diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml index 25e5369a7..0d6383784 100644 --- a/.github/workflows/pr_build_and_test.yml +++ b/.github/workflows/pr_build_and_test.yml @@ -84,12 +84,12 @@ jobs: FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" - # stable_test: - # name: Run Stable Tests - # needs: [clone,build] - # uses: ./.github/workflows/_stable_test.yml - # with: - # DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate - # FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} - # FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} - # MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + stable_test: + name: Run Stable Tests + needs: [clone,build] + uses: ./.github/workflows/_stable_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index 529188d05..49f776577 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -1108,6 +1108,7 @@ class LLMEngine: f" --load_strategy {self.cfg.load_config.load_strategy}" f" --early_stop_config '{self.cfg.early_stop_config.to_json_string()}'" f" --load_choices {self.cfg.load_config.load_choices}" + f" --ips {self.cfg.ips}" ) worker_append_flag = { @@ -1292,7 +1293,7 @@ class LLMEngine: ) ) llm_logger.info( - f"Engine is initialized successfully with {self.cfg.tensor_parallel_size}" + f"Engine is initialized successfully with {self.cfg.parallel_config.tensor_parallel_size}" + f" data parallel id {i}" ) self.dp_processed[-1].start() diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index ec8703ee0..95f2c235d 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -132,7 +132,7 @@ class ResourceManagerV1(ResourceManager): num_new_tokens = request.need_prefill_tokens - request.num_computed_tokens num_new_tokens = min(num_new_tokens, token_budget) - if not self.config.enable_mm: + if not self.config.model_config.enable_mm: return num_new_tokens inputs = request.multimodal_inputs @@ -290,7 +290,7 @@ class ResourceManagerV1(ResourceManager): while self.waiting and token_budget > 0: if len(self.running) == self.max_num_seqs: break - if self.config.enable_mm and self.exist_prefill(scheduled_reqs): + if self.config.model_config.enable_mm and self.exist_prefill(scheduled_reqs): break request = self.waiting[0] if request.status == RequestStatus.WAITING: diff --git a/fastdeploy/rl/rollout_config.py b/fastdeploy/rl/rollout_config.py index 0b17b2911..3db6f5b87 100644 --- a/fastdeploy/rl/rollout_config.py +++ b/fastdeploy/rl/rollout_config.py @@ -102,6 +102,7 @@ class RolloutModelConfig: self.graph_optimization_config = graph_optimization_config self.local_rank = local_rank self.early_stop_config = early_stop_config + self.ips = None def __str__(self): return "\n".join(f"{k}: {v}" for k, v in self.__dict__.items()) diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index b9ac09648..d6f332a7a 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -606,6 +606,13 @@ def parse_args(): help="The format of the model weights to load. default/new_loader.", ) + parser.add_argument( + "--ips", + type=str, + default=None, + help="The ips of multinode deployment.", + ) + args = parser.parse_args() return args @@ -721,6 +728,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig: graph_opt_config=graph_opt_config, early_stop_config=early_stop_config, cache_config=cache_config, + ips=args.ips, ) update_fd_config_for_mm(fd_config)