Fix fdconfig bugs (#3528)

* fix config

* fix parallel

* fix ips

* fix rl

* open code
This commit is contained in:
YuanRisheng
2025-08-22 16:17:15 +08:00
committed by GitHub
parent 7ae41e9daf
commit 5b66462f0e
5 changed files with 22 additions and 12 deletions

View File

@@ -84,12 +84,12 @@ jobs:
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
# stable_test:
# name: Run Stable Tests
# needs: [clone,build]
# uses: ./.github/workflows/_stable_test.yml
# with:
# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
stable_test:
name: Run Stable Tests
needs: [clone,build]
uses: ./.github/workflows/_stable_test.yml
with:
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"

View File

@@ -1108,6 +1108,7 @@ class LLMEngine:
f" --load_strategy {self.cfg.load_config.load_strategy}"
f" --early_stop_config '{self.cfg.early_stop_config.to_json_string()}'"
f" --load_choices {self.cfg.load_config.load_choices}"
f" --ips {self.cfg.ips}"
)
worker_append_flag = {
@@ -1292,7 +1293,7 @@ class LLMEngine:
)
)
llm_logger.info(
f"Engine is initialized successfully with {self.cfg.tensor_parallel_size}"
f"Engine is initialized successfully with {self.cfg.parallel_config.tensor_parallel_size}"
+ f" data parallel id {i}"
)
self.dp_processed[-1].start()

View File

@@ -132,7 +132,7 @@ class ResourceManagerV1(ResourceManager):
num_new_tokens = request.need_prefill_tokens - request.num_computed_tokens
num_new_tokens = min(num_new_tokens, token_budget)
if not self.config.enable_mm:
if not self.config.model_config.enable_mm:
return num_new_tokens
inputs = request.multimodal_inputs
@@ -290,7 +290,7 @@ class ResourceManagerV1(ResourceManager):
while self.waiting and token_budget > 0:
if len(self.running) == self.max_num_seqs:
break
if self.config.enable_mm and self.exist_prefill(scheduled_reqs):
if self.config.model_config.enable_mm and self.exist_prefill(scheduled_reqs):
break
request = self.waiting[0]
if request.status == RequestStatus.WAITING:

View File

@@ -102,6 +102,7 @@ class RolloutModelConfig:
self.graph_optimization_config = graph_optimization_config
self.local_rank = local_rank
self.early_stop_config = early_stop_config
self.ips = None
def __str__(self):
return "\n".join(f"{k}: {v}" for k, v in self.__dict__.items())

View File

@@ -606,6 +606,13 @@ def parse_args():
help="The format of the model weights to load. default/new_loader.",
)
parser.add_argument(
"--ips",
type=str,
default=None,
help="The ips of multinode deployment.",
)
args = parser.parse_args()
return args
@@ -721,6 +728,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
graph_opt_config=graph_opt_config,
early_stop_config=early_stop_config,
cache_config=cache_config,
ips=args.ips,
)
update_fd_config_for_mm(fd_config)