mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
Fix fdconfig bugs (#3528)
* fix config * fix parallel * fix ips * fix rl * open code
This commit is contained in:
18
.github/workflows/pr_build_and_test.yml
vendored
18
.github/workflows/pr_build_and_test.yml
vendored
@@ -84,12 +84,12 @@ jobs:
|
||||
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
|
||||
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
|
||||
|
||||
# stable_test:
|
||||
# name: Run Stable Tests
|
||||
# needs: [clone,build]
|
||||
# uses: ./.github/workflows/_stable_test.yml
|
||||
# with:
|
||||
# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
|
||||
# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
|
||||
# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
|
||||
# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
|
||||
stable_test:
|
||||
name: Run Stable Tests
|
||||
needs: [clone,build]
|
||||
uses: ./.github/workflows/_stable_test.yml
|
||||
with:
|
||||
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
|
||||
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
|
||||
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
|
||||
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
|
||||
|
@@ -1108,6 +1108,7 @@ class LLMEngine:
|
||||
f" --load_strategy {self.cfg.load_config.load_strategy}"
|
||||
f" --early_stop_config '{self.cfg.early_stop_config.to_json_string()}'"
|
||||
f" --load_choices {self.cfg.load_config.load_choices}"
|
||||
f" --ips {self.cfg.ips}"
|
||||
)
|
||||
|
||||
worker_append_flag = {
|
||||
@@ -1292,7 +1293,7 @@ class LLMEngine:
|
||||
)
|
||||
)
|
||||
llm_logger.info(
|
||||
f"Engine is initialized successfully with {self.cfg.tensor_parallel_size}"
|
||||
f"Engine is initialized successfully with {self.cfg.parallel_config.tensor_parallel_size}"
|
||||
+ f" data parallel id {i}"
|
||||
)
|
||||
self.dp_processed[-1].start()
|
||||
|
@@ -132,7 +132,7 @@ class ResourceManagerV1(ResourceManager):
|
||||
num_new_tokens = request.need_prefill_tokens - request.num_computed_tokens
|
||||
num_new_tokens = min(num_new_tokens, token_budget)
|
||||
|
||||
if not self.config.enable_mm:
|
||||
if not self.config.model_config.enable_mm:
|
||||
return num_new_tokens
|
||||
|
||||
inputs = request.multimodal_inputs
|
||||
@@ -290,7 +290,7 @@ class ResourceManagerV1(ResourceManager):
|
||||
while self.waiting and token_budget > 0:
|
||||
if len(self.running) == self.max_num_seqs:
|
||||
break
|
||||
if self.config.enable_mm and self.exist_prefill(scheduled_reqs):
|
||||
if self.config.model_config.enable_mm and self.exist_prefill(scheduled_reqs):
|
||||
break
|
||||
request = self.waiting[0]
|
||||
if request.status == RequestStatus.WAITING:
|
||||
|
@@ -102,6 +102,7 @@ class RolloutModelConfig:
|
||||
self.graph_optimization_config = graph_optimization_config
|
||||
self.local_rank = local_rank
|
||||
self.early_stop_config = early_stop_config
|
||||
self.ips = None
|
||||
|
||||
def __str__(self):
|
||||
return "\n".join(f"{k}: {v}" for k, v in self.__dict__.items())
|
||||
|
@@ -606,6 +606,13 @@ def parse_args():
|
||||
help="The format of the model weights to load. default/new_loader.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--ips",
|
||||
type=str,
|
||||
default=None,
|
||||
help="The ips of multinode deployment.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
@@ -721,6 +728,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
graph_opt_config=graph_opt_config,
|
||||
early_stop_config=early_stop_config,
|
||||
cache_config=cache_config,
|
||||
ips=args.ips,
|
||||
)
|
||||
update_fd_config_for_mm(fd_config)
|
||||
|
||||
|
Reference in New Issue
Block a user