Unify server-side and model-side Config (Part3) (#3047)

* merge model config

* fix arch

* fix rl
This commit is contained in:
YuanRisheng
2025-07-29 17:07:44 +08:00
committed by GitHub
parent 907d561523
commit 502ee92a0a
14 changed files with 116 additions and 199 deletions

View File

@@ -1064,7 +1064,7 @@ class LLMEngine:
f" --devices {self.cfg.device_ids} {py_script}"
f" --max_num_seqs {self.cfg.max_num_seqs} --max_model_len {self.cfg.max_model_len}"
f" --gpu_memory_utilization {self.cfg.cache_config.gpu_memory_utilization}"
f" --model_name_or_path {self.cfg.model_name_or_path!s}"
f" --model {self.cfg.model_name_or_path!s}"
f" --device_ids {self.cfg.device_ids}"
f" --tensor_parallel_size {self.cfg.tensor_parallel_size}"
f" --engine_worker_queue_port {self.cfg.engine_worker_queue_port!s}"
@@ -1084,7 +1084,7 @@ class LLMEngine:
f" --speculative_config '{self.cfg.speculative_config.to_json_string()}'"
f" --graph_optimization_config '{self.cfg.graph_optimization_config.to_json_string()}'"
f" --guided_decoding_backend {self.cfg.guided_decoding_backend}"
f" --load_strategy {self.cfg.model_config.load_strategy}"
f" --load_strategy {self.cfg.load_config.load_strategy}"
)
worker_append_flag = {
@@ -1092,7 +1092,7 @@ class LLMEngine:
"enable_prefix_caching": self.cfg.cache_config.enable_prefix_caching,
"enable_chunked_prefill": self.cfg.cache_config.enable_chunked_prefill,
"do_profile": self.do_profile,
"dynamic_load_weight": self.cfg.model_config.dynamic_load_weight,
"dynamic_load_weight": self.cfg.load_config.dynamic_load_weight,
"disable_any_whitespace": self.cfg.disable_any_whitespace,
"enable_custom_all_reduce": self.cfg.parallel_config.enable_custom_all_reduce,
"enable_logprob": self.cfg.enable_logprob,
@@ -1231,9 +1231,9 @@ class LLMEngine:
elif (match := re.search(r"Start load layer (\d+)", line)) or (
match := re.search(r"set state for layer (\d+)", line)
):
progress = eval(match.group(1)) * 1.0 / self.cfg.model_config.num_layers
progress = eval(match.group(1)) * 1.0 / self.cfg.model_config.num_hidden_layers
self.worker_init_status["layer_loadding"] = progress
if self.worker_init_status["layer_loadding"] == self.cfg.model_config.num_layers - 1:
if self.worker_init_status["layer_loadding"] == self.cfg.model_config.num_hidden_layers - 1:
self.worker_init_status["finished"] = True
self.checking_worker_status_thread = threading.Thread(target=detect_thread, daemon=True)