mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-03 15:56:49 +08:00
Unify server-side and model-side Config (Part3) (#3047)
* merge model config * fix arch * fix rl
This commit is contained in:
@@ -1064,7 +1064,7 @@ class LLMEngine:
|
||||
f" --devices {self.cfg.device_ids} {py_script}"
|
||||
f" --max_num_seqs {self.cfg.max_num_seqs} --max_model_len {self.cfg.max_model_len}"
|
||||
f" --gpu_memory_utilization {self.cfg.cache_config.gpu_memory_utilization}"
|
||||
f" --model_name_or_path {self.cfg.model_name_or_path!s}"
|
||||
f" --model {self.cfg.model_name_or_path!s}"
|
||||
f" --device_ids {self.cfg.device_ids}"
|
||||
f" --tensor_parallel_size {self.cfg.tensor_parallel_size}"
|
||||
f" --engine_worker_queue_port {self.cfg.engine_worker_queue_port!s}"
|
||||
@@ -1084,7 +1084,7 @@ class LLMEngine:
|
||||
f" --speculative_config '{self.cfg.speculative_config.to_json_string()}'"
|
||||
f" --graph_optimization_config '{self.cfg.graph_optimization_config.to_json_string()}'"
|
||||
f" --guided_decoding_backend {self.cfg.guided_decoding_backend}"
|
||||
f" --load_strategy {self.cfg.model_config.load_strategy}"
|
||||
f" --load_strategy {self.cfg.load_config.load_strategy}"
|
||||
)
|
||||
|
||||
worker_append_flag = {
|
||||
@@ -1092,7 +1092,7 @@ class LLMEngine:
|
||||
"enable_prefix_caching": self.cfg.cache_config.enable_prefix_caching,
|
||||
"enable_chunked_prefill": self.cfg.cache_config.enable_chunked_prefill,
|
||||
"do_profile": self.do_profile,
|
||||
"dynamic_load_weight": self.cfg.model_config.dynamic_load_weight,
|
||||
"dynamic_load_weight": self.cfg.load_config.dynamic_load_weight,
|
||||
"disable_any_whitespace": self.cfg.disable_any_whitespace,
|
||||
"enable_custom_all_reduce": self.cfg.parallel_config.enable_custom_all_reduce,
|
||||
"enable_logprob": self.cfg.enable_logprob,
|
||||
@@ -1231,9 +1231,9 @@ class LLMEngine:
|
||||
elif (match := re.search(r"Start load layer (\d+)", line)) or (
|
||||
match := re.search(r"set state for layer (\d+)", line)
|
||||
):
|
||||
progress = eval(match.group(1)) * 1.0 / self.cfg.model_config.num_layers
|
||||
progress = eval(match.group(1)) * 1.0 / self.cfg.model_config.num_hidden_layers
|
||||
self.worker_init_status["layer_loadding"] = progress
|
||||
if self.worker_init_status["layer_loadding"] == self.cfg.model_config.num_layers - 1:
|
||||
if self.worker_init_status["layer_loadding"] == self.cfg.model_config.num_hidden_layers - 1:
|
||||
self.worker_init_status["finished"] = True
|
||||
|
||||
self.checking_worker_status_thread = threading.Thread(target=detect_thread, daemon=True)
|
||||
|
Reference in New Issue
Block a user