mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] Support mtp ep in fd (#3340)
* [Optimize] Add metrics for analysing perf * Fix bug in mtp
This commit is contained in:
@@ -866,10 +866,10 @@ class LLMEngine:
|
||||
is_prefill = True
|
||||
self.token_processor.number_of_input_tokens += tasks[i].prompt_token_ids_len
|
||||
|
||||
for task in tasks:
|
||||
task.inference_start_time = time.time()
|
||||
if not is_decode:
|
||||
llm_logger.info(f"Tasks are sent to engine, req_ids={req_ids}")
|
||||
for task in tasks:
|
||||
task.inference_start_time = time.time()
|
||||
if not self.cfg.enable_mm:
|
||||
self.update_requests_chunk_size(tasks)
|
||||
else:
|
||||
|
Reference in New Issue
Block a user