[Feature] Support mtp ep in fd (#3340)

* [Optimize] Add metrics for analysing perf * Fix bug in mtp
2025-10-05 16:48:03 +08:00 · 2025-08-11 21:49:44 +08:00
parent 110f33a530
commit 7573802a88
5 changed files with 24 additions and 8 deletions
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -866,10 +866,10 @@ class LLMEngine:
                    is_prefill = True
            self.token_processor.number_of_input_tokens += tasks[i].prompt_token_ids_len

+        for task in tasks:
+            task.inference_start_time = time.time()
        if not is_decode:
            llm_logger.info(f"Tasks are sent to engine, req_ids={req_ids}")
-            for task in tasks:
-                task.inference_start_time = time.time()
            if not self.cfg.enable_mm:
                self.update_requests_chunk_size(tasks)
            else: