[BugFix] fix qwen3-embedding model tp>1 (#4223)

* support qwen3-embedding * fix ci bug * fix * fix ci bug * fix ci bug * fix * fix qwen3-embedding * fix * fix * fix
2025-10-05 16:48:03 +08:00 · 2025-09-24 14:13:26 +08:00
parent 3161014e49
commit e8318b7477
3 changed files with 11 additions and 4 deletions
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -1321,6 +1321,7 @@ class GPUModelRunner(ModelRunnerBase):

            logits = None
            if hasattr(self.model, "is_pooling_model") and self.model.is_pooling_model:
+                # TODO(lizexu123) The preheating the pooling function have not been implemented yet.
                pass
            else:
                # 4. Execute spec decode
@@ -1632,9 +1633,9 @@ class GPUModelRunner(ModelRunnerBase):
        logits = None
        # 4. Compute logits, Sample
        if hasattr(self.model, "is_pooling_model") and self.model.is_pooling_model:
+            # TODO(lizexu123) The execution of the pooling function have not been implemented yet.
            pass
        else:
-            # 4. Execute spec decode
            logits = self.model.compute_logits(hidden_states)

        if not self.speculative_decoding: