[BugFix] fix qwen3-embedding model tp>1 (#4223)

* support qwen3-embedding

* fix ci bug

* fix

* fix ci bug

* fix ci bug

* fix

* fix qwen3-embedding

* fix

* fix

* fix
This commit is contained in:
lizexu123
2025-09-24 14:13:26 +08:00
committed by GitHub
parent 3161014e49
commit e8318b7477
3 changed files with 11 additions and 4 deletions

View File

@@ -1321,6 +1321,7 @@ class GPUModelRunner(ModelRunnerBase):
logits = None
if hasattr(self.model, "is_pooling_model") and self.model.is_pooling_model:
# TODO(lizexu123) The preheating the pooling function have not been implemented yet.
pass
else:
# 4. Execute spec decode
@@ -1632,9 +1633,9 @@ class GPUModelRunner(ModelRunnerBase):
logits = None
# 4. Compute logits, Sample
if hasattr(self.model, "is_pooling_model") and self.model.is_pooling_model:
# TODO(lizexu123) The execution of the pooling function have not been implemented yet.
pass
else:
# 4. Execute spec decode
logits = self.model.compute_logits(hidden_states)
if not self.speculative_decoding: