mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[BugFix] fix qwen3-embedding model tp>1 (#4223)
* support qwen3-embedding * fix ci bug * fix * fix ci bug * fix ci bug * fix * fix qwen3-embedding * fix * fix * fix
This commit is contained in:
@@ -1321,6 +1321,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
|
||||
logits = None
|
||||
if hasattr(self.model, "is_pooling_model") and self.model.is_pooling_model:
|
||||
# TODO(lizexu123) The preheating the pooling function have not been implemented yet.
|
||||
pass
|
||||
else:
|
||||
# 4. Execute spec decode
|
||||
@@ -1632,9 +1633,9 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
logits = None
|
||||
# 4. Compute logits, Sample
|
||||
if hasattr(self.model, "is_pooling_model") and self.model.is_pooling_model:
|
||||
# TODO(lizexu123) The execution of the pooling function have not been implemented yet.
|
||||
pass
|
||||
else:
|
||||
# 4. Execute spec decode
|
||||
logits = self.model.compute_logits(hidden_states)
|
||||
|
||||
if not self.speculative_decoding:
|
||||
|
Reference in New Issue
Block a user