mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 04:46:16 +08:00
fix mtp bug in pd-split mode (#2970)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -340,7 +340,7 @@ class MTPProposer(Proposer):
|
||||
self.model_inputs["pre_ids"][idx : idx + 1] = request.prompt_token_ids[-1]
|
||||
prefill_token_num = self.max_draft_token_num + 1
|
||||
self.model_inputs["draft_tokens"][idx : idx + 1, 0:1] = paddle.to_tensor(
|
||||
request.draft_token_ids[0:1], dtype="int64"
|
||||
request.draft_token_ids[1:2], dtype="int64"
|
||||
)
|
||||
|
||||
self.model_inputs["seq_lens_encoder"][idx : idx + 1] = 0
|
||||
|
Reference in New Issue
Block a user