From 8d629568f2777d2440ebd0364aa839c1ea6ed69c Mon Sep 17 00:00:00 2001 From: freeliuzc Date: Sat, 11 Oct 2025 17:16:57 +0800 Subject: [PATCH] [MTP]fix speculate-decoding in dpep mode (#4351) --- fastdeploy/output/token_processor.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 73585ef77..5192cd2ab 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -156,7 +156,13 @@ class TokenProcessor: try: is_blocking = True if self.speculative_decoding: - speculate_get_output(self.output_tokens, rank_id, is_blocking, False) + if ( + self.cfg.parallel_config.enable_expert_parallel + and self.cfg.parallel_config.data_parallel_size > 1 + ): + speculate_get_output(self.output_tokens, rank_id, is_blocking, True) + else: + speculate_get_output(self.output_tokens, rank_id, is_blocking, False) if self.output_tokens[0] == -2: continue