[Speculative Decoding][MTP]Support attn mask offset (#4641)

* [MTP]Merge support attn (#4591) * support mask_offset in speculate decoding * fix dummpy run output * add unit test * fix unit test import * support attn_mask_offset in mtp mode * add update_attn_mask op * fix unit test && fix code-style
2025-12-24 13:28:13 +08:00 · 2025-11-03 10:08:01 +08:00
parent f44f4bafd1
commit 11398790d3
13 changed files with 638 additions and 111 deletions
--- a/fastdeploy/spec_decode/base.py
+++ b/fastdeploy/spec_decode/base.py
@@ -69,6 +69,8 @@ class Proposer(ABC):
        self.max_ngram_size = self.speculative_config.max_ngram_size
        self.min_ngram_size = self.speculative_config.min_ngram_size

+        self.enable_mm = self.model_config.enable_mm
+
        spec_logger.info(f"Speculate config: {self.speculative_config}")

    def run(self, *args, **kwargs) -> Any: