[Feature] Add speculative decoding simulation benchmark. (#2751)

* Add speculative decoding simulation benchmark * Fix the name of the parameter
2025-10-05 08:37:06 +08:00 · 2025-07-09 12:08:43 +08:00
parent 6b10c19482
commit f7cad30a38
8 changed files with 246 additions and 7 deletions
--- a/fastdeploy/model_executor/layers/sample/sampler.py
+++ b/fastdeploy/model_executor/layers/sample/sampler.py
@@ -235,6 +235,7 @@ class SpeculativeSampler(nn.Layer):
            raise NotImplementedError()
        self.speculative_verify_window = fd_config.speculative_config.verify_window
        self.speculative_max_candidate_len = fd_config.speculative_config.max_candidate_len
+        self.speculative_benchmark_mode = fd_config.speculative_config.benchmark_mode

    def pre_process(self, skip_idx_list: List[int] = []):
        """ pre process before running """
@@ -309,6 +310,7 @@ class SpeculativeSampler(nn.Layer):
            max_model_len,
            self.speculative_verify_window,
            True,  # enable_topp
+            self.speculative_benchmark_mode,
        )

        return None