[Feature] Add speculative decoding simulation benchmark. (#2751)

* Add speculative decoding simulation benchmark * Fix the name of the parameter
2025-09-27 12:52:29 +08:00 · 2025-07-09 12:08:43 +08:00
parent 6b10c19482
commit f7cad30a38
8 changed files with 246 additions and 7 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -238,6 +238,10 @@ class SpeculativeConfig:
    # A trick method is currently used to enable this sharing.
    # This will be replaced with a more standardized solution in the future.
    sharing_model = None
+    # During benchmarking, we need to enforce that the number of accepted tokens is 1.
+    # This means no tokens from MTP are accepted.
+    # This ensures that the specified simulation acceptance rate is not affected.
+    benchmark_mode: bool = False


@dataclass