mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Feature] Add speculative decoding simulation benchmark. (#2751)
* Add speculative decoding simulation benchmark * Fix the name of the parameter
This commit is contained in:
@@ -235,6 +235,7 @@ class SpeculativeSampler(nn.Layer):
|
||||
raise NotImplementedError()
|
||||
self.speculative_verify_window = fd_config.speculative_config.verify_window
|
||||
self.speculative_max_candidate_len = fd_config.speculative_config.max_candidate_len
|
||||
self.speculative_benchmark_mode = fd_config.speculative_config.benchmark_mode
|
||||
|
||||
def pre_process(self, skip_idx_list: List[int] = []):
|
||||
""" pre process before running """
|
||||
@@ -309,6 +310,7 @@ class SpeculativeSampler(nn.Layer):
|
||||
max_model_len,
|
||||
self.speculative_verify_window,
|
||||
True, # enable_topp
|
||||
self.speculative_benchmark_mode,
|
||||
)
|
||||
|
||||
return None
|
||||
|
Reference in New Issue
Block a user