mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
@@ -83,10 +83,11 @@ class RolloutModelConfig:
|
||||
self.pad_token_id = pad_token_id
|
||||
self.eos_tokens_lens = eos_tokens_lens
|
||||
self.enable_chunked_prefill = enable_chunked_prefill
|
||||
self.speculative_method = speculative_method
|
||||
self.speculative_max_draft_token_num = speculative_max_draft_token_num
|
||||
self.speculative_model_name_or_path = speculative_model_name_or_path
|
||||
self.speculative_model_quantization = speculative_model_quantization
|
||||
self.speculative_config = {}
|
||||
self.speculative_config["method"] = speculative_method
|
||||
self.speculative_config["max_draft_token_num"] = speculative_max_draft_token_num
|
||||
self.speculative_config["model"] = speculative_model_name_or_path
|
||||
self.speculative_config["quantization"] = speculative_model_quantization
|
||||
self.max_num_batched_tokens = max_num_batched_tokens
|
||||
self.enable_prefix_caching = enable_prefix_caching
|
||||
self.splitwise_role = splitwise_role
|
||||
|
Reference in New Issue
Block a user