mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
Unify server-side and model-side Config (Part1) (#3018)
* move cache config * fix mtp
This commit is contained in:
@@ -19,8 +19,8 @@ from dataclasses import asdict, dataclass
|
||||
from dataclasses import fields as dataclass_fields
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastdeploy.config import CacheConfig
|
||||
from fastdeploy.engine.config import (
|
||||
CacheConfig,
|
||||
Config,
|
||||
GraphOptimizationConfig,
|
||||
ModelConfig,
|
||||
@@ -770,28 +770,6 @@ class EngineArgs:
|
||||
load_strategy=self.load_strategy,
|
||||
)
|
||||
|
||||
def create_cache_config(self, model_cfg) -> CacheConfig:
|
||||
"""
|
||||
Create and return a CacheConfig object based on the current settings.
|
||||
"""
|
||||
return CacheConfig(
|
||||
block_size=self.block_size,
|
||||
tensor_parallel_size=self.tensor_parallel_size,
|
||||
gpu_memory_utilization=self.gpu_memory_utilization,
|
||||
num_gpu_blocks_override=self.num_gpu_blocks_override,
|
||||
kv_cache_ratio=self.kv_cache_ratio,
|
||||
prealloc_dec_block_slot_num_threshold=self.prealloc_dec_block_slot_num_threshold,
|
||||
enable_prefix_caching=self.enable_prefix_caching,
|
||||
swap_space=self.swap_space,
|
||||
cache_queue_port=self.cache_queue_port,
|
||||
model_cfg=model_cfg,
|
||||
enable_chunked_prefill=self.enable_chunked_prefill,
|
||||
enc_dec_block_num=self.static_decode_blocks,
|
||||
rdma_comm_ports=self.rdma_comm_ports,
|
||||
cache_transfer_protocol=self.cache_transfer_protocol,
|
||||
pd_comm_port=self.pd_comm_port,
|
||||
)
|
||||
|
||||
def create_speculative_config(self) -> SpeculativeConfig:
|
||||
""" """
|
||||
if self.speculative_config is not None:
|
||||
@@ -864,12 +842,16 @@ class EngineArgs:
|
||||
self.tensor_parallel_size <= 1 and self.enable_custom_all_reduce
|
||||
), "enable_custom_all_reduce must be used with tensor_parallel_size>1"
|
||||
|
||||
all_dict = asdict(self)
|
||||
all_dict["model_cfg"] = model_cfg
|
||||
cache_cfg = CacheConfig(all_dict)
|
||||
|
||||
return Config(
|
||||
model_name_or_path=self.model,
|
||||
model_config=model_cfg,
|
||||
scheduler_config=scheduler_cfg,
|
||||
tokenizer=self.tokenizer,
|
||||
cache_config=self.create_cache_config(model_cfg),
|
||||
cache_config=cache_cfg,
|
||||
parallel_config=self.create_parallel_config(),
|
||||
max_model_len=self.max_model_len,
|
||||
tensor_parallel_size=self.tensor_parallel_size,
|
||||
|
Reference in New Issue
Block a user