Unify server-side and model-side Config (Part1) (#3018)

* move cache config

* fix mtp
This commit is contained in:
YuanRisheng
2025-07-28 10:51:52 +08:00
committed by GitHub
parent 8f426c1690
commit 6ccc10ad47
23 changed files with 243 additions and 289 deletions

View File

@@ -19,8 +19,8 @@ from dataclasses import asdict, dataclass
from dataclasses import fields as dataclass_fields
from typing import Any, Dict, List, Optional
from fastdeploy.config import CacheConfig
from fastdeploy.engine.config import (
CacheConfig,
Config,
GraphOptimizationConfig,
ModelConfig,
@@ -770,28 +770,6 @@ class EngineArgs:
load_strategy=self.load_strategy,
)
def create_cache_config(self, model_cfg) -> CacheConfig:
"""
Create and return a CacheConfig object based on the current settings.
"""
return CacheConfig(
block_size=self.block_size,
tensor_parallel_size=self.tensor_parallel_size,
gpu_memory_utilization=self.gpu_memory_utilization,
num_gpu_blocks_override=self.num_gpu_blocks_override,
kv_cache_ratio=self.kv_cache_ratio,
prealloc_dec_block_slot_num_threshold=self.prealloc_dec_block_slot_num_threshold,
enable_prefix_caching=self.enable_prefix_caching,
swap_space=self.swap_space,
cache_queue_port=self.cache_queue_port,
model_cfg=model_cfg,
enable_chunked_prefill=self.enable_chunked_prefill,
enc_dec_block_num=self.static_decode_blocks,
rdma_comm_ports=self.rdma_comm_ports,
cache_transfer_protocol=self.cache_transfer_protocol,
pd_comm_port=self.pd_comm_port,
)
def create_speculative_config(self) -> SpeculativeConfig:
""" """
if self.speculative_config is not None:
@@ -864,12 +842,16 @@ class EngineArgs:
self.tensor_parallel_size <= 1 and self.enable_custom_all_reduce
), "enable_custom_all_reduce must be used with tensor_parallel_size>1"
all_dict = asdict(self)
all_dict["model_cfg"] = model_cfg
cache_cfg = CacheConfig(all_dict)
return Config(
model_name_or_path=self.model,
model_config=model_cfg,
scheduler_config=scheduler_cfg,
tokenizer=self.tokenizer,
cache_config=self.create_cache_config(model_cfg),
cache_config=cache_cfg,
parallel_config=self.create_parallel_config(),
max_model_len=self.max_model_len,
tensor_parallel_size=self.tensor_parallel_size,