Files
FastDeploy/fastdeploy/scheduler/config.py
lddfym 94e1a895e3 fix spelling error (#2826)
* fix spelling error

* fix scheduler reset error
2025-07-14 13:13:08 +08:00

280 lines
10 KiB
Python

"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
import redis
from fastdeploy.utils import llm_logger
from .global_scheduler import GlobalScheduler
from .local_scheduler import LocalScheduler
from .splitwise_scheduler import SplitWiseScheduler, SplitWiseSchedulerConfig
class LocalSchedulerConfig:
"""
Configuration class for LocalScheduler.
Attributes:
max_size: Maximum number of concurrent requests (-1 for unlimited)
ttl: Time-to-live in seconds for request expiration
"""
def __init__(self,
max_size: int = -1,
ttl: int = 900,
max_model_len: int = 8192,
enable_chunked_prefill: bool = False,
max_num_partial_prefills: int = 1,
max_long_partial_prefills: int = 1,
long_prefill_token_threshold: int = 0,
**kwargs
):
"""
Initialize LocalScheduler configuration.
Args:
max_size: Maximum concurrent requests (-1 for unlimited, 0 for disabled)
ttl: Time-to-live in seconds for request expiration (default 900s)
max_model_len: Maximum model context length in tokens
enable_chunked_prefill: Whether to enable chunked prefill processing
max_num_partial_prefills: Max partial prefill operations allowed
max_long_partial_prefills: Max long-running partial prefill ops
long_prefill_token_threshold: Token count threshold for long prefill
**kwargs: Additional unused arguments (for forward compatibility)
Note:
- If long_prefill_token_threshold is 0, it's auto-calculated as 4% of max_model_len
- See LocalScheduler class for implementation details
"""
self.max_size = max_size
self.ttl = ttl
self.max_model_len = max_model_len
self.enable_chunked_prefill = enable_chunked_prefill
self.max_num_partial_prefills = max_num_partial_prefills
self.max_long_partial_prefills = max_long_partial_prefills
self.long_prefill_token_threshold = long_prefill_token_threshold
if self.long_prefill_token_threshold == 0:
self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
def check(self):
"""
Validate the configuration values.
Currently performs no validation as all values are acceptable.
"""
pass
def print(self):
"""
Print the current configuration to logs.
"""
llm_logger.info("LocalScheduler Configuration Information :")
for k, v in self.__dict__.items():
llm_logger.info("{:<20}:{:<6}{}".format(k, "", v))
llm_logger.info(
"=============================================================")
class GlobalSchedulerConfig:
"""
Configuration class for GlobalScheduler (Redis-based).
Attributes:
host: Redis server hostname
port: Redis server port
db: Redis database number
password: Optional Redis password
topic: Namespace prefix for queues
ttl: Time-to-live in seconds for Redis keys
"""
def __init__(self,
host: str = "127.0.0.1",
port: int = 6379,
db: int = 0,
password=None,
topic: str = "default",
ttl: int = 900,
min_load_score: float = 3,
max_model_len: int = 8192,
load_shards_num: int = 1,
enable_chunked_prefill: bool = False,
max_num_partial_prefills: int = 1,
max_long_partial_prefills: int = 1,
long_prefill_token_threshold: int = 0,
**kwargs
):
"""
Initialize GlobalScheduler (Redis-based) configuration.
Args:
host: Redis server hostname (default "127.0.0.1")
port: Redis server port (default 6379)
db: Redis database number (default 0)
password: Optional Redis password
topic: Namespace prefix for queues (default "default")
ttl: Time-to-live in seconds for Redis keys (default 900s)
min_load_score: Minimum load score for task assignment (default 3)
max_model_len: Maximum model context length in tokens
load_shards_num: Number of load balancing shards
enable_chunked_prefill: Whether to enable chunked prefill processing
max_num_partial_prefills: Max partial prefill operations allowed
max_long_partial_prefills: Max long-running partial prefill ops
long_prefill_token_threshold: Token count threshold for long prefill
**kwargs: Additional unused arguments (for forward compatibility)
Note:
- If long_prefill_token_threshold is 0, it's auto-calculated as 4% of max_model_len
- See GlobalScheduler class for implementation details
"""
self.host = host
self.port = port
self.db = db
self.password = password
self.topic = topic
self.ttl = ttl
self.min_load_score = min_load_score
self.load_shards_num = load_shards_num
self.max_model_len = max_model_len
self.enable_chunked_prefill = enable_chunked_prefill
self.max_num_partial_prefills = max_num_partial_prefills
self.max_long_partial_prefills = max_long_partial_prefills
self.long_prefill_token_threshold = long_prefill_token_threshold
if self.long_prefill_token_threshold == 0:
self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
def check(self):
"""
Validate the configuration by testing Redis connection.
Raises:
Exception: If connection to Redis fails
"""
if self.ttl <= 0:
raise ValueError("ttl should be greater than 60")
if self.min_load_score < 1:
raise ValueError("min_load_score should be greater than 0")
if self.load_shards_num < 1:
raise ValueError("load_shards_num should be greater than 0")
r = redis.Redis(self.host, self.port, self.db, self.password)
try:
response = r.ping()
if not response:
raise Exception("connect to redis failed")
finally:
r.close()
def print(self):
"""
Print the current configuration to logs.
"""
llm_logger.info("GlobalScheduler Configuration Information :")
password = self.password
self.password = "******"
for k, v in self.__dict__.items():
llm_logger.info("{:<20}:{:<6}{}".format(k, "", v))
self.password = password
llm_logger.info(
"=============================================================")
class SchedulerConfig:
"""
Factory class for scheduler configurations.
Creates appropriate config based on scheduler type (local/global).
"""
def __init__(self, name="local", **kwargs):
"""
Initialize scheduler configuration factory.
Args:
name: Scheduler type ("local" for LocalScheduler or "global" for GlobalScheduler)
**kwargs: Configuration parameters for the specific scheduler type
Initializes:
- Appropriate config object based on scheduler type
- Validates configuration parameters
Raises:
Exception: If invalid scheduler type is specified
"""
self.name = name
self.config = None
if name == "local":
self.config = LocalSchedulerConfig(**kwargs)
if name == "global":
self.config = GlobalSchedulerConfig(**kwargs)
if name == "splitwise":
self.config = SplitWiseSchedulerConfig(**kwargs)
def check(self):
"""
Validate the configuration.
Raises:
Exception: If invalid scheduler type is specified
"""
if self.name not in ["local", "global", "splitwise"]:
raise Exception(f'Unknown scheduler type {self.name}')
self.config.check()
def print(self):
"""
Print the current configuration to logs.
"""
self.config.print()
def scheduler(self):
"""
Create a scheduler instance based on the configuration.
Returns:
Initialized scheduler instance (LocalScheduler or GlobalScheduler)
"""
if self.name == "global":
return GlobalScheduler(host=self.config.host,
port=self.config.port,
db=self.config.db,
password=self.config.password,
topic=self.config.topic,
ttl=self.config.ttl,
min_load_score=self.config.min_load_score,
load_shards_num=self.config.load_shards_num,
enable_chunked_prefill=self.config.enable_chunked_prefill,
max_num_partial_prefills=self.config.max_num_partial_prefills,
max_long_partial_prefills=self.config.max_long_partial_prefills,
long_prefill_token_threshold=self.config.long_prefill_token_threshold,)
if self.name == "splitwise":
return SplitWiseScheduler(self.config)
return LocalScheduler(max_size=self.config.max_size,
ttl=self.config.ttl,
enable_chunked_prefill=self.config.enable_chunked_prefill,
max_num_partial_prefills=self.config.max_num_partial_prefills,
max_long_partial_prefills=self.config.max_long_partial_prefills,
long_prefill_token_threshold=self.config.long_prefill_token_threshold,)