mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
Sync v2.0 version of code to github repo
This commit is contained in:
@@ -18,35 +18,68 @@ import redis
|
||||
from fastdeploy.utils import llm_logger
|
||||
from .global_scheduler import GlobalScheduler
|
||||
from .local_scheduler import LocalScheduler
|
||||
from .splitwise_scheduler import SplitWiseScheduler, SplitWiseSchedulerConfig
|
||||
|
||||
|
||||
|
||||
class LocalSchedulerConfig:
|
||||
"""
|
||||
LocalSchedulerConfig class
|
||||
Configuration class for LocalScheduler.
|
||||
|
||||
Attributes:
|
||||
max_size: Maximum number of concurrent requests (-1 for unlimited)
|
||||
ttl: Time-to-live in seconds for request expiration
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
max_size: int = -1,
|
||||
ttl: int = 900,
|
||||
wait_response_timeout: float = 1,
|
||||
max_model_len: int = 8192,
|
||||
enable_chunked_prefill: bool = False,
|
||||
max_num_partial_prefills: int = 1,
|
||||
max_long_partial_prefills: int = 1,
|
||||
long_prefill_token_threshold: int = 0,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
Initialize LocalScheduler configuration.
|
||||
|
||||
Args:
|
||||
max_size: Maximum concurrent requests (-1 for unlimited, 0 for disabled)
|
||||
ttl: Time-to-live in seconds for request expiration (default 900s)
|
||||
max_model_len: Maximum model context length in tokens
|
||||
enable_chunked_prefill: Whether to enable chunked prefill processing
|
||||
max_num_partial_prefills: Max partial prefill operations allowed
|
||||
max_long_partial_prefills: Max long-running partial prefill ops
|
||||
long_prefill_token_threshold: Token count threshold for long prefill
|
||||
**kwargs: Additional unused arguments (for forward compatibility)
|
||||
|
||||
Note:
|
||||
- If long_prefill_token_threshold is 0, it's auto-calculated as 4% of max_model_len
|
||||
- See LocalScheduler class for implementation details
|
||||
"""
|
||||
self.max_size = max_size
|
||||
self.ttl = ttl
|
||||
self.wait_response_timeout = wait_response_timeout
|
||||
|
||||
self.max_model_len = max_model_len
|
||||
self.enable_chunked_prefill = enable_chunked_prefill
|
||||
self.max_num_partial_prefills = max_num_partial_prefills
|
||||
self.max_long_partial_prefills = max_long_partial_prefills
|
||||
self.long_prefill_token_threshold = long_prefill_token_threshold
|
||||
if self.long_prefill_token_threshold == 0:
|
||||
self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
|
||||
|
||||
def check(self):
|
||||
"""
|
||||
check config
|
||||
Validate the configuration values.
|
||||
|
||||
Currently performs no validation as all values are acceptable.
|
||||
"""
|
||||
assert self.wait_response_timeout > 0, \
|
||||
"LocalScheduler: `wait_response_timeout` must be greater than zero"
|
||||
assert self.ttl > self.wait_response_timeout, \
|
||||
"LocalScheduler: `ttl` must be greater than `wait_response_timeout`"
|
||||
pass
|
||||
|
||||
def print(self):
|
||||
"""
|
||||
print config
|
||||
Print the current configuration to logs.
|
||||
"""
|
||||
llm_logger.info("LocalScheduler Configuration Information :")
|
||||
for k, v in self.__dict__.items():
|
||||
@@ -57,7 +90,15 @@ class LocalSchedulerConfig:
|
||||
|
||||
class GlobalSchedulerConfig:
|
||||
"""
|
||||
GlobalSchedulerConfig class
|
||||
Configuration class for GlobalScheduler (Redis-based).
|
||||
|
||||
Attributes:
|
||||
host: Redis server hostname
|
||||
port: Redis server port
|
||||
db: Redis database number
|
||||
password: Optional Redis password
|
||||
topic: Namespace prefix for queues
|
||||
ttl: Time-to-live in seconds for Redis keys
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@@ -67,31 +108,69 @@ class GlobalSchedulerConfig:
|
||||
password=None,
|
||||
topic: str = "default",
|
||||
ttl: int = 900,
|
||||
wait_response_timeout: float = 1,
|
||||
remote_write_time: int = 3,
|
||||
min_load_score: float = 3,
|
||||
max_model_len: int = 8192,
|
||||
load_shrads_num: int = 1,
|
||||
enable_chunked_prefill: bool = False,
|
||||
max_num_partial_prefills: int = 1,
|
||||
max_long_partial_prefills: int = 1,
|
||||
long_prefill_token_threshold: int = 0,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
Initialize GlobalScheduler (Redis-based) configuration.
|
||||
|
||||
Args:
|
||||
host: Redis server hostname (default "127.0.0.1")
|
||||
port: Redis server port (default 6379)
|
||||
db: Redis database number (default 0)
|
||||
password: Optional Redis password
|
||||
topic: Namespace prefix for queues (default "default")
|
||||
ttl: Time-to-live in seconds for Redis keys (default 900s)
|
||||
min_load_score: Minimum load score for task assignment (default 3)
|
||||
max_model_len: Maximum model context length in tokens
|
||||
load_shrads_num: Number of load balancing shards
|
||||
enable_chunked_prefill: Whether to enable chunked prefill processing
|
||||
max_num_partial_prefills: Max partial prefill operations allowed
|
||||
max_long_partial_prefills: Max long-running partial prefill ops
|
||||
long_prefill_token_threshold: Token count threshold for long prefill
|
||||
**kwargs: Additional unused arguments (for forward compatibility)
|
||||
|
||||
Note:
|
||||
- If long_prefill_token_threshold is 0, it's auto-calculated as 4% of max_model_len
|
||||
- See GlobalScheduler class for implementation details
|
||||
"""
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.db = db
|
||||
self.password = password
|
||||
self.topic = topic
|
||||
self.ttl = ttl
|
||||
self.wait_response_timeout = wait_response_timeout
|
||||
self.remote_write_time = remote_write_time
|
||||
self.min_load_score = min_load_score
|
||||
self.load_shrads_num = load_shrads_num
|
||||
|
||||
self.max_model_len = max_model_len
|
||||
self.enable_chunked_prefill = enable_chunked_prefill
|
||||
self.max_num_partial_prefills = max_num_partial_prefills
|
||||
self.max_long_partial_prefills = max_long_partial_prefills
|
||||
self.long_prefill_token_threshold = long_prefill_token_threshold
|
||||
if self.long_prefill_token_threshold == 0:
|
||||
self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
|
||||
|
||||
def check(self):
|
||||
"""
|
||||
check config
|
||||
Validate the configuration by testing Redis connection.
|
||||
|
||||
Raises:
|
||||
Exception: If connection to Redis fails
|
||||
"""
|
||||
assert self.wait_response_timeout > 0, \
|
||||
"GlobalScheduler: `wait_response_timeout` must be greater than zero"
|
||||
assert self.remote_write_time > 0, \
|
||||
"GlobalScheduler: `remote_write_time` must be greater than zero"
|
||||
assert self.ttl > self.remote_write_time, \
|
||||
"GlobalScheduler: `ttl` must be greater than `remote_write_time`"
|
||||
assert self.ttl > self.wait_response_timeout, \
|
||||
"GlobalScheduler: `ttl` must be greater than `wait_response_timeout`"
|
||||
|
||||
if self.ttl <= 0:
|
||||
raise ValueError("ttl should be greater than 60")
|
||||
if self.min_load_score < 1:
|
||||
raise ValueError("min_load_score should be greater than 0")
|
||||
if self.load_shrads_num < 1:
|
||||
raise ValueError("load_shrads_num should be greater than 0")
|
||||
|
||||
r = redis.Redis(self.host, self.port, self.db, self.password)
|
||||
try:
|
||||
@@ -103,21 +182,40 @@ class GlobalSchedulerConfig:
|
||||
|
||||
def print(self):
|
||||
"""
|
||||
print config
|
||||
Print the current configuration to logs.
|
||||
"""
|
||||
llm_logger.info("GlobalScheduler Configuration Information :")
|
||||
password = self.password
|
||||
self.password = "******"
|
||||
for k, v in self.__dict__.items():
|
||||
llm_logger.info("{:<20}:{:<6}{}".format(k, "", v))
|
||||
self.password = password
|
||||
llm_logger.info(
|
||||
"=============================================================")
|
||||
|
||||
|
||||
class SchedulerConfig:
|
||||
"""
|
||||
SchedulerConfig class
|
||||
Factory class for scheduler configurations.
|
||||
|
||||
Creates appropriate config based on scheduler type (local/global).
|
||||
"""
|
||||
|
||||
def __init__(self, name="local", **kwargs):
|
||||
"""
|
||||
Initialize scheduler configuration factory.
|
||||
|
||||
Args:
|
||||
name: Scheduler type ("local" for LocalScheduler or "global" for GlobalScheduler)
|
||||
**kwargs: Configuration parameters for the specific scheduler type
|
||||
|
||||
Initializes:
|
||||
- Appropriate config object based on scheduler type
|
||||
- Validates configuration parameters
|
||||
|
||||
Raises:
|
||||
Exception: If invalid scheduler type is specified
|
||||
"""
|
||||
self.name = name
|
||||
self.config = None
|
||||
|
||||
@@ -126,26 +224,34 @@ class SchedulerConfig:
|
||||
|
||||
if name == "global":
|
||||
self.config = GlobalSchedulerConfig(**kwargs)
|
||||
|
||||
if name == "splitwise":
|
||||
self.config = SplitWiseSchedulerConfig(**kwargs)
|
||||
|
||||
def check(self):
|
||||
"""
|
||||
check config
|
||||
Validate the configuration.
|
||||
|
||||
Raises:
|
||||
Exception: If invalid scheduler type is specified
|
||||
"""
|
||||
if self.name not in ["local", "global"]:
|
||||
raise Exception(
|
||||
"SchedulerConfig: `name` must be `local` or `global`")
|
||||
if self.name not in ["local", "global", "splitwise"]:
|
||||
raise Exception(f'Unknown scheduler type {self.name}')
|
||||
|
||||
self.config.check()
|
||||
|
||||
def print(self):
|
||||
"""
|
||||
print config
|
||||
Print the current configuration to logs.
|
||||
"""
|
||||
self.config.print()
|
||||
|
||||
def scheduler(self):
|
||||
"""
|
||||
create scheduler by config
|
||||
Create a scheduler instance based on the configuration.
|
||||
|
||||
Returns:
|
||||
Initialized scheduler instance (LocalScheduler or GlobalScheduler)
|
||||
"""
|
||||
|
||||
if self.name == "global":
|
||||
@@ -155,10 +261,19 @@ class SchedulerConfig:
|
||||
password=self.config.password,
|
||||
topic=self.config.topic,
|
||||
ttl=self.config.ttl,
|
||||
remote_write_time=self.config.remote_write_time,
|
||||
wait_response_timeout=self.config.wait_response_timeout)
|
||||
min_load_score=self.config.min_load_score,
|
||||
load_shrads_num=self.config.load_shrads_num,
|
||||
enable_chunked_prefill=self.config.enable_chunked_prefill,
|
||||
max_num_partial_prefills=self.config.max_num_partial_prefills,
|
||||
max_long_partial_prefills=self.config.max_long_partial_prefills,
|
||||
long_prefill_token_threshold=self.config.long_prefill_token_threshold,)
|
||||
|
||||
if self.name == "splitwise":
|
||||
return SplitWiseScheduler(self.config)
|
||||
|
||||
return LocalScheduler(max_size=self.config.max_size,
|
||||
ttl=self.config.ttl,
|
||||
wait_response_timeout=self.config.wait_response_timeout
|
||||
)
|
||||
enable_chunked_prefill=self.config.enable_chunked_prefill,
|
||||
max_num_partial_prefills=self.config.max_num_partial_prefills,
|
||||
max_long_partial_prefills=self.config.max_long_partial_prefills,
|
||||
long_prefill_token_threshold=self.config.long_prefill_token_threshold,)
|
||||
|
Reference in New Issue
Block a user