Sync v2.0 version of code to github repo

2025-10-05 00:33:03 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/fastdeploy/scheduler/config.py
+++ b/fastdeploy/scheduler/config.py
@@ -18,35 +18,68 @@ import redis
 from fastdeploy.utils import llm_logger
 from .global_scheduler import GlobalScheduler
 from .local_scheduler import LocalScheduler
+from .splitwise_scheduler import SplitWiseScheduler, SplitWiseSchedulerConfig
+


 class LocalSchedulerConfig:
    """
-    LocalSchedulerConfig class
+    Configuration class for LocalScheduler.
+
+    Attributes:
+        max_size: Maximum number of concurrent requests (-1 for unlimited)
+        ttl: Time-to-live in seconds for request expiration
    """

    def __init__(self,
                 max_size: int = -1,
                 ttl: int = 900,
-                 wait_response_timeout: float = 1,
+                 max_model_len: int = 8192,
+                 enable_chunked_prefill: bool = False,
+                 max_num_partial_prefills: int = 1,
+                 max_long_partial_prefills: int = 1,
+                 long_prefill_token_threshold: int = 0,
                 **kwargs
                 ):
+        """
+        Initialize LocalScheduler configuration.
+
+        Args:
+            max_size: Maximum concurrent requests (-1 for unlimited, 0 for disabled)
+            ttl: Time-to-live in seconds for request expiration (default 900s)
+            max_model_len: Maximum model context length in tokens
+            enable_chunked_prefill: Whether to enable chunked prefill processing
+            max_num_partial_prefills: Max partial prefill operations allowed
+            max_long_partial_prefills: Max long-running partial prefill ops
+            long_prefill_token_threshold: Token count threshold for long prefill
+            **kwargs: Additional unused arguments (for forward compatibility)
+
+        Note:
+            - If long_prefill_token_threshold is 0, it's auto-calculated as 4% of max_model_len
+            - See LocalScheduler class for implementation details
+        """
        self.max_size = max_size
        self.ttl = ttl
-        self.wait_response_timeout = wait_response_timeout
+
+        self.max_model_len = max_model_len
+        self.enable_chunked_prefill = enable_chunked_prefill
+        self.max_num_partial_prefills = max_num_partial_prefills
+        self.max_long_partial_prefills = max_long_partial_prefills
+        self.long_prefill_token_threshold = long_prefill_token_threshold
+        if self.long_prefill_token_threshold == 0:
+            self.long_prefill_token_threshold = int(self.max_model_len * 0.04)

    def check(self):
        """
-        check config
+        Validate the configuration values.
+
+        Currently performs no validation as all values are acceptable.
        """
-        assert self.wait_response_timeout > 0, \
-            "LocalScheduler: `wait_response_timeout` must be greater than zero"
-        assert self.ttl > self.wait_response_timeout, \
-            "LocalScheduler: `ttl` must be greater than `wait_response_timeout`"
+        pass

    def print(self):
        """
-        print config
+        Print the current configuration to logs.
        """
        llm_logger.info("LocalScheduler Configuration Information :")
        for k, v in self.__dict__.items():
@@ -57,7 +90,15 @@ class LocalSchedulerConfig:

 class GlobalSchedulerConfig:
    """
-    GlobalSchedulerConfig class
+    Configuration class for GlobalScheduler (Redis-based).
+
+    Attributes:
+        host: Redis server hostname
+        port: Redis server port
+        db: Redis database number
+        password: Optional Redis password
+        topic: Namespace prefix for queues
+        ttl: Time-to-live in seconds for Redis keys
    """

    def __init__(self,
@@ -67,31 +108,69 @@ class GlobalSchedulerConfig:
                 password=None,
                 topic: str = "default",
                 ttl: int = 900,
-                 wait_response_timeout: float = 1,
-                 remote_write_time: int = 3,
+                 min_load_score: float = 3,
+                 max_model_len: int = 8192,
+                 load_shrads_num: int = 1,
+                 enable_chunked_prefill: bool = False,
+                 max_num_partial_prefills: int = 1,
+                 max_long_partial_prefills: int = 1,
+                 long_prefill_token_threshold: int = 0,
                 **kwargs
                 ):
+        """
+        Initialize GlobalScheduler (Redis-based) configuration.
+
+        Args:
+            host: Redis server hostname (default "127.0.0.1")
+            port: Redis server port (default 6379)
+            db: Redis database number (default 0)
+            password: Optional Redis password
+            topic: Namespace prefix for queues (default "default")
+            ttl: Time-to-live in seconds for Redis keys (default 900s)
+            min_load_score: Minimum load score for task assignment (default 3)
+            max_model_len: Maximum model context length in tokens
+            load_shrads_num: Number of load balancing shards
+            enable_chunked_prefill: Whether to enable chunked prefill processing
+            max_num_partial_prefills: Max partial prefill operations allowed
+            max_long_partial_prefills: Max long-running partial prefill ops
+            long_prefill_token_threshold: Token count threshold for long prefill
+            **kwargs: Additional unused arguments (for forward compatibility)
+
+        Note:
+            - If long_prefill_token_threshold is 0, it's auto-calculated as 4% of max_model_len
+            - See GlobalScheduler class for implementation details
+        """
        self.host = host
        self.port = port
        self.db = db
        self.password = password
        self.topic = topic
        self.ttl = ttl
-        self.wait_response_timeout = wait_response_timeout
-        self.remote_write_time = remote_write_time
+        self.min_load_score = min_load_score
+        self.load_shrads_num = load_shrads_num
+
+        self.max_model_len = max_model_len
+        self.enable_chunked_prefill = enable_chunked_prefill
+        self.max_num_partial_prefills = max_num_partial_prefills
+        self.max_long_partial_prefills = max_long_partial_prefills
+        self.long_prefill_token_threshold = long_prefill_token_threshold
+        if self.long_prefill_token_threshold == 0:
+            self.long_prefill_token_threshold = int(self.max_model_len * 0.04)

    def check(self):
        """
-        check config
+        Validate the configuration by testing Redis connection.
+
+        Raises:
+            Exception: If connection to Redis fails
        """
-        assert self.wait_response_timeout > 0, \
-            "GlobalScheduler: `wait_response_timeout` must be greater than zero"
-        assert self.remote_write_time > 0, \
-            "GlobalScheduler: `remote_write_time` must be greater than zero"
-        assert self.ttl > self.remote_write_time, \
-            "GlobalScheduler: `ttl` must be greater than `remote_write_time`"
-        assert self.ttl > self.wait_response_timeout, \
-            "GlobalScheduler: `ttl` must be greater than `wait_response_timeout`"
+
+        if self.ttl <= 0:
+            raise ValueError("ttl should be greater than 60")
+        if self.min_load_score < 1:
+            raise ValueError("min_load_score should be greater than 0")
+        if self.load_shrads_num < 1:
+            raise ValueError("load_shrads_num should be greater than 0")

        r = redis.Redis(self.host, self.port, self.db, self.password)
        try:
@@ -103,21 +182,40 @@ class GlobalSchedulerConfig:

    def print(self):
        """
-        print config
+        Print the current configuration to logs.
        """
        llm_logger.info("GlobalScheduler Configuration Information :")
+        password = self.password
+        self.password = "******"
        for k, v in self.__dict__.items():
            llm_logger.info("{:<20}:{:<6}{}".format(k, "", v))
+        self.password = password
        llm_logger.info(
            "=============================================================")


 class SchedulerConfig:
    """
-    SchedulerConfig class
+    Factory class for scheduler configurations.
+
+    Creates appropriate config based on scheduler type (local/global).
    """

    def __init__(self, name="local", **kwargs):
+        """
+        Initialize scheduler configuration factory.
+
+        Args:
+            name: Scheduler type ("local" for LocalScheduler or "global" for GlobalScheduler)
+            **kwargs: Configuration parameters for the specific scheduler type
+
+        Initializes:
+            - Appropriate config object based on scheduler type
+            - Validates configuration parameters
+
+        Raises:
+            Exception: If invalid scheduler type is specified
+        """
        self.name = name
        self.config = None

@@ -126,26 +224,34 @@ class SchedulerConfig:

        if name == "global":
            self.config = GlobalSchedulerConfig(**kwargs)
+        
+        if name == "splitwise":
+            self.config = SplitWiseSchedulerConfig(**kwargs)

    def check(self):
        """
-        check config
+        Validate the configuration.
+
+        Raises:
+            Exception: If invalid scheduler type is specified
        """
-        if self.name not in ["local", "global"]:
-            raise Exception(
-                "SchedulerConfig: `name` must be `local` or `global`")
+        if self.name not in ["local", "global", "splitwise"]:
+            raise Exception(f'Unknown scheduler type {self.name}')

        self.config.check()

    def print(self):
        """
-        print config
+        Print the current configuration to logs.
        """
        self.config.print()

    def scheduler(self):
        """
-        create scheduler by config
+        Create a scheduler instance based on the configuration.
+
+        Returns:
+            Initialized scheduler instance (LocalScheduler or GlobalScheduler)
        """

        if self.name == "global":
@@ -155,10 +261,19 @@ class SchedulerConfig:
                                   password=self.config.password,
                                   topic=self.config.topic,
                                   ttl=self.config.ttl,
-                                   remote_write_time=self.config.remote_write_time,
-                                   wait_response_timeout=self.config.wait_response_timeout)
+                                   min_load_score=self.config.min_load_score,
+                                   load_shrads_num=self.config.load_shrads_num,
+                                   enable_chunked_prefill=self.config.enable_chunked_prefill,
+                                   max_num_partial_prefills=self.config.max_num_partial_prefills,
+                                   max_long_partial_prefills=self.config.max_long_partial_prefills,
+                                   long_prefill_token_threshold=self.config.long_prefill_token_threshold,)
+        
+        if self.name == "splitwise":
+            return SplitWiseScheduler(self.config)

        return LocalScheduler(max_size=self.config.max_size,
                              ttl=self.config.ttl,
-                              wait_response_timeout=self.config.wait_response_timeout
-                              )
+                              enable_chunked_prefill=self.config.enable_chunked_prefill,
+                              max_num_partial_prefills=self.config.max_num_partial_prefills,
+                              max_long_partial_prefills=self.config.max_long_partial_prefills,
+                              long_prefill_token_threshold=self.config.long_prefill_token_threshold,)