diff --git a/fastdeploy/scheduler/local_scheduler.py b/fastdeploy/scheduler/local_scheduler.py index 5d79e5009..20e53317b 100644 --- a/fastdeploy/scheduler/local_scheduler.py +++ b/fastdeploy/scheduler/local_scheduler.py @@ -208,6 +208,9 @@ class LocalScheduler: """ return (token_num + block_size - 1) // block_size + def get_unhandled_request_num(self): + return len(self.requests) + def get_requests( self, available_blocks, diff --git a/fastdeploy/splitwise/internal_adapter_utils.py b/fastdeploy/splitwise/internal_adapter_utils.py index 6288a30f9..d52edf897 100644 --- a/fastdeploy/splitwise/internal_adapter_utils.py +++ b/fastdeploy/splitwise/internal_adapter_utils.py @@ -56,9 +56,9 @@ class InternalAdapter: "splitwise_role": self.cfg.splitwise_role, "block_size": int(self.cfg.cache_config.block_size), "block_num": int(available_block_num), - "max_block_num": self.cfg.cache_config.total_block_num, + "max_block_num": int(self.cfg.cache_config.total_block_num), "dec_token_num": int(self.cfg.cache_config.dec_token_num), - "available_resource": 1.0 * available_block_num / self.cfg.cache_config.total_block_num, + "available_resource": float(1.0 * available_block_num / self.cfg.cache_config.total_block_num), "max_batch_size": int(available_batch_size), "max_input_token_num": self.cfg.max_num_batched_tokens, "unhandled_request_num": self.engine.scheduler.get_unhandled_request_num(),