mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 04:46:16 +08:00
[Feature] Support mixed deployment with adapter (#3517)
This commit is contained in:
@@ -208,6 +208,9 @@ class LocalScheduler:
|
|||||||
"""
|
"""
|
||||||
return (token_num + block_size - 1) // block_size
|
return (token_num + block_size - 1) // block_size
|
||||||
|
|
||||||
|
def get_unhandled_request_num(self):
|
||||||
|
return len(self.requests)
|
||||||
|
|
||||||
def get_requests(
|
def get_requests(
|
||||||
self,
|
self,
|
||||||
available_blocks,
|
available_blocks,
|
||||||
|
@@ -56,9 +56,9 @@ class InternalAdapter:
|
|||||||
"splitwise_role": self.cfg.splitwise_role,
|
"splitwise_role": self.cfg.splitwise_role,
|
||||||
"block_size": int(self.cfg.cache_config.block_size),
|
"block_size": int(self.cfg.cache_config.block_size),
|
||||||
"block_num": int(available_block_num),
|
"block_num": int(available_block_num),
|
||||||
"max_block_num": self.cfg.cache_config.total_block_num,
|
"max_block_num": int(self.cfg.cache_config.total_block_num),
|
||||||
"dec_token_num": int(self.cfg.cache_config.dec_token_num),
|
"dec_token_num": int(self.cfg.cache_config.dec_token_num),
|
||||||
"available_resource": 1.0 * available_block_num / self.cfg.cache_config.total_block_num,
|
"available_resource": float(1.0 * available_block_num / self.cfg.cache_config.total_block_num),
|
||||||
"max_batch_size": int(available_batch_size),
|
"max_batch_size": int(available_batch_size),
|
||||||
"max_input_token_num": self.cfg.max_num_batched_tokens,
|
"max_input_token_num": self.cfg.max_num_batched_tokens,
|
||||||
"unhandled_request_num": self.engine.scheduler.get_unhandled_request_num(),
|
"unhandled_request_num": self.engine.scheduler.get_unhandled_request_num(),
|
||||||
|
Reference in New Issue
Block a user