mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-28 05:12:24 +08:00
fix spelling error (#2827)
This commit is contained in:
@@ -348,7 +348,7 @@ def reset_scheduler():
|
|||||||
|
|
||||||
if llm_engine is None:
|
if llm_engine is None:
|
||||||
return Response("Engine not loaded", status_code=500)
|
return Response("Engine not loaded", status_code=500)
|
||||||
llm_engine.scheduler.reset_scheduler()
|
llm_engine.scheduler.reset()
|
||||||
return Response("Scheduler Reset Successfully", status_code=200)
|
return Response("Scheduler Reset Successfully", status_code=200)
|
||||||
|
|
||||||
|
|
||||||
@@ -366,7 +366,7 @@ def control_scheduler(request: ControlSchedulerRequest):
|
|||||||
return JSONResponse(content=content.model_dump(), status_code=500)
|
return JSONResponse(content=content.model_dump(), status_code=500)
|
||||||
|
|
||||||
if request.reset:
|
if request.reset:
|
||||||
llm_engine.scheduler.reset_scheduler()
|
llm_engine.scheduler.reset()
|
||||||
|
|
||||||
if request.load_shards_num or request.reallocate_shard:
|
if request.load_shards_num or request.reallocate_shard:
|
||||||
if hasattr(llm_engine.scheduler, "update_config") and callable(llm_engine.scheduler.update_config):
|
if hasattr(llm_engine.scheduler, "update_config") and callable(llm_engine.scheduler.update_config):
|
||||||
|
@@ -110,7 +110,7 @@ class GlobalSchedulerConfig:
|
|||||||
ttl: int = 900,
|
ttl: int = 900,
|
||||||
min_load_score: float = 3,
|
min_load_score: float = 3,
|
||||||
max_model_len: int = 8192,
|
max_model_len: int = 8192,
|
||||||
load_shrads_num: int = 1,
|
load_shards_num: int = 1,
|
||||||
enable_chunked_prefill: bool = False,
|
enable_chunked_prefill: bool = False,
|
||||||
max_num_partial_prefills: int = 1,
|
max_num_partial_prefills: int = 1,
|
||||||
max_long_partial_prefills: int = 1,
|
max_long_partial_prefills: int = 1,
|
||||||
@@ -129,7 +129,7 @@ class GlobalSchedulerConfig:
|
|||||||
ttl: Time-to-live in seconds for Redis keys (default 900s)
|
ttl: Time-to-live in seconds for Redis keys (default 900s)
|
||||||
min_load_score: Minimum load score for task assignment (default 3)
|
min_load_score: Minimum load score for task assignment (default 3)
|
||||||
max_model_len: Maximum model context length in tokens
|
max_model_len: Maximum model context length in tokens
|
||||||
load_shrads_num: Number of load balancing shards
|
load_shards_num: Number of load balancing shards
|
||||||
enable_chunked_prefill: Whether to enable chunked prefill processing
|
enable_chunked_prefill: Whether to enable chunked prefill processing
|
||||||
max_num_partial_prefills: Max partial prefill operations allowed
|
max_num_partial_prefills: Max partial prefill operations allowed
|
||||||
max_long_partial_prefills: Max long-running partial prefill ops
|
max_long_partial_prefills: Max long-running partial prefill ops
|
||||||
@@ -147,7 +147,7 @@ class GlobalSchedulerConfig:
|
|||||||
self.topic = topic
|
self.topic = topic
|
||||||
self.ttl = ttl
|
self.ttl = ttl
|
||||||
self.min_load_score = min_load_score
|
self.min_load_score = min_load_score
|
||||||
self.load_shrads_num = load_shrads_num
|
self.load_shards_num = load_shards_num
|
||||||
|
|
||||||
self.max_model_len = max_model_len
|
self.max_model_len = max_model_len
|
||||||
self.enable_chunked_prefill = enable_chunked_prefill
|
self.enable_chunked_prefill = enable_chunked_prefill
|
||||||
@@ -169,8 +169,8 @@ class GlobalSchedulerConfig:
|
|||||||
raise ValueError("ttl should be greater than 60")
|
raise ValueError("ttl should be greater than 60")
|
||||||
if self.min_load_score < 1:
|
if self.min_load_score < 1:
|
||||||
raise ValueError("min_load_score should be greater than 0")
|
raise ValueError("min_load_score should be greater than 0")
|
||||||
if self.load_shrads_num < 1:
|
if self.load_shards_num < 1:
|
||||||
raise ValueError("load_shrads_num should be greater than 0")
|
raise ValueError("load_shards_num should be greater than 0")
|
||||||
|
|
||||||
r = redis.Redis(self.host, self.port, self.db, self.password)
|
r = redis.Redis(self.host, self.port, self.db, self.password)
|
||||||
try:
|
try:
|
||||||
@@ -262,7 +262,7 @@ class SchedulerConfig:
|
|||||||
topic=self.config.topic,
|
topic=self.config.topic,
|
||||||
ttl=self.config.ttl,
|
ttl=self.config.ttl,
|
||||||
min_load_score=self.config.min_load_score,
|
min_load_score=self.config.min_load_score,
|
||||||
load_shrads_num=self.config.load_shrads_num,
|
load_shards_num=self.config.load_shards_num,
|
||||||
enable_chunked_prefill=self.config.enable_chunked_prefill,
|
enable_chunked_prefill=self.config.enable_chunked_prefill,
|
||||||
max_num_partial_prefills=self.config.max_num_partial_prefills,
|
max_num_partial_prefills=self.config.max_num_partial_prefills,
|
||||||
max_long_partial_prefills=self.config.max_long_partial_prefills,
|
max_long_partial_prefills=self.config.max_long_partial_prefills,
|
||||||
|
Reference in New Issue
Block a user