[BugFix] race condition [is_fetching] causing multiple fetch requests (#5238)

* RouterArgs port str -> int

* fix race condition [is_fetching] causing multiple fetch requests

* bugfix: Delete duplicate input_ids tensor creation
This commit is contained in:
Daci
2025-11-28 13:41:36 +08:00
committed by GitHub
parent 35479b691f
commit 7dc06cac6e
3 changed files with 4 additions and 4 deletions

View File

@@ -680,7 +680,6 @@ class EngineService:
def _fetch_request():
try:
nonlocal is_fetching
is_fetching = True
num_prefill_batch = min(
int(self.resource_manager.available_batch()),
self.cfg.max_prefill_batch,
@@ -803,6 +802,7 @@ class EngineService:
continue
if self.cfg.scheduler_config.splitwise_role != "mixed":
if not is_fetching:
is_fetching = True
get_request_pool.submit(_fetch_request)
else:
@@ -813,6 +813,7 @@ class EngineService:
):
# Check if the thread pool is still available to avoid submitting tasks to a shutdown thread pool.
try:
is_fetching = True
get_request_pool.submit(_fetch_request)
except RuntimeError as e:
if "shutdown" in str(e):

View File

@@ -414,7 +414,6 @@ class ResourceManagerV1(ResourceManager):
):
input_ids_lst = request.prompt_token_ids + request.output_token_ids
input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
image_patch_id = inputs["image_patch_id"]
if request.multimodal_img_boundaries is None:

View File

@@ -32,7 +32,7 @@ class RouterArgs:
"""
Host address to bind the router server
"""
port: str = "9000"
port: int = 9000
"""
Port to bind the router server.
"""
@@ -55,7 +55,7 @@ class RouterArgs:
)
parser.add_argument(
"--port",
type=str,
type=int,
default=RouterArgs.port,
help="Port number to bind the router server",
)