[BugFix] race condition [is_fetching] causing multiple fetch requests (#5238)

* RouterArgs port str -> int * fix race condition [is_fetching] causing multiple fetch requests * bugfix: Delete duplicate input_ids tensor creation
2025-12-24 13:28:13 +08:00 · 2025-11-28 13:41:36 +08:00
parent 35479b691f
commit 7dc06cac6e
3 changed files with 4 additions and 4 deletions
--- a/fastdeploy/engine/common_engine.py
+++ b/fastdeploy/engine/common_engine.py
@@ -680,7 +680,6 @@ class EngineService:
        def _fetch_request():
            try:
                nonlocal is_fetching
-                is_fetching = True
                num_prefill_batch = min(
                    int(self.resource_manager.available_batch()),
                    self.cfg.max_prefill_batch,
@@ -803,6 +802,7 @@ class EngineService:
                    continue
                if self.cfg.scheduler_config.splitwise_role != "mixed":
                    if not is_fetching:
+                        is_fetching = True
                        get_request_pool.submit(_fetch_request)

                else:
@@ -813,6 +813,7 @@ class EngineService:
                    ):
                        # Check if the thread pool is still available to avoid submitting tasks to a shutdown thread pool.
                        try:
+                            is_fetching = True
                            get_request_pool.submit(_fetch_request)
                        except RuntimeError as e:
                            if "shutdown" in str(e):
--- a/fastdeploy/engine/sched/resource_manager_v1.py
+++ b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -414,7 +414,6 @@ class ResourceManagerV1(ResourceManager):
        ):
            input_ids_lst = request.prompt_token_ids + request.output_token_ids
            input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
-            input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
            image_patch_id = inputs["image_patch_id"]

            if request.multimodal_img_boundaries is None:
--- a/fastdeploy/router/router.py
+++ b/fastdeploy/router/router.py
@@ -32,7 +32,7 @@ class RouterArgs:
    """
    Host address to bind the router server
    """
-    port: str = "9000"
+    port: int = 9000
    """
    Port to bind the router server.
    """
@@ -55,7 +55,7 @@ class RouterArgs:
        )
        parser.add_argument(
            "--port",
-            type=str,
+            type=int,
            default=RouterArgs.port,
            help="Port number to bind the router server",
        )