[Feature] Multimodal Model P / D Separation (#5323)

* RouterArgs port str -> int * fix race condition [is_fetching] causing multiple fetch requests * bugfix: Delete duplicate input_ids tensor creation * mm pd splitwise json -> pickle5; multimodal_inputs only pos id; debuglog f to %s * fix ENABLE_V1_KVCACHE_SCHEDULER=0 mm model lack pos_id, ... * update cr * Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * pre-commit fix * rm multimodal_inputs deepcopy & fix rdma_cache_transfer.py tpsize=0 --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-12-09 10:47:42 +08:00
parent a8ffc22032
commit 2f208db4e9
5 changed files with 80 additions and 33 deletions
--- a/fastdeploy/scheduler/splitwise_scheduler.py
+++ b/fastdeploy/scheduler/splitwise_scheduler.py
@@ -17,6 +17,7 @@
 import copy
 import hashlib
 import math
+import pickle
 import random
 import threading
 import time
@@ -545,7 +546,7 @@ class APIScheduler:
            pkey, dkey = f"ReqQ_{pnode.nodeid}", f"ReqQ_{dnode.nodeid}"
            req_dict = req.to_dict()
            req_dict["group"] = group
-            req_str = orjson.dumps(req_dict)
+            req_str = pickle.dumps(req_dict, protocol=5)
            # logger.info(f"Schedule Req {req_str}")
            self.client.lpush(dkey, req_str)
            self.client.lpush(pkey, req_str)
@@ -795,7 +796,7 @@ class InferScheduler:
                    reqs = [ret[1]]

                for req_str in reqs:
-                    req = orjson.loads(req_str)
+                    req = pickle.loads(req_str)
                    group = req.get("group", "")
                    req = Request.from_dict(req)
                    writer_idx = select_writer(req)