[Feature] Multimodal Model P / D Separation (#5323)

* RouterArgs port str -> int

* fix race condition [is_fetching] causing multiple fetch requests

* bugfix: Delete duplicate input_ids tensor creation

* mm pd splitwise json -> pickle5; multimodal_inputs only pos id;
debuglog f to %s

* fix ENABLE_V1_KVCACHE_SCHEDULER=0 mm model lack pos_id, ...

* update cr

* Apply suggestions from code review

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>

* pre-commit fix

* rm multimodal_inputs deepcopy & fix rdma_cache_transfer.py tpsize=0

---------

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Daci
2025-12-09 10:47:42 +08:00
committed by GitHub
parent a8ffc22032
commit 2f208db4e9
5 changed files with 80 additions and 33 deletions

View File

@@ -17,6 +17,7 @@
import copy
import hashlib
import math
import pickle
import random
import threading
import time
@@ -545,7 +546,7 @@ class APIScheduler:
pkey, dkey = f"ReqQ_{pnode.nodeid}", f"ReqQ_{dnode.nodeid}"
req_dict = req.to_dict()
req_dict["group"] = group
req_str = orjson.dumps(req_dict)
req_str = pickle.dumps(req_dict, protocol=5)
# logger.info(f"Schedule Req {req_str}")
self.client.lpush(dkey, req_str)
self.client.lpush(pkey, req_str)
@@ -795,7 +796,7 @@ class InferScheduler:
reqs = [ret[1]]
for req_str in reqs:
req = orjson.loads(req_str)
req = pickle.loads(req_str)
group = req.get("group", "")
req = Request.from_dict(req)
writer_idx = select_writer(req)