[Bug fix] Fix bug for d blocks not enough (#3479)

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Fix bug for memory allocation

* Fix bug for D blocks not enough

* fix bug when d blocks not enough

* fix bug when d blocks not enough

* fix cache message recycle step

* fix cache message recycle step

* Fix step_idx recycle
This commit is contained in:
chenjian
2025-08-21 11:36:16 +08:00
committed by GitHub
parent c487b62ee0
commit 6854506533
4 changed files with 120 additions and 51 deletions

View File

@@ -252,6 +252,9 @@ class CacheMessager:
self.last_step_idx = -1
self.last_layer_idx = -1 # int32
max_step_idx = 100003
engine_recycled_count = 0
while True:
cache_info = self.engine_worker_queue.get_cache_info()
@@ -271,7 +274,6 @@ class CacheMessager:
current_info["status"] = "init"
logger.info(f"start cache_infos: {current_info}")
self.cache_info[info["request_id"]] = current_info
self.last_step_idx = min(self.last_step_idx, current_info["current_id"])
else:
self.cache_info[info["request_id"]] = info
prefilled_layer_idx = layer_shm_value.value[0]
@@ -287,7 +289,18 @@ class CacheMessager:
if not self.cache_info:
time.sleep(0.001)
continue
logger.debug(f"prefilled_layer_idx: {prefilled_layer_idx}, prefilled_step_idx: {prefilled_step_idx}")
if self.last_step_idx > prefilled_step_idx:
engine_recycled_count += 1
self.last_step_idx = prefilled_step_idx # only copy value read from shm memory
prefilled_step_idx = (
prefilled_step_idx + max_step_idx * engine_recycled_count
) # remap prefilled_step_idx for comparison
logger.debug(
f"prefilled_layer_idx: {prefilled_layer_idx}, prefilled_step_idx in shm: {self.last_step_idx},"
f"prefilled_step_idx: {prefilled_step_idx} engine_recycled_count {engine_recycled_count}"
)
for req_id, item in list(self.cache_info.items()):
if "status" not in item:
continue
@@ -318,7 +331,8 @@ class CacheMessager:
if item["current_id"] < prefilled_step_idx:
current_layer_idx = self.num_hidden_layers
else:
current_layer_idx = prefilled_layer_idx + 1
if item["current_id"] == prefilled_step_idx:
current_layer_idx = prefilled_layer_idx + 1
for layer_idx in range(item["layer_idx"], current_layer_idx):
tic = time.time()
@@ -361,9 +375,7 @@ class CacheMessager:
self.engine_worker_queue.put_finished_req([(item["request_id"], "finished")])
logger.info(f"put write cache {item['request_id']}")
del self.cache_info[req_id]
self.last_step_idx = prefilled_step_idx
self.last_layer_idx = prefilled_layer_idx
self.last_layer_idx = prefilled_layer_idx
except Exception as e:
logger.info(f"prefill layerwise send cache thread has exception: {e}")