mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Bug fix] Fix bug for d blocks not enough (#3479)
* Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Fix bug for memory allocation * Fix bug for D blocks not enough * fix bug when d blocks not enough * fix bug when d blocks not enough * fix cache message recycle step * fix cache message recycle step * Fix step_idx recycle
This commit is contained in:
@@ -252,6 +252,9 @@ class CacheMessager:
|
||||
self.last_step_idx = -1
|
||||
self.last_layer_idx = -1 # int32
|
||||
|
||||
max_step_idx = 100003
|
||||
engine_recycled_count = 0
|
||||
|
||||
while True:
|
||||
|
||||
cache_info = self.engine_worker_queue.get_cache_info()
|
||||
@@ -271,7 +274,6 @@ class CacheMessager:
|
||||
current_info["status"] = "init"
|
||||
logger.info(f"start cache_infos: {current_info}")
|
||||
self.cache_info[info["request_id"]] = current_info
|
||||
self.last_step_idx = min(self.last_step_idx, current_info["current_id"])
|
||||
else:
|
||||
self.cache_info[info["request_id"]] = info
|
||||
prefilled_layer_idx = layer_shm_value.value[0]
|
||||
@@ -287,7 +289,18 @@ class CacheMessager:
|
||||
if not self.cache_info:
|
||||
time.sleep(0.001)
|
||||
continue
|
||||
logger.debug(f"prefilled_layer_idx: {prefilled_layer_idx}, prefilled_step_idx: {prefilled_step_idx}")
|
||||
if self.last_step_idx > prefilled_step_idx:
|
||||
engine_recycled_count += 1
|
||||
self.last_step_idx = prefilled_step_idx # only copy value read from shm memory
|
||||
prefilled_step_idx = (
|
||||
prefilled_step_idx + max_step_idx * engine_recycled_count
|
||||
) # remap prefilled_step_idx for comparison
|
||||
|
||||
logger.debug(
|
||||
f"prefilled_layer_idx: {prefilled_layer_idx}, prefilled_step_idx in shm: {self.last_step_idx},"
|
||||
f"prefilled_step_idx: {prefilled_step_idx} engine_recycled_count {engine_recycled_count}"
|
||||
)
|
||||
|
||||
for req_id, item in list(self.cache_info.items()):
|
||||
if "status" not in item:
|
||||
continue
|
||||
@@ -318,7 +331,8 @@ class CacheMessager:
|
||||
if item["current_id"] < prefilled_step_idx:
|
||||
current_layer_idx = self.num_hidden_layers
|
||||
else:
|
||||
current_layer_idx = prefilled_layer_idx + 1
|
||||
if item["current_id"] == prefilled_step_idx:
|
||||
current_layer_idx = prefilled_layer_idx + 1
|
||||
|
||||
for layer_idx in range(item["layer_idx"], current_layer_idx):
|
||||
tic = time.time()
|
||||
@@ -361,9 +375,7 @@ class CacheMessager:
|
||||
self.engine_worker_queue.put_finished_req([(item["request_id"], "finished")])
|
||||
logger.info(f"put write cache {item['request_id']}")
|
||||
del self.cache_info[req_id]
|
||||
|
||||
self.last_step_idx = prefilled_step_idx
|
||||
self.last_layer_idx = prefilled_layer_idx
|
||||
self.last_layer_idx = prefilled_layer_idx
|
||||
|
||||
except Exception as e:
|
||||
logger.info(f"prefill layerwise send cache thread has exception: {e}")
|
||||
|
Reference in New Issue
Block a user