mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
* 延迟 import Config * support chunked_prefill * support chunked_prefill
This commit is contained in:
@@ -145,15 +145,31 @@ class ResourceManagerV1(ResourceManager):
|
||||
if inputs.get("patch_idx", None) is not None and inputs.get("patch_map", None) is not None:
|
||||
pre_end_idx = request.num_computed_tokens
|
||||
new_end_idx = pre_end_idx + num_new_tokens
|
||||
|
||||
prompt_token_ids_len = len(request.prompt_token_ids)
|
||||
assert prompt_token_ids_len == len(inputs["patch_idx"]), (prompt_token_ids_len, len(inputs["patch_idx"]))
|
||||
|
||||
# start
|
||||
start_patch_idx = inputs["patch_idx"][pre_end_idx]
|
||||
if pre_end_idx >= prompt_token_ids_len:
|
||||
start_patch_idx = inputs["patch_idx"][-1]
|
||||
else:
|
||||
start_patch_idx = inputs["patch_idx"][pre_end_idx]
|
||||
start_patch_map = inputs["patch_map"][start_patch_idx]
|
||||
request.image_start = start_patch_map["image_num"]
|
||||
request.video_start = start_patch_map["video_num"]
|
||||
request.audio_start = start_patch_map["audio_num"]
|
||||
|
||||
# end
|
||||
end_patch_idx = inputs["patch_idx"][new_end_idx]
|
||||
if new_end_idx >= prompt_token_ids_len:
|
||||
end_patch_idx = inputs["patch_idx"][-1]
|
||||
else:
|
||||
end_patch_idx = inputs["patch_idx"][new_end_idx]
|
||||
if request.prompt_token_ids[new_end_idx] in [
|
||||
inputs["image_end_id"],
|
||||
inputs["video_end_id"],
|
||||
inputs["audio_end_id"],
|
||||
]:
|
||||
end_patch_idx -= 1
|
||||
end_patch_map = inputs["patch_map"][end_patch_idx]
|
||||
end_modal_id = end_patch_map["modal_id"]
|
||||
if end_modal_id > 0:
|
||||
|
@@ -18,7 +18,6 @@ from abc import abstractmethod
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from paddle.base.core import Config
|
||||
from paddleformers.utils.log import logger
|
||||
|
||||
try:
|
||||
@@ -103,6 +102,8 @@ class DeepEPEngine:
|
||||
|
||||
self.deepep_engine = None
|
||||
|
||||
from paddle.base.core import Config
|
||||
|
||||
self.ep_config = Config(24, 6, 256)
|
||||
self.num_max_dispatch_tokens_per_rank = num_max_dispatch_tokens_per_rank
|
||||
|
||||
|
Reference in New Issue
Block a user