diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index 734d99a5f..d753ae6aa 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -145,15 +145,31 @@ class ResourceManagerV1(ResourceManager): if inputs.get("patch_idx", None) is not None and inputs.get("patch_map", None) is not None: pre_end_idx = request.num_computed_tokens new_end_idx = pre_end_idx + num_new_tokens + + prompt_token_ids_len = len(request.prompt_token_ids) + assert prompt_token_ids_len == len(inputs["patch_idx"]), (prompt_token_ids_len, len(inputs["patch_idx"])) + # start - start_patch_idx = inputs["patch_idx"][pre_end_idx] + if pre_end_idx >= prompt_token_ids_len: + start_patch_idx = inputs["patch_idx"][-1] + else: + start_patch_idx = inputs["patch_idx"][pre_end_idx] start_patch_map = inputs["patch_map"][start_patch_idx] request.image_start = start_patch_map["image_num"] request.video_start = start_patch_map["video_num"] request.audio_start = start_patch_map["audio_num"] # end - end_patch_idx = inputs["patch_idx"][new_end_idx] + if new_end_idx >= prompt_token_ids_len: + end_patch_idx = inputs["patch_idx"][-1] + else: + end_patch_idx = inputs["patch_idx"][new_end_idx] + if request.prompt_token_ids[new_end_idx] in [ + inputs["image_end_id"], + inputs["video_end_id"], + inputs["audio_end_id"], + ]: + end_patch_idx -= 1 end_patch_map = inputs["patch_map"][end_patch_idx] end_modal_id = end_patch_map["modal_id"] if end_modal_id > 0: diff --git a/fastdeploy/model_executor/layers/moe/ep.py b/fastdeploy/model_executor/layers/moe/ep.py index f4d36b674..5b10b68ec 100644 --- a/fastdeploy/model_executor/layers/moe/ep.py +++ b/fastdeploy/model_executor/layers/moe/ep.py @@ -18,7 +18,6 @@ from abc import abstractmethod import paddle from paddle import nn -from paddle.base.core import Config from paddleformers.utils.log import logger try: @@ -103,6 +102,8 @@ class DeepEPEngine: self.deepep_engine = None + from paddle.base.core import Config + self.ep_config = Config(24, 6, 256) self.num_max_dispatch_tokens_per_rank = num_max_dispatch_tokens_per_rank