diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py
index 734d99a5f..d753ae6aa 100644
--- a/fastdeploy/engine/sched/resource_manager_v1.py
+++ b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -145,15 +145,31 @@ class ResourceManagerV1(ResourceManager):
         if inputs.get("patch_idx", None) is not None and inputs.get("patch_map", None) is not None:
             pre_end_idx = request.num_computed_tokens
             new_end_idx = pre_end_idx + num_new_tokens
+
+            prompt_token_ids_len = len(request.prompt_token_ids)
+            assert prompt_token_ids_len == len(inputs["patch_idx"]), (prompt_token_ids_len, len(inputs["patch_idx"]))
+
             # start
-            start_patch_idx = inputs["patch_idx"][pre_end_idx]
+            if pre_end_idx >= prompt_token_ids_len:
+                start_patch_idx = inputs["patch_idx"][-1]
+            else:
+                start_patch_idx = inputs["patch_idx"][pre_end_idx]
             start_patch_map = inputs["patch_map"][start_patch_idx]
             request.image_start = start_patch_map["image_num"]
             request.video_start = start_patch_map["video_num"]
             request.audio_start = start_patch_map["audio_num"]
 
             # end
-            end_patch_idx = inputs["patch_idx"][new_end_idx]
+            if new_end_idx >= prompt_token_ids_len:
+                end_patch_idx = inputs["patch_idx"][-1]
+            else:
+                end_patch_idx = inputs["patch_idx"][new_end_idx]
+                if request.prompt_token_ids[new_end_idx] in [
+                    inputs["image_end_id"],
+                    inputs["video_end_id"],
+                    inputs["audio_end_id"],
+                ]:
+                    end_patch_idx -= 1
             end_patch_map = inputs["patch_map"][end_patch_idx]
             end_modal_id = end_patch_map["modal_id"]
             if end_modal_id > 0:
diff --git a/fastdeploy/model_executor/layers/moe/ep.py b/fastdeploy/model_executor/layers/moe/ep.py
index f4d36b674..5b10b68ec 100644
--- a/fastdeploy/model_executor/layers/moe/ep.py
+++ b/fastdeploy/model_executor/layers/moe/ep.py
@@ -18,7 +18,6 @@ from abc import abstractmethod
 
 import paddle
 from paddle import nn
-from paddle.base.core import Config
 from paddleformers.utils.log import logger
 
 try:
@@ -103,6 +102,8 @@ class DeepEPEngine:
 
         self.deepep_engine = None
 
+        from paddle.base.core import Config
+
         self.ep_config = Config(24, 6, 256)
         self.num_max_dispatch_tokens_per_rank = num_max_dispatch_tokens_per_rank