diff --git a/fastdeploy/model_executor/graph_optimization/decorator.py b/fastdeploy/model_executor/graph_optimization/decorator.py
index 2937579b0..562164aae 100644
--- a/fastdeploy/model_executor/graph_optimization/decorator.py
+++ b/fastdeploy/model_executor/graph_optimization/decorator.py
@@ -114,14 +114,14 @@ def cuda_graph_buffers(buffer_meta):
                     cur = getattr(cur, p)
                 return cur
 
-            if not hasattr(self, "_mm_buffers"):
-                self._mm_buffers = {}
+            if not hasattr(self, "_cuda_graph_buffers"):
+                self._cuda_graph_buffers = {}
                 for name, meta in buffer_meta.items():
                     shape = [_resolve_path(fd_config, s) if isinstance(s, str) else s for s in meta["shape"]]
                     dtype = meta["dtype"]
                     if "." in meta["dtype"]:
                         dtype = _resolve_path(fd_config, meta["dtype"])
-                    self._mm_buffers[name] = paddle.full(
+                    self._cuda_graph_buffers[name] = paddle.full(
                         shape=shape,
                         dtype=dtype,
                         fill_value=meta.get("value", 0),
diff --git a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
index 875d97cdd..a36f9d4db 100644
--- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
+++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
@@ -506,17 +506,17 @@ class Ernie4_5_VLModel(nn.Layer):
         text_token_num = paddle.maximum((token_num - image_token_num), paddle.ones([], dtype="int64"))
 
         # The scenario requiring padding is CUDA graph, thus we only need to pad the maximum capture size.
-        self._mm_buffers["token_type_ids"][: self.fd_config.graph_opt_config.max_capture_size].fill_(-1)
-        self._mm_buffers["token_type_ids"].copy_(token_type_ids, False)
-        self._mm_buffers["image_token_num"].copy_(image_token_num, False)
+        self._cuda_graph_buffers["token_type_ids"][: self.fd_config.graph_opt_config.max_capture_size].fill_(-1)
+        self._cuda_graph_buffers["token_type_ids"].copy_(token_type_ids, False)
+        self._cuda_graph_buffers["image_token_num"].copy_(image_token_num, False)
 
         return VLMoEMeta(
-            text_input=self._mm_buffers["text_input"][:text_token_num],
-            image_input=self._mm_buffers["image_input"][:image_token_num],
-            text_index=self._mm_buffers["text_index"][:token_num],
-            image_index=self._mm_buffers["image_index"][:token_num],
-            token_type_ids=self._mm_buffers["token_type_ids"][:token_num],
-            image_token_num=self._mm_buffers["image_token_num"],
+            text_input=self._cuda_graph_buffers["text_input"][:text_token_num],
+            image_input=self._cuda_graph_buffers["image_input"][:image_token_num],
+            text_index=self._cuda_graph_buffers["text_index"][:token_num],
+            image_index=self._cuda_graph_buffers["image_index"][:token_num],
+            token_type_ids=self._cuda_graph_buffers["token_type_ids"][:token_num],
+            image_token_num=self._cuda_graph_buffers["image_token_num"],
         )
 
     def get_input_embeddings(self, ids_remove_padding: paddle.Tensor) -> paddle.Tensor:
@@ -756,10 +756,11 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
     def get_input_embeddings(
         self,
         ids_remove_padding: paddle.Tensor,
+        image_token_num: int,
         image_features: Optional[paddle.Tensor] = None,
     ) -> paddle.Tensor:
         input_embeddings = self.ernie.get_input_embeddings(ids_remove_padding=ids_remove_padding)
-        if image_features is not None and len(image_features) > 0:
+        if image_token_num > 0:
             input_embeddings[ids_remove_padding == self.ernie.im_patch_id] = image_features.cast(self.ernie._dtype)
         return input_embeddings
 
@@ -769,11 +770,13 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
         image_features: Optional[paddle.Tensor],
         forward_meta: ForwardMeta,
     ):
+        vl_moe_meta = self.ernie.prepare_vl_moe_meta(ids_remove_padding=ids_remove_padding)
         input_embeddings = self.get_input_embeddings(
-            ids_remove_padding=ids_remove_padding, image_features=image_features
+            ids_remove_padding=ids_remove_padding,
+            image_features=image_features,
+            image_token_num=vl_moe_meta.image_token_num.item(),
         )
         self._input_embeddings.copy_(input_embeddings, False)
-        vl_moe_meta = self.ernie.prepare_vl_moe_meta(ids_remove_padding=ids_remove_padding)
 
         hidden_states = self.ernie(
             input_embeddings=self._input_embeddings,