mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-31 11:56:44 +08:00 
			
		
		
		
	[BugFix] Fix image_feature 0-Size causing insert failed (#4042)
				
					
				
			* update * fix image_feature
This commit is contained in:
		| @@ -114,14 +114,14 @@ def cuda_graph_buffers(buffer_meta): | |||||||
|                     cur = getattr(cur, p) |                     cur = getattr(cur, p) | ||||||
|                 return cur |                 return cur | ||||||
|  |  | ||||||
|             if not hasattr(self, "_mm_buffers"): |             if not hasattr(self, "_cuda_graph_buffers"): | ||||||
|                 self._mm_buffers = {} |                 self._cuda_graph_buffers = {} | ||||||
|                 for name, meta in buffer_meta.items(): |                 for name, meta in buffer_meta.items(): | ||||||
|                     shape = [_resolve_path(fd_config, s) if isinstance(s, str) else s for s in meta["shape"]] |                     shape = [_resolve_path(fd_config, s) if isinstance(s, str) else s for s in meta["shape"]] | ||||||
|                     dtype = meta["dtype"] |                     dtype = meta["dtype"] | ||||||
|                     if "." in meta["dtype"]: |                     if "." in meta["dtype"]: | ||||||
|                         dtype = _resolve_path(fd_config, meta["dtype"]) |                         dtype = _resolve_path(fd_config, meta["dtype"]) | ||||||
|                     self._mm_buffers[name] = paddle.full( |                     self._cuda_graph_buffers[name] = paddle.full( | ||||||
|                         shape=shape, |                         shape=shape, | ||||||
|                         dtype=dtype, |                         dtype=dtype, | ||||||
|                         fill_value=meta.get("value", 0), |                         fill_value=meta.get("value", 0), | ||||||
|   | |||||||
| @@ -506,17 +506,17 @@ class Ernie4_5_VLModel(nn.Layer): | |||||||
|         text_token_num = paddle.maximum((token_num - image_token_num), paddle.ones([], dtype="int64")) |         text_token_num = paddle.maximum((token_num - image_token_num), paddle.ones([], dtype="int64")) | ||||||
|  |  | ||||||
|         # The scenario requiring padding is CUDA graph, thus we only need to pad the maximum capture size. |         # The scenario requiring padding is CUDA graph, thus we only need to pad the maximum capture size. | ||||||
|         self._mm_buffers["token_type_ids"][: self.fd_config.graph_opt_config.max_capture_size].fill_(-1) |         self._cuda_graph_buffers["token_type_ids"][: self.fd_config.graph_opt_config.max_capture_size].fill_(-1) | ||||||
|         self._mm_buffers["token_type_ids"].copy_(token_type_ids, False) |         self._cuda_graph_buffers["token_type_ids"].copy_(token_type_ids, False) | ||||||
|         self._mm_buffers["image_token_num"].copy_(image_token_num, False) |         self._cuda_graph_buffers["image_token_num"].copy_(image_token_num, False) | ||||||
|  |  | ||||||
|         return VLMoEMeta( |         return VLMoEMeta( | ||||||
|             text_input=self._mm_buffers["text_input"][:text_token_num], |             text_input=self._cuda_graph_buffers["text_input"][:text_token_num], | ||||||
|             image_input=self._mm_buffers["image_input"][:image_token_num], |             image_input=self._cuda_graph_buffers["image_input"][:image_token_num], | ||||||
|             text_index=self._mm_buffers["text_index"][:token_num], |             text_index=self._cuda_graph_buffers["text_index"][:token_num], | ||||||
|             image_index=self._mm_buffers["image_index"][:token_num], |             image_index=self._cuda_graph_buffers["image_index"][:token_num], | ||||||
|             token_type_ids=self._mm_buffers["token_type_ids"][:token_num], |             token_type_ids=self._cuda_graph_buffers["token_type_ids"][:token_num], | ||||||
|             image_token_num=self._mm_buffers["image_token_num"], |             image_token_num=self._cuda_graph_buffers["image_token_num"], | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|     def get_input_embeddings(self, ids_remove_padding: paddle.Tensor) -> paddle.Tensor: |     def get_input_embeddings(self, ids_remove_padding: paddle.Tensor) -> paddle.Tensor: | ||||||
| @@ -756,10 +756,11 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM): | |||||||
|     def get_input_embeddings( |     def get_input_embeddings( | ||||||
|         self, |         self, | ||||||
|         ids_remove_padding: paddle.Tensor, |         ids_remove_padding: paddle.Tensor, | ||||||
|  |         image_token_num: int, | ||||||
|         image_features: Optional[paddle.Tensor] = None, |         image_features: Optional[paddle.Tensor] = None, | ||||||
|     ) -> paddle.Tensor: |     ) -> paddle.Tensor: | ||||||
|         input_embeddings = self.ernie.get_input_embeddings(ids_remove_padding=ids_remove_padding) |         input_embeddings = self.ernie.get_input_embeddings(ids_remove_padding=ids_remove_padding) | ||||||
|         if image_features is not None and len(image_features) > 0: |         if image_token_num > 0: | ||||||
|             input_embeddings[ids_remove_padding == self.ernie.im_patch_id] = image_features.cast(self.ernie._dtype) |             input_embeddings[ids_remove_padding == self.ernie.im_patch_id] = image_features.cast(self.ernie._dtype) | ||||||
|         return input_embeddings |         return input_embeddings | ||||||
|  |  | ||||||
| @@ -769,11 +770,13 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM): | |||||||
|         image_features: Optional[paddle.Tensor], |         image_features: Optional[paddle.Tensor], | ||||||
|         forward_meta: ForwardMeta, |         forward_meta: ForwardMeta, | ||||||
|     ): |     ): | ||||||
|  |         vl_moe_meta = self.ernie.prepare_vl_moe_meta(ids_remove_padding=ids_remove_padding) | ||||||
|         input_embeddings = self.get_input_embeddings( |         input_embeddings = self.get_input_embeddings( | ||||||
|             ids_remove_padding=ids_remove_padding, image_features=image_features |             ids_remove_padding=ids_remove_padding, | ||||||
|  |             image_features=image_features, | ||||||
|  |             image_token_num=vl_moe_meta.image_token_num.item(), | ||||||
|         ) |         ) | ||||||
|         self._input_embeddings.copy_(input_embeddings, False) |         self._input_embeddings.copy_(input_embeddings, False) | ||||||
|         vl_moe_meta = self.ernie.prepare_vl_moe_meta(ids_remove_padding=ids_remove_padding) |  | ||||||
|  |  | ||||||
|         hidden_states = self.ernie( |         hidden_states = self.ernie( | ||||||
|             input_embeddings=self._input_embeddings, |             input_embeddings=self._input_embeddings, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Ayakouji
					Ayakouji