mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
@@ -49,7 +49,6 @@ from fastdeploy.platforms import current_platform
|
||||
|
||||
if current_platform.is_cuda():
|
||||
from fastdeploy.model_executor.ops.gpu import (
|
||||
extract_text_token_output,
|
||||
text_image_gather_scatter,
|
||||
text_image_index_out,
|
||||
)
|
||||
@@ -544,17 +543,6 @@ class Ernie4_5_VLModel(nn.Layer):
|
||||
)
|
||||
|
||||
hidden_states = hidden_states + residual
|
||||
|
||||
max_seq_len, max_seq_len_index = paddle.topk(forward_meta.seq_lens_this_time, k=1)
|
||||
hidden_states = extract_text_token_output(
|
||||
max_seq_len,
|
||||
max_seq_len_index.cast("int32"),
|
||||
vl_moe_meta.image_token_num.cast("int32"),
|
||||
forward_meta.seq_lens_this_time,
|
||||
forward_meta.cu_seqlens_q,
|
||||
hidden_states.cast("float32"),
|
||||
).cast(self._dtype)
|
||||
|
||||
out = self.norm(hidden_states)
|
||||
|
||||
return out
|
||||
|
Reference in New Issue
Block a user