mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
remove cum_offsets from ForwardMeta (#2925)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -85,8 +85,6 @@ class ForwardMeta:
|
|||||||
# The sequence length processed in the current step
|
# The sequence length processed in the current step
|
||||||
seq_lens_this_time: Optional[paddle.Tensor] = None
|
seq_lens_this_time: Optional[paddle.Tensor] = None
|
||||||
|
|
||||||
# Accumulated offset
|
|
||||||
cum_offsets: Optional[paddle.Tensor] = None
|
|
||||||
# batch_id_per_token tensor, used to indicate which token belongs which batch after padding removal to the original input_ids
|
# batch_id_per_token tensor, used to indicate which token belongs which batch after padding removal to the original input_ids
|
||||||
batch_id_per_token: Optional[paddle.Tensor] = None
|
batch_id_per_token: Optional[paddle.Tensor] = None
|
||||||
# Accumulated sequence length of query
|
# Accumulated sequence length of query
|
||||||
@@ -112,7 +110,8 @@ class XPUForwardMeta(ForwardMeta):
|
|||||||
"""
|
"""
|
||||||
XPUForwardMeta is used to store the global meta information of the forward, and some XPU specific meta info.
|
XPUForwardMeta is used to store the global meta information of the forward, and some XPU specific meta info.
|
||||||
"""
|
"""
|
||||||
|
# Accumulated offset
|
||||||
|
cum_offsets: Optional[paddle.Tensor] = None
|
||||||
# TODO(wanghaitao): Supplementary notes
|
# TODO(wanghaitao): Supplementary notes
|
||||||
#
|
#
|
||||||
encoder_batch_map: Optional[paddle.Tensor] = None
|
encoder_batch_map: Optional[paddle.Tensor] = None
|
||||||
|
@@ -397,7 +397,6 @@ class MTPProposer(Proposer):
|
|||||||
seq_lens_encoder=self.model_inputs["seq_lens_encoder"],
|
seq_lens_encoder=self.model_inputs["seq_lens_encoder"],
|
||||||
seq_lens_decoder=self.model_inputs["seq_lens_decoder"],
|
seq_lens_decoder=self.model_inputs["seq_lens_decoder"],
|
||||||
seq_lens_this_time=self.model_inputs["seq_lens_this_time"],
|
seq_lens_this_time=self.model_inputs["seq_lens_this_time"],
|
||||||
cum_offsets=self.model_inputs["cum_offsets"],
|
|
||||||
batch_id_per_token=self.model_inputs["batch_id_per_token"],
|
batch_id_per_token=self.model_inputs["batch_id_per_token"],
|
||||||
cu_seqlens_q=self.model_inputs["cu_seqlens_q"],
|
cu_seqlens_q=self.model_inputs["cu_seqlens_q"],
|
||||||
cu_seqlens_k=self.model_inputs["cu_seqlens_k"],
|
cu_seqlens_k=self.model_inputs["cu_seqlens_k"],
|
||||||
|
@@ -680,7 +680,6 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
|
seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
|
||||||
seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
|
seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
|
||||||
seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
|
seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
|
||||||
cum_offsets=self.share_inputs["cum_offsets"],
|
|
||||||
batch_id_per_token=self.share_inputs["batch_id_per_token"],
|
batch_id_per_token=self.share_inputs["batch_id_per_token"],
|
||||||
cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
|
cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
|
||||||
cu_seqlens_k=self.share_inputs["cu_seqlens_k"],
|
cu_seqlens_k=self.share_inputs["cu_seqlens_k"],
|
||||||
|
Reference in New Issue
Block a user