mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Excutor] Increase buffer size to prevent address corruption; add forward metadata debug tool (#3404)
* 修复buffer申请不够大,增加打印forwardmetadata的工具 * fix mistake * Make CPU tensor in CPUPlace * Add test about forward_meta_str and Add unitest_requirement --------- Co-authored-by: RAM <gstian5555@outlook.com>
This commit is contained in:
@@ -681,9 +681,11 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
dtype="int64",
|
||||
)
|
||||
self.share_inputs["cum_offsets"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
|
||||
self.share_inputs["batch_id_per_token"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
|
||||
self.share_inputs["cu_seqlens_q"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
|
||||
self.share_inputs["cu_seqlens_k"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
|
||||
self.share_inputs["batch_id_per_token"] = paddle.full(
|
||||
[max_num_seqs * self.parallel_config.max_model_len, 1], 0, dtype="int32"
|
||||
)
|
||||
self.share_inputs["cu_seqlens_q"] = paddle.full([max_num_seqs + 1, 1], 0, dtype="int32")
|
||||
self.share_inputs["cu_seqlens_k"] = paddle.full([max_num_seqs + 1, 1], 0, dtype="int32")
|
||||
|
||||
# Declare AttentionBackend buffers
|
||||
self.share_inputs["decoder_batch_ids"] = None
|
||||
|
Reference in New Issue
Block a user