[Excutor] Increase buffer size to prevent address corruption; add forward metadata debug tool (#3404)

* 修复buffer申请不够大,增加打印forwardmetadata的工具

* fix mistake

* Make CPU tensor in CPUPlace

* Add test about forward_meta_str and Add unitest_requirement

---------

Co-authored-by: RAM <gstian5555@outlook.com>
This commit is contained in:
Jundong Liu
2025-08-18 16:14:09 +08:00
committed by GitHub
parent 5585cf7aa5
commit ea4a3b479c
5 changed files with 146 additions and 4 deletions

View File

@@ -681,9 +681,11 @@ class GPUModelRunner(ModelRunnerBase):
dtype="int64",
)
self.share_inputs["cum_offsets"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
self.share_inputs["batch_id_per_token"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
self.share_inputs["cu_seqlens_q"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
self.share_inputs["cu_seqlens_k"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
self.share_inputs["batch_id_per_token"] = paddle.full(
[max_num_seqs * self.parallel_config.max_model_len, 1], 0, dtype="int32"
)
self.share_inputs["cu_seqlens_q"] = paddle.full([max_num_seqs + 1, 1], 0, dtype="int32")
self.share_inputs["cu_seqlens_k"] = paddle.full([max_num_seqs + 1, 1], 0, dtype="int32")
# Declare AttentionBackend buffers
self.share_inputs["decoder_batch_ids"] = None