[Bug fix] Fix cudagraph when use ep. (#3130)

* fix cudagraph when use ep

* fix typo

* reduce full length to adapt large bsz such 128/256
This commit is contained in:
Longzhi Wang
2025-08-04 18:06:18 +08:00
committed by GitHub
parent 2bd8a50649
commit 01d7586661

View File

@@ -525,6 +525,12 @@ class GPUModelRunner(ModelRunnerBase):
num_tokens // batch_size,
self.parallel_config.max_model_len - max_dec_len,
)
# NOTE(wanglongzhi): When the full length is too large, DeepEP's buffer size will not be enough to cause the result to appear nan.
# TODO(wanglongzhi): Figure out the accurate buffer size of DeepEP.
if self.fd_config.parallel_config.enable_expert_parallel:
full_length = min(full_length, 32)
input_length = int(full_length * self.cache_config.kv_cache_ratio)
block_num = (
input_length + self.cache_config.block_size - 1