[OPs] Universal optimization and Fix early_stop cuda 700 (#3375)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* delete nonzero

* delete setup_ops_base.py

* check if

* check gcp infer_seed.cpu()

* fix repetition_early_stopper_kernel cuda 700
This commit is contained in:
chen
2025-08-14 22:40:44 +08:00
committed by GitHub
parent 09c979f3dd
commit f0f00a6025
15 changed files with 102 additions and 71 deletions

View File

@@ -90,10 +90,10 @@ class RepetitionEarlyStopper(EarlyStopper):
)
B, W = self.trunc_scores.shape
V = probs.shape[1]
real_bsz, V = probs.shape
BLOCK_W = triton.next_power_of_2(W)
grid = (B,)
grid = (real_bsz,)
repetition_early_stopper_kernel[grid](
self.trunc_scores,
probs,