[OPs] Universal optimization and Fix early_stop cuda 700 (#3375)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* delete nonzero

* delete setup_ops_base.py

* check if

* check gcp infer_seed.cpu()

* fix repetition_early_stopper_kernel cuda 700
This commit is contained in:
chen
2025-08-14 22:40:44 +08:00
committed by GitHub
parent 09c979f3dd
commit f0f00a6025
15 changed files with 102 additions and 71 deletions

View File

@@ -281,10 +281,13 @@ class Sampler(nn.Layer):
probs = F.softmax(logits)
probs = min_p_sampling(probs, sampling_metadata.min_p)
probs = min_p_sampling(probs, sampling_metadata.min_p, sampling_metadata.min_p_list)
_, next_tokens = top_k_top_p_sampling(
probs, sampling_metadata.top_p, sampling_metadata.top_k, seed=sampling_metadata.seed[0, 0]
probs,
sampling_metadata.top_p,
sampling_metadata.top_k,
sampling_metadata.top_k_list,
seed=sampling_metadata.seed[0, 0],
)
logprobs_tensors = (