[OPs] Universal optimization and Fix early_stop cuda 700 (#3375)

* delete nonzero * delete setup_ops_base.py * check if * check gcp infer_seed.cpu() * fix repetition_early_stopper_kernel cuda 700
2025-10-05 16:48:03 +08:00 · 2025-08-14 22:40:44 +08:00
parent 09c979f3dd
commit f0f00a6025
15 changed files with 102 additions and 71 deletions
--- a/fastdeploy/model_executor/layers/sample/sampler.py
+++ b/fastdeploy/model_executor/layers/sample/sampler.py
@@ -281,10 +281,13 @@ class Sampler(nn.Layer):

        probs = F.softmax(logits)

-        probs = min_p_sampling(probs, sampling_metadata.min_p)
-
+        probs = min_p_sampling(probs, sampling_metadata.min_p, sampling_metadata.min_p_list)
        _, next_tokens = top_k_top_p_sampling(
-            probs, sampling_metadata.top_p, sampling_metadata.top_k, seed=sampling_metadata.seed[0, 0]
+            probs,
+            sampling_metadata.top_p,
+            sampling_metadata.top_k,
+            sampling_metadata.top_k_list,
+            seed=sampling_metadata.seed[0, 0],
        )

        logprobs_tensors = (