[OPs] Universal optimization and Fix early_stop cuda 700 (#3375)

* delete nonzero * delete setup_ops_base.py * check if * check gcp infer_seed.cpu() * fix repetition_early_stopper_kernel cuda 700
2025-10-04 16:22:57 +08:00 · 2025-08-14 22:40:44 +08:00
parent 09c979f3dd
commit f0f00a6025
15 changed files with 102 additions and 71 deletions
--- a/custom_ops/gpu_ops/sample_kernels/rejection_top_p_sampling.cu
+++ b/custom_ops/gpu_ops/sample_kernels/rejection_top_p_sampling.cu
@@ -19,7 +19,7 @@
 std::vector<paddle::Tensor> TopPSamplingReject(const paddle::Tensor &probs,
                                               const paddle::Tensor &top_p,
                                               const paddle::optional<paddle::Tensor> &top_k,
-                                               int seed) {
+                                               int64_t seed) {
  std::vector<int64_t> probs_shape = probs.shape();
  unsigned int batch_size = probs_shape[0];
  unsigned int vocab_size = probs_shape[1];
@@ -82,7 +82,7 @@ TopPSamplingRejectInferDtype(const paddle::DataType &probs_dtype,
 PD_BUILD_STATIC_OP(rejection_top_p_sampling)
    .Inputs({"probs", "top_p", paddle::Optional("top_k")})
    .Outputs({"samples"})
-    .Attrs({"seed: int"})
+    .Attrs({"seed: int64_t"})
    .SetKernelFn(PD_KERNEL(TopPSamplingReject))
    .SetInferShapeFn(PD_INFER_SHAPE(TopPSamplingRejectInferShape))
    .SetInferDtypeFn(PD_INFER_DTYPE(TopPSamplingRejectInferDtype));