[OPs] Universal optimization and Fix early_stop cuda 700 (#3375)

* delete nonzero * delete setup_ops_base.py * check if * check gcp infer_seed.cpu() * fix repetition_early_stopper_kernel cuda 700
2025-10-05 16:48:03 +08:00 · 2025-08-14 22:40:44 +08:00
parent 09c979f3dd
commit f0f00a6025
15 changed files with 102 additions and 71 deletions
--- a/custom_ops/gpu_ops/get_output_ep.cc
+++ b/custom_ops/gpu_ops/get_output_ep.cc
@@ -109,11 +109,11 @@ void GetOutputEp(const paddle::Tensor& x,
    return;
 }

-void GetOutputStatic(const paddle::Tensor& x, int64_t rank_id, bool wait_flag) {
+void GetOutputEPStatic(const paddle::Tensor& x, int64_t rank_id, bool wait_flag) {
    GetOutputEp(x, rank_id, wait_flag, 1);
 }

-void GetOutputDynamic(const paddle::Tensor& x,
+void GetOutputEPDynamic(const paddle::Tensor& x,
                      int64_t rank_id,
                      bool wait_flag,
                      int msg_queue_id) {
@@ -125,11 +125,11 @@ PD_BUILD_STATIC_OP(get_output_ep)
    .Attrs({"rank_id: int64_t", "wait_flag: bool"})
    .Outputs({"x_out"})
    .SetInplaceMap({{"x", "x_out"}})
-    .SetKernelFn(PD_KERNEL(GetOutputStatic));
+    .SetKernelFn(PD_KERNEL(GetOutputEPStatic));

 PD_BUILD_STATIC_OP(get_output_ep_dynamic)
    .Inputs({"x"})
    .Attrs({"rank_id: int64_t", "wait_flag: bool", "msg_queue_id: int"})
    .Outputs({"x_out"})
    .SetInplaceMap({{"x", "x_out"}})
-    .SetKernelFn(PD_KERNEL(GetOutputDynamic));
+    .SetKernelFn(PD_KERNEL(GetOutputEPDynamic));