[Feature] DeepseekV3 use pd_build_static_op (#2948)

Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
This commit is contained in:
K11OntheBoat
2025-07-22 15:03:41 +08:00
committed by GitHub
parent 2a8a2c06de
commit e991777757
6 changed files with 13 additions and 8 deletions

View File

@@ -12,9 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "helper.h"
#include "paddle/extension.h"
#define CEILDIV(a,b) (((a+b-1)/b))
template <typename scalar_t>
@@ -189,7 +189,7 @@ std::vector<paddle::Tensor> tritonmoe_preprocess_kernel(const paddle::Tensor& to
return {sorted_ids, expert_ids, num_tokens_post_pad};
}
PD_BUILD_OP(tritonmoe_preprocess)
PD_BUILD_STATIC_OP(tritonmoe_preprocess)
.Inputs({"topk_ids"})
.Attrs({"num_experts: int64_t", "GEMM_BLOCK_SIZE_M: int64_t"})
.Outputs({"sorted_ids", "expert_ids", "num_tokens_post_pad"})