[OPs] Universal optimization and Fix early_stop cuda 700 (#3375)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* delete nonzero

* delete setup_ops_base.py

* check if

* check gcp infer_seed.cpu()

* fix repetition_early_stopper_kernel cuda 700
This commit is contained in:
chen
2025-08-14 22:40:44 +08:00
committed by GitHub
parent 09c979f3dd
commit f0f00a6025
15 changed files with 102 additions and 71 deletions

View File

@@ -19,7 +19,6 @@ from fastdeploy.import_ops import import_custom_ops
PACKAGE = "fastdeploy.model_executor.ops.gpu"
import_custom_ops(PACKAGE, "..base.fastdeploy_base_ops", globals())
import_custom_ops(PACKAGE, ".fastdeploy_ops", globals())

View File

@@ -17,7 +17,6 @@ from fastdeploy.import_ops import import_custom_ops
PACKAGE = "fastdeploy.model_executor.ops.iluvatar"
import_custom_ops(PACKAGE, "..base.fastdeploy_base_ops", globals())
import_custom_ops(PACKAGE, ".fastdeploy_ops", globals())
from .moe_ops import iluvatar_moe_expert_ffn as moe_expert_ffn # noqa: F401