[Feature] Optim PaddleOCR-VL (#4873)

* [Feature] Optim PaddleOCR-VL * fix bug
2025-12-24 13:28:13 +08:00 · 2025-11-07 14:56:44 +08:00
parent bbe0820555
commit cba185f1fe
12 changed files with 535 additions and 112 deletions
--- a/custom_ops/gpu_ops/cpp_extensions.cc
+++ b/custom_ops/gpu_ops/cpp_extensions.cc
@@ -1059,6 +1059,15 @@ std::vector<paddle::Tensor> UpdateAttnMaskOffsets(
    const paddle::Tensor& decode_states,
    const paddle::Tensor& mask_rollback);

+std::vector<paddle::Tensor> FusedNeoxRopeEmbedding(
+    const paddle::Tensor& qkv,
+    const paddle::Tensor& cos_emb,
+    const paddle::Tensor& sin_emb,
+    const int num_heads,
+    const int head_dim);
+
+std::vector<paddle::Tensor> GeluTanh(paddle::Tensor& input);
+
 PYBIND11_MODULE(fastdeploy_ops, m) {
  m.def("get_expert_token_num",
        &GetExpertTokenNum,
@@ -1648,4 +1657,10 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
  m.def("update_attn_mask_offsets",
        &UpdateAttnMaskOffsets,
        "update attention mask");
+
+  m.def("fused_neox_rope_embedding",
+        &FusedNeoxRopeEmbedding,
+        "fused_neox_rope_embedding function");
+
+  m.def("gelu_tanh", &GeluTanh, "gelu_tanh function");
 }