【Infer】Improve the performance block_wise_fp8 of triton_moe_backend (#2942)

2025-10-05 16:48:03 +08:00 · 2025-07-23 13:02:50 +08:00
parent e51f018577
commit ad202272ed
2 changed files with 30 additions and 14 deletions
--- a/fastdeploy/model_executor/layers/utils.py
+++ b/fastdeploy/model_executor/layers/utils.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 """

+import functools
 from typing import Tuple, Union

 import numpy as np
@@ -375,3 +376,17 @@ def create_and_set_parameter(layer: nn.Layer, name: str, tensor: paddle.Tensor):
        ),
    )
    getattr(layer, name).set_value(tensor)
+
+@functools.cache
+def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str]) -> paddle.Tensor:
+    """
+    Creates and caches an empty tensor with the specified shape and data type.
+
+    Args:
+        shape (Tuple[int, ...]): A tuple representing the dimensions of the tensor.
+        dtype (Union[paddle.dtype, str]): The data type for the tensor, such as 'bfloat16', 'float16', etc.
+
+    Returns:
+        paddle.Tensor: An empty tensor with the specified shape and data type.
+    """
+    return paddle.empty(list(shape), dtype=dtype)