【Infer】Improve the performance block_wise_fp8 of triton_moe_backend (#2942)

This commit is contained in:
chen
2025-07-23 13:02:50 +08:00
committed by GitHub
parent e51f018577
commit ad202272ed
2 changed files with 30 additions and 14 deletions

View File

@@ -14,6 +14,7 @@
# limitations under the License.
"""
import functools
from typing import Tuple, Union
import numpy as np
@@ -375,3 +376,17 @@ def create_and_set_parameter(layer: nn.Layer, name: str, tensor: paddle.Tensor):
),
)
getattr(layer, name).set_value(tensor)
@functools.cache
def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str]) -> paddle.Tensor:
"""
Creates and caches an empty tensor with the specified shape and data type.
Args:
shape (Tuple[int, ...]): A tuple representing the dimensions of the tensor.
dtype (Union[paddle.dtype, str]): The data type for the tensor, such as 'bfloat16', 'float16', etc.
Returns:
paddle.Tensor: An empty tensor with the specified shape and data type.
"""
return paddle.empty(list(shape), dtype=dtype)