mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
【Infer】Improve the performance block_wise_fp8 of triton_moe_backend (#2942)
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import functools
|
||||
from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
@@ -375,3 +376,17 @@ def create_and_set_parameter(layer: nn.Layer, name: str, tensor: paddle.Tensor):
|
||||
),
|
||||
)
|
||||
getattr(layer, name).set_value(tensor)
|
||||
|
||||
@functools.cache
|
||||
def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str]) -> paddle.Tensor:
|
||||
"""
|
||||
Creates and caches an empty tensor with the specified shape and data type.
|
||||
|
||||
Args:
|
||||
shape (Tuple[int, ...]): A tuple representing the dimensions of the tensor.
|
||||
dtype (Union[paddle.dtype, str]): The data type for the tensor, such as 'bfloat16', 'float16', etc.
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: An empty tensor with the specified shape and data type.
|
||||
"""
|
||||
return paddle.empty(list(shape), dtype=dtype)
|
||||
|
Reference in New Issue
Block a user