mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[GCU] Support gcu platform (#2702)
baseline: e7fa57ebae
Co-authored-by: yongqiangma <xing.wo@163.com>
This commit is contained in:
@@ -17,5 +17,6 @@ from . import cpu
|
||||
from . import xpu
|
||||
from . import npu
|
||||
from . import iluvatar
|
||||
from . import gcu
|
||||
|
||||
__all__ = ["gpu", "cpu", "xpu", "npu", "iluvatar"]
|
||||
__all__ = ["gpu", "cpu", "xpu", "npu", "iluvatar", "gcu"]
|
||||
|
116
fastdeploy/model_executor/ops/gcu/__init__.py
Normal file
116
fastdeploy/model_executor/ops/gcu/__init__.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
""" fastdeploy gcu ops """
|
||||
from fastdeploy.platforms import current_platform
|
||||
|
||||
from fastdeploy.import_ops import import_custom_ops, rename_imported_op
|
||||
|
||||
PACKAGE = "fastdeploy.model_executor.ops.gcu"
|
||||
|
||||
import_custom_ops(PACKAGE, ".fastdeploy_ops", globals())
|
||||
|
||||
if current_platform.is_gcu():
|
||||
from paddle_custom_device.gcu.ops import (invoke_fused_moe_kernel, # noqa: F401,E402
|
||||
moe_align_block_size, top_p_sampling, # noqa: F401
|
||||
topk_softmax, # noqa: F401
|
||||
weight_quantize_custom_rtn, # noqa: F401
|
||||
weight_quantize_rtn) # noqa: F401
|
||||
|
||||
# ###################### Ops from PaddleCustomDevice ####################
|
||||
rename_imported_op(
|
||||
old_name="fused_rotary_embedding_gcu",
|
||||
new_name="fused_rotary_embedding",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="reshape_and_cache_gcu",
|
||||
new_name="reshape_and_cache",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="paged_attention_gcu",
|
||||
new_name="paged_attention",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="mem_efficient_attention_gcu",
|
||||
new_name="mem_efficient_attention",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="flash_attn_var_len_gcu",
|
||||
new_name="flash_attn_var_len",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="rms_norm_gcu",
|
||||
new_name="rms_norm",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="fused_add_rms_norm_op",
|
||||
new_name="fused_add_rms_norm",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="linear_quant_gcu",
|
||||
new_name="linear_quant",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
|
||||
# ###################### CPU OPS ####################
|
||||
rename_imported_op(
|
||||
old_name="get_padding_offset_gcu",
|
||||
new_name="get_padding_offset",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="update_inputs_gcu",
|
||||
new_name="update_inputs",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="rebuild_padding_gcu",
|
||||
new_name="rebuild_padding",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="get_token_penalty_multi_scores_gcu",
|
||||
new_name="get_token_penalty_multi_scores",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="set_stop_value_multi_ends_gcu",
|
||||
new_name="set_stop_value_multi_ends",
|
||||
global_ns=globals(),
|
||||
)
|
||||
|
||||
rename_imported_op(
|
||||
old_name="set_value_by_flags_and_idx_gcu",
|
||||
new_name="set_value_by_flags_and_idx",
|
||||
global_ns=globals(),
|
||||
)
|
Reference in New Issue
Block a user