[Metax] optimize mla attention (#5258)

This commit is contained in:
xiaozude
2025-12-09 11:18:19 +08:00
committed by GitHub
parent 5d9b5e4a5b
commit c06a6234b9
6 changed files with 1026 additions and 377 deletions

View File

@@ -162,12 +162,11 @@ function copy_ops(){
is_maca=`$python -c "import paddle; print(paddle.device.is_compiled_with_custom_device('metax_gpu'))"`
if [ "$is_maca" = "True" ]; then
DEVICE_TYPE="metax_gpu"
mkdir -p ../fastdeploy/model_executor/ops/base
cp -r ${OPS_TMP_DIR_BASE}/${WHEEL_BASE_NAME}/* ../fastdeploy/model_executor/ops/base
cp -r ${TMP_PACKAGE_DIR}/* ../fastdeploy/model_executor/ops/gpu
echo -e "MACA ops have been copy to fastdeploy"
return
fi
is_intel_hpu=`$python -c "import paddle; print(paddle.is_compiled_with_custom_device('intel_hpu'))"`
if [ "$is_intel_hpu" = "True" ]; then
DEVICE_TYPE="intel-hpu"