diff --git a/build.sh b/build.sh index 0596d8f99..7c9e0ab63 100644 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +22,13 @@ FD_CPU_USE_BF16=${3:-"false"} # For SM90 (Hopper), use 90. For SM100 (Blackwell), use 100. # These will be translated to 90a / 100a in setup_ops.py for specific features. FD_BUILDING_ARCS=${4:-""} - +# FD_USE_PRECOMPILED: Specify whether to use precompiled custom ops. +# 0 = build ops from source (default) +# 1 = use precompiled ops +FD_USE_PRECOMPILED=${5:-0} +# FD_COMMIT_ID: Specify the commit ID for locating precompiled wheel packages. +# If not provided, the current git commit ID will be used automatically. +FD_COMMIT_ID=${6:-""} # paddle distributed use to set archs unset PADDLE_CUDA_ARCH_LIST @@ -31,6 +37,7 @@ unset PADDLE_CUDA_ARCH_LIST DIST_DIR="dist" BUILD_DIR="build" EGG_DIR="fastdeploy.egg-info" +PRE_WHEEL_DIR="pre_wheel" # custom_ops directory config OPS_SRC_DIR="custom_ops" @@ -40,6 +47,7 @@ OPS_TMP_DIR="tmp" RED='\033[0;31m' BLUE='\033[0;34m' GREEN='\033[1;32m' +YELLOW='\033[1;33m' BOLD='\033[1m' NONE='\033[0m' @@ -57,12 +65,11 @@ function python_version_check() { function init() { echo -e "${BLUE}[init]${NONE} removing building directory..." - rm -rf $DIST_DIR $BUILD_DIR $EGG_DIR + rm -rf $BUILD_DIR $EGG_DIR $PRE_WHEEL_DIR ${python} -m pip install setuptools_scm echo -e "${BLUE}[init]${NONE} ${GREEN}init success\n" } - function copy_ops(){ OPS_VERSION="0.0.0" PY_MAIN_VERSION=`${python} -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1}'` @@ -142,6 +149,86 @@ function copy_ops(){ return } +function extract_ops_from_precompiled_wheel() { + local WHL_NAME="fastdeploy_gpu-0.0.0-py3-none-any.whl" + if [ -z "$FD_COMMIT_ID" ]; then + if git rev-parse HEAD >/dev/null 2>&1; then + FD_COMMIT_ID=$(git rev-parse HEAD) + echo -e "${BLUE}[init]${NONE} Using current repo commit ID: ${GREEN}${FD_COMMIT_ID}${NONE}" + else + echo -e "${RED}[ERROR]${NONE} Cannot determine commit ID (not a git repo). Please provide manually." + exit 1 + fi + fi + + CUDA_VERSION=$(nvcc --version | grep "release" | sed -E 's/.*release ([0-9]+)\.([0-9]+).*/\1\2/') + echo -e "${BLUE}[info]${NONE} Detected CUDA version: ${GREEN}cu${CUDA_VERSION}${NONE}" + + GPU_ARCH_STR=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader \ + | awk '{printf("%d\n",$1*10)}' | sort -u | awk '{printf("SM_%s_",$1)}' | sed 's/_$//') + echo -e "${BLUE}[info]${NONE} Detected GPU arch: ${GREEN}${GPU_ARCH_STR}${NONE}" + + local WHL_PATH="${PRE_WHEEL_DIR}/${WHL_NAME}" + local REMOTE_URL="https://paddle-qa.bj.bcebos.com/paddle-pipeline/FastDeploy_ActionCE/cu${CUDA_VERSION}/${GPU_ARCH_STR}/develop/${FD_COMMIT_ID}/${WHL_NAME}" + + mkdir -p "${PRE_WHEEL_DIR}" + + if [ ! -f "$WHL_PATH" ]; then + echo -e "${BLUE}[precompiled]${NONE} Local wheel not found, downloading from: ${REMOTE_URL}" + wget --no-check-certificate -O "$WHL_PATH" "$REMOTE_URL" || { + echo -e "${YELLOW}[WARNING]${NONE} Failed to download wheel." + return 1 + } + echo -e "${GREEN}[SUCCESS]${NONE} Downloaded precompiled wheel to ${WHL_PATH}" + else + echo -e "${BLUE}[precompiled]${NONE} Found local wheel: ${WHL_PATH}" + if ! unzip -t "$WHL_PATH" >/dev/null 2>&1; then + echo -e "${BLUE}[WARNING]${NONE} Local wheel seems invalid." + echo -e "${BLUE}[fallback]${NONE} Falling back to source compilation..." + return 1 + fi + fi + + local TMP_DIR="${PRE_WHEEL_DIR}/tmp_whl_unpack" + rm -rf "$TMP_DIR" + mkdir -p "$TMP_DIR" + + echo -e "${BLUE}[precompiled]${NONE} Unpacking wheel..." + ${python} -m zipfile -e "$WHL_PATH" "$TMP_DIR" + + local DATA_DIR + DATA_DIR=$(find "$TMP_DIR" -maxdepth 1 -type d -name "*.data" | head -n 1) + if [ -z "$DATA_DIR" ]; then + echo -e "${RED}[ERROR]${NONE} Cannot find *.data directory in unpacked wheel." + rm -rf "$TMP_DIR" + echo -e "${YELLOW}[fallback]${NONE} Falling back to source compilation..." + FD_USE_PRECOMPILED=0 + return 1 + fi + + local PLATLIB_DIR="${DATA_DIR}/platlib" + local SRC_DIR="${PLATLIB_DIR}/fastdeploy/model_executor/ops/gpu" + local DST_DIR="fastdeploy/model_executor/ops/gpu" + + if [ ! -d "$SRC_DIR" ]; then + echo -e "${RED}[ERROR]${NONE} GPU ops directory not found in wheel: $SRC_DIR" + rm -rf "$TMP_DIR" + echo -e "${YELLOW}[fallback]${NONE} Falling back to source compilation..." + FD_USE_PRECOMPILED=0 + return 1 + fi + + echo -e "${BLUE}[precompiled]${NONE} Copying GPU precompiled contents..." + mkdir -p "$DST_DIR" + cp -r "$SRC_DIR/deep_gemm" "$DST_DIR/" 2>/dev/null || true + cp -r "$SRC_DIR/fastdeploy_ops.py" "$DST_DIR/" 2>/dev/null || true + cp -f "$SRC_DIR/"fastdeploy_ops_*.so "$DST_DIR/" 2>/dev/null || true + cp -f "$SRC_DIR/version.txt" "$DST_DIR/" 2>/dev/null || true + + echo -e "${GREEN}[SUCCESS]${NONE} Installed FastDeploy using precompiled wheel." + rm -rf "${PRE_WHEEL_DIR}" +} + function build_and_install_ops() { cd $OPS_SRC_DIR export no_proxy=bcebos.com,paddlepaddle.org.cn,${no_proxy} @@ -229,7 +316,7 @@ function abort() { cur_dir=`basename "$pwd"` - rm -rf $BUILD_DIR $EGG_DIR $DIST_DIR + rm -rf $BUILD_DIR $EGG_DIR ${python} -m pip uninstall -y fastdeploy-${DEVICE_TYPE} rm -rf $OPS_SRC_DIR/$BUILD_DIR $OPS_SRC_DIR/$EGG_DIR @@ -243,9 +330,44 @@ if [ "$BUILD_WHEEL" -eq 1 ]; then init version_info - build_and_install_ops - build_and_install - cleanup + # Whether to enable precompiled wheel + if [ "$FD_USE_PRECOMPILED" -eq 1 ]; then + echo -e "${BLUE}[MODE]${NONE} Using precompiled .whl" + if extract_ops_from_precompiled_wheel; then + echo -e "${GREEN}[DONE]${NONE} Precompiled wheel installed successfully." + echo -e "${BLUE}[MODE]${NONE} Building wheel package from installed files..." + build_and_install + echo -e "${BLUE}[MODE]${NONE} Installing newly built FastDeploy wheel..." + ${python} -m pip install ./dist/fastdeploy*.whl + # get Paddle version + PADDLE_VERSION=`${python} -c "import paddle; print(paddle.version.full_version)"` + PADDLE_COMMIT=`${python} -c "import paddle; print(paddle.version.commit)"` + # get FastDeploy info + EFFLLM_BRANCH=`git rev-parse --abbrev-ref HEAD` + EFFLLM_COMMIT=`git rev-parse --short HEAD` + # get Python version + PYTHON_VERSION=`${python} -c "import platform; print(platform.python_version())"` + echo -e "\n${GREEN}fastdeploy wheel packaged successfully${NONE} + ${BLUE}Python version:${NONE} $PYTHON_VERSION + ${BLUE}Paddle version:${NONE} $PADDLE_VERSION ($PADDLE_COMMIT) + ${BLUE}fastdeploy branch:${NONE} $EFFLLM_BRANCH ($EFFLLM_COMMIT)\n" + echo -e "${GREEN}wheel saved under${NONE} ${RED}${BOLD}./dist${NONE}" + cleanup + trap : 0 + exit 0 + else + echo -e "${BLUE}[fallback]${NONE} ${YELLOW}Precompiled .whl unavailable, switching to source build." + FD_USE_PRECOMPILED=0 + fi + fi + + if [ "$FD_USE_PRECOMPILED" -eq 0 ]; then + echo -e "${BLUE}[MODE]${NONE} Building from source (ops)..." + build_and_install_ops + echo -e "${BLUE}[MODE]${NONE} Building full wheel from source..." + build_and_install + cleanup + fi # get Paddle version PADDLE_VERSION=`${python} -c "import paddle; print(paddle.version.full_version)"` @@ -274,6 +396,6 @@ else init build_and_install_ops version_info - rm -rf $BUILD_DIR $EGG_DIR $DIST_DIR + rm -rf $BUILD_DIR $EGG_DIR rm -rf $OPS_SRC_DIR/$BUILD_DIR $OPS_SRC_DIR/$EGG_DIR fi diff --git a/docs/get_started/installation/nvidia_gpu.md b/docs/get_started/installation/nvidia_gpu.md index 5076b9b2f..e2e25807a 100644 --- a/docs/get_started/installation/nvidia_gpu.md +++ b/docs/get_started/installation/nvidia_gpu.md @@ -80,6 +80,51 @@ bash build.sh 1 python false [80,90] ``` The built packages will be in the ```FastDeploy/dist``` directory. +## 5. Precompiled Operator Wheel Packages + +FastDeploy provides precompiled GPU operator wheel packages for quick setup without building the entire source code. +This method currently supports **SM90 architecture (e.g., H20/H100)** and **CUDA 12.6** environments only. + +> By default, `build.sh` compiles all custom operators from source.To use the precompiled package, enable it with the `FD_USE_PRECOMPILED` parameter. +> If the precompiled package cannot be downloaded or does not match the current environment, the system will automatically fall back to `4. Build Wheel from Source`. + +First, install paddlepaddle-gpu. +For detailed instructions, please refer to the [PaddlePaddle Installation Guide](https://www.paddlepaddle.org.cn/). + +```shell +python -m pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ +``` + +Then, clone the FastDeploy repository and build using the precompiled operator wheels: + +```shell +git clone https://github.com/PaddlePaddle/FastDeploy +cd FastDeploy + +# Argument 1: Whether to build wheel package (1 for yes) +# Argument 2: Python interpreter path +# Argument 3: Whether to compile CPU inference operators (false for GPU only) +# Argument 4: Target GPU architectures (currently supports [90]) +# Argument 5: Whether to use precompiled operators (1 for enable) +# Argument 6 (optional): Specific commitID for precompiled operators(The default is the current commit ID.) + +# Use precompiled operators for accelerated build +bash build.sh 1 python false [90] 1 + +# Use precompiled wheel from a specific commit +bash build.sh 1 python false [90] 1 7dbd9412b0de47aacad9011e8ace492af7247620 +``` + +The downloaded wheel packages will be stored in the `FastDeploy/pre_wheel` directory. +After the build completes, the operator binaries can be found in `FastDeploy/fastdeploy/model_executor/ops/gpu`. + +> **Notes:** +> +> - This mode prioritizes downloading precompiled GPU operator wheels to reduce build time. +> - Currently supports **GPU + SM90 + CUDA 12.6** only. +> - For custom architectures or modified operator logic, please use **source compilation (Section 4)**. +> - You can check whether the precompiled wheel for a specific commit has been successfully built on the [FastDeploy CI Build Status Page](https://github.com/PaddlePaddle/FastDeploy/actions/workflows/ci_image_update.yml). + ## Environment Verification After installation, verify the environment with this Python code: diff --git a/docs/zh/get_started/installation/nvidia_gpu.md b/docs/zh/get_started/installation/nvidia_gpu.md index 25c2f4fd4..c32575389 100644 --- a/docs/zh/get_started/installation/nvidia_gpu.md +++ b/docs/zh/get_started/installation/nvidia_gpu.md @@ -10,7 +10,7 @@ - Python >= 3.10 - Linux X86_64 -可通过如下4种方式进行安装 +可通过如下5种方式进行安装 ## 1. 预编译Docker安装(推荐) @@ -88,6 +88,49 @@ bash build.sh 1 python false [80,90] 编译后的产物在```FastDeploy/dist```目录下。 +## 5. 算子预编译 Wheel 包 + +FastDeploy 提供了 GPU 算子预编译版 Wheel 包,可在无需完整源码编译的情况下快速构建。该方式当前仅支持 **SM90 架构(H20/H100等)** 和 **CUDA 12.6** 环境。 + +>默认情况下,`build.sh` 会从源码编译;若希望使用预编译包,可使用`FD_USE_PRECOMPILED` 参数; +>若预编译包下载失败或与环境不匹配,系统会自动回退至 `4. wheel 包源码编译` 模式。 + +首先安装 paddlepaddle-gpu,详细安装方式参考 [PaddlePaddle安装](https://www.paddlepaddle.org.cn/) + +``` shell +python -m pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ +``` + +接着克隆源代码,拉取 whl 包并安装 + +```shell +git clone https://github.com/PaddlePaddle/FastDeploy +cd FastDeploy + +# 第1个参数: 是否打包成 wheel (1 表示打包) +# 第2个参数: Python 解释器路径 +# 第3个参数: 是否编译 CPU 推理算子 (false 表示仅 GPU) +# 第4个参数: GPU 架构 (当前仅支持 [90]) +# 第5个参数: 是否使用预编译算子 (1 表示启用预编译) +# 第6个参数(可选): 指定预编译算子的 commitID(默认使用当前的 commitID) + +# 使用预编译 whl 包加速构建 +bash build.sh 1 python false [90] 1 + +# 从指定 commitID 获取对应预编译算子 +bash build.sh 1 python false [90] 1 7dbd9412b0de47aacad9011e8ace492af7247620 +``` + +下载的 whl 包在 `FastDeploy/pre_wheel`目录下。 + +构建完成后,算子相关的产物位于 `FastDeploy/fastdeploy/model_executor/ops/gpu` 目录下。 + +> **说明:** +> - 该模式会优先下载预编译的 GPU 算子 whl 包,减少编译时间; +> - 目前仅支持 **GPU + SM90 + CUDA 12.6**; +> - 若希望自定义架构或修改算子逻辑,请使用 **源码编译方式(第4节)**。 +> - 您可以在 FastDeploy CI 构建状态页面查看对应 commit 的预编译 whl 是否已构建成功。 + ## 环境检查 在安装 FastDeploy 后,通过如下 Python 代码检查环境的可用性