[Feature] Enhance build script, add pre_wheel logic (#4729)

* Enhance build script, add pre_wheel logic Updated copyright year and added precompiled wheel installation logic. * update the nvidia_gpu.md, add pre_wheel description * fix zh .md * update the url, automatically detect CUDA and SM * Fix GPU architecture string formatting in build.sh * Change default for FD_USE_PRECOMPILED to 0 * fix build.sh * add ./dist, pre-wheel path * simplify the process,just save the whl * del pre_wheel dir * fix function name, extract_ops_from_precompiled_wheel * fix docs * add default commitID in docs --------- Co-authored-by: plusNew001 <95567040+plusNew001@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-11-13 19:03:52 +08:00
parent 05da8e34c0
commit a5e949d9d0
3 changed files with 220 additions and 10 deletions
--- a/build.sh
+++ b/build.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash

-# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,7 +22,13 @@ FD_CPU_USE_BF16=${3:-"false"}
 # For SM90 (Hopper), use 90. For SM100 (Blackwell), use 100.
 # These will be translated to 90a / 100a in setup_ops.py for specific features.
 FD_BUILDING_ARCS=${4:-""}
-
+# FD_USE_PRECOMPILED: Specify whether to use precompiled custom ops.
+# 0 = build ops from source (default)
+# 1 = use precompiled ops
+FD_USE_PRECOMPILED=${5:-0}
+# FD_COMMIT_ID: Specify the commit ID for locating precompiled wheel packages.
+# If not provided, the current git commit ID will be used automatically.
+FD_COMMIT_ID=${6:-""}

 # paddle distributed use to set archs
 unset PADDLE_CUDA_ARCH_LIST
@@ -31,6 +37,7 @@ unset PADDLE_CUDA_ARCH_LIST
 DIST_DIR="dist"
 BUILD_DIR="build"
 EGG_DIR="fastdeploy.egg-info"
+PRE_WHEEL_DIR="pre_wheel"

 # custom_ops directory config
 OPS_SRC_DIR="custom_ops"
@@ -40,6 +47,7 @@ OPS_TMP_DIR="tmp"
 RED='\033[0;31m'
 BLUE='\033[0;34m'
 GREEN='\033[1;32m'
+YELLOW='\033[1;33m'
 BOLD='\033[1m'
 NONE='\033[0m'

@@ -57,12 +65,11 @@ function python_version_check() {

 function init() {
    echo -e "${BLUE}[init]${NONE} removing building directory..."
-    rm -rf $DIST_DIR $BUILD_DIR $EGG_DIR
+    rm -rf $BUILD_DIR $EGG_DIR $PRE_WHEEL_DIR
    ${python} -m pip install setuptools_scm
    echo -e "${BLUE}[init]${NONE} ${GREEN}init success\n"
 }

-
 function copy_ops(){
    OPS_VERSION="0.0.0"
    PY_MAIN_VERSION=`${python} -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1}'`
@@ -142,6 +149,86 @@ function copy_ops(){
    return
 }

+function extract_ops_from_precompiled_wheel() {
+  local WHL_NAME="fastdeploy_gpu-0.0.0-py3-none-any.whl"
+  if [ -z "$FD_COMMIT_ID" ]; then
+    if git rev-parse HEAD >/dev/null 2>&1; then
+      FD_COMMIT_ID=$(git rev-parse HEAD)
+      echo -e "${BLUE}[init]${NONE} Using current repo commit ID: ${GREEN}${FD_COMMIT_ID}${NONE}"
+    else
+      echo -e "${RED}[ERROR]${NONE} Cannot determine commit ID (not a git repo). Please provide manually."
+      exit 1
+    fi
+  fi
+
+  CUDA_VERSION=$(nvcc --version | grep "release" | sed -E 's/.*release ([0-9]+)\.([0-9]+).*/\1\2/')
+  echo -e "${BLUE}[info]${NONE} Detected CUDA version: ${GREEN}cu${CUDA_VERSION}${NONE}"
+
+  GPU_ARCH_STR=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader \
+    | awk '{printf("%d\n",$1*10)}' | sort -u | awk '{printf("SM_%s_",$1)}' | sed 's/_$//')
+  echo -e "${BLUE}[info]${NONE} Detected GPU arch: ${GREEN}${GPU_ARCH_STR}${NONE}"
+
+  local WHL_PATH="${PRE_WHEEL_DIR}/${WHL_NAME}"
+  local REMOTE_URL="https://paddle-qa.bj.bcebos.com/paddle-pipeline/FastDeploy_ActionCE/cu${CUDA_VERSION}/${GPU_ARCH_STR}/develop/${FD_COMMIT_ID}/${WHL_NAME}"
+
+  mkdir -p "${PRE_WHEEL_DIR}"
+
+  if [ ! -f "$WHL_PATH" ]; then
+    echo -e "${BLUE}[precompiled]${NONE} Local wheel not found, downloading from: ${REMOTE_URL}"
+    wget --no-check-certificate -O "$WHL_PATH" "$REMOTE_URL" || {
+        echo -e "${YELLOW}[WARNING]${NONE} Failed to download wheel."
+        return 1
+    }
+    echo -e "${GREEN}[SUCCESS]${NONE} Downloaded precompiled wheel to ${WHL_PATH}"
+  else
+    echo -e "${BLUE}[precompiled]${NONE} Found local wheel: ${WHL_PATH}"
+    if ! unzip -t "$WHL_PATH" >/dev/null 2>&1; then
+      echo -e "${BLUE}[WARNING]${NONE} Local wheel seems invalid."
+      echo -e "${BLUE}[fallback]${NONE} Falling back to source compilation..."
+      return 1
+    fi
+  fi
+
+  local TMP_DIR="${PRE_WHEEL_DIR}/tmp_whl_unpack"
+  rm -rf "$TMP_DIR"
+  mkdir -p "$TMP_DIR"
+
+  echo -e "${BLUE}[precompiled]${NONE} Unpacking wheel..."
+  ${python} -m zipfile -e "$WHL_PATH" "$TMP_DIR"
+
+  local DATA_DIR
+  DATA_DIR=$(find "$TMP_DIR" -maxdepth 1 -type d -name "*.data" | head -n 1)
+  if [ -z "$DATA_DIR" ]; then
+    echo -e "${RED}[ERROR]${NONE} Cannot find *.data directory in unpacked wheel."
+    rm -rf "$TMP_DIR"
+    echo -e "${YELLOW}[fallback]${NONE} Falling back to source compilation..."
+    FD_USE_PRECOMPILED=0
+    return 1
+  fi
+
+  local PLATLIB_DIR="${DATA_DIR}/platlib"
+  local SRC_DIR="${PLATLIB_DIR}/fastdeploy/model_executor/ops/gpu"
+  local DST_DIR="fastdeploy/model_executor/ops/gpu"
+
+  if [ ! -d "$SRC_DIR" ]; then
+    echo -e "${RED}[ERROR]${NONE} GPU ops directory not found in wheel: $SRC_DIR"
+    rm -rf "$TMP_DIR"
+    echo -e "${YELLOW}[fallback]${NONE} Falling back to source compilation..."
+    FD_USE_PRECOMPILED=0
+    return 1
+  fi
+
+  echo -e "${BLUE}[precompiled]${NONE} Copying GPU precompiled contents..."
+  mkdir -p "$DST_DIR"
+  cp -r "$SRC_DIR/deep_gemm" "$DST_DIR/" 2>/dev/null || true
+  cp -r "$SRC_DIR/fastdeploy_ops.py" "$DST_DIR/" 2>/dev/null || true
+  cp -f "$SRC_DIR/"fastdeploy_ops_*.so "$DST_DIR/" 2>/dev/null || true
+  cp -f "$SRC_DIR/version.txt" "$DST_DIR/" 2>/dev/null || true
+
+  echo -e "${GREEN}[SUCCESS]${NONE} Installed FastDeploy using precompiled wheel."
+  rm -rf "${PRE_WHEEL_DIR}"
+}
+
 function build_and_install_ops() {
  cd $OPS_SRC_DIR
  export no_proxy=bcebos.com,paddlepaddle.org.cn,${no_proxy}
@@ -229,7 +316,7 @@ function abort() {

  cur_dir=`basename "$pwd"`

-  rm -rf $BUILD_DIR $EGG_DIR $DIST_DIR
+  rm -rf $BUILD_DIR $EGG_DIR
  ${python} -m pip uninstall -y fastdeploy-${DEVICE_TYPE}

  rm -rf $OPS_SRC_DIR/$BUILD_DIR $OPS_SRC_DIR/$EGG_DIR
@@ -243,9 +330,44 @@ if [ "$BUILD_WHEEL" -eq 1 ]; then

  init
  version_info
-  build_and_install_ops
-  build_and_install
-  cleanup
+  # Whether to enable precompiled wheel
+  if [ "$FD_USE_PRECOMPILED" -eq 1 ]; then
+    echo -e "${BLUE}[MODE]${NONE} Using precompiled .whl"
+    if extract_ops_from_precompiled_wheel; then
+      echo -e "${GREEN}[DONE]${NONE} Precompiled wheel installed successfully."
+      echo -e "${BLUE}[MODE]${NONE} Building wheel package from installed files..."
+      build_and_install
+      echo -e "${BLUE}[MODE]${NONE} Installing newly built FastDeploy wheel..."
+      ${python} -m pip install ./dist/fastdeploy*.whl
+      # get Paddle version
+      PADDLE_VERSION=`${python} -c "import paddle; print(paddle.version.full_version)"`
+      PADDLE_COMMIT=`${python} -c "import paddle; print(paddle.version.commit)"`
+      # get FastDeploy info
+      EFFLLM_BRANCH=`git rev-parse --abbrev-ref HEAD`
+      EFFLLM_COMMIT=`git rev-parse --short HEAD`
+      # get Python version
+      PYTHON_VERSION=`${python} -c "import platform; print(platform.python_version())"`
+      echo -e "\n${GREEN}fastdeploy wheel packaged successfully${NONE}
+              ${BLUE}Python version:${NONE} $PYTHON_VERSION
+              ${BLUE}Paddle version:${NONE} $PADDLE_VERSION ($PADDLE_COMMIT)
+              ${BLUE}fastdeploy branch:${NONE} $EFFLLM_BRANCH ($EFFLLM_COMMIT)\n"
+      echo -e "${GREEN}wheel saved under${NONE} ${RED}${BOLD}./dist${NONE}"
+      cleanup
+      trap : 0
+      exit 0
+    else
+      echo -e "${BLUE}[fallback]${NONE} ${YELLOW}Precompiled .whl unavailable, switching to source build."
+      FD_USE_PRECOMPILED=0
+    fi
+  fi
+
+  if [ "$FD_USE_PRECOMPILED" -eq 0 ]; then
+    echo -e "${BLUE}[MODE]${NONE} Building from source (ops)..."
+    build_and_install_ops
+    echo -e "${BLUE}[MODE]${NONE} Building full wheel from source..."
+    build_and_install
+    cleanup
+  fi

  # get Paddle version
  PADDLE_VERSION=`${python} -c "import paddle; print(paddle.version.full_version)"`
@@ -274,6 +396,6 @@ else
  init
  build_and_install_ops
  version_info
-  rm -rf $BUILD_DIR $EGG_DIR $DIST_DIR
+  rm -rf $BUILD_DIR $EGG_DIR
  rm -rf $OPS_SRC_DIR/$BUILD_DIR $OPS_SRC_DIR/$EGG_DIR
 fi
--- a/docs/get_started/installation/nvidia_gpu.md
+++ b/docs/get_started/installation/nvidia_gpu.md
@@ -80,6 +80,51 @@ bash build.sh 1 python false [80,90]
 ```
 The built packages will be in the ```FastDeploy/dist``` directory.

+## 5. Precompiled Operator Wheel Packages
+
+FastDeploy provides precompiled GPU operator wheel packages for quick setup without building the entire source code.
+This method currently supports **SM90 architecture (e.g., H20/H100)** and **CUDA 12.6** environments only.
+
+> By default, `build.sh` compiles all custom operators from source.To use the precompiled package, enable it with the `FD_USE_PRECOMPILED` parameter.
+> If the precompiled package cannot be downloaded or does not match the current environment, the system will automatically fall back to `4. Build Wheel from Source`.
+
+First, install paddlepaddle-gpu.
+For detailed instructions, please refer to the [PaddlePaddle Installation Guide](https://www.paddlepaddle.org.cn/).
+
+```shell
+python -m pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+```
+
+Then, clone the FastDeploy repository and build using the precompiled operator wheels:
+
+```shell
+git clone https://github.com/PaddlePaddle/FastDeploy
+cd FastDeploy
+
+# Argument 1: Whether to build wheel package (1 for yes)
+# Argument 2: Python interpreter path
+# Argument 3: Whether to compile CPU inference operators (false for GPU only)
+# Argument 4: Target GPU architectures (currently supports [90])
+# Argument 5: Whether to use precompiled operators (1 for enable)
+# Argument 6 (optional): Specific commitID for precompiled operators(The default is the current commit ID.)
+
+# Use precompiled operators for accelerated build
+bash build.sh 1 python false [90] 1
+
+# Use precompiled wheel from a specific commit
+bash build.sh 1 python false [90] 1 7dbd9412b0de47aacad9011e8ace492af7247620
+```
+
+The downloaded wheel packages will be stored in the `FastDeploy/pre_wheel` directory.
+After the build completes, the operator binaries can be found in `FastDeploy/fastdeploy/model_executor/ops/gpu`.
+
+> **Notes:**
+>
+> - This mode prioritizes downloading precompiled GPU operator wheels to reduce build time.
+> - Currently supports **GPU + SM90 + CUDA 12.6** only.
+> - For custom architectures or modified operator logic, please use **source compilation (Section 4)**.
+> - You can check whether the precompiled wheel for a specific commit has been successfully built on the [FastDeploy CI Build Status Page](https://github.com/PaddlePaddle/FastDeploy/actions/workflows/ci_image_update.yml).
+
 ## Environment Verification

 After installation, verify the environment with this Python code:
--- a/docs/zh/get_started/installation/nvidia_gpu.md
+++ b/docs/zh/get_started/installation/nvidia_gpu.md
@@ -10,7 +10,7 @@
 - Python >= 3.10
 - Linux X86_64

-可通过如下4种方式进行安装
+可通过如下5种方式进行安装

 ## 1. 预编译Docker安装(推荐)

@@ -88,6 +88,49 @@ bash build.sh 1 python false [80,90]

 编译后的产物在```FastDeploy/dist```目录下。

+## 5. 算子预编译 Wheel 包
+
+FastDeploy 提供了 GPU 算子预编译版 Wheel 包，可在无需完整源码编译的情况下快速构建。该方式当前仅支持 **SM90 架构（H20/H100等）** 和 **CUDA 12.6** 环境。
+
+>默认情况下，`build.sh` 会从源码编译；若希望使用预编译包，可使用`FD_USE_PRECOMPILED` 参数；
+>若预编译包下载失败或与环境不匹配，系统会自动回退至 `4. wheel 包源码编译` 模式。
+
+首先安装 paddlepaddle-gpu，详细安装方式参考 [PaddlePaddle安装](https://www.paddlepaddle.org.cn/)
+
+``` shell
+python -m pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+```
+
+接着克隆源代码，拉取 whl 包并安装
+
+```shell
+git clone https://github.com/PaddlePaddle/FastDeploy
+cd FastDeploy
+
+# 第1个参数: 是否打包成 wheel (1 表示打包)
+# 第2个参数: Python 解释器路径
+# 第3个参数: 是否编译 CPU 推理算子 (false 表示仅 GPU)
+# 第4个参数: GPU 架构 (当前仅支持 [90])
+# 第5个参数: 是否使用预编译算子 (1 表示启用预编译)
+# 第6个参数(可选): 指定预编译算子的 commitID（默认使用当前的 commitID）
+
+# 使用预编译 whl 包加速构建
+bash build.sh 1 python false [90] 1
+
+# 从指定 commitID 获取对应预编译算子
+bash build.sh 1 python false [90] 1 7dbd9412b0de47aacad9011e8ace492af7247620
+```
+
+下载的 whl 包在 `FastDeploy/pre_wheel`目录下。
+
+构建完成后，算子相关的产物位于 `FastDeploy/fastdeploy/model_executor/ops/gpu` 目录下。
+
+> **说明：**
+> - 该模式会优先下载预编译的 GPU 算子 whl 包，减少编译时间；
+> - 目前仅支持 **GPU + SM90 + CUDA 12.6**；
+> - 若希望自定义架构或修改算子逻辑，请使用 **源码编译方式（第4节）**。
+> - 您可以在 FastDeploy CI 构建状态页面查看对应 commit 的预编译 whl 是否已构建成功。
+
 ## 环境检查

 在安装 FastDeploy 后，通过如下 Python 代码检查环境的可用性