mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 00:06:38 +08:00
[LLM] First commit the llm deployment code
This commit is contained in:
170
custom_ops/setup_ops_cpu.py
Normal file
170
custom_ops/setup_ops_cpu.py
Normal file
@@ -0,0 +1,170 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
""" setup for FASTDEPLOY custom cpu ops """
|
||||
import os
|
||||
import subprocess
|
||||
from paddle.utils.cpp_extension import setup, CppExtension
|
||||
from setuptools import find_namespace_packages
|
||||
import glob
|
||||
import tarfile
|
||||
|
||||
BUILDING_ARCS = []
|
||||
use_bf16 = os.getenv("CPU_USE_BF16", "False") == "True"
|
||||
|
||||
|
||||
def download_and_extract(url, destination_directory):
|
||||
"""
|
||||
Download a .tar.gz file using wget to the destination directory
|
||||
and extract its contents without renaming the downloaded file.
|
||||
|
||||
:param url: The URL of the .tar.gz file to download.
|
||||
:param destination_directory: The directory where the file should be downloaded and extracted.
|
||||
"""
|
||||
os.makedirs(destination_directory, exist_ok=True)
|
||||
|
||||
filename = os.path.basename(url)
|
||||
file_path = os.path.join(destination_directory, filename)
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
["wget", "-O", file_path, url],
|
||||
check=True,
|
||||
)
|
||||
print(f"Downloaded: {file_path}")
|
||||
|
||||
with tarfile.open(file_path, "r:gz") as tar:
|
||||
tar.extractall(path=destination_directory)
|
||||
print(f"Extracted: {file_path} to {destination_directory}")
|
||||
os.remove(file_path)
|
||||
print(f"Deleted downloaded file: {file_path}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error downloading file: {e}")
|
||||
except Exception as e:
|
||||
print(f"Error extracting file: {e}")
|
||||
|
||||
|
||||
x86_simd_sort_dir = "third_party/x86-simd-sort"
|
||||
if not os.path.exists(x86_simd_sort_dir) or not os.listdir(x86_simd_sort_dir):
|
||||
x86_simd_sort_url = (
|
||||
"https://paddlepaddle-inference-banchmark.bj.bcebos.com/x86-simd-sort.tar.gz"
|
||||
)
|
||||
download_and_extract(x86_simd_sort_url, "third_party")
|
||||
xft_dir = "third_party/xFasterTransformer"
|
||||
if not os.path.exists(xft_dir) or not os.listdir(xft_dir):
|
||||
if use_bf16:
|
||||
xft_url = "https://paddlepaddle-inference-banchmark.bj.bcebos.com/xft.tar.gz"
|
||||
else:
|
||||
xft_url = (
|
||||
"https://paddlepaddle-inference-banchmark.bj.bcebos.com/xft_no_bf16.tar.gz"
|
||||
)
|
||||
download_and_extract(xft_url, "third_party")
|
||||
|
||||
libs = [
|
||||
"xfastertransformer",
|
||||
"xft_comm_helper",
|
||||
"x86simdsortcpp",
|
||||
]
|
||||
xft_dir = "third_party/xFasterTransformer"
|
||||
x86_simd_sort_dir = "third_party/x86-simd-sort"
|
||||
paddle_custom_kernel_include = [
|
||||
os.path.join(xft_dir, "include"),
|
||||
os.path.join(xft_dir, "src/common"), # src
|
||||
os.path.join(xft_dir, "src/kernels"), # src
|
||||
os.path.join(xft_dir, "src/layers"), # src
|
||||
os.path.join(xft_dir, "src/models"), # src
|
||||
os.path.join(xft_dir, "src/utils"), # src
|
||||
os.path.join(xft_dir, "3rdparty/onednn/include"), # src
|
||||
os.path.join(xft_dir, "3rdparty/onednn/build/include"), # src
|
||||
os.path.join(xft_dir, "3rdparty/xdnn"), # src
|
||||
os.path.join(xft_dir, "3rdparty"),
|
||||
os.path.join(xft_dir, "3rdparty/mkl/include"),
|
||||
os.path.join(x86_simd_sort_dir, "src"), # src
|
||||
]
|
||||
|
||||
# cc flags
|
||||
paddle_extra_compile_args = [
|
||||
"-std=c++17",
|
||||
"-shared",
|
||||
"-fPIC",
|
||||
"-Wno-parentheses",
|
||||
"-DPADDLE_WITH_CUSTOM_KERNEL",
|
||||
"-mavx512f",
|
||||
"-mavx512vl",
|
||||
"-fopenmp",
|
||||
"-mavx512bw",
|
||||
"-mno-mmx",
|
||||
"-Wall",
|
||||
"-march=skylake-avx512",
|
||||
"-O3",
|
||||
"-g",
|
||||
"-lstdc++fs",
|
||||
"-D_GLIBCXX_USE_CXX11_ABI=1",
|
||||
]
|
||||
if use_bf16:
|
||||
# avx512-bf16 flags
|
||||
paddle_extra_compile_args += [
|
||||
"-DAVX512_BF16_WEIGHT_ONLY_BF16=true",
|
||||
"-DAVX512_FP16_WEIGHT_ONLY_INT8=true",
|
||||
"-DAVX512_FP16_WEIGHT_ONLY_FP16=true",
|
||||
]
|
||||
else:
|
||||
# no avx512-bf16 flags
|
||||
paddle_extra_compile_args += [
|
||||
"-DAVX512_FP32_WEIGHT_ONLY_INT8=true",
|
||||
"-DAVX512_FP32_WEIGHT_ONLY_FP16=true",
|
||||
]
|
||||
paddle_custom_kernel_library_dir = [
|
||||
"third_party/xFasterTransformer/build/",
|
||||
"third_party/x86-simd-sort/builddir",
|
||||
]
|
||||
|
||||
include_files = []
|
||||
for include_dir in paddle_custom_kernel_include:
|
||||
include_files.extend(glob.glob(os.path.join(include_dir, "*.h")))
|
||||
so_files = []
|
||||
for library_dir in paddle_custom_kernel_library_dir:
|
||||
if os.path.isdir(library_dir):
|
||||
for lib in libs:
|
||||
lib_file = os.path.join(library_dir, f"lib{lib}.so")
|
||||
if os.path.isfile(lib_file):
|
||||
so_files.append(lib_file)
|
||||
setup(
|
||||
name="fastdeploy_cpu_ops",
|
||||
ext_modules=CppExtension(
|
||||
sources=[
|
||||
"cpu_ops/simd_sort.cc",
|
||||
"cpu_ops/set_value_by_flags.cc",
|
||||
"cpu_ops/token_penalty_multi_scores.cc",
|
||||
"cpu_ops/stop_generation_multi_ends.cc",
|
||||
"cpu_ops/update_inputs.cc",
|
||||
"cpu_ops/get_padding_offset.cc",
|
||||
"cpu_ops/xft_all_layer.cc",
|
||||
"cpu_ops/xft_greedy_search.cc",
|
||||
"cpu_ops/avx_weight_only.cc",
|
||||
],
|
||||
extra_link_args=[
|
||||
"-Wl,-rpath,$ORIGIN/x86-simd-sort/builddir",
|
||||
"-Wl,-rpath,$ORIGIN/xFasterTransformer/build",
|
||||
],
|
||||
include_dirs=paddle_custom_kernel_include,
|
||||
library_dirs=paddle_custom_kernel_library_dir,
|
||||
libraries=libs,
|
||||
extra_compile_args=paddle_extra_compile_args,
|
||||
),
|
||||
packages=find_namespace_packages(where="third_party"),
|
||||
package_dir={"": "third_party"},
|
||||
package_data={"fastdeploy_cpu_ops": include_files + so_files},
|
||||
include_package_data=True,
|
||||
)
|
Reference in New Issue
Block a user