mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
Sync v2.0 version of code to github repo
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,19 +12,40 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" setup for FastDeploy custom ops """
|
||||
import glob
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
|
||||
import paddle
|
||||
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup
|
||||
from setuptools import find_namespace_packages, find_packages
|
||||
|
||||
archs = json.loads(os.getenv("BUILDING_ARCS", "[]"))
|
||||
use_bf16 = os.getenv("CPU_USE_BF16", "False") == "True"
|
||||
|
||||
def load_module_from_path(module_name, path):
|
||||
"""
|
||||
load python module from path
|
||||
"""
|
||||
spec = importlib.util.spec_from_file_location(module_name, path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
ROOT_DIR = Path(__file__).parent.parent
|
||||
|
||||
# cannot import envs directly because it depends on fastdeploy,
|
||||
# which is not installed yet
|
||||
envs = load_module_from_path('envs',
|
||||
os.path.join(ROOT_DIR, 'fastdeploy', 'envs.py'))
|
||||
|
||||
archs = json.loads(envs.FD_BUILDING_ARCS)
|
||||
use_bf16 = envs.FD_CPU_USE_BF16 == "True"
|
||||
|
||||
|
||||
def download_and_extract(url, destination_directory):
|
||||
@@ -80,6 +101,30 @@ def clone_git_repo(version, repo_url, destination_path):
|
||||
return False
|
||||
|
||||
|
||||
def process_git_repo(cur_path, dst_path, commit_id=None, patch=None):
|
||||
"""
|
||||
reset git repo to destination commit and apply patch.
|
||||
"""
|
||||
if commit_id is not None:
|
||||
reset_cmd = ["git", "reset", "--hard", commit_id]
|
||||
if patch is not None:
|
||||
patch_source = os.path.join(cur_path, patch)
|
||||
patch_destination = os.path.join(dst_path, patch)
|
||||
shutil.copy(patch_source, patch_destination)
|
||||
apply_cmd = ["git", "apply", patch]
|
||||
|
||||
try:
|
||||
os.chdir(dst_path)
|
||||
if commit_id is not None:
|
||||
subprocess.run(reset_cmd, check=True)
|
||||
if patch is not None:
|
||||
subprocess.run(apply_cmd, check=True)
|
||||
os.chdir(cur_path)
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
|
||||
def get_sm_version(archs):
|
||||
"""
|
||||
Get sm version of paddle.
|
||||
@@ -94,6 +139,18 @@ def get_sm_version(archs):
|
||||
return list(arch_set)
|
||||
|
||||
|
||||
def get_nvcc_version():
|
||||
"""
|
||||
Get cuda version of nvcc.
|
||||
"""
|
||||
nvcc_output = subprocess.check_output(["nvcc", "--version"],
|
||||
universal_newlines=True)
|
||||
output = nvcc_output.split()
|
||||
release_idx = output.index("release") + 1
|
||||
nvcc_cuda_version = float(output[release_idx].split(",")[0])
|
||||
return nvcc_cuda_version
|
||||
|
||||
|
||||
def get_gencode_flags(archs):
|
||||
"""
|
||||
Get gencode flags for current device or input.
|
||||
@@ -149,7 +206,6 @@ if paddle.is_compiled_with_rocm():
|
||||
"gpu_ops/save_with_output_msg.cc",
|
||||
"gpu_ops/get_output.cc",
|
||||
"gpu_ops/get_output_msg_with_topk.cc",
|
||||
"gpu_ops/reset_need_stop_value.cc",
|
||||
"gpu_ops/step.cu",
|
||||
"gpu_ops/step_reschedule.cu",
|
||||
"gpu_ops/set_data_ipc.cu",
|
||||
@@ -175,25 +231,42 @@ if paddle.is_compiled_with_rocm():
|
||||
)
|
||||
elif paddle.is_compiled_with_cuda():
|
||||
sources = [
|
||||
"gpu_ops/set_mask_value.cu", "gpu_ops/set_value_by_flags.cu",
|
||||
"gpu_ops/ngram_mask.cu", "gpu_ops/gather_idx.cu",
|
||||
"gpu_ops/get_output_ep.cc", "gpu_ops/get_mm_split_fuse.cc",
|
||||
"gpu_ops/set_mask_value.cu",
|
||||
"gpu_ops/set_value_by_flags.cu",
|
||||
"gpu_ops/ngram_mask.cu",
|
||||
"gpu_ops/gather_idx.cu",
|
||||
"gpu_ops/get_output_ep.cc",
|
||||
"gpu_ops/get_mm_split_fuse.cc",
|
||||
"gpu_ops/token_penalty_multi_scores.cu",
|
||||
"gpu_ops/token_penalty_only_once.cu", "gpu_ops/stop_generation.cu",
|
||||
"gpu_ops/token_penalty_only_once.cu",
|
||||
"gpu_ops/stop_generation.cu",
|
||||
"gpu_ops/stop_generation_multi_ends.cu",
|
||||
"gpu_ops/stop_generation_multi_stop_seqs.cu", "gpu_ops/set_flags.cu",
|
||||
"gpu_ops/step.cu", "gpu_ops/step_reschedule.cu",
|
||||
"gpu_ops/fused_get_rope.cu", "gpu_ops/get_padding_offset.cu",
|
||||
"gpu_ops/update_inputs.cu", "gpu_ops/update_inputs_beam.cu",
|
||||
"gpu_ops/beam_search_softmax.cu", "gpu_ops/rebuild_padding.cu",
|
||||
"gpu_ops/set_data_ipc.cu", "gpu_ops/read_data_ipc.cu",
|
||||
"gpu_ops/enforce_generation.cu", "gpu_ops/dequant_int8.cu",
|
||||
"gpu_ops/tune_cublaslt_gemm.cu", "gpu_ops/swap_cache_batch.cu",
|
||||
"gpu_ops/swap_cache.cu", "gpu_ops/step_system_cache.cu",
|
||||
"gpu_ops/cpp_extensions.cu", "gpu_ops/share_external_data.cu",
|
||||
"gpu_ops/stop_generation_multi_stop_seqs.cu",
|
||||
"gpu_ops/set_flags.cu",
|
||||
"gpu_ops/step.cu",
|
||||
"gpu_ops/step_reschedule.cu",
|
||||
"gpu_ops/fused_get_rope.cu",
|
||||
"gpu_ops/get_padding_offset.cu",
|
||||
"gpu_ops/update_inputs.cu",
|
||||
"gpu_ops/update_inputs_beam.cu",
|
||||
"gpu_ops/beam_search_softmax.cu",
|
||||
"gpu_ops/rebuild_padding.cu",
|
||||
"gpu_ops/set_data_ipc.cu",
|
||||
"gpu_ops/read_data_ipc.cu",
|
||||
"gpu_ops/enforce_generation.cu",
|
||||
"gpu_ops/dequant_int8.cu",
|
||||
"gpu_ops/tune_cublaslt_gemm.cu",
|
||||
"gpu_ops/swap_cache_batch.cu",
|
||||
"gpu_ops/swap_cache.cu",
|
||||
"gpu_ops/step_system_cache.cu",
|
||||
"gpu_ops/cpp_extensions.cc",
|
||||
"gpu_ops/share_external_data.cu",
|
||||
"gpu_ops/per_token_quant_fp8.cu",
|
||||
"gpu_ops/extract_text_token_output.cu",
|
||||
"gpu_ops/update_split_fuse_input.cu"
|
||||
"gpu_ops/update_split_fuse_input.cu",
|
||||
"gpu_ops/text_image_index_out.cu",
|
||||
"gpu_ops/text_image_gather_scatter.cu",
|
||||
"gpu_ops/sample_kernels/rejection_top_p_sampling.cu",
|
||||
]
|
||||
|
||||
# pd_disaggregation
|
||||
@@ -201,6 +274,8 @@ elif paddle.is_compiled_with_cuda():
|
||||
"gpu_ops/remote_cache_kv_ipc.cc",
|
||||
"gpu_ops/open_shm_and_get_meta_signal.cc",
|
||||
"gpu_ops/init_signal_layerwise.cc",
|
||||
"gpu_ops/get_data_ptr_ipc.cu",
|
||||
"gpu_ops/ipc_sent_key_value_cache_by_remote_ptr.cu",
|
||||
]
|
||||
|
||||
cutlass_dir = "third_party/cutlass"
|
||||
@@ -213,12 +288,26 @@ elif paddle.is_compiled_with_cuda():
|
||||
raise ValueError("Git clone cutlass failed!")
|
||||
|
||||
# deep gemm
|
||||
deep_gemm_dir = "third_party/DeepGEMM"
|
||||
if not os.path.exists(deep_gemm_dir) or not os.listdir(deep_gemm_dir):
|
||||
if not os.path.exists(deep_gemm_dir):
|
||||
os.makedirs(deep_gemm_dir)
|
||||
clone_git_repo("main", "https://github.com/deepseek-ai/DeepGEMM.git",
|
||||
deep_gemm_dir)
|
||||
if not os.listdir(deep_gemm_dir):
|
||||
raise ValueError("Git clone DeepGEMM failed!")
|
||||
cur_path = os.path.dirname(os.path.abspath(__file__))
|
||||
dst_path = os.path.join(cur_path, deep_gemm_dir)
|
||||
commit_id = "95e81b3dd6704e279e5f4757c5b94776ac988a8d"
|
||||
patch = "0001-DeepGEMM-95e81b3.patch"
|
||||
process_git_repo(cur_path, dst_path, commit_id, patch)
|
||||
|
||||
dg_third_party_include_dirs = (
|
||||
"third_party/cutlass/include/cute",
|
||||
"third_party/cutlass/include/cutlass",
|
||||
)
|
||||
|
||||
dg_include_dir = "gpu_ops/fp8_deep_gemm/deep_gemm/include"
|
||||
dg_include_dir = "third_party/DeepGEMM/deep_gemm/include"
|
||||
os.makedirs(dg_include_dir, exist_ok=True)
|
||||
|
||||
for d in dg_third_party_include_dirs:
|
||||
@@ -252,15 +341,37 @@ elif paddle.is_compiled_with_cuda():
|
||||
|
||||
nvcc_compile_args = get_gencode_flags(archs)
|
||||
nvcc_compile_args += ["-DPADDLE_DEV"]
|
||||
nvcc_compile_args += ["-DPADDLE_ON_INFERENCE"]
|
||||
nvcc_compile_args += ["-DPy_LIMITED_API=0x03090000"]
|
||||
nvcc_compile_args += [
|
||||
"-Igpu_ops/cutlass_kernels",
|
||||
"-Ithird_party/cutlass/include",
|
||||
"-Ithird_party/cutlass/tools/util/include",
|
||||
"-Igpu_ops/fp8_gemm_with_cutlass",
|
||||
"-Igpu_ops",
|
||||
"-Ithird_party/nlohmann_json/include",
|
||||
]
|
||||
nvcc_version = get_nvcc_version()
|
||||
print(f'nvcc_version = {nvcc_version}')
|
||||
if nvcc_version >= 12.0:
|
||||
sources += ["gpu_ops/sample_kernels/air_top_p_sampling.cu"]
|
||||
cc = max(get_sm_version(archs))
|
||||
print(f"cc = {cc}")
|
||||
fp8_auto_gen_directory = "gpu_ops/cutlass_kernels/fp8_gemm_fused/autogen"
|
||||
if os.path.isdir(fp8_auto_gen_directory):
|
||||
shutil.rmtree(fp8_auto_gen_directory)
|
||||
|
||||
if cc >= 75:
|
||||
nvcc_compile_args += [
|
||||
"-DENABLE_SCALED_MM_C2X=1",
|
||||
"-Igpu_ops/cutlass_kernels/w8a8",
|
||||
]
|
||||
sources += [
|
||||
"gpu_ops/cutlass_kernels/w8a8/scaled_mm_entry.cu",
|
||||
"gpu_ops/cutlass_kernels/w8a8/scaled_mm_c2x.cu",
|
||||
"gpu_ops/quantization/common.cu",
|
||||
]
|
||||
|
||||
if cc >= 80:
|
||||
# append_attention
|
||||
sources += ["gpu_ops/append_attention.cu"]
|
||||
@@ -280,19 +391,45 @@ elif paddle.is_compiled_with_cuda():
|
||||
if cc >= 89:
|
||||
# Running generate fp8 gemm codes.
|
||||
nvcc_compile_args += ["-DENABLE_FP8"]
|
||||
os.system("python auto_gen_fp8_fp8_gemm_fused_kernels.py")
|
||||
os.system("python auto_gen_fp8_fp8_dual_gemm_fused_kernels.py")
|
||||
os.system("python auto_gen_visitor_fp8_gemm_fused_kernels.py")
|
||||
|
||||
nvcc_compile_args += [
|
||||
"-Igpu_ops/cutlass_kernels/fp8_gemm_fused/autogen"
|
||||
]
|
||||
|
||||
os.system("python utils/auto_gen_visitor_fp8_gemm_fused_kernels.py")
|
||||
if cc < 90:
|
||||
os.system("python utils/auto_gen_fp8_fp8_gemm_fused_kernels.py")
|
||||
os.system(
|
||||
"python utils/auto_gen_fp8_fp8_dual_gemm_fused_kernels.py")
|
||||
else:
|
||||
nvcc_compile_args += [
|
||||
"-gencode",
|
||||
"arch=compute_90a,code=compute_90a",
|
||||
"-O3",
|
||||
"-DNDEBUG",
|
||||
]
|
||||
os.system(
|
||||
"python utils/auto_gen_fp8_fp8_gemm_fused_kernels_sm90.py")
|
||||
os.system(
|
||||
"python utils/auto_gen_fp8_fp8_dual_gemm_fused_kernels_sm90.py"
|
||||
)
|
||||
os.system(
|
||||
"python utils/auto_gen_fp8_fp8_block_gemm_fused_kernels_sm90.py"
|
||||
)
|
||||
|
||||
nvcc_compile_args += [
|
||||
"-DENABLE_SCALED_MM_SM90=1",
|
||||
]
|
||||
sources += [
|
||||
"gpu_ops/fp8_gemm_with_cutlass/fp8_fp8_half_block_gemm.cu",
|
||||
"gpu_ops/cutlass_kernels/w8a8/scaled_mm_c3x_sm90.cu",
|
||||
"gpu_ops/cutlass_kernels/w8a8/c3x/scaled_mm_sm90_fp8.cu",
|
||||
"gpu_ops/cutlass_kernels/w8a8/c3x/scaled_mm_sm90_int8.cu",
|
||||
"gpu_ops/cutlass_kernels/w8a8/c3x/scaled_mm_azp_sm90_int8.cu",
|
||||
]
|
||||
|
||||
sources += [
|
||||
"gpu_ops/fp8_gemm_with_cutlass/fp8_fp8_half_gemm.cu",
|
||||
"gpu_ops/cutlass_kernels/fp8_gemm_fused/fp8_fp8_gemm_scale_bias_act.cu",
|
||||
"gpu_ops/fp8_gemm_with_cutlass/fp8_fp8_fp8_dual_gemm.cu",
|
||||
"gpu_ops/cutlass_kernels/fp8_gemm_fused/fp8_fp8_dual_gemm_scale_bias_act.cu",
|
||||
"gpu_ops/fp8_gemm_with_cutlass/fp8_fp8_half_cuda_core_gemm.cu",
|
||||
"gpu_ops/fp8_gemm_with_cutlass/per_channel_fp8_fp8_half_gemm.cu",
|
||||
"gpu_ops/cutlass_kernels/fp8_gemm_fused/visitor_fp8_gemm_fused.cu",
|
||||
@@ -300,22 +437,9 @@ elif paddle.is_compiled_with_cuda():
|
||||
"gpu_ops/scaled_gemm_f8_i4_f16_weight_quantize.cu",
|
||||
"gpu_ops/cutlass_kernels/cutlass_heuristic.cu",
|
||||
"gpu_ops/cutlass_kernels/cutlass_preprocessors.cu",
|
||||
"gpu_ops/air_topp_sampling.cu",
|
||||
]
|
||||
if cc >= 90:
|
||||
nvcc_compile_args += [
|
||||
"-gencode",
|
||||
"arch=compute_90a,code=compute_90a",
|
||||
"-O3",
|
||||
"-DNDEBUG",
|
||||
]
|
||||
os.system("python auto_gen_fp8_fp8_block_gemm_fused_kernels_sm90.py")
|
||||
sources += ["gpu_ops/fp8_gemm_with_cutlass/fp8_fp8_half_block_gemm.cu"]
|
||||
|
||||
# for fp8 autogen *.cu
|
||||
if cc >= 89:
|
||||
sources += find_end_files(
|
||||
"gpu_ops/cutlass_kernels/fp8_gemm_fused/autogen", ".cu")
|
||||
sources += find_end_files(fp8_auto_gen_directory, ".cu")
|
||||
|
||||
setup(
|
||||
name="fastdeploy_ops",
|
||||
@@ -324,8 +448,8 @@ elif paddle.is_compiled_with_cuda():
|
||||
extra_compile_args={"nvcc": nvcc_compile_args},
|
||||
libraries=["cublasLt"],
|
||||
),
|
||||
packages=find_packages(where="gpu_ops/fp8_deep_gemm"),
|
||||
package_dir={"": "gpu_ops/fp8_deep_gemm"},
|
||||
packages=find_packages(where="third_party/DeepGEMM"),
|
||||
package_dir={"": "third_party/DeepGEMM"},
|
||||
package_data={
|
||||
"deep_gemm": [
|
||||
"include/deep_gemm/**/*",
|
||||
@@ -336,56 +460,9 @@ elif paddle.is_compiled_with_cuda():
|
||||
include_package_data=True,
|
||||
)
|
||||
elif paddle.is_compiled_with_xpu():
|
||||
# TODO zhangsishuai@baidu.com to add xpu ops
|
||||
setup(
|
||||
name="fastdeploy_ops",
|
||||
ext_modules=CUDAExtension(sources=[
|
||||
"xpu_ops/set_mask_value.cu",
|
||||
"xpu_ops/set_value_by_flags.cu",
|
||||
"xpu_ops/ngram_mask.cu",
|
||||
"xpu_ops/gather_idx.cu",
|
||||
"xpu_ops/token_penalty_multi_scores.cu",
|
||||
"xpu_ops/token_penalty_only_once.cu",
|
||||
]),
|
||||
)
|
||||
assert False, "In XPU, we should use setup_ops.py in xpu_ops/src, not this."
|
||||
else:
|
||||
use_bf16 = os.getenv("CPU_USE_BF16", "False") == "True"
|
||||
x86_simd_sort_dir = "third_party/x86-simd-sort"
|
||||
if not os.path.exists(x86_simd_sort_dir) or not os.listdir(
|
||||
x86_simd_sort_dir):
|
||||
x86_simd_sort_url = "https://paddlepaddle-inference-banchmark.bj.bcebos.com/x86-simd-sort.tar.gz"
|
||||
download_and_extract(x86_simd_sort_url, "third_party")
|
||||
xft_dir = "third_party/xFasterTransformer"
|
||||
if not os.path.exists(xft_dir) or not os.listdir(xft_dir):
|
||||
if use_bf16:
|
||||
xft_url = (
|
||||
"https://paddlepaddle-inference-banchmark.bj.bcebos.com/xft.tar.gz"
|
||||
)
|
||||
else:
|
||||
xft_url = "https://paddlepaddle-inference-banchmark.bj.bcebos.com/xft_no_bf16.tar.gz"
|
||||
download_and_extract(xft_url, "third_party")
|
||||
|
||||
libs = [
|
||||
"xfastertransformer",
|
||||
"xft_comm_helper",
|
||||
"x86simdsortcpp",
|
||||
]
|
||||
xft_dir = "third_party/xFasterTransformer"
|
||||
x86_simd_sort_dir = "third_party/x86-simd-sort"
|
||||
paddle_custom_kernel_include = [
|
||||
os.path.join(xft_dir, "include"),
|
||||
os.path.join(xft_dir, "src/common"), # src
|
||||
os.path.join(xft_dir, "src/kernels"), # src
|
||||
os.path.join(xft_dir, "src/layers"), # src
|
||||
os.path.join(xft_dir, "src/models"), # src
|
||||
os.path.join(xft_dir, "src/utils"), # src
|
||||
os.path.join(xft_dir, "3rdparty/onednn/include"), # src
|
||||
os.path.join(xft_dir, "3rdparty/onednn/build/include"), # src
|
||||
os.path.join(xft_dir, "3rdparty/xdnn"), # src
|
||||
os.path.join(xft_dir, "3rdparty"),
|
||||
os.path.join(xft_dir, "3rdparty/mkl/include"),
|
||||
os.path.join(x86_simd_sort_dir, "src"), # src
|
||||
]
|
||||
use_bf16 = envs.FD_CPU_USE_BF16 == "True"
|
||||
|
||||
# cc flags
|
||||
paddle_extra_compile_args = [
|
||||
@@ -394,46 +471,15 @@ else:
|
||||
"-fPIC",
|
||||
"-Wno-parentheses",
|
||||
"-DPADDLE_WITH_CUSTOM_KERNEL",
|
||||
"-mavx512f",
|
||||
"-mavx512vl",
|
||||
"-fopenmp",
|
||||
"-mavx512bw",
|
||||
"-mno-mmx",
|
||||
"-DPADDLE_ON_INFERENCE",
|
||||
"-Wall",
|
||||
"-march=skylake-avx512",
|
||||
"-O3",
|
||||
"-g",
|
||||
"-lstdc++fs",
|
||||
"-D_GLIBCXX_USE_CXX11_ABI=1",
|
||||
]
|
||||
if use_bf16:
|
||||
# avx512-bf16 flags
|
||||
paddle_extra_compile_args += [
|
||||
"-DAVX512_BF16_WEIGHT_ONLY_BF16=true",
|
||||
"-DAVX512_FP16_WEIGHT_ONLY_INT8=true",
|
||||
"-DAVX512_FP16_WEIGHT_ONLY_FP16=true",
|
||||
]
|
||||
else:
|
||||
# no avx512-bf16 flags
|
||||
paddle_extra_compile_args += [
|
||||
"-DAVX512_FP32_WEIGHT_ONLY_INT8=true",
|
||||
"-DAVX512_FP32_WEIGHT_ONLY_FP16=true",
|
||||
]
|
||||
paddle_custom_kernel_library_dir = [
|
||||
"third_party/xFasterTransformer/build/",
|
||||
"third_party/x86-simd-sort/builddir",
|
||||
"-DPy_LIMITED_API=0x03090000",
|
||||
]
|
||||
|
||||
include_files = []
|
||||
for include_dir in paddle_custom_kernel_include:
|
||||
include_files.extend(glob.glob(os.path.join(include_dir, "*.h")))
|
||||
so_files = []
|
||||
for library_dir in paddle_custom_kernel_library_dir:
|
||||
if os.path.isdir(library_dir):
|
||||
for lib in libs:
|
||||
lib_file = os.path.join(library_dir, f"lib{lib}.so")
|
||||
if os.path.isfile(lib_file):
|
||||
so_files.append(lib_file)
|
||||
setup(
|
||||
name="fastdeploy_cpu_ops",
|
||||
ext_modules=CppExtension(
|
||||
@@ -444,21 +490,14 @@ else:
|
||||
"cpu_ops/stop_generation_multi_ends.cc",
|
||||
"cpu_ops/update_inputs.cc",
|
||||
"cpu_ops/get_padding_offset.cc",
|
||||
"cpu_ops/xft_all_layer.cc",
|
||||
"cpu_ops/xft_greedy_search.cc",
|
||||
"cpu_ops/avx_weight_only.cc",
|
||||
],
|
||||
extra_link_args=[
|
||||
"-Wl,-rpath,$ORIGIN/x86-simd-sort/builddir",
|
||||
"-Wl,-rpath,$ORIGIN/xFasterTransformer/build",
|
||||
],
|
||||
include_dirs=paddle_custom_kernel_include,
|
||||
library_dirs=paddle_custom_kernel_library_dir,
|
||||
libraries=libs,
|
||||
extra_compile_args=paddle_extra_compile_args,
|
||||
),
|
||||
packages=find_namespace_packages(where="third_party"),
|
||||
package_dir={"": "third_party"},
|
||||
package_data={"fastdeploy_cpu_ops": include_files + so_files},
|
||||
include_package_data=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user