mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[XPU] Support XPU via Paddle Inference backend (#1987)
* [backend] Support XPU via Paddle Inference backend * [backend] Support XPU via Paddle Inference backend * [backend] Support XPU via Paddle Inference backend * [XPU] support XPU benchmark via paddle inference * [XPU] support XPU benchmark via paddle inference * [benchmark] add xpu paddle h2d config files
This commit is contained in:
@@ -7,6 +7,7 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
|||||||
|
|
||||||
include_directories(${FASTDEPLOY_INCS})
|
include_directories(${FASTDEPLOY_INCS})
|
||||||
|
|
||||||
|
add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)
|
||||||
add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
|
add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
|
||||||
add_executable(benchmark_ppyolov5 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov5.cc)
|
add_executable(benchmark_ppyolov5 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov5.cc)
|
||||||
add_executable(benchmark_ppyolov6 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov6.cc)
|
add_executable(benchmark_ppyolov6 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov6.cc)
|
||||||
@@ -37,13 +38,13 @@ add_executable(benchmark_gfl ${PROJECT_SOURCE_DIR}/benchmark_gfl.cc)
|
|||||||
add_executable(benchmark_retinanet ${PROJECT_SOURCE_DIR}/benchmark_retinanet.cc)
|
add_executable(benchmark_retinanet ${PROJECT_SOURCE_DIR}/benchmark_retinanet.cc)
|
||||||
add_executable(benchmark_tood ${PROJECT_SOURCE_DIR}/benchmark_tood.cc)
|
add_executable(benchmark_tood ${PROJECT_SOURCE_DIR}/benchmark_tood.cc)
|
||||||
add_executable(benchmark_ttfnet ${PROJECT_SOURCE_DIR}/benchmark_ttfnet.cc)
|
add_executable(benchmark_ttfnet ${PROJECT_SOURCE_DIR}/benchmark_ttfnet.cc)
|
||||||
add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)
|
|
||||||
add_executable(benchmark_ppdet ${PROJECT_SOURCE_DIR}/benchmark_ppdet.cc)
|
add_executable(benchmark_ppdet ${PROJECT_SOURCE_DIR}/benchmark_ppdet.cc)
|
||||||
add_executable(benchmark_dino ${PROJECT_SOURCE_DIR}/benchmark_dino.cc)
|
add_executable(benchmark_dino ${PROJECT_SOURCE_DIR}/benchmark_dino.cc)
|
||||||
add_executable(benchmark_ppshituv2_rec ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_rec.cc)
|
add_executable(benchmark_ppshituv2_rec ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_rec.cc)
|
||||||
add_executable(benchmark_ppshituv2_det ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_det.cc)
|
add_executable(benchmark_ppshituv2_det ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_det.cc)
|
||||||
|
|
||||||
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||||
|
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
@@ -74,12 +75,12 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
|||||||
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags pthread)
|
|
||||||
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
else()
|
else()
|
||||||
|
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags)
|
||||||
@@ -110,7 +111,6 @@ else()
|
|||||||
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
|
|
||||||
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags)
|
||||||
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.l3.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.l3.txt
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
device: xpu
|
||||||
|
device_id: 0
|
||||||
|
cpu_thread_nums: 1
|
||||||
|
warmup: 200
|
||||||
|
repeat: 1000
|
||||||
|
backend: paddle
|
||||||
|
profile_mode: runtime
|
||||||
|
include_h2d_d2h: true
|
||||||
|
use_fp16: false
|
||||||
|
collect_memory_info: false
|
||||||
|
sampling_interval: 1
|
||||||
|
precision_compare: false
|
||||||
|
xpu_l3_cache: 62914560
|
||||||
|
result_path: benchmark_xpu_paddle_fp32_l3.txt
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.txt
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
device: xpu
|
||||||
|
device_id: 0
|
||||||
|
cpu_thread_nums: 1
|
||||||
|
warmup: 200
|
||||||
|
repeat: 1000
|
||||||
|
backend: paddle
|
||||||
|
profile_mode: runtime
|
||||||
|
include_h2d_d2h: true
|
||||||
|
use_fp16: false
|
||||||
|
collect_memory_info: false
|
||||||
|
sampling_interval: 1
|
||||||
|
precision_compare: false
|
||||||
|
xpu_l3_cache: 0
|
||||||
|
result_path: benchmark_xpu_paddle_fp32.txt
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.l3.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.l3.txt
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
device: xpu
|
||||||
|
device_id: 0
|
||||||
|
cpu_thread_nums: 1
|
||||||
|
warmup: 200
|
||||||
|
repeat: 1000
|
||||||
|
backend: paddle
|
||||||
|
profile_mode: runtime
|
||||||
|
include_h2d_d2h: false
|
||||||
|
use_fp16: false
|
||||||
|
collect_memory_info: false
|
||||||
|
sampling_interval: 1
|
||||||
|
precision_compare: false
|
||||||
|
xpu_l3_cache: 62914560
|
||||||
|
result_path: benchmark_xpu_paddle_fp32_l3.txt
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.txt
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
device: xpu
|
||||||
|
device_id: 0
|
||||||
|
cpu_thread_nums: 1
|
||||||
|
warmup: 200
|
||||||
|
repeat: 1000
|
||||||
|
backend: paddle
|
||||||
|
profile_mode: runtime
|
||||||
|
include_h2d_d2h: false
|
||||||
|
use_fp16: false
|
||||||
|
collect_memory_info: false
|
||||||
|
sampling_interval: 1
|
||||||
|
precision_compare: false
|
||||||
|
xpu_l3_cache: 0
|
||||||
|
result_path: benchmark_xpu_paddle_fp32.txt
|
@@ -60,7 +60,9 @@ DEFINE_int32(device_id, -1,
|
|||||||
"Optional, set specific device id for GPU/XPU, default -1."
|
"Optional, set specific device id for GPU/XPU, default -1."
|
||||||
"will force to override the value in config file "
|
"will force to override the value in config file "
|
||||||
"eg, 0/1/2/...");
|
"eg, 0/1/2/...");
|
||||||
|
DEFINE_bool(enable_log_info, false,
|
||||||
|
"Optional, whether to enable log info for paddle backend,"
|
||||||
|
"default false.");
|
||||||
|
|
||||||
static void PrintUsage() {
|
static void PrintUsage() {
|
||||||
std::cout << "Usage: infer_demo --model model_path --image img_path "
|
std::cout << "Usage: infer_demo --model model_path --image img_path "
|
||||||
|
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
static void UpdateBaseCustomFlags(
|
static void UpdateBaseCustomFlags(
|
||||||
std::unordered_map<std::string, std::string>& config_info) {
|
std::unordered_map<std::string, std::string>& config_info) {
|
||||||
|
// see benchmark/cpp/flags.h
|
||||||
if (FLAGS_warmup > -1) {
|
if (FLAGS_warmup > -1) {
|
||||||
config_info["warmup"] = std::to_string(FLAGS_warmup);
|
config_info["warmup"] = std::to_string(FLAGS_warmup);
|
||||||
}
|
}
|
||||||
@@ -30,6 +31,14 @@ static void UpdateBaseCustomFlags(
|
|||||||
if (FLAGS_use_fp16) {
|
if (FLAGS_use_fp16) {
|
||||||
config_info["use_fp16"] = "true";
|
config_info["use_fp16"] = "true";
|
||||||
}
|
}
|
||||||
|
if (FLAGS_xpu_l3_cache >= 0) {
|
||||||
|
config_info["xpu_l3_cache"] = std::to_string(FLAGS_xpu_l3_cache);
|
||||||
|
}
|
||||||
|
if (FLAGS_enable_log_info) {
|
||||||
|
config_info["enable_log_info"] = "true";
|
||||||
|
} else {
|
||||||
|
config_info["enable_log_info"] = "false";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
||||||
@@ -47,6 +56,9 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
|||||||
option->EnableProfiling(config_info["include_h2d_d2h"] == "true",
|
option->EnableProfiling(config_info["include_h2d_d2h"] == "true",
|
||||||
repeat, warmup);
|
repeat, warmup);
|
||||||
}
|
}
|
||||||
|
if (config_info["enable_log_info"] == "true") {
|
||||||
|
option->paddle_infer_option.enable_log_info = true;
|
||||||
|
}
|
||||||
if (config_info["device"] == "gpu") {
|
if (config_info["device"] == "gpu") {
|
||||||
option->UseGpu(std::stoi(config_info["device_id"]));
|
option->UseGpu(std::stoi(config_info["device_id"]));
|
||||||
if (config_info["backend"] == "ort") {
|
if (config_info["backend"] == "ort") {
|
||||||
@@ -104,16 +116,14 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else if (config_info["device"] == "xpu") {
|
} else if (config_info["device"] == "xpu") {
|
||||||
if (FLAGS_xpu_l3_cache >= 0) {
|
|
||||||
option->UseKunlunXin(std::stoi(config_info["device_id"]),
|
|
||||||
FLAGS_xpu_l3_cache);
|
|
||||||
} else {
|
|
||||||
option->UseKunlunXin(std::stoi(config_info["device_id"]),
|
option->UseKunlunXin(std::stoi(config_info["device_id"]),
|
||||||
std::stoi(config_info["xpu_l3_cache"]));
|
std::stoi(config_info["xpu_l3_cache"]));
|
||||||
}
|
|
||||||
if (config_info["backend"] == "ort") {
|
if (config_info["backend"] == "ort") {
|
||||||
option->UseOrtBackend();
|
option->UseOrtBackend();
|
||||||
} else if (config_info["backend"] == "paddle") {
|
} else if (config_info["backend"] == "paddle") {
|
||||||
|
// Note: For inference + XPU fp16, As long as the
|
||||||
|
// model is fp16, it can automatically run on the
|
||||||
|
// fp16 precision.
|
||||||
option->UsePaddleInferBackend();
|
option->UsePaddleInferBackend();
|
||||||
} else if (config_info["backend"] == "lite") {
|
} else if (config_info["backend"] == "lite") {
|
||||||
option->UsePaddleLiteBackend();
|
option->UsePaddleLiteBackend();
|
||||||
|
@@ -1,20 +1,26 @@
|
|||||||
|
if(NOT ENABLE_PADDLE_BACKEND)
|
||||||
if(NOT ENABLE_LITE_BACKEND)
|
if(NOT ENABLE_LITE_BACKEND)
|
||||||
message("Will force to set ENABLE_LITE_BACKEND when build with KunlunXin.")
|
message(WARNING "Will force to set ENABLE_LITE_BACKEND=ON if ENABLE_PADDLE_BACKEND=OFF when build with KunlunXin.")
|
||||||
set(ENABLE_LITE_BACKEND ON)
|
set(ENABLE_LITE_BACKEND ON)
|
||||||
endif()
|
endif()
|
||||||
|
else()
|
||||||
|
if(ENABLE_LITE_BACKEND)
|
||||||
|
message(WARNING "Will force to set ENABLE_LITE_BACKEND=OFF if ENABLE_PADDLE_BACKEND=ON when build with KunlunXin.")
|
||||||
|
set(ENABLE_LITE_BACKEND OFF)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
option(WITH_LITE_XPU_LOG "" ON)
|
option(WITH_LITE_XPU_LOG "" ON)
|
||||||
|
if(NOT ENABLE_PADDLE_BACKEND)
|
||||||
if(NOT PADDLELITE_URL)
|
if(NOT PADDLELITE_URL)
|
||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||||
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-aarch64-xpu-v213.tgz")
|
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-aarch64-xpu-v213.tgz")
|
||||||
else ()
|
else ()
|
||||||
if (WITH_LITE_XPU_LOG)
|
if (WITH_LITE_XPU_LOG)
|
||||||
# set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
|
|
||||||
# set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230303.tgz")
|
|
||||||
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230410.tgz")
|
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230410.tgz")
|
||||||
else()
|
else()
|
||||||
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-without-log-20230303.tgz")
|
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-without-log-20230303.tgz")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
@@ -114,10 +114,11 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
set(PADDLEINFERENCE_VERSION "0.0.0.660f781b77")
|
set(PADDLEINFERENCE_VERSION "0.0.0.660f781b77")
|
||||||
else()
|
else()
|
||||||
# Linux with x86 CPU/Arm CPU/GPU/IPU ...
|
# Linux with x86/aarch64 CPU/Arm CPU/GPU/IPU ...
|
||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||||
message(FATAL_ERROR "Paddle Backend doesn't support linux aarch64 now.")
|
message(FATAL_ERROR "Paddle Backend doesn't support linux aarch64 now.")
|
||||||
else()
|
else()
|
||||||
|
# x86_64
|
||||||
if(WITH_GPU)
|
if(WITH_GPU)
|
||||||
set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-trt8.5.2.2-mkl-avx-0.0.0.660f781b77.tgz")
|
set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-trt8.5.2.2-mkl-avx-0.0.0.660f781b77.tgz")
|
||||||
set(PADDLEINFERENCE_VERSION "0.0.0.660f781b77")
|
set(PADDLEINFERENCE_VERSION "0.0.0.660f781b77")
|
||||||
@@ -130,9 +131,13 @@ else()
|
|||||||
# TODO(qiuyanjun): Should use the commit id to tag the version
|
# TODO(qiuyanjun): Should use the commit id to tag the version
|
||||||
set(PADDLEINFERENCE_VERSION "2.4-dev1")
|
set(PADDLEINFERENCE_VERSION "2.4-dev1")
|
||||||
endif()
|
endif()
|
||||||
|
if(WITH_KUNLUNXIN)
|
||||||
|
set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-xpu-openblas-0.0.0.021fd73536.tgz")
|
||||||
|
set(PADDLEINFERENCE_VERSION "0.0.0.021fd73536")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NEED_ABI0)
|
if(NEED_ABI0)
|
||||||
if(WITH_GPU OR WITH_PU)
|
if(WITH_GPU OR WITH_IPU OR WITH_KUNLUNXIN)
|
||||||
message(WARNING "While NEED_ABI0=ON, only support CPU now, will fallback to CPU.")
|
message(WARNING "While NEED_ABI0=ON, only support CPU now, will fallback to CPU.")
|
||||||
endif()
|
endif()
|
||||||
set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-2.4.0-abi0.tgz")
|
set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-2.4.0-abi0.tgz")
|
||||||
|
@@ -41,6 +41,10 @@
|
|||||||
#cmakedefine WITH_GPU
|
#cmakedefine WITH_GPU
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef WITH_KUNLUNXIN
|
||||||
|
#cmakedefine WITH_KUNLUNXIN
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef WITH_DIRECTML
|
#ifndef WITH_DIRECTML
|
||||||
#cmakedefine WITH_DIRECTML
|
#cmakedefine WITH_DIRECTML
|
||||||
#endif
|
#endif
|
||||||
|
@@ -45,6 +45,33 @@ struct IpuOption {
|
|||||||
bool ipu_enable_half_partial;
|
bool ipu_enable_half_partial;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*! @brief Option object to configure KUNLUNXIN XPU
|
||||||
|
*/
|
||||||
|
struct XpuOption {
|
||||||
|
/// kunlunxin device id
|
||||||
|
int kunlunxin_device_id = 0;
|
||||||
|
/// EnableXpu
|
||||||
|
/// kunlunxin_l3_workspace_size
|
||||||
|
int kunlunxin_l3_workspace_size = 0xfffc00;
|
||||||
|
/// kunlunxin_locked
|
||||||
|
bool kunlunxin_locked = false;
|
||||||
|
/// kunlunxin_autotune
|
||||||
|
bool kunlunxin_autotune = true;
|
||||||
|
/// kunlunxin_autotune_file
|
||||||
|
std::string kunlunxin_autotune_file = "";
|
||||||
|
/// kunlunxin_precision
|
||||||
|
std::string kunlunxin_precision = "int16";
|
||||||
|
/// kunlunxin_adaptive_seqlen
|
||||||
|
bool kunlunxin_adaptive_seqlen = false;
|
||||||
|
/// kunlunxin_enable_multi_stream
|
||||||
|
bool kunlunxin_enable_multi_stream = false;
|
||||||
|
/// SetXpuConfig
|
||||||
|
/// quant post dynamic weight bits
|
||||||
|
int kunlunxin_quant_post_dynamic_weight_bits = -1;
|
||||||
|
/// quant post dynamic op types
|
||||||
|
std::vector<std::string> kunlunxin_quant_post_dynamic_op_types = {};
|
||||||
|
};
|
||||||
|
|
||||||
/*! @brief Option object to configure Paddle Inference backend
|
/*! @brief Option object to configure Paddle Inference backend
|
||||||
*/
|
*/
|
||||||
struct PaddleBackendOption {
|
struct PaddleBackendOption {
|
||||||
@@ -63,6 +90,10 @@ struct PaddleBackendOption {
|
|||||||
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
||||||
*/
|
*/
|
||||||
IpuOption ipu_option;
|
IpuOption ipu_option;
|
||||||
|
/*
|
||||||
|
* @brief XPU option, this will configure the KUNLUNXIN XPU hardware, if inference model in XPU
|
||||||
|
*/
|
||||||
|
XpuOption xpu_option;
|
||||||
|
|
||||||
/// Collect shape for model while enable_trt is true
|
/// Collect shape for model while enable_trt is true
|
||||||
bool collect_trt_shape = false;
|
bool collect_trt_shape = false;
|
||||||
@@ -93,12 +124,22 @@ struct PaddleBackendOption {
|
|||||||
ipu_option.ipu_enable_half_partial = enable_half_partial;
|
ipu_option.ipu_enable_half_partial = enable_half_partial;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetXpuConfig(
|
||||||
|
int quant_post_dynamic_weight_bits = -1,
|
||||||
|
const std::vector<std::string>& quant_post_dynamic_op_types = {}) {
|
||||||
|
xpu_option.kunlunxin_quant_post_dynamic_weight_bits =
|
||||||
|
quant_post_dynamic_weight_bits;
|
||||||
|
xpu_option.kunlunxin_quant_post_dynamic_op_types =
|
||||||
|
quant_post_dynamic_op_types;
|
||||||
|
}
|
||||||
|
|
||||||
// The belowing parameters may be removed, please do not
|
// The belowing parameters may be removed, please do not
|
||||||
// read or write them directly
|
// read or write them directly
|
||||||
TrtBackendOption trt_option;
|
TrtBackendOption trt_option;
|
||||||
bool enable_pinned_memory = false;
|
bool enable_pinned_memory = false;
|
||||||
void* external_stream_ = nullptr;
|
void* external_stream_ = nullptr;
|
||||||
Device device = Device::CPU;
|
Device device = Device::CPU;
|
||||||
|
/// device id for CPU/GPU
|
||||||
int device_id = 0;
|
int device_id = 0;
|
||||||
std::vector<std::string> trt_disabled_ops_{};
|
std::vector<std::string> trt_disabled_ops_{};
|
||||||
int cpu_thread_num = 8;
|
int cpu_thread_num = 8;
|
||||||
|
@@ -78,7 +78,26 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
option.ipu_option.ipu_available_memory_proportion,
|
option.ipu_option.ipu_available_memory_proportion,
|
||||||
option.ipu_option.ipu_enable_half_partial);
|
option.ipu_option.ipu_enable_half_partial);
|
||||||
#else
|
#else
|
||||||
FDWARNING << "The FastDeploy is not compiled with IPU backend, so will "
|
FDWARNING << "The FastDeploy is not compiled with IPU device, so will "
|
||||||
|
"fallback to CPU with Paddle Inference Backend."
|
||||||
|
<< std::endl;
|
||||||
|
#endif
|
||||||
|
} else if (option.device == Device::KUNLUNXIN) {
|
||||||
|
#ifdef WITH_KUNLUNXIN
|
||||||
|
config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
|
||||||
|
option.xpu_option.kunlunxin_locked,
|
||||||
|
option.xpu_option.kunlunxin_autotune,
|
||||||
|
option.xpu_option.kunlunxin_autotune_file,
|
||||||
|
option.xpu_option.kunlunxin_precision,
|
||||||
|
option.xpu_option.kunlunxin_adaptive_seqlen,
|
||||||
|
option.xpu_option.kunlunxin_enable_multi_stream);
|
||||||
|
config_.SetXpuConfig(
|
||||||
|
option.xpu_option.kunlunxin_quant_post_dynamic_weight_bits,
|
||||||
|
option.xpu_option.kunlunxin_quant_post_dynamic_op_types);
|
||||||
|
config_.SetXpuDeviceId(option.xpu_option.kunlunxin_device_id);
|
||||||
|
#else
|
||||||
|
FDWARNING
|
||||||
|
<< "The FastDeploy is not compiled with KUNLUNXIN device, so will "
|
||||||
"fallback to CPU with Paddle Inference Backend."
|
"fallback to CPU with Paddle Inference Backend."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
#endif
|
#endif
|
||||||
@@ -89,6 +108,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
|
config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!option.enable_log_info) {
|
if (!option.enable_log_info) {
|
||||||
config_.DisableGlogInfo();
|
config_.DisableGlogInfo();
|
||||||
}
|
}
|
||||||
@@ -106,6 +126,7 @@ bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto option = runtime_option;
|
auto option = runtime_option;
|
||||||
|
// Collect basic paddle inference option and trt option.
|
||||||
option.paddle_infer_option.model_file = runtime_option.model_file;
|
option.paddle_infer_option.model_file = runtime_option.model_file;
|
||||||
option.paddle_infer_option.params_file = runtime_option.params_file;
|
option.paddle_infer_option.params_file = runtime_option.params_file;
|
||||||
option.paddle_infer_option.model_from_memory_ =
|
option.paddle_infer_option.model_from_memory_ =
|
||||||
@@ -117,6 +138,10 @@ bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
|
|||||||
option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
|
option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
|
||||||
option.paddle_infer_option.trt_option = runtime_option.trt_option;
|
option.paddle_infer_option.trt_option = runtime_option.trt_option;
|
||||||
option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
|
option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
|
||||||
|
// Note(qiuyanjun): For Ipu option and XPU option, please check the
|
||||||
|
// details of RuntimeOption::UseIpu() and RuntimeOption::UseKunlunXin().
|
||||||
|
// Futhermore, please check paddle_infer_option.SetIpuConfig() and
|
||||||
|
// paddle_infer_option.SetXpuConfig() for more details of extra configs.
|
||||||
return InitFromPaddle(option.model_file, option.params_file,
|
return InitFromPaddle(option.model_file, option.params_file,
|
||||||
option.model_from_memory_, option.paddle_infer_option);
|
option.model_from_memory_, option.paddle_infer_option);
|
||||||
}
|
}
|
||||||
|
@@ -19,6 +19,8 @@ namespace fastdeploy {
|
|||||||
paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) {
|
paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) {
|
||||||
if (device == Device::GPU) {
|
if (device == Device::GPU) {
|
||||||
return paddle_infer::PlaceType::kGPU;
|
return paddle_infer::PlaceType::kGPU;
|
||||||
|
} else if (device == Device::KUNLUNXIN) {
|
||||||
|
return paddle_infer::PlaceType::kXPU;
|
||||||
}
|
}
|
||||||
return paddle_infer::PlaceType::kCPU;
|
return paddle_infer::PlaceType::kCPU;
|
||||||
}
|
}
|
||||||
@@ -52,9 +54,21 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor,
|
|||||||
tensor->CopyFromCpu(static_cast<const int64_t*>(fd_tensor.Data()));
|
tensor->CopyFromCpu(static_cast<const int64_t*>(fd_tensor.Data()));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
} else if (fd_tensor.dtype == FDDataType::INT8) {
|
||||||
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
|
tensor->ShareExternalData(static_cast<const int8_t*>(fd_tensor.Data()),
|
||||||
|
shape, place);
|
||||||
|
} else {
|
||||||
|
tensor->CopyFromCpu(static_cast<const int8_t*>(fd_tensor.Data()));
|
||||||
|
}
|
||||||
|
return;
|
||||||
} else if (fd_tensor.dtype == FDDataType::UINT8) {
|
} else if (fd_tensor.dtype == FDDataType::UINT8) {
|
||||||
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
tensor->ShareExternalData(static_cast<const uint8_t*>(fd_tensor.Data()),
|
tensor->ShareExternalData(static_cast<const uint8_t*>(fd_tensor.Data()),
|
||||||
shape, paddle_infer::PlaceType::kCPU);
|
shape, place);
|
||||||
|
} else {
|
||||||
|
tensor->CopyFromCpu(static_cast<const uint8_t*>(fd_tensor.Data()));
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
|
FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
|
||||||
@@ -89,9 +103,21 @@ void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor,
|
|||||||
tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor.MutableData()));
|
tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor.MutableData()));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
} else if (fd_tensor.dtype == FDDataType::INT8) {
|
||||||
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
|
tensor->ShareExternalData(static_cast<const int8_t*>(fd_tensor.Data()),
|
||||||
|
shape, place);
|
||||||
|
} else {
|
||||||
|
tensor->CopyFromCpu(static_cast<const int8_t*>(fd_tensor.Data()));
|
||||||
|
}
|
||||||
|
return;
|
||||||
} else if (fd_tensor.dtype == FDDataType::UINT8) {
|
} else if (fd_tensor.dtype == FDDataType::UINT8) {
|
||||||
tensor->ShareExternalData(static_cast<uint8_t*>(fd_tensor.MutableData()),
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
shape, paddle_infer::PlaceType::kCPU);
|
tensor->ShareExternalData(static_cast<const uint8_t*>(fd_tensor.Data()),
|
||||||
|
shape, place);
|
||||||
|
} else {
|
||||||
|
tensor->CopyFromCpu(static_cast<const uint8_t*>(fd_tensor.Data()));
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
|
FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
|
||||||
@@ -149,6 +175,11 @@ void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
|
|||||||
Device device = Device::CPU;
|
Device device = Device::CPU;
|
||||||
if (place == paddle_infer::PlaceType::kGPU) {
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
device = Device::GPU;
|
device = Device::GPU;
|
||||||
|
} else if (place == paddle_infer::PlaceType::kXPU) {
|
||||||
|
device = Device::KUNLUNXIN;
|
||||||
|
FDASSERT(false,
|
||||||
|
"Currently, copy_to_fd=false, FDTensor SetExternalData "
|
||||||
|
"is not support for Device::KUNLUNXIN now!")
|
||||||
}
|
}
|
||||||
fd_tensor->name = tensor->name();
|
fd_tensor->name = tensor->name();
|
||||||
fd_tensor->SetExternalData(shape, fd_dtype, out_data, device);
|
fd_tensor->SetExternalData(shape, fd_dtype, out_data, device);
|
||||||
|
@@ -99,7 +99,7 @@ static std::map<Device, std::vector<Backend>>
|
|||||||
{Device::SUNRISENPU, {Backend::HORIZONNPU}},
|
{Device::SUNRISENPU, {Backend::HORIZONNPU}},
|
||||||
{Device::IPU, {Backend::PDINFER}},
|
{Device::IPU, {Backend::PDINFER}},
|
||||||
{Device::TIMVX, {Backend::LITE}},
|
{Device::TIMVX, {Backend::LITE}},
|
||||||
{Device::KUNLUNXIN, {Backend::LITE}},
|
{Device::KUNLUNXIN, {Backend::LITE, Backend::PDINFER}},
|
||||||
{Device::ASCEND, {Backend::LITE}},
|
{Device::ASCEND, {Backend::LITE}},
|
||||||
{Device::SOPHGOTPUD, {Backend::SOPHGOTPU}},
|
{Device::SOPHGOTPUD, {Backend::SOPHGOTPU}},
|
||||||
{Device::DIRECTML, {Backend::ORT}}
|
{Device::DIRECTML, {Backend::ORT}}
|
||||||
|
@@ -79,14 +79,18 @@ void RuntimeOption::UseTimVX() {
|
|||||||
paddle_lite_option.device = device;
|
paddle_lite_option.device = device;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
void RuntimeOption::UseKunlunXin(int kunlunxin_id,
|
||||||
|
int l3_workspace_size,
|
||||||
bool locked, bool autotune,
|
bool locked, bool autotune,
|
||||||
const std::string& autotune_file,
|
const std::string& autotune_file,
|
||||||
const std::string& precision,
|
const std::string& precision,
|
||||||
bool adaptive_seqlen,
|
bool adaptive_seqlen,
|
||||||
bool enable_multi_stream,
|
bool enable_multi_stream,
|
||||||
int64_t gm_default_size) {
|
int64_t gm_default_size) {
|
||||||
|
#ifdef WITH_KUNLUNXIN
|
||||||
device = Device::KUNLUNXIN;
|
device = Device::KUNLUNXIN;
|
||||||
|
|
||||||
|
#ifdef ENABLE_LITE_BACKEND
|
||||||
paddle_lite_option.device = device;
|
paddle_lite_option.device = device;
|
||||||
paddle_lite_option.device_id = kunlunxin_id;
|
paddle_lite_option.device_id = kunlunxin_id;
|
||||||
paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
|
paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
|
||||||
@@ -97,6 +101,42 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
|||||||
paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
|
paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
|
||||||
paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
|
paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
|
||||||
paddle_lite_option.kunlunxin_gm_default_size = gm_default_size;
|
paddle_lite_option.kunlunxin_gm_default_size = gm_default_size;
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
|
paddle_infer_option.device = device;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_device_id = kunlunxin_id;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_l3_workspace_size = l3_workspace_size;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_locked = locked;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_autotune = autotune;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_autotune_file = autotune_file;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_precision = precision;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
|
||||||
|
paddle_infer_option.xpu_option.kunlunxin_enable_multi_stream = enable_multi_stream;
|
||||||
|
// paddle_infer_option.xpu_option.kunlunxin_gm_default_size = gm_default_size;
|
||||||
|
// use paddle_infer_option.xpu_option.SetXpuConfig() for more options.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
FDWARNING << "The FastDeploy didn't compile with KUNLUNXIN, will force to use CPU."
|
||||||
|
<< std::endl;
|
||||||
|
device = Device::CPU;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseIpu(int device_num, int micro_batch_size,
|
||||||
|
bool enable_pipelining, int batches_per_step) {
|
||||||
|
#ifdef WITH_IPU
|
||||||
|
device = Device::IPU;
|
||||||
|
paddle_infer_option.ipu_option.ipu_device_num = device_num;
|
||||||
|
paddle_infer_option.ipu_option.ipu_micro_batch_size = micro_batch_size;
|
||||||
|
paddle_infer_option.ipu_option.ipu_enable_pipelining = enable_pipelining;
|
||||||
|
paddle_infer_option.ipu_option.ipu_batches_per_step = batches_per_step;
|
||||||
|
// use paddle_infer_option.ipu_option.SetIpuConfig() for more options.
|
||||||
|
#else
|
||||||
|
FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU."
|
||||||
|
<< std::endl;
|
||||||
|
device = Device::CPU;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuntimeOption::UseAscend() {
|
void RuntimeOption::UseAscend() {
|
||||||
@@ -484,19 +524,4 @@ void RuntimeOption::DisablePaddleTrtOPs(const std::vector<std::string>& ops) {
|
|||||||
paddle_infer_option.DisableTrtOps(ops);
|
paddle_infer_option.DisableTrtOps(ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuntimeOption::UseIpu(int device_num, int micro_batch_size,
|
|
||||||
bool enable_pipelining, int batches_per_step) {
|
|
||||||
#ifdef WITH_IPU
|
|
||||||
device = Device::IPU;
|
|
||||||
ipu_device_num = device_num;
|
|
||||||
ipu_micro_batch_size = micro_batch_size;
|
|
||||||
ipu_enable_pipelining = enable_pipelining;
|
|
||||||
ipu_batches_per_step = batches_per_step;
|
|
||||||
#else
|
|
||||||
FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU."
|
|
||||||
<< std::endl;
|
|
||||||
device = Device::CPU;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -52,11 +52,11 @@ __build_fastdeploy_linux_x86_64_xpu_shared() {
|
|||||||
local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install"
|
local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install"
|
||||||
cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}"
|
cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}"
|
||||||
|
|
||||||
cmake -DWITH_KUNLUNXIN=ON \
|
cmake -DCMAKE_BUILD_TYPE=Release \
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DWITH_KUNLUNXIN=ON \
|
||||||
-DWITH_GPU=OFF \
|
|
||||||
-DENABLE_ORT_BACKEND=OFF \
|
-DENABLE_ORT_BACKEND=OFF \
|
||||||
-DENABLE_PADDLE_BACKEND=OFF \
|
-DENABLE_PADDLE_BACKEND=ON \
|
||||||
|
-DENABLE_LITE_BACKEND=OFF \
|
||||||
-DENABLE_VISION=ON \
|
-DENABLE_VISION=ON \
|
||||||
-DENABLE_BENCHMARK=ON \
|
-DENABLE_BENCHMARK=ON \
|
||||||
-DBUILD_EXAMPLES=OFF \
|
-DBUILD_EXAMPLES=OFF \
|
||||||
|
Reference in New Issue
Block a user