mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[XPU] Support XPU via Paddle Inference backend (#1987)
* [backend] Support XPU via Paddle Inference backend * [backend] Support XPU via Paddle Inference backend * [backend] Support XPU via Paddle Inference backend * [XPU] support XPU benchmark via paddle inference * [XPU] support XPU benchmark via paddle inference * [benchmark] add xpu paddle h2d config files
This commit is contained in:
@@ -7,6 +7,7 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
|
||||
include_directories(${FASTDEPLOY_INCS})
|
||||
|
||||
add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)
|
||||
add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
|
||||
add_executable(benchmark_ppyolov5 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov5.cc)
|
||||
add_executable(benchmark_ppyolov6 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov6.cc)
|
||||
@@ -37,13 +38,13 @@ add_executable(benchmark_gfl ${PROJECT_SOURCE_DIR}/benchmark_gfl.cc)
|
||||
add_executable(benchmark_retinanet ${PROJECT_SOURCE_DIR}/benchmark_retinanet.cc)
|
||||
add_executable(benchmark_tood ${PROJECT_SOURCE_DIR}/benchmark_tood.cc)
|
||||
add_executable(benchmark_ttfnet ${PROJECT_SOURCE_DIR}/benchmark_ttfnet.cc)
|
||||
add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)
|
||||
add_executable(benchmark_ppdet ${PROJECT_SOURCE_DIR}/benchmark_ppdet.cc)
|
||||
add_executable(benchmark_dino ${PROJECT_SOURCE_DIR}/benchmark_dino.cc)
|
||||
add_executable(benchmark_ppshituv2_rec ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_rec.cc)
|
||||
add_executable(benchmark_ppshituv2_det ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_det.cc)
|
||||
|
||||
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
@@ -74,12 +75,12 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
else()
|
||||
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags)
|
||||
@@ -110,7 +111,6 @@ else()
|
||||
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags)
|
||||
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.l3.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.l3.txt
Executable file
@@ -0,0 +1,14 @@
|
||||
device: xpu
|
||||
device_id: 0
|
||||
cpu_thread_nums: 1
|
||||
warmup: 200
|
||||
repeat: 1000
|
||||
backend: paddle
|
||||
profile_mode: runtime
|
||||
include_h2d_d2h: true
|
||||
use_fp16: false
|
||||
collect_memory_info: false
|
||||
sampling_interval: 1
|
||||
precision_compare: false
|
||||
xpu_l3_cache: 62914560
|
||||
result_path: benchmark_xpu_paddle_fp32_l3.txt
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.h2d.txt
Executable file
@@ -0,0 +1,14 @@
|
||||
device: xpu
|
||||
device_id: 0
|
||||
cpu_thread_nums: 1
|
||||
warmup: 200
|
||||
repeat: 1000
|
||||
backend: paddle
|
||||
profile_mode: runtime
|
||||
include_h2d_d2h: true
|
||||
use_fp16: false
|
||||
collect_memory_info: false
|
||||
sampling_interval: 1
|
||||
precision_compare: false
|
||||
xpu_l3_cache: 0
|
||||
result_path: benchmark_xpu_paddle_fp32.txt
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.l3.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.l3.txt
Executable file
@@ -0,0 +1,14 @@
|
||||
device: xpu
|
||||
device_id: 0
|
||||
cpu_thread_nums: 1
|
||||
warmup: 200
|
||||
repeat: 1000
|
||||
backend: paddle
|
||||
profile_mode: runtime
|
||||
include_h2d_d2h: false
|
||||
use_fp16: false
|
||||
collect_memory_info: false
|
||||
sampling_interval: 1
|
||||
precision_compare: false
|
||||
xpu_l3_cache: 62914560
|
||||
result_path: benchmark_xpu_paddle_fp32_l3.txt
|
14
benchmark/cpp/config/config.xpu.paddle.fp32.txt
Executable file
14
benchmark/cpp/config/config.xpu.paddle.fp32.txt
Executable file
@@ -0,0 +1,14 @@
|
||||
device: xpu
|
||||
device_id: 0
|
||||
cpu_thread_nums: 1
|
||||
warmup: 200
|
||||
repeat: 1000
|
||||
backend: paddle
|
||||
profile_mode: runtime
|
||||
include_h2d_d2h: false
|
||||
use_fp16: false
|
||||
collect_memory_info: false
|
||||
sampling_interval: 1
|
||||
precision_compare: false
|
||||
xpu_l3_cache: 0
|
||||
result_path: benchmark_xpu_paddle_fp32.txt
|
@@ -60,7 +60,9 @@ DEFINE_int32(device_id, -1,
|
||||
"Optional, set specific device id for GPU/XPU, default -1."
|
||||
"will force to override the value in config file "
|
||||
"eg, 0/1/2/...");
|
||||
|
||||
DEFINE_bool(enable_log_info, false,
|
||||
"Optional, whether to enable log info for paddle backend,"
|
||||
"default false.");
|
||||
|
||||
static void PrintUsage() {
|
||||
std::cout << "Usage: infer_demo --model model_path --image img_path "
|
||||
|
@@ -18,6 +18,7 @@
|
||||
|
||||
static void UpdateBaseCustomFlags(
|
||||
std::unordered_map<std::string, std::string>& config_info) {
|
||||
// see benchmark/cpp/flags.h
|
||||
if (FLAGS_warmup > -1) {
|
||||
config_info["warmup"] = std::to_string(FLAGS_warmup);
|
||||
}
|
||||
@@ -30,6 +31,14 @@ static void UpdateBaseCustomFlags(
|
||||
if (FLAGS_use_fp16) {
|
||||
config_info["use_fp16"] = "true";
|
||||
}
|
||||
if (FLAGS_xpu_l3_cache >= 0) {
|
||||
config_info["xpu_l3_cache"] = std::to_string(FLAGS_xpu_l3_cache);
|
||||
}
|
||||
if (FLAGS_enable_log_info) {
|
||||
config_info["enable_log_info"] = "true";
|
||||
} else {
|
||||
config_info["enable_log_info"] = "false";
|
||||
}
|
||||
}
|
||||
|
||||
static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
||||
@@ -47,6 +56,9 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
||||
option->EnableProfiling(config_info["include_h2d_d2h"] == "true",
|
||||
repeat, warmup);
|
||||
}
|
||||
if (config_info["enable_log_info"] == "true") {
|
||||
option->paddle_infer_option.enable_log_info = true;
|
||||
}
|
||||
if (config_info["device"] == "gpu") {
|
||||
option->UseGpu(std::stoi(config_info["device_id"]));
|
||||
if (config_info["backend"] == "ort") {
|
||||
@@ -104,16 +116,14 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
||||
return false;
|
||||
}
|
||||
} else if (config_info["device"] == "xpu") {
|
||||
if (FLAGS_xpu_l3_cache >= 0) {
|
||||
option->UseKunlunXin(std::stoi(config_info["device_id"]),
|
||||
FLAGS_xpu_l3_cache);
|
||||
} else {
|
||||
option->UseKunlunXin(std::stoi(config_info["device_id"]),
|
||||
std::stoi(config_info["xpu_l3_cache"]));
|
||||
}
|
||||
option->UseKunlunXin(std::stoi(config_info["device_id"]),
|
||||
std::stoi(config_info["xpu_l3_cache"]));
|
||||
if (config_info["backend"] == "ort") {
|
||||
option->UseOrtBackend();
|
||||
} else if (config_info["backend"] == "paddle") {
|
||||
// Note: For inference + XPU fp16, As long as the
|
||||
// model is fp16, it can automatically run on the
|
||||
// fp16 precision.
|
||||
option->UsePaddleInferBackend();
|
||||
} else if (config_info["backend"] == "lite") {
|
||||
option->UsePaddleLiteBackend();
|
||||
|
Reference in New Issue
Block a user