[XPU] Support XPU via Paddle Inference backend (#1987)

* [backend] Support XPU via Paddle Inference backend

* [backend] Support XPU via Paddle Inference backend

* [backend] Support XPU via Paddle Inference backend

* [XPU] support XPU benchmark via paddle inference

* [XPU] support XPU benchmark via paddle inference

* [benchmark] add xpu paddle h2d config files
This commit is contained in:
DefTruth
2023-05-25 14:13:40 +08:00
committed by GitHub
parent 24f32d10a7
commit 49c033a828
16 changed files with 262 additions and 57 deletions

View File

@@ -7,6 +7,7 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
include_directories(${FASTDEPLOY_INCS})
add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)
add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
add_executable(benchmark_ppyolov5 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov5.cc)
add_executable(benchmark_ppyolov6 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov6.cc)
@@ -37,13 +38,13 @@ add_executable(benchmark_gfl ${PROJECT_SOURCE_DIR}/benchmark_gfl.cc)
add_executable(benchmark_retinanet ${PROJECT_SOURCE_DIR}/benchmark_retinanet.cc)
add_executable(benchmark_tood ${PROJECT_SOURCE_DIR}/benchmark_tood.cc)
add_executable(benchmark_ttfnet ${PROJECT_SOURCE_DIR}/benchmark_ttfnet.cc)
add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)
add_executable(benchmark_ppdet ${PROJECT_SOURCE_DIR}/benchmark_ppdet.cc)
add_executable(benchmark_dino ${PROJECT_SOURCE_DIR}/benchmark_dino.cc)
add_executable(benchmark_ppshituv2_rec ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_rec.cc)
add_executable(benchmark_ppshituv2_det ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_det.cc)
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags pthread)
@@ -74,12 +75,12 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags pthread)
else()
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppyolov6 ${FASTDEPLOY_LIBS} gflags)
@@ -110,7 +111,6 @@ else()
target_link_libraries(benchmark_retinanet ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_tood ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ttfnet ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppdet ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_dino ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags)

View File

@@ -0,0 +1,14 @@
device: xpu
device_id: 0
cpu_thread_nums: 1
warmup: 200
repeat: 1000
backend: paddle
profile_mode: runtime
include_h2d_d2h: true
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 62914560
result_path: benchmark_xpu_paddle_fp32_l3.txt

View File

@@ -0,0 +1,14 @@
device: xpu
device_id: 0
cpu_thread_nums: 1
warmup: 200
repeat: 1000
backend: paddle
profile_mode: runtime
include_h2d_d2h: true
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_xpu_paddle_fp32.txt

View File

@@ -0,0 +1,14 @@
device: xpu
device_id: 0
cpu_thread_nums: 1
warmup: 200
repeat: 1000
backend: paddle
profile_mode: runtime
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 62914560
result_path: benchmark_xpu_paddle_fp32_l3.txt

View File

@@ -0,0 +1,14 @@
device: xpu
device_id: 0
cpu_thread_nums: 1
warmup: 200
repeat: 1000
backend: paddle
profile_mode: runtime
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_xpu_paddle_fp32.txt

View File

@@ -60,7 +60,9 @@ DEFINE_int32(device_id, -1,
"Optional, set specific device id for GPU/XPU, default -1."
"will force to override the value in config file "
"eg, 0/1/2/...");
DEFINE_bool(enable_log_info, false,
"Optional, whether to enable log info for paddle backend,"
"default false.");
static void PrintUsage() {
std::cout << "Usage: infer_demo --model model_path --image img_path "

View File

@@ -18,6 +18,7 @@
static void UpdateBaseCustomFlags(
std::unordered_map<std::string, std::string>& config_info) {
// see benchmark/cpp/flags.h
if (FLAGS_warmup > -1) {
config_info["warmup"] = std::to_string(FLAGS_warmup);
}
@@ -30,6 +31,14 @@ static void UpdateBaseCustomFlags(
if (FLAGS_use_fp16) {
config_info["use_fp16"] = "true";
}
if (FLAGS_xpu_l3_cache >= 0) {
config_info["xpu_l3_cache"] = std::to_string(FLAGS_xpu_l3_cache);
}
if (FLAGS_enable_log_info) {
config_info["enable_log_info"] = "true";
} else {
config_info["enable_log_info"] = "false";
}
}
static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
@@ -47,6 +56,9 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
option->EnableProfiling(config_info["include_h2d_d2h"] == "true",
repeat, warmup);
}
if (config_info["enable_log_info"] == "true") {
option->paddle_infer_option.enable_log_info = true;
}
if (config_info["device"] == "gpu") {
option->UseGpu(std::stoi(config_info["device_id"]));
if (config_info["backend"] == "ort") {
@@ -104,16 +116,14 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
return false;
}
} else if (config_info["device"] == "xpu") {
if (FLAGS_xpu_l3_cache >= 0) {
option->UseKunlunXin(std::stoi(config_info["device_id"]),
FLAGS_xpu_l3_cache);
} else {
option->UseKunlunXin(std::stoi(config_info["device_id"]),
std::stoi(config_info["xpu_l3_cache"]));
}
option->UseKunlunXin(std::stoi(config_info["device_id"]),
std::stoi(config_info["xpu_l3_cache"]));
if (config_info["backend"] == "ort") {
option->UseOrtBackend();
} else if (config_info["backend"] == "paddle") {
// Note: For inference + XPU fp16, As long as the
// model is fp16, it can automatically run on the
// fp16 precision.
option->UsePaddleInferBackend();
} else if (config_info["backend"] == "lite") {
option->UsePaddleLiteBackend();