mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-10 11:00:19 +08:00
[XPU] Update XPU L3 Cache setting docs (#2001)
* [patchelf] fix patchelf error for inference xpu * [serving] add xpu dockerfile and support fd server * [serving] add xpu dockerfile and support fd server * [Serving] support XPU + Tritron * [Serving] support XPU + Tritron * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] add comments for xpu tritron dockerfile * [Doruntime] fix xpu infer error * [Doruntime] fix xpu infer error * [XPU] update xpu dockerfile * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * [XPU] Update XPU L3 Cache setting docs
This commit is contained in:
@@ -34,6 +34,7 @@ static void UpdateBaseCustomFlags(
|
||||
if (FLAGS_xpu_l3_cache >= 0) {
|
||||
config_info["xpu_l3_cache"] = std::to_string(FLAGS_xpu_l3_cache);
|
||||
}
|
||||
// update custom options for paddle backend
|
||||
if (FLAGS_enable_log_info) {
|
||||
config_info["enable_log_info"] = "true";
|
||||
} else {
|
||||
|
@@ -85,6 +85,8 @@ struct PaddleBackendOption {
|
||||
bool enable_memory_optimize = true;
|
||||
/// Whether enable ir debug, default false
|
||||
bool switch_ir_debug = false;
|
||||
/// Whether enable ir optimize, default true
|
||||
bool switch_ir_optimize = true;
|
||||
|
||||
/*
|
||||
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
||||
|
@@ -84,6 +84,9 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||
#endif
|
||||
} else if (option.device == Device::KUNLUNXIN) {
|
||||
#ifdef WITH_KUNLUNXIN
|
||||
// Note(qiuyanjun): For Paddle XPU L3 Cache, please set
|
||||
// export XPU_PADDLE_L3_SIZE=67104768 (XPU R200)
|
||||
// export FLAGS_fuse_multi_transformer_quant_type="float"
|
||||
config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
|
||||
option.xpu_option.kunlunxin_locked,
|
||||
option.xpu_option.kunlunxin_autotune,
|
||||
@@ -117,6 +120,9 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||
} else {
|
||||
config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
|
||||
}
|
||||
// Note: SwitchIrOptim is enabled by default for paddle inference
|
||||
// backend. So, we don't need to set it manually.
|
||||
// config_.SwitchIrOptim(option.switch_ir_optimize);
|
||||
}
|
||||
|
||||
bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
|
||||
|
@@ -36,7 +36,12 @@ docker run -itd --name fd_xpu_server -v `pwd`/:/serving --net=host --privileged
|
||||
```bash
|
||||
docker exec -it fd_xpu_server /bin/bash
|
||||
cd /opt/fastdeploy/benchmark/cpp/build
|
||||
|
||||
# 设置XPU L3 Cache (R200是63Mb)
|
||||
export XPU_PADDLE_L3_SIZE=67104768
|
||||
# 运行benchmark验证
|
||||
./benchmark --model ResNet50_infer --config_path ../config/config.xpu.paddle.fp32.txt --enable_log_info
|
||||
|
||||
cd /serving
|
||||
```
|
||||
输出为:
|
||||
|
Reference in New Issue
Block a user