[XPU] Update XPU L3 Cache setting docs (#2001)

* [patchelf] fix patchelf error for inference xpu * [serving] add xpu dockerfile and support fd server * [serving] add xpu dockerfile and support fd server * [Serving] support XPU + Tritron * [Serving] support XPU + Tritron * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] add comments for xpu tritron dockerfile * [Doruntime] fix xpu infer error * [Doruntime] fix xpu infer error * [XPU] update xpu dockerfile * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * [XPU] Update XPU L3 Cache setting docs
2025-10-10 02:50:19 +08:00 · 2023-05-30 11:21:04 +08:00
parent 434b48dda5
commit 387c5695b3
4 changed files with 14 additions and 0 deletions
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -84,6 +84,9 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
 #endif
  } else if (option.device == Device::KUNLUNXIN) {
 #ifdef WITH_KUNLUNXIN
+    // Note(qiuyanjun): For Paddle XPU L3 Cache, please set
+    // export XPU_PADDLE_L3_SIZE=67104768 (XPU R200)
+    // export FLAGS_fuse_multi_transformer_quant_type="float"
    config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
                      option.xpu_option.kunlunxin_locked,
                      option.xpu_option.kunlunxin_autotune,
@@ -117,6 +120,9 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
  } else {
    config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
  }
+  // Note: SwitchIrOptim is enabled by default for paddle inference
+  // backend. So, we don't need to set it manually.
+  // config_.SwitchIrOptim(option.switch_ir_optimize);
 }

 bool PaddleBackend::Init(const RuntimeOption& runtime_option) {