diff --git a/serving/Dockerfile b/serving/Dockerfile
index 71921dc8a..c3b5f40dc 100644
--- a/serving/Dockerfile
+++ b/serving/Dockerfile
@@ -20,7 +20,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
 COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
 COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
 
-COPY TensorRT-8.4.1.5 /opt/
+COPY serving/TensorRT-8.4.1.5 /opt/
 
 ENV TZ=Asia/Shanghai \
     DEBIAN_FRONTEND=noninteractive \
diff --git a/serving/Dockfile_cpu b/serving/Dockerfile_cpu
similarity index 93%
rename from serving/Dockfile_cpu
rename to serving/Dockerfile_cpu
index 390272d30..e6270bb59 100644
--- a/serving/Dockfile_cpu
+++ b/serving/Dockerfile_cpu
@@ -19,9 +19,9 @@ ENV TZ=Asia/Shanghai \
 
 RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
     && python3 -m pip install -U pip \
-    && python3 -m pip install paddlepaddle faster_tokenizer 
+    && python3 -m pip install paddlepaddle paddlenlp faster_tokenizer 
 
-COPY python/dist/*.whl /opt/fastdeploy/
+COPY python/dist/*.whl *.whl /opt/fastdeploy/ 
 RUN python3 -m pip install  /opt/fastdeploy/*.whl \
     && rm -rf /opt/fastdeploy/*.whl
 
diff --git a/serving/README_CN.md b/serving/README_CN.md
index 5849af03f..afd0d7131 100644
--- a/serving/README_CN.md
+++ b/serving/README_CN.md
@@ -6,14 +6,31 @@
 
 FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-server/server)搭建了端到端的服务化部署。底层后端使用FastDeploy高性能Runtime模块，并串联FastDeploy前后处理模块实现端到端的服务化部署。具有快速部署、使用简单、性能卓越的特性。
 
-## 端到端部署示例
+## 准备环境
 
-- [YOLOV5 检测任务](../examples/vision/detection/yolov5/README.md)
-- [OCR ]()
-- [Erinie3.0 文本分类任务]()
-- [UIE ]()
-- [Speech ]()
+### 环境要求
+- Linux
+- 如果使用GPU镜像， 要求NVIDIA Driver >= 470(如果是旧的Tesla架构GPU，如T4使用的NVIDIA Driver可以是418.40+、440.33+、450.51+、460.27+)
 
-## 高阶文档
-- [模型仓库](docs/zh_CN/model_repository.md)
-- [模型配置](docs/zh_CN/model_configuration.md)
+### 获取镜像
+
+#### CPU镜像
+CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署，支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime
+``` shell
+docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only
+```
+
+#### GPU镜像
+GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署，支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime
+```
+docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
+```
+
+用户也可根据自身需求，参考如下文档自行编译镜像
+- [FastDeploy服务化部署镜像编译说明](docs/zh_CN/compile.md)
+
+## 其它文档
+- [服务化模型目录说明](docs/zh_CN/model_repository.md) (说明如何准备模型目录)
+- [服务化部署配置说明](docs/zh_CN/model_configuration.md)  (说明runtime的配置选项)
+- [服务化部署示例](docs/zh_CN/demo.md)
+  - [YOLOV5 检测任务](../examples/vision/detection/yolov5/serving/README.md)
diff --git a/serving/docs/zh_CN/compile.md b/serving/docs/zh_CN/compile.md
new file mode 100644
index 000000000..3beae64fb
--- /dev/null
+++ b/serving/docs/zh_CN/compile.md
@@ -0,0 +1 @@
+# 服务化部署镜像编译
diff --git a/serving/docs/zh_CN/demo.md b/serving/docs/zh_CN/demo.md
new file mode 100644
index 000000000..8fdfa08e7
--- /dev/null
+++ b/serving/docs/zh_CN/demo.md
@@ -0,0 +1 @@
+# 服务化部署示例
diff --git a/serving/scripts/build_fd_backend.sh b/serving/scripts/build_fd_backend.sh
index 5d402c5d9..b7aaae7b4 100644
--- a/serving/scripts/build_fd_backend.sh
+++ b/serving/scripts/build_fd_backend.sh
@@ -24,17 +24,17 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
 fi
 
 docker run -it --rm --name build_fd_backend \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
            nvcr.io/nvidia/tritonserver:21.10-py3 \
            bash -c \
            'cd /workspace/fastdeploy/serving;
             rm -rf build; mkdir build; cd build;
             apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
-            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
             cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
 else
 docker run -it --rm --name build_fd_backend \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
            paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
            bash -c \
            'cd /workspace/fastdeploy/serving;
diff --git a/serving/scripts/build_fd_runtime.sh b/serving/scripts/build_fd_runtime.sh
index 7525ca6a2..723eeb366 100644
--- a/serving/scripts/build_fd_runtime.sh
+++ b/serving/scripts/build_fd_runtime.sh
@@ -29,7 +29,7 @@ if [ ! -d "./TensorRT-8.4.1.5/" ]; then
 fi
 
 docker run -it --rm --name build_fd_runtime \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
            nvcr.io/nvidia/tritonserver:21.10-py3-min \
            bash -c \
            'cd /workspace/fastdeploy;
@@ -37,15 +37,15 @@ docker run -it --rm --name build_fd_runtime \
             apt-get update;
             apt-get install -y --no-install-recommends python3-dev python3-pip;
             ln -s /usr/bin/python3 /usr/bin/python;
-            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
-            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
             make -j`nproc`;
             make install'
 
 else
 
 docker run -it --rm --name build_fd_runtime \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
            paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
            bash -c \
            'cd /workspace/fastdeploy;
diff --git a/serving/scripts/build_fd_vison.sh b/serving/scripts/build_fd_vison.sh
index e0beb6e7f..59cd923b1 100644
--- a/serving/scripts/build_fd_vison.sh
+++ b/serving/scripts/build_fd_vison.sh
@@ -23,7 +23,7 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
 fi
 
 docker run -it --rm --name build_fd_vison \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
            nvcr.io/nvidia/tritonserver:21.10-py3-min \
            bash -c \
            'cd /workspace/fastdeploy/python;
@@ -31,7 +31,7 @@ docker run -it --rm --name build_fd_vison \
             apt-get update;
             apt-get install -y --no-install-recommends patchelf python3-dev python3-pip;
             ln -s /usr/bin/python3 /usr/bin/python;
-            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
             export WITH_GPU=ON;
             export ENABLE_ORT_BACKEND=OFF;
             export ENABLE_VISION=ON;
diff --git a/serving/src/fastdeploy_runtime.cc b/serving/src/fastdeploy_runtime.cc
index b1ed8b6b0..fd541d5f9 100644
--- a/serving/src/fastdeploy_runtime.cc
+++ b/serving/src/fastdeploy_runtime.cc
@@ -315,6 +315,8 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
                 //       &runtime_options_->trt_max_workspace_size));
               } else if (param_key == "cache_file") {
                 runtime_options_->SetTrtCacheFile(value_string);
+              } else  (param_key == "use_paddle") {
+                runtime_options_->EnablePaddleToTrt();
               }
             }
           }
@@ -1025,12 +1027,13 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
         input, &input_name, &input_datatype, &input_shape, &input_dims_count,
         nullptr, nullptr));
 
-    if (input_tensors_[input_idx].name != std::string(input_name)) {
+    int index = GetInfoIndex(std::string(input_name), input_tensor_infos_);
+    if (index < 0) {
       auto err = TRITONSERVER_ErrorNew(
           TRITONSERVER_ERROR_INTERNAL,
           (std::string("Input name [") + input_name +
            std::string("] is not one of the FD predictor input: ") +
-           input_tensors_[input_idx].name)
+           input_tensors_[index].name)
               .c_str());
       // SendErrorForResponses(responses, request_count, err);
       return err;
@@ -1075,12 +1078,12 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
       memory_type = TRITONSERVER_MEMORY_CPU;
       device = fastdeploy::Device::CPU;
     }
-    input_tensors_[input_idx].Resize(
+    input_tensors_[index].Resize(
         batchn_shape, ConvertDataTypeToFD(input_datatype), input_name, device);
     collector->ProcessTensor(
         input_name,
-        reinterpret_cast<char*>(input_tensors_[input_idx].MutableData()),
-        input_tensors_[input_idx].Nbytes(), memory_type, device_id);
+        reinterpret_cast<char*>(input_tensors_[index].MutableData()),
+        input_tensors_[index].Nbytes(), memory_type, device_id);
   }
 
   // Finalize...