diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt
index 9706587d3..c79e679c3 100755
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -9,9 +9,12 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 include_directories(${FASTDEPLOY_INCS})
 
 add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
+add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
 
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
 endif()
diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc
new file mode 100644
index 000000000..4bd6e0df4
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -0,0 +1,125 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/benchmark/utils.h"
+#include "fastdeploy/vision.h"
+#include "flags.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
+              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
+              std::string gpu_mem_file_name) {
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option)) {
+    PrintUsage();
+    return false;
+  }
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+
+  if (FLAGS_profile_mode == "runtime") {
+    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
+  }
+  auto model = fastdeploy::vision::detection::PaddleYOLOv8(
+      model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return false;
+  }
+  auto im = cv::imread(image_file);
+  // For Runtime
+  if (FLAGS_profile_mode == "runtime") {
+    fastdeploy::vision::DetectionResult res;
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return false;
+    }
+    double profile_time = model.GetProfileTime() * 1000;
+    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
+    auto vis_im = fastdeploy::vision::VisDetection(im, res);
+    cv::imwrite("vis_result.jpg", vis_im);
+    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+  } else {
+    // For End2End
+    // Step1: warm up for warmup times
+    std::cout << "Warmup " << warmup << " times..." << std::endl;
+    for (int i = 0; i < warmup; i++) {
+      fastdeploy::vision::DetectionResult res;
+      if (!model.Predict(im, &res)) {
+        std::cerr << "Failed to predict." << std::endl;
+        return false;
+      }
+    }
+    std::vector<float> end2end_statis;
+    // Step2: repeat for repeats times
+    std::cout << "Counting time..." << std::endl;
+    fastdeploy::TimeCounter tc;
+    fastdeploy::vision::DetectionResult res;
+    for (int i = 0; i < repeats; i++) {
+      if (FLAGS_collect_memory_info && i % dump_period == 0) {
+        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
+#if defined(WITH_GPU)
+        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
+                                                         FLAGS_device_id);
+#endif
+      }
+      tc.Start();
+      if (!model.Predict(im, &res)) {
+        std::cerr << "Failed to predict." << std::endl;
+        return false;
+      }
+      tc.End();
+      end2end_statis.push_back(tc.Duration() * 1000);
+    }
+    float end2end = std::accumulate(end2end_statis.end() - repeats,
+                                    end2end_statis.end(), 0.f) /
+                    repeats;
+    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
+    auto vis_im = fastdeploy::vision::VisDetection(im, res);
+    cv::imwrite("vis_result.jpg", vis_im);
+    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+  }
+
+  return true;
+}
+
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  int repeats = FLAGS_repeat;
+  int warmup = FLAGS_warmup;
+  int dump_period = FLAGS_dump_period;
+  std::string cpu_mem_file_name = "result_cpu.txt";
+  std::string gpu_mem_file_name = "result_gpu.txt";
+  // Run model
+  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
+               cpu_mem_file_name, gpu_mem_file_name) != true) {
+    exit(1);
+  }
+  if (FLAGS_collect_memory_info) {
+    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
+    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
+#if defined(WITH_GPU)
+    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
+    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
+#endif
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
old mode 100755
new mode 100644
index d84292536..ae16dd8d8
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -65,8 +65,10 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
     for (int i = 0; i < repeats; i++) {
       if (FLAGS_collect_memory_info && i % dump_period == 0) {
         fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
+#if defined(WITH_GPU)
         fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
                                                          FLAGS_device_id);
+#endif
       }
       tc.Start();
       if (!model.Predict(im, &res)) {
@@ -102,9 +104,11 @@ int main(int argc, char* argv[]) {
   }
   if (FLAGS_collect_memory_info) {
     float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
-    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
     std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
+#if defined(WITH_GPU)
+    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
     std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
+#endif
   }
   return 0;
 }
\ No newline at end of file
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index 3d35eb313..c9a8e8d91 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -27,13 +27,14 @@ DEFINE_int32(repeat, 1000, "Number of repeats for profiling.");
 DEFINE_string(profile_mode, "runtime", "runtime or end2end.");
 DEFINE_string(backend, "default",
               "The inference runtime backend, support: ['default', 'ort', "
-              "'paddle', 'ov', 'trt', 'paddle_trt']");
+              "'paddle', 'ov', 'trt', 'paddle_trt', 'lite']");
 DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread.");
 DEFINE_bool(
     include_h2d_d2h, false, "Whether run profiling with h2d and d2h.");
 DEFINE_bool(
     use_fp16, false,
-    "Whether to use FP16 mode, only support 'trt' and 'paddle_trt' backend");
+    "Whether to use FP16 mode, only support 'trt', 'paddle_trt' "
+    "and 'lite' backend");
 DEFINE_bool(
     collect_memory_info, false, "Whether to collect memory info");
 DEFINE_int32(dump_period, 100, "How often to collect memory info.");
@@ -58,7 +59,6 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       option->UsePaddleInferBackend();
     } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
       option->UseTrtBackend();
-      option->SetTrtInputShape("input", {1, 3, 112, 112});
       if (FLAGS_backend == "paddle_trt") {
         option->EnablePaddleToTrt();
       }
@@ -81,11 +81,16 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       option->UseOpenVINOBackend();
     } else if (FLAGS_backend == "paddle") {
       option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
     } else if (FLAGS_backend == "default") {
       return true;
     } else {
       std::cout << "While inference with CPU, only support "
-                   "default/ort/ov/paddle now, "
+                   "default/ort/ov/paddle/lite now, "
                 << FLAGS_backend << " is not supported." << std::endl;
       return false;
     }
diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h
index bd738545a..15e71b50a 100644
--- a/fastdeploy/runtime/backends/lite/lite_backend.h
+++ b/fastdeploy/runtime/backends/lite/lite_backend.h
@@ -32,7 +32,7 @@ class LiteBackend : public BaseBackend {
   LiteBackend() {}
   virtual ~LiteBackend() = default;
 
-  bool Init(const RuntimeOption& option);
+  bool Init(const RuntimeOption& option) override;
 
   bool Infer(std::vector<FDTensor>& inputs,
             std::vector<FDTensor>* outputs,
diff --git a/scripts/android/build_android_cpp_with_benchmark.sh b/scripts/android/build_android_cpp_with_benchmark.sh
new file mode 100755
index 000000000..4a2c4084c
--- /dev/null
+++ b/scripts/android/build_android_cpp_with_benchmark.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+set -e
+set +x
+
+# -------------------------------------------------------------------------------
+#                        mutable global variables
+# -------------------------------------------------------------------------------
+TOOLCHAIN=clang # gcc/clang toolchain
+
+# -------------------------------------------------------------------------------
+#                        readonly global variables
+# -------------------------------------------------------------------------------
+readonly ROOT_PATH=$(pwd)
+readonly ANDROID_ABI=$1
+readonly ANDROID_PLATFORM="android-$2"
+readonly BUILD_ROOT=build/Android
+readonly BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-$2
+
+# -------------------------------------------------------------------------------
+#                                 tasks
+# -------------------------------------------------------------------------------
+__make_build_dir() {
+  if [ ! -d "${BUILD_DIR}" ]; then
+    echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..."
+    if [ ! -d "${BUILD_ROOT}" ]; then
+      mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !"
+    fi
+    mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !"
+  else
+    echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}"
+  fi
+}
+
+__check_cxx_envs() {
+  if [ $LDFLAGS ]; then
+    echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c"
+    echo "unset it before crossing compiling ${ANDROID_ABI}"
+    unset LDFLAGS
+  fi
+  if [ $CPPFLAGS ]; then
+    echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c"
+    echo "unset it before crossing compiling ${ANDROID_ABI}"
+    unset CPPFLAGS
+  fi
+  if [ $CPLUS_INCLUDE_PATH ]; then
+    echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c"
+    echo "unset it before crossing compiling ${ANDROID_ABI}"
+    unset CPLUS_INCLUDE_PATH
+  fi
+  if [ $C_INCLUDE_PATH ]; then
+    echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c"
+    echo "unset it before crossing compiling ${ANDROID_ABI}"
+    unset C_INCLUDE_PATH
+  fi
+}
+
+__set_android_ndk() {
+  if [ -z $ANDROID_NDK ]; then
+    echo "-- [INFO] ANDROID_NDK not exists, please setup manually ..."
+    exit 0
+  else
+    echo "-- [INFO] Found ANDROID_NDK: ${ANDROID_NDK}"
+  fi
+  if [ "$ANDROID_NDK" ]; then
+      NDK_VERSION=$(echo $ANDROID_NDK | egrep -o "[0-9]{2}" | head -n 1)
+      if [ "$NDK_VERSION" -gt 17 ]; then
+          TOOLCHAIN=clang
+      fi
+      echo "-- [INFO] Checked ndk version: ${NDK_VERSION}"
+      echo "-- [INFO] Selected toolchain: ${TOOLCHAIN}"
+  fi
+}
+
+__build_fastdeploy_android_shared() {
+
+  local ANDROID_STL=c++_shared  # c++_static
+  local ANDROID_TOOLCHAIN=${TOOLCHAIN}
+  local TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake
+  local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install"
+  cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}"
+
+  cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
+        -DCMAKE_BUILD_TYPE=MinSizeRel \
+        -DANDROID_ABI=${ANDROID_ABI} \
+        -DANDROID_NDK=${ANDROID_NDK} \
+        -DANDROID_PLATFORM=${ANDROID_PLATFORM} \
+        -DANDROID_STL=${ANDROID_STL} \
+        -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
+        -DENABLE_ORT_BACKEND=OFF \
+        -DENABLE_LITE_BACKEND=ON \
+        -DENABLE_PADDLE2ONNX=OFF \
+        -DENABLE_FLYCV=ON \
+        -DENABLE_TEXT=OFF \
+        -DENABLE_VISION=ON \
+        -DBUILD_EXAMPLES=ON \
+        -DENABLE_BENCHMARK=ON \
+        -DWITH_OPENCV_STATIC=OFF \
+        -DWITH_LITE_STATIC=OFF \
+        -DWITH_OPENMP=OFF \
+        -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \
+        -Wno-dev ../../.. && make -j8 && make install
+
+  echo "-- [INFO][built][${ANDROID_ABI}][${BUILD_DIR}/install]"
+}
+
+main() {
+  __make_build_dir
+  __check_cxx_envs
+  __set_android_ndk
+  __build_fastdeploy_android_shared
+  exit 0
+}
+
+main
+
+# Usage:
+# ./scripts/android/build_android_cpp_with_benchmark.sh arm64-v8a 21
+# ./scripts/android/build_android_cpp_with_benchmark.sh armeabi-v7a 21
diff --git a/scripts/linux/build_linux_x86_64_cpp_gpu.sh b/scripts/linux/build_linux_x86_64_cpp_gpu.sh
index 6f2b4ed7d..9ae91921e 100755
--- a/scripts/linux/build_linux_x86_64_cpp_gpu.sh
+++ b/scripts/linux/build_linux_x86_64_cpp_gpu.sh
@@ -62,7 +62,7 @@ __build_fastdeploy_linux_x86_64_gpu_shared() {
         -DENABLE_OPENVINO_BACKEND=ON \
         -DENABLE_PADDLE2ONNX=ON \
         -DENABLE_VISION=ON \
-        -DENABLE_BENCHMARK=ON \
+        -DENABLE_BENCHMARK=OFF \
         -DBUILD_EXAMPLES=ON \
         -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \
         -Wno-dev ../../.. && make -j8 && make install
diff --git a/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh b/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh
new file mode 100755
index 000000000..6f2b4ed7d
--- /dev/null
+++ b/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+set -e
+set +x
+
+# -------------------------------------------------------------------------------
+#                        readonly global variables
+# -------------------------------------------------------------------------------
+readonly ROOT_PATH=$(pwd)
+readonly BUILD_ROOT=build/Linux
+readonly BUILD_DIR="${BUILD_ROOT}/x86_64_gpu"
+
+# -------------------------------------------------------------------------------
+#                                 tasks
+# -------------------------------------------------------------------------------
+__make_build_dir() {
+  if [ ! -d "${BUILD_DIR}" ]; then
+    echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..."
+    if [ ! -d "${BUILD_ROOT}" ]; then
+      mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !"
+    fi
+    mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !"
+  else
+    echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}"
+  fi
+}
+
+__check_cxx_envs() {
+  if [ $LDFLAGS ]; then
+    echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c"
+    echo "unset it before crossing compiling ${BUILD_DIR}"
+    unset LDFLAGS
+  fi
+  if [ $CPPFLAGS ]; then
+    echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c"
+    echo "unset it before crossing compiling ${BUILD_DIR}"
+    unset CPPFLAGS
+  fi
+  if [ $CPLUS_INCLUDE_PATH ]; then
+    echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c"
+    echo "unset it before crossing compiling ${BUILD_DIR}"
+    unset CPLUS_INCLUDE_PATH
+  fi
+  if [ $C_INCLUDE_PATH ]; then
+    echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c"
+    echo "unset it before crossing compiling ${BUILD_DIR}"
+    unset C_INCLUDE_PATH
+  fi
+}
+
+__build_fastdeploy_linux_x86_64_gpu_shared() {
+
+  local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install"
+  cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}"
+
+  cmake -DCMAKE_BUILD_TYPE=Release \
+        -DWITH_GPU=ON \
+        -DTRT_DIRECTORY=${TRT_DIRECTORY} \
+        -DCUDA_DIRECTORY=${CUDA_DIRECTORY} \
+        -DENABLE_ORT_BACKEND=ON \
+        -DENABLE_TRT_BACKEND=ON \
+        -DENABLE_PADDLE_BACKEND=ON \
+        -DENABLE_OPENVINO_BACKEND=ON \
+        -DENABLE_PADDLE2ONNX=ON \
+        -DENABLE_VISION=ON \
+        -DENABLE_BENCHMARK=ON \
+        -DBUILD_EXAMPLES=ON \
+        -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \
+        -Wno-dev ../../.. && make -j8 && make install
+
+  echo "-- [INFO][built][x86_64_gpu}][${BUILD_DIR}/install]"
+}
+
+main() {
+  __make_build_dir
+  __check_cxx_envs
+  __build_fastdeploy_linux_x86_64_gpu_shared
+  exit 0
+}
+
+main
+
+# Usage:
+# ./scripts/linux/build_linux_x86_64_cpp_gpu.sh