diff --git a/.github/workflows/_accuracy_test.yml b/.github/workflows/_accuracy_test.yml
index 8db47c6ef..a0ed22375 100644
--- a/.github/workflows/_accuracy_test.yml
+++ b/.github/workflows/_accuracy_test.yml
@@ -31,6 +31,7 @@ on:
 jobs:
   accuracy_tests:
     runs-on: [self-hosted, GPU-h20-1Cards]
+    timeout-minutes: 60
     steps:
       - name: Code Prepare
         shell: bash
@@ -147,7 +148,7 @@ jobs:
 
           git config --global --add safe.directory /workspace/FastDeploy
           cd FastDeploy
-          pushd test/ce/deploy
+          pushd tests/ce/deploy
           python3.10 deploy.py > dd.log 2>&1 &
           sleep 3
           curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
@@ -157,7 +158,7 @@ jobs:
           curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
           popd
 
-          pushd test/ce/accuracy_cases
+          pushd tests/ce/accuracy_cases
           export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
           export TEMPLATE=TOKEN_LOGPROB
           export MODEL_SIZE=0.3B
diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml
index 4e6695b58..73a7c7dca 100644
--- a/.github/workflows/_base_test.yml
+++ b/.github/workflows/_base_test.yml
@@ -31,6 +31,7 @@ on:
 jobs:
   base_tests:
     runs-on: [self-hosted, GPU-h20-1Cards]
+    timeout-minutes: 60
     steps:
       - name: Code Prepare
         shell: bash
@@ -147,7 +148,7 @@ jobs:
 
           git config --global --add safe.directory /workspace/FastDeploy
           cd FastDeploy
-          pushd test/ce/deploy
+          pushd tests/ce/deploy
           python3.10 deploy.py > dd.log 2>&1 &
           sleep 3
           curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
@@ -157,7 +158,7 @@ jobs:
           curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
           popd
 
-          pushd test/ce/server
+          pushd tests/ce/server
           export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
           export TEMPLATE=TOKEN_LOGPROB
           TEST_EXIT_CODE=0
diff --git a/.github/workflows/_build_linux.yml b/.github/workflows/_build_linux.yml
index 583686289..28436e772 100644
--- a/.github/workflows/_build_linux.yml
+++ b/.github/workflows/_build_linux.yml
@@ -55,6 +55,7 @@ on:
 jobs:
   fd-build:
     runs-on: [self-hosted, GPU-Build]
+    timeout-minutes: 240
     outputs:
       wheel_path: ${{ steps.set_output.outputs.wheel_path }}
     steps:
diff --git a/.github/workflows/_pre_ce_test.yml b/.github/workflows/_pre_ce_test.yml
index b7415f9cb..f66098341 100644
--- a/.github/workflows/_pre_ce_test.yml
+++ b/.github/workflows/_pre_ce_test.yml
@@ -30,6 +30,7 @@ on:
 jobs:
   run_ce_cases:
     runs-on: [self-hosted, PRE_CE_RUN_2Card]
+    timeout-minutes: 60
     steps:
       - name: Print current runner name
         run: |
diff --git a/.github/workflows/_stable_test.yml b/.github/workflows/_stable_test.yml
new file mode 100644
index 000000000..05889805d
--- /dev/null
+++ b/.github/workflows/_stable_test.yml
@@ -0,0 +1,162 @@
+name: Stable Test
+description: "Run Stable Tests"
+
+on:
+  workflow_call:
+    inputs:
+      DOCKER_IMAGE:
+        description: "Build Images"
+        required: true
+        type: string
+        default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310"
+      FASTDEPLOY_ARCHIVE_URL:
+        description: "URL of the compressed FastDeploy code archive."
+        required: true
+        type: string
+      FASTDEPLOY_WHEEL_URL:
+        description: "URL of the FastDeploy Wheel."
+        required: true
+        type: string
+      CACHE_DIR:
+        description: "Cache Dir Use"
+        required: false
+        type: string
+        default: ""
+      MODEL_CACHE_DIR:
+        description: "Cache Dir Use"
+        required: false
+        type: string
+        default: ""
+
+jobs:
+  stable_tests:
+    runs-on: [self-hosted, GPU-h1z1-2Cards]
+    timeout-minutes: 60
+    steps:
+      - name: Code Prepare
+        shell: bash
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
+        run: |
+            set -x
+            REPO="https://github.com/${{ github.repository }}.git"
+            FULL_REPO="${{ github.repository }}"
+            REPO_NAME="${FULL_REPO##*/}"
+            BASE_BRANCH="${{ github.base_ref }}"
+
+            # Clean the repository directory before starting
+            docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
+            -e "REPO_NAME=${REPO_NAME}" \
+            ${docker_image} /bin/bash -c '
+              if [ -d ${REPO_NAME} ]; then
+                echo "Directory ${REPO_NAME} exists, removing it..."
+                rm -rf ${REPO_NAME}*
+              fi
+            '
+
+            wget -q ${fd_archive_url}
+            tar -xf FastDeploy.tar.gz
+            rm -rf FastDeploy.tar.gz
+            cd FastDeploy
+            git config --global user.name "FastDeployCI"
+            git config --global user.email "fastdeploy_ci@example.com"
+            git log -n 3 --oneline
+
+      - name: Run FastDeploy Stable Tests
+        shell: bash
+        env:
+          docker_image: ${{ inputs.DOCKER_IMAGE }}
+          fastdeploy_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
+          CACHE_DIR: ${{ inputs.CACHE_DIR }}
+          MODEL_CACHE_DIR: ${{ inputs.MODEL_CACHE_DIR }}
+        run: |
+          runner_name="${{ runner.name }}"
+          CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}')
+          DEVICES=$(echo "$CARD_ID" | fold -w1 | paste -sd,)
+          DEVICE_PORT=$(echo "$DEVICES" | cut -d',' -f1)
+
+          FLASK_PORT=$((42068 + DEVICE_PORT * 100))
+          FD_API_PORT=$((42088 + DEVICE_PORT * 100))
+          FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
+          FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
+          FD_CACHE_QUEUE_PORT=$((42038 + DEVICE_PORT * 100))
+          FD_INFERENCE_MSG_QUEUE_ID=$(( 42048 + DEVICE_PORT * 100))
+          echo "Test ENV Parameter:"
+          echo "========================================================="
+          echo "FLASK_PORT=${FLASK_PORT}"
+          echo "FD_API_PORT=${FD_API_PORT}"
+          echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
+          echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
+          echo "FD_INFERENCE_MSG_QUEUE_ID=${INFERENCE_MSG_QUEUE_ID}"
+          echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
+          echo "DEVICES=${DEVICES}"
+          echo "========================================================="
+
+          CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
+          echo "CACHE_DIR is set to ${CACHE_DIR}"
+          if [ ! -f "${CACHE_DIR}/gitconfig" ]; then
+            touch "${CACHE_DIR}/gitconfig"
+          fi
+          if [ ! -d "${MODEL_CACHE_DIR}" ]; then
+            echo "Error: MODEL_CACHE_DIR '${MODEL_CACHE_DIR}' does not exist."
+            exit 1
+          fi
+
+          PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
+          LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
+          echo "==== LOG_FILE is ${LOG_FILE} ===="
+
+          echo "==== PORT CLEAN BEFORE TASK RUN ====" | tee -a $LOG_FILE
+
+          for port in "${PORTS[@]}"; do
+              PIDS=$(lsof -t -i :$port || true)
+              if [ -n "$PIDS" ]; then
+                  echo "Port $port is occupied by PID(s): $PIDS" | tee -a $LOG_FILE
+                  echo "$PIDS" | xargs -r kill -9
+                  echo "Port $port cleared" | tee -a $LOG_FILE
+              else
+                  echo "Port $port is free" | tee -a $LOG_FILE
+              fi
+          done
+
+          echo "==== PORT CLEAN COMPLETE ====" | tee -a $LOG_FILE
+
+          docker run --rm --ipc=host --pid=host --net=host \
+          -v $(pwd):/workspace \
+          -w /workspace \
+          -e fastdeploy_wheel_url=${fastdeploy_wheel_url} \
+          -e "FD_API_PORT=${FD_API_PORT}" \
+          -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
+          -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
+          -e "FLASK_PORT=${FLASK_PORT}" \
+          -e "FD_INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID}" \
+          -e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
+          -v "${MODEL_CACHE_DIR}:/MODELDATA" \
+          -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
+          -v "${CACHE_DIR}/.cache:/root/.cache" \
+          -v "${CACHE_DIR}/ConfigDir:/root/.config" \
+          -e TZ="Asia/Shanghai" \
+          --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
+          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+          python -m pip install ${fastdeploy_wheel_url}
+          python -m pip install pytest
+
+          git config --global --add safe.directory /workspace/FastDeploy
+          cd FastDeploy
+          TEST_EXIT_CODE=0
+          pushd tests/ce/stable_cases
+          bash launch_model.sh /MODELDATA
+          bash run.sh || TEST_EXIT_CODE=1
+          popd
+          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
+          '
+          if [ -f ./FastDeploy/exit_code.env ]; then
+            source ./FastDeploy/exit_code.env
+            cat ./FastDeploy/exit_code.env >> $GITHUB_ENV
+          fi
+          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
+          exit ${TEST_EXIT_CODE}
diff --git a/.github/workflows/_unit_test_coverage.yml b/.github/workflows/_unit_test_coverage.yml
index 7d02a1f9e..728c1d46c 100644
--- a/.github/workflows/_unit_test_coverage.yml
+++ b/.github/workflows/_unit_test_coverage.yml
@@ -31,6 +31,7 @@ on:
 jobs:
   run_tests_with_coverage:
     runs-on: [self-hosted, GPU-h1z1-2Cards]
+    timeout-minutes: 60
     outputs:
       diff_cov_file_url: ${{ steps.cov_upload.outputs.diff_cov_file_url }}
       unittest_failed_url: ${{ steps.cov_upload.outputs.unittest_failed_url }}
@@ -150,12 +151,12 @@ jobs:
           python -m pip install coverage
           python -m pip install diff-cover
           python -m pip install ${fd_wheel_url}
-          if [ -d "test/plugins" ]; then
-              cd test/plugins
+          if [ -d "tests/plugins" ]; then
+              cd tests/plugins
               python setup.py install
               cd ../..
           else
-              echo "Warning: test/plugins directory not found, skipping setup.py install"
+              echo "Warning: tests/plugins directory not found, skipping setup.py install"
           fi
           export COVERAGE_FILE=/workspace/FastDeploy/coveragedata/.coverage
           export COVERAGE_RCFILE=/workspace/FastDeploy/scripts/.coveragerc
@@ -204,7 +205,7 @@ jobs:
             echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_OUTPUT
             echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_ENV
           fi
-          unittest_result="test/failed_tests.log"
+          unittest_result="tests/failed_tests.log"
           if [ -s ${unittest_result} ];then
             python ${push_file} ${unittest_result} ${target_path}/UnitTestResult
             target_path_stripped="${target_path#paddle-github-action/}"
diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml
index 8faa7436b..0d6383784 100644
--- a/.github/workflows/pr_build_and_test.yml
+++ b/.github/workflows/pr_build_and_test.yml
@@ -83,3 +83,13 @@ jobs:
       FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
       FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
       MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
+
+  stable_test:
+    name: Run Stable Tests
+    needs: [clone,build]
+    uses: ./.github/workflows/_stable_test.yml
+    with:
+      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
+      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
+      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
+      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
diff --git a/scripts/coverage_run.sh b/scripts/coverage_run.sh
index 8f0e149c4..b5c6578a5 100644
--- a/scripts/coverage_run.sh
+++ b/scripts/coverage_run.sh
@@ -2,7 +2,7 @@
 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 echo "$DIR"
 
-run_path="$DIR/../test/"
+run_path="$DIR/../tests/"
 cd ${run_path}
 ls
 
diff --git a/scripts/run_ci_gcu.sh b/scripts/run_ci_gcu.sh
index 172511216..46ceee8d7 100644
--- a/scripts/run_ci_gcu.sh
+++ b/scripts/run_ci_gcu.sh
@@ -97,7 +97,7 @@ cat server.log
 echo -e "\n"
 
 echo "Start inference..."
-python test/ci_use/GCU/run_ernie.py
+python tests/ci_use/GCU/run_ernie.py
 exit_code=$?
 echo -e "exit_code is ${exit_code}.\n"
 
diff --git a/scripts/run_ci_iluvatar.sh b/scripts/run_ci_iluvatar.sh
index dca1369bf..fe702be87 100644
--- a/scripts/run_ci_iluvatar.sh
+++ b/scripts/run_ci_iluvatar.sh
@@ -31,7 +31,7 @@ export INFERENCE_MSG_QUEUE_ID=232132
 export FD_DEBUG=1
 export PADDLE_XCCL_BACKEND=iluvatar_gpu
 export FD_SAMPLING_CLASS=rejection
-python test/ci_use/iluvatar_UT/run_ernie300B_4layer.py
+python tests/ci_use/iluvatar_UT/run_ernie300B_4layer.py
 exit_code=$?
 echo exit_code is ${exit_code}
 
diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index b39f9482a..04d4b8699 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -77,7 +77,7 @@ done
 cat server.log
 
 # 执行服务化推理
-python test/ci_use/XPU_45T/run_45T.py
+python tests/ci_use/XPU_45T/run_45T.py
 exit_code=$?
 echo exit_code is ${exit_code}
 
@@ -143,7 +143,7 @@ done
 cat server.log
 
 # 执行服务化推理
-python test/ci_use/XPU_45T/run_45T.py
+python tests/ci_use/XPU_45T/run_45T.py
 kv_block_test_exit_code=$?
 echo kv_block_test_exit_code is ${kv_block_test_exit_code}
 
diff --git a/scripts/run_pre_ce.sh b/scripts/run_pre_ce.sh
index 726b91e85..67b06736e 100644
--- a/scripts/run_pre_ce.sh
+++ b/scripts/run_pre_ce.sh
@@ -9,7 +9,7 @@ python -m pip install -r requirements.txt
 python -m pip install jsonschema aistudio_sdk==0.3.5
 
 failed_files=()
-run_path="$DIR/../test/ci_use/"
+run_path="$DIR/../tests/ci_use/"
 
 # load all test files
 for subdir in "$run_path"*/; do
diff --git a/scripts/run_unittest.sh b/scripts/run_unittest.sh
index 576a27016..8d14790de 100644
--- a/scripts/run_unittest.sh
+++ b/scripts/run_unittest.sh
@@ -46,7 +46,7 @@ done <<< "$gpu_info"
 export CUDA_VISIBLE_DEVICES=${min_gpu}
 
 # 使用 find 命令查找 test 目录下的 .py 文件
-test_files=$(find test -type f -name "test*.py")
+test_files=$(find tests -type f -name "test*.py")
 
 # 遍历每个找到的测试文件
 for test_file in $test_files; do
diff --git a/test/ce/accuracy_cases/gsm8k.parquet b/tests/ce/accuracy_cases/gsm8k.parquet
similarity index 100%
rename from test/ce/accuracy_cases/gsm8k.parquet
rename to tests/ce/accuracy_cases/gsm8k.parquet
diff --git a/test/ce/accuracy_cases/gsm8k.py b/tests/ce/accuracy_cases/gsm8k.py
similarity index 100%
rename from test/ce/accuracy_cases/gsm8k.py
rename to tests/ce/accuracy_cases/gsm8k.py
diff --git a/test/ce/deploy/deploy.py b/tests/ce/deploy/deploy.py
similarity index 100%
rename from test/ce/deploy/deploy.py
rename to tests/ce/deploy/deploy.py
diff --git a/test/ce/performance/stress_tools.py b/tests/ce/performance/stress_tools.py
similarity index 100%
rename from test/ce/performance/stress_tools.py
rename to tests/ce/performance/stress_tools.py
diff --git a/test/ce/server/core/__init__.py b/tests/ce/server/core/__init__.py
similarity index 100%
rename from test/ce/server/core/__init__.py
rename to tests/ce/server/core/__init__.py
diff --git a/test/ce/server/core/logger.py b/tests/ce/server/core/logger.py
similarity index 100%
rename from test/ce/server/core/logger.py
rename to tests/ce/server/core/logger.py
diff --git a/test/ce/server/core/request_template.py b/tests/ce/server/core/request_template.py
similarity index 100%
rename from test/ce/server/core/request_template.py
rename to tests/ce/server/core/request_template.py
diff --git a/test/ce/server/core/utils.py b/tests/ce/server/core/utils.py
similarity index 100%
rename from test/ce/server/core/utils.py
rename to tests/ce/server/core/utils.py
diff --git a/test/ce/server/demo.py b/tests/ce/server/demo.py
similarity index 100%
rename from test/ce/server/demo.py
rename to tests/ce/server/demo.py
diff --git a/test/ce/server/requirements.txt b/tests/ce/server/requirements.txt
similarity index 100%
rename from test/ce/server/requirements.txt
rename to tests/ce/server/requirements.txt
diff --git a/test/ce/server/test_DDoS.py b/tests/ce/server/test_DDoS.py
similarity index 100%
rename from test/ce/server/test_DDoS.py
rename to tests/ce/server/test_DDoS.py
diff --git a/test/ce/server/test_base_chat.py b/tests/ce/server/test_base_chat.py
similarity index 100%
rename from test/ce/server/test_base_chat.py
rename to tests/ce/server/test_base_chat.py
diff --git a/test/ce/server/test_compare_top_logprobs.py b/tests/ce/server/test_compare_top_logprobs.py
similarity index 100%
rename from test/ce/server/test_compare_top_logprobs.py
rename to tests/ce/server/test_compare_top_logprobs.py
diff --git a/test/ce/server/test_completions.py b/tests/ce/server/test_completions.py
similarity index 81%
rename from test/ce/server/test_completions.py
rename to tests/ce/server/test_completions.py
index 12532c5f6..1ee7cbaa8 100644
--- a/test/ce/server/test_completions.py
+++ b/tests/ce/server/test_completions.py
@@ -9,22 +9,18 @@ Checking for /v1/completions parameters
 
 import json
 
-from core import (
-    TEMPLATE,
-    URL,
-    build_request_payload,
-    send_request,
-)
+from core import TEMPLATE, URL, build_request_payload, send_request
 
 URL = URL.replace("/v1/chat/completions", "/v1/completions")
 
+
 def test_completion_total_tokens():
     data = {
         "prompt": "你是谁",
         "stream": True,
         "stream_options": {"include_usage": True, "continuous_usage_stats": True},
     }
-    
+
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload, stream=True)
     last_data = None
@@ -33,10 +29,9 @@ def test_completion_total_tokens():
             break
         if line.strip() == "" or not line.startswith("data: "):
             continue
-        line = line[len("data: "):]
+        line = line[len("data: ") :]
         last_data = json.loads(line)
     usage = last_data["usage"]
     total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
     assert "total_tokens" in usage, "total_tokens 不存在"
-    assert usage["total_tokens"]== total_tokens, "total_tokens计数不正确"
-    
\ No newline at end of file
+    assert usage["total_tokens"] == total_tokens, "total_tokens计数不正确"
diff --git a/test/ce/server/test_evil_cases.py b/tests/ce/server/test_evil_cases.py
similarity index 83%
rename from test/ce/server/test_evil_cases.py
rename to tests/ce/server/test_evil_cases.py
index 508bfeabd..874b520b8 100644
--- a/test/ce/server/test_evil_cases.py
+++ b/tests/ce/server/test_evil_cases.py
@@ -130,15 +130,14 @@ def test_multilingual_input():
         "messages": [
             {
                 "role": "user",
-                "content": "这是一个包含多种语言的输入：Hello, 世界！Bonjour, le monde! Hola, el mundo! こんにちは、世界！"
+                "content": "这是一个包含多种语言的输入：Hello, 世界！Bonjour, le monde! Hola, el mundo! こんにちは、世界！",
             }
         ],
         "stream": False,
-
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
-    
+
     # 验证响应是否包含有效的回复
     assert "choices" in resp, "未收到有效的回复"
     assert len(resp["choices"]) > 0, "回复为空"
@@ -150,18 +149,9 @@ def test_multilingual_input():
     print("多语言混合输入测试通过！")
 
 
-
 def test_too_long_input():
     """测试超长输入是否被正确处理"""
-    data = {
-        "messages": [
-            {
-                "role": "user",
-                "content": "a，" * 200000  # 超过最大输入长度
-            }
-        ],
-        "stream": False
-    }
+    data = {"messages": [{"role": "user", "content": "a，" * 200000}], "stream": False}  # 超过最大输入长度
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
     assert resp["detail"].get("object") == "error", "超长输入未被识别为错误"
@@ -170,15 +160,7 @@ def test_too_long_input():
 
 def test_empty_input():
     """测试空输入是否被正确处理"""
-    data = {
-        "messages": [
-            {
-                "role": "user",
-                "content": ""  # 空输入
-            }
-        ],
-        "stream": False
-    }
+    data = {"messages": [{"role": "user", "content": ""}], "stream": False}  # 空输入
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
     assert "error" not in resp.get("object"), "空输入被识别为错误"
@@ -191,7 +173,7 @@ def test_prompt_only_spaces():
         "messages": [
             {
                 "role": "user",
-                "content":  "    ",  # 纯空格
+                "content": "    ",  # 纯空格
             }
         ],
         "stream": False,
@@ -202,19 +184,11 @@ def test_prompt_only_spaces():
     assert resp.get("object") == "chat.completion", "应返回 chat.completion 对象"
     response_content = resp["choices"][0]["message"]["content"]
     assert len(response_content) > 0, "messages content为空，未正常生成回复"
-    
+
 
 def test_illegal_characters():
     """测试非法字符输入是否被正确处理"""
-    data = {
-        "messages": [
-            {
-                "role": "user",
-                "content": "非洲的首都是？：\x00\x01\x02"  # 非法字符
-            }
-        ],
-        "stream": False
-    }
+    data = {"messages": [{"role": "user", "content": "非洲的首都是？：\x00\x01\x02"}], "stream": False}  # 非法字符
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
     assert len(resp["choices"][0]["message"]["content"]) > 0, "非法字符输入影响模型回复"
@@ -224,10 +198,7 @@ def test_sql_injection():
     """测试 SQL 注入攻击是否被正确处理"""
     data = {
         "messages": [
-            {
-                "role": "user",
-                "content": "SELECT * FROM users WHERE username = 'admin' OR '1'='1';"  # SQL 注入攻击
-            }
+            {"role": "user", "content": "SELECT * FROM users WHERE username = 'admin' OR '1'='1';"}  # SQL 注入攻击
         ],
         "stream": False,
         "max_tokens": 50,
@@ -243,12 +214,7 @@ def test_sql_injection():
 def test_xss_attack():
     """测试 XSS 攻击是否被正确处理"""
     data = {
-        "messages": [
-            {
-                "role": "user",
-                "content": "<script>alert('XSS');</script>"  # XSS 攻击
-            }
-        ],
+        "messages": [{"role": "user", "content": "<script>alert('XSS');</script>"}],  # XSS 攻击
         "stream": False,
         "max_tokens": 50,
     }
@@ -264,14 +230,9 @@ def test_stop_empty_string():
     """测试 stop 参数为空字符串时的行为"""
     data = {
         "stream": False,
-        "messages": [
-            {
-                "role": "user",
-                "content": "非洲的首都是？"
-            }
-        ],
+        "messages": [{"role": "user", "content": "非洲的首都是？"}],
         "max_tokens": 10,
-        "stop": ""  # 空字符串
+        "stop": "",  # 空字符串
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
@@ -283,14 +244,9 @@ def test_stop_multiple_strings():
     """测试 stop 参数为多个字符串时的行为"""
     data = {
         "stream": False,
-        "messages": [
-            {
-                "role": "user",
-                "content": "非洲的首都是？"
-            }
-        ],
+        "messages": [{"role": "user", "content": "非洲的首都是？"}],
         "max_tokens": 50,
-        "stop": ["。", "！", "？"]  # 多个停止条件
+        "stop": ["。", "！", "？"],  # 多个停止条件
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
@@ -303,14 +259,9 @@ def test_stop_with_special_characters():
     """测试 stop 参数为包含特殊字符的字符串时的行为"""
     data = {
         "stream": False,
-        "messages": [
-            {
-                "role": "user",
-                "content": "非洲的首都是？"
-            }
-        ],
+        "messages": [{"role": "user", "content": "非洲的首都是？"}],
         "max_tokens": 50,
-        "stop": "!@#$%^&*()"  # 包含特殊字符
+        "stop": "!@#$%^&*()",  # 包含特殊字符
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
@@ -323,14 +274,9 @@ def test_stop_with_newlines():
     """测试 stop 参数为包含换行符的字符串时的行为"""
     data = {
         "stream": False,
-        "messages": [
-            {
-                "role": "user",
-                "content": "非洲的首都是？"
-            }
-        ],
+        "messages": [{"role": "user", "content": "非洲的首都是？"}],
         "max_tokens": 50,
-        "stop": "\n\n"  # 包含换行符
+        "stop": "\n\n",  # 包含换行符
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
@@ -345,12 +291,12 @@ def test_model_empty():
         "messages": [
             {
                 "role": "user",
-                "content":  "非洲的首都是？",  
+                "content": "非洲的首都是？",
             }
         ],
         "stream": False,
         "max_tokens": 10,
-        "model": ""  # 空模型
+        "model": "",  # 空模型
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
@@ -365,12 +311,12 @@ def test_model_invalid():
         "messages": [
             {
                 "role": "user",
-                "content":  "非洲的首都是？",  
+                "content": "非洲的首都是？",
             }
         ],
         "stream": False,
         "max_tokens": 10,
-        "model": "non-existent-model"  # 不存在的模型
+        "model": "non-existent-model",  # 不存在的模型
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
@@ -385,18 +331,20 @@ def test_model_with_special_characters():
         "messages": [
             {
                 "role": "user",
-                "content":  "非洲的首都是？",
+                "content": "非洲的首都是？",
             }
         ],
         "stream": False,
         "max_tokens": 10,
-        "model": "!@#"  # 包含特殊字符
+        "model": "!@#",  # 包含特殊字符
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
     assert resp.get("object") == "chat.completion", "不存在的 model 应触发校验异常"
     assert "!@#" in resp.get("model"), "未返回预期的 model 信息"
-    assert len(resp.get("choices")[0].get("message").get("content")) > 0, "模型名为model 参数为非法格式，未正常生成回复"
+    assert (
+        len(resp.get("choices")[0].get("message").get("content")) > 0
+    ), "模型名为model 参数为非法格式，未正常生成回复"
 
 
 def test_max_tokens_negative():
@@ -405,7 +353,7 @@ def test_max_tokens_negative():
         "messages": [
             {
                 "role": "user",
-                "content":  "非洲的首都是？",  
+                "content": "非洲的首都是？",
             }
         ],
         "stream": False,
@@ -414,7 +362,7 @@ def test_max_tokens_negative():
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
     assert resp.get("detail").get("object") == "error", "max_tokens < 0 未触发校验异常"
-    assert 'max_tokens can be defined [1,' in resp.get("detail").get("message"), "未返回预期的 max_tokens 错误信息"
+    assert "max_tokens can be defined [1," in resp.get("detail").get("message"), "未返回预期的 max_tokens 错误信息"
 
 
 def test_max_tokens_min():
@@ -423,7 +371,7 @@ def test_max_tokens_min():
         "messages": [
             {
                 "role": "user",
-                "content":  "非洲的首都是？",  
+                "content": "非洲的首都是？",
             }
         ],
         "stream": False,
@@ -431,8 +379,10 @@ def test_max_tokens_min():
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
-    assert resp.get('detail').get("object") == "error", "max_tokens未0时API未拦截住"
-    assert "reasoning_max_tokens must be between max_tokens and 1" in resp.get('detail').get("message", ""), "未返回预期的 max_tokens 达到异常值0 的 错误信息"
+    assert resp.get("detail").get("object") == "error", "max_tokens未0时API未拦截住"
+    assert "reasoning_max_tokens must be between max_tokens and 1" in resp.get("detail").get(
+        "message", ""
+    ), "未返回预期的 max_tokens 达到异常值0 的 错误信息"
 
 
 def test_max_tokens_non_integer():
@@ -441,7 +391,7 @@ def test_max_tokens_non_integer():
         "messages": [
             {
                 "role": "user",
-                "content":  "非洲的首都是？",
+                "content": "非洲的首都是？",
             }
         ],
         "stream": False,
@@ -449,5 +399,6 @@ def test_max_tokens_non_integer():
     }
     payload = build_request_payload(TEMPLATE, data)
     resp = send_request(URL, payload).json()
-    assert resp.get('detail')[0].get("msg") == "Input should be a valid integer, got a number with a fractional part", "未返回预期的 max_tokens 为非整数的错误信息"
-
+    assert (
+        resp.get("detail")[0].get("msg") == "Input should be a valid integer, got a number with a fractional part"
+    ), "未返回预期的 max_tokens 为非整数的错误信息"
diff --git a/test/ce/server/test_logprobs.py b/tests/ce/server/test_logprobs.py
similarity index 100%
rename from test/ce/server/test_logprobs.py
rename to tests/ce/server/test_logprobs.py
diff --git a/test/ce/server/test_params_boundary.py b/tests/ce/server/test_params_boundary.py
similarity index 100%
rename from test/ce/server/test_params_boundary.py
rename to tests/ce/server/test_params_boundary.py
diff --git a/test/ce/server/test_repetition_early_stop.py b/tests/ce/server/test_repetition_early_stop.py
similarity index 100%
rename from test/ce/server/test_repetition_early_stop.py
rename to tests/ce/server/test_repetition_early_stop.py
diff --git a/test/ce/server/test_seed_usage.py b/tests/ce/server/test_seed_usage.py
similarity index 100%
rename from test/ce/server/test_seed_usage.py
rename to tests/ce/server/test_seed_usage.py
diff --git a/test/ce/server/test_stream.py b/tests/ce/server/test_stream.py
similarity index 100%
rename from test/ce/server/test_stream.py
rename to tests/ce/server/test_stream.py
diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh
new file mode 100644
index 000000000..1850dc944
--- /dev/null
+++ b/tests/ce/stable_cases/launch_model.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+MODEL_PATH="${1}/TP2"
+FD_API_PORT=${FD_API_PORT:-8000}
+FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001}
+FD_METRICS_PORT=${FD_METRICS_PORT:-8002}
+FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003}
+
+
+
+if [ -z "$MODEL_PATH" ]; then
+  echo "❌ 用法: $0 <模型路径>"
+  exit 1
+fi
+
+if [ ! -d "$MODEL_PATH" ]; then
+  echo "❌ 错误：模型目录不存在: $MODEL_PATH"
+  exit 1
+fi
+
+echo "使用模型: $MODEL_PATH"
+
+
+# 清理日志
+rm -rf log/*
+mkdir -p log
+
+# 环境变量
+export CUDA_VISIBLE_DEVICES=0,1
+export INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID:-7679}
+export ENABLE_V1_KVCACHE_SCHEDULER=1
+
+
+python -m fastdeploy.entrypoints.openai.api_server \
+       --tensor-parallel-size 2 \
+       --port ${FD_API_PORT} \
+       --engine-worker-queue-port ${FD_ENGINE_QUEUE_PORT} \
+       --metrics-port ${FD_METRICS_PORT} \
+       --cache-queue-port ${FD_CACHE_QUEUE_PORT} \
+       --quantization wint8 \
+       --max-model-len 32768 \
+       --max-num-seqs 256 \
+       --gpu-memory-utilization 0.9 \
+       --model "$MODEL_PATH" \
+       --load-strategy ipc_snapshot \
+       --dynamic-load-weight &
+
+success=0
+
+for i in $(seq 1 300); do
+    if (echo > /dev/tcp/127.0.0.1/$FD_API_PORT) >/dev/null 2>&1; then
+        echo "API server is up on port $FD_API_PORT on iteration $i"
+        success=1
+        break
+    fi
+    sleep 1
+done
+if [ $success -eq 0 ]; then
+    echo "超时: API 服务在 300 秒内未启动 (端口 $FD_API_PORT)"
+fi
diff --git a/tests/ce/stable_cases/run.sh b/tests/ce/stable_cases/run.sh
new file mode 100644
index 000000000..6b7f939bb
--- /dev/null
+++ b/tests/ce/stable_cases/run.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+
+# ================== Configuration Parameters ==================
+FD_API_PORT=${FD_API_PORT:-8000}
+FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001}
+FD_METRICS_PORT=${FD_METRICS_PORT:-8002}
+FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003}
+
+
+HOST="0.0.0.0"
+PORT="${FD_API_PORT}"  # 这里需要配合启动脚本那个URL PORT
+BASE_URL="http://$HOST:$PORT"
+
+TOTAL_ROUNDS=30
+CHAT_REQUESTS_PER_ROUND=5
+export CUDA_VISIBLE_DEVICES=0,1
+MAX_MEMORY_MB=10240  # 10GB
+
+# ====================================================
+# assert_eq actual expected message
+assert_eq() {
+    local actual="$1"
+    local expected="$2"
+    local msg="$3"
+    if [ "$actual" != "$expected" ]; then
+        echo "Assertion failed: $msg" >&2
+        exit 1
+    fi
+}
+
+# assert_true condition message
+assert_true() {
+    local condition="$1"
+    local msg="$2"
+    if [ "$condition" != "1" ] && [ "$condition" != "true" ]; then
+        echo "Assertion failed: $msg" >&2
+        exit 1
+    fi
+}
+
+# assert_success exit_code message
+assert_success() {
+    local code="$1"
+    local msg="$2"
+    if [ "$code" -ne 0 ]; then
+        echo "Assertion failed: $msg" >&2
+        exit 1
+    fi
+}
+
+# curl_get_status(url, options...) → returns via global variables http_code and response_body
+curl_get_status() {
+    local result
+    result=$(curl -s -w "%{http_code}" "$@")
+    http_code="${result: -3}"
+    response_body="${result%???}"
+}
+
+# ====================================================
+# Get visible GPU IDs from CUDA_VISIBLE_DEVICES
+# ====================================================
+
+get_visible_gpu_ids() {
+    local ids=()
+    IFS=',' read -ra ADDR <<< "$CUDA_VISIBLE_DEVICES"
+    for i in "${ADDR[@]}"; do
+        if [[ "$i" =~ ^[0-9]+$ ]]; then
+            ids+=("$i")
+        fi
+    done
+    echo "${ids[@]}"
+}
+
+# ====================================================
+# Check GPU memory usage (must not exceed MAX_MEMORY_MB)
+# ====================================================
+
+check_gpu_memory() {
+    local gpu_ids
+    gpu_ids=($(get_visible_gpu_ids))
+
+    if [ ${#gpu_ids[@]} -eq 0 ]; then
+        echo "Assertion failed: No valid GPU IDs in CUDA_VISIBLE_DEVICES='$CUDA_VISIBLE_DEVICES'" >&2
+        exit 1
+    fi
+
+    for gpu_id in "${gpu_ids[@]}"; do
+        local memory_used
+        memory_used=$(nvidia-smi -i "$gpu_id" --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null) || \
+            assert_success $? "Failed to query GPU $gpu_id memory usage"
+
+        if ! [[ "$memory_used" =~ ^[0-9]+ ]]; then
+            echo "Assertion failed: Invalid memory value for GPU $gpu_id: $memory_used" >&2
+            exit 1
+        fi
+
+        assert_true "$(( memory_used <= MAX_MEMORY_MB ))" \
+            "GPU $gpu_id memory $memory_used MB > $MAX_MEMORY_MB MB"
+    done
+}
+
+# ====================================================
+
+for round in $(seq 1 $TOTAL_ROUNDS); do
+    echo "=== Round $round / $TOTAL_ROUNDS ==="
+
+    # Step 1: Clear loaded weights
+    echo "[Step 1] Clearing load weight..."
+    curl_get_status -i "$BASE_URL/clear_load_weight"
+    assert_eq "$http_code" "200" "/clear_load_weight failed with HTTP $http_code"
+
+    # Step 2: Check GPU memory usage
+    echo "[Step 2] Checking GPU memory..."
+    check_gpu_memory
+
+    # Step 3: Update model weights
+    echo "[Step 3] Updating model weight..."
+    curl_get_status -i "$BASE_URL/update_model_weight"
+    assert_eq "$http_code" "200" "/update_model_weight failed with HTTP $http_code"
+
+    # Step 4: Send chat completion requests
+    echo "[Step 4] Sending $CHAT_REQUESTS_PER_ROUND chat completions..."
+    for i in $(seq 1 $CHAT_REQUESTS_PER_ROUND); do
+        echo "  Request $i / $CHAT_REQUESTS_PER_ROUND"
+        # Send request and capture response
+        response=$(curl -s -X POST "$BASE_URL/v1/chat/completions" \
+            -H "Content-Type: application/json" \
+            -d '{"messages": [{"role": "user", "content": "Hello!"}]}')
+
+        # Extract the 'content' field from the response
+        content=$(echo "$response" | \
+            grep -o '"content":"[^"]*"' | \
+            head -1 | \
+            sed 's/^"content":"//' | \
+            sed 's/"$//')
+
+        if [ -z "$content" ]; then
+            # Fallback: try extracting content using sed more robustly
+            content=$(echo "$response" | \
+                sed -n 's/.*"content":"\([^"]*\)".*/\1/p' | \
+                head -1)
+        fi
+
+        # Check if content is empty or null
+        if [ -z "$content" ] || [ "$content" = "null" ]; then
+            echo "Failed: Empty or null 'content' in response" >&2
+            echo "Raw response:" >&2
+            echo "$response" >&2
+            exit 1
+        fi
+
+        echo "Received non-empty response"
+        echo -e "\n---\n"
+    done
+
+    echo "Round $round completed."
+    echo "==================================\n"
+done
+
+echo "All $TOTAL_ROUNDS rounds completed successfully."
diff --git a/test/ci_use/EB_Lite/test_EB_Lite_serving.py b/tests/ci_use/EB_Lite/test_EB_Lite_serving.py
similarity index 100%
rename from test/ci_use/EB_Lite/test_EB_Lite_serving.py
rename to tests/ci_use/EB_Lite/test_EB_Lite_serving.py
diff --git a/test/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py b/tests/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py
similarity index 100%
rename from test/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py
rename to tests/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py
diff --git a/test/ci_use/EB_VL_Lite/baseline.txt b/tests/ci_use/EB_VL_Lite/baseline.txt
similarity index 100%
rename from test/ci_use/EB_VL_Lite/baseline.txt
rename to tests/ci_use/EB_VL_Lite/baseline.txt
diff --git a/test/ci_use/EB_VL_Lite/rollout_model.py b/tests/ci_use/EB_VL_Lite/rollout_model.py
similarity index 100%
rename from test/ci_use/EB_VL_Lite/rollout_model.py
rename to tests/ci_use/EB_VL_Lite/rollout_model.py
diff --git a/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py b/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
similarity index 100%
rename from test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
rename to tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
diff --git a/test/ci_use/EB_VL_Lite/test_rollout_model.py b/tests/ci_use/EB_VL_Lite/test_rollout_model.py
similarity index 100%
rename from test/ci_use/EB_VL_Lite/test_rollout_model.py
rename to tests/ci_use/EB_VL_Lite/test_rollout_model.py
diff --git a/test/ci_use/GCU/run_ernie.py b/tests/ci_use/GCU/run_ernie.py
similarity index 100%
rename from test/ci_use/GCU/run_ernie.py
rename to tests/ci_use/GCU/run_ernie.py
diff --git a/test/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py b/tests/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py
similarity index 100%
rename from test/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py
rename to tests/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py
diff --git a/test/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py b/tests/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py
similarity index 100%
rename from test/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py
rename to tests/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py
diff --git a/test/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py b/tests/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py
similarity index 100%
rename from test/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py
rename to tests/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py
diff --git a/test/ci_use/XPU_45T/run_45T.py b/tests/ci_use/XPU_45T/run_45T.py
similarity index 100%
rename from test/ci_use/XPU_45T/run_45T.py
rename to tests/ci_use/XPU_45T/run_45T.py
diff --git a/test/ci_use/iluvatar_UT/run_ernie300B_4layer.py b/tests/ci_use/iluvatar_UT/run_ernie300B_4layer.py
similarity index 100%
rename from test/ci_use/iluvatar_UT/run_ernie300B_4layer.py
rename to tests/ci_use/iluvatar_UT/run_ernie300B_4layer.py
diff --git a/test/distributed/custom_all_reduce.py b/tests/distributed/custom_all_reduce.py
similarity index 100%
rename from test/distributed/custom_all_reduce.py
rename to tests/distributed/custom_all_reduce.py
diff --git a/test/distributed/test_custom_all_reduce.py b/tests/distributed/test_custom_all_reduce.py
similarity index 100%
rename from test/distributed/test_custom_all_reduce.py
rename to tests/distributed/test_custom_all_reduce.py
diff --git a/test/entrypoints/openai/test_build_sample_logprobs.py b/tests/entrypoints/openai/test_build_sample_logprobs.py
similarity index 100%
rename from test/entrypoints/openai/test_build_sample_logprobs.py
rename to tests/entrypoints/openai/test_build_sample_logprobs.py
diff --git a/test/entrypoints/openai/test_completion_echo.py b/tests/entrypoints/openai/test_completion_echo.py
similarity index 100%
rename from test/entrypoints/openai/test_completion_echo.py
rename to tests/entrypoints/openai/test_completion_echo.py
diff --git a/test/entrypoints/openai/test_serving_completion.py b/tests/entrypoints/openai/test_serving_completion.py
similarity index 100%
rename from test/entrypoints/openai/test_serving_completion.py
rename to tests/entrypoints/openai/test_serving_completion.py
diff --git a/test/entrypoints/test_generation.py b/tests/entrypoints/test_generation.py
similarity index 100%
rename from test/entrypoints/test_generation.py
rename to tests/entrypoints/test_generation.py
diff --git a/test/graph_optimization/test_cuda_graph_dynamic_subgraph.py b/tests/graph_optimization/test_cuda_graph_dynamic_subgraph.py
similarity index 100%
rename from test/graph_optimization/test_cuda_graph_dynamic_subgraph.py
rename to tests/graph_optimization/test_cuda_graph_dynamic_subgraph.py
diff --git a/test/graph_optimization/test_cuda_graph_spec_decode.py b/tests/graph_optimization/test_cuda_graph_spec_decode.py
similarity index 100%
rename from test/graph_optimization/test_cuda_graph_spec_decode.py
rename to tests/graph_optimization/test_cuda_graph_spec_decode.py
diff --git a/test/input/test_ernie_processor.py b/tests/input/test_ernie_processor.py
similarity index 100%
rename from test/input/test_ernie_processor.py
rename to tests/input/test_ernie_processor.py
diff --git a/test/layers/test_append_attention.py b/tests/layers/test_append_attention.py
similarity index 100%
rename from test/layers/test_append_attention.py
rename to tests/layers/test_append_attention.py
diff --git a/test/layers/test_min_sampling.py b/tests/layers/test_min_sampling.py
similarity index 100%
rename from test/layers/test_min_sampling.py
rename to tests/layers/test_min_sampling.py
diff --git a/test/layers/test_quant_layer.py b/tests/layers/test_quant_layer.py
similarity index 100%
rename from test/layers/test_quant_layer.py
rename to tests/layers/test_quant_layer.py
diff --git a/test/layers/test_repetition_early_stopper.py b/tests/layers/test_repetition_early_stopper.py
similarity index 100%
rename from test/layers/test_repetition_early_stopper.py
rename to tests/layers/test_repetition_early_stopper.py
diff --git a/test/layers/test_sampler.py b/tests/layers/test_sampler.py
similarity index 100%
rename from test/layers/test_sampler.py
rename to tests/layers/test_sampler.py
diff --git a/test/model_executor/test_forward_meta_str.py b/tests/model_executor/test_forward_meta_str.py
similarity index 100%
rename from test/model_executor/test_forward_meta_str.py
rename to tests/model_executor/test_forward_meta_str.py
diff --git a/test/operators/test_air_top_p_sampling.py b/tests/operators/test_air_top_p_sampling.py
similarity index 100%
rename from test/operators/test_air_top_p_sampling.py
rename to tests/operators/test_air_top_p_sampling.py
diff --git a/test/operators/test_cutlass_scaled_mm.py b/tests/operators/test_cutlass_scaled_mm.py
similarity index 100%
rename from test/operators/test_cutlass_scaled_mm.py
rename to tests/operators/test_cutlass_scaled_mm.py
diff --git a/test/operators/test_deqant_int8_cpp_extension.py b/tests/operators/test_deqant_int8_cpp_extension.py
similarity index 100%
rename from test/operators/test_deqant_int8_cpp_extension.py
rename to tests/operators/test_deqant_int8_cpp_extension.py
diff --git a/test/operators/test_dequant.py b/tests/operators/test_dequant.py
similarity index 100%
rename from test/operators/test_dequant.py
rename to tests/operators/test_dequant.py
diff --git a/test/operators/test_flash_mask_attn.py b/tests/operators/test_flash_mask_attn.py
similarity index 100%
rename from test/operators/test_flash_mask_attn.py
rename to tests/operators/test_flash_mask_attn.py
diff --git a/test/operators/test_fp8_fp8_half_cuda_core_gemm.py b/tests/operators/test_fp8_fp8_half_cuda_core_gemm.py
similarity index 100%
rename from test/operators/test_fp8_fp8_half_cuda_core_gemm.py
rename to tests/operators/test_fp8_fp8_half_cuda_core_gemm.py
diff --git a/test/operators/test_fused_moe.py b/tests/operators/test_fused_moe.py
similarity index 100%
rename from test/operators/test_fused_moe.py
rename to tests/operators/test_fused_moe.py
diff --git a/test/operators/test_get_token_penalty_multi_scores.py b/tests/operators/test_get_token_penalty_multi_scores.py
similarity index 100%
rename from test/operators/test_get_token_penalty_multi_scores.py
rename to tests/operators/test_get_token_penalty_multi_scores.py
diff --git a/test/operators/test_moe_top_k_select.py b/tests/operators/test_moe_top_k_select.py
similarity index 100%
rename from test/operators/test_moe_top_k_select.py
rename to tests/operators/test_moe_top_k_select.py
diff --git a/test/operators/test_noaux_tc.py b/tests/operators/test_noaux_tc.py
similarity index 100%
rename from test/operators/test_noaux_tc.py
rename to tests/operators/test_noaux_tc.py
diff --git a/test/operators/test_perchannel_gemm.py b/tests/operators/test_perchannel_gemm.py
similarity index 100%
rename from test/operators/test_perchannel_gemm.py
rename to tests/operators/test_perchannel_gemm.py
diff --git a/test/operators/test_rejection_top_p_sampling.py b/tests/operators/test_rejection_top_p_sampling.py
similarity index 100%
rename from test/operators/test_rejection_top_p_sampling.py
rename to tests/operators/test_rejection_top_p_sampling.py
diff --git a/test/operators/test_scaled_gemm_f8_i4_f16.py b/tests/operators/test_scaled_gemm_f8_i4_f16.py
similarity index 100%
rename from test/operators/test_scaled_gemm_f8_i4_f16.py
rename to tests/operators/test_scaled_gemm_f8_i4_f16.py
diff --git a/test/operators/test_split_fuse.py b/tests/operators/test_split_fuse.py
similarity index 100%
rename from test/operators/test_split_fuse.py
rename to tests/operators/test_split_fuse.py
diff --git a/test/operators/test_stop_generation_multi_ends.py b/tests/operators/test_stop_generation_multi_ends.py
similarity index 100%
rename from test/operators/test_stop_generation_multi_ends.py
rename to tests/operators/test_stop_generation_multi_ends.py
diff --git a/test/operators/test_token_penalty.py b/tests/operators/test_token_penalty.py
similarity index 100%
rename from test/operators/test_token_penalty.py
rename to tests/operators/test_token_penalty.py
diff --git a/test/operators/test_w4afp8_gemm.py b/tests/operators/test_w4afp8_gemm.py
similarity index 100%
rename from test/operators/test_w4afp8_gemm.py
rename to tests/operators/test_w4afp8_gemm.py
diff --git a/test/operators/test_wfp8afp8_sparse_gemm.py b/tests/operators/test_wfp8afp8_sparse_gemm.py
similarity index 100%
rename from test/operators/test_wfp8afp8_sparse_gemm.py
rename to tests/operators/test_wfp8afp8_sparse_gemm.py
diff --git a/test/plugins/fd_add_dummy_model/__init__.py b/tests/plugins/fd_add_dummy_model/__init__.py
similarity index 100%
rename from test/plugins/fd_add_dummy_model/__init__.py
rename to tests/plugins/fd_add_dummy_model/__init__.py
diff --git a/test/plugins/fd_add_dummy_model_runner/__init__.py b/tests/plugins/fd_add_dummy_model_runner/__init__.py
similarity index 100%
rename from test/plugins/fd_add_dummy_model_runner/__init__.py
rename to tests/plugins/fd_add_dummy_model_runner/__init__.py
diff --git a/test/plugins/setup.py b/tests/plugins/setup.py
similarity index 100%
rename from test/plugins/setup.py
rename to tests/plugins/setup.py
diff --git a/test/plugins/test_model_registry.py b/tests/plugins/test_model_registry.py
similarity index 100%
rename from test/plugins/test_model_registry.py
rename to tests/plugins/test_model_registry.py
diff --git a/test/utils/test_custom_chat_template.py b/tests/utils/test_custom_chat_template.py
similarity index 100%
rename from test/utils/test_custom_chat_template.py
rename to tests/utils/test_custom_chat_template.py
diff --git a/test/utils/test_download.py b/tests/utils/test_download.py
similarity index 100%
rename from test/utils/test_download.py
rename to tests/utils/test_download.py
diff --git a/test/utils/test_version.py b/tests/utils/test_version.py
similarity index 100%
rename from test/utils/test_version.py
rename to tests/utils/test_version.py