diff --git a/.github/workflows/_accuracy_test.yml b/.github/workflows/_accuracy_test.yml index 8db47c6ef..a0ed22375 100644 --- a/.github/workflows/_accuracy_test.yml +++ b/.github/workflows/_accuracy_test.yml @@ -31,6 +31,7 @@ on: jobs: accuracy_tests: runs-on: [self-hosted, GPU-h20-1Cards] + timeout-minutes: 60 steps: - name: Code Prepare shell: bash @@ -147,7 +148,7 @@ jobs: git config --global --add safe.directory /workspace/FastDeploy cd FastDeploy - pushd test/ce/deploy + pushd tests/ce/deploy python3.10 deploy.py > dd.log 2>&1 & sleep 3 curl -X POST http://0.0.0.0:${FLASK_PORT}/start \ @@ -157,7 +158,7 @@ jobs: curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90 popd - pushd test/ce/accuracy_cases + pushd tests/ce/accuracy_cases export URL=http://localhost:${FD_API_PORT}/v1/chat/completions export TEMPLATE=TOKEN_LOGPROB export MODEL_SIZE=0.3B diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml index 4e6695b58..73a7c7dca 100644 --- a/.github/workflows/_base_test.yml +++ b/.github/workflows/_base_test.yml @@ -31,6 +31,7 @@ on: jobs: base_tests: runs-on: [self-hosted, GPU-h20-1Cards] + timeout-minutes: 60 steps: - name: Code Prepare shell: bash @@ -147,7 +148,7 @@ jobs: git config --global --add safe.directory /workspace/FastDeploy cd FastDeploy - pushd test/ce/deploy + pushd tests/ce/deploy python3.10 deploy.py > dd.log 2>&1 & sleep 3 curl -X POST http://0.0.0.0:${FLASK_PORT}/start \ @@ -157,7 +158,7 @@ jobs: curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90 popd - pushd test/ce/server + pushd tests/ce/server export URL=http://localhost:${FD_API_PORT}/v1/chat/completions export TEMPLATE=TOKEN_LOGPROB TEST_EXIT_CODE=0 diff --git a/.github/workflows/_build_linux.yml b/.github/workflows/_build_linux.yml index 583686289..28436e772 100644 --- a/.github/workflows/_build_linux.yml +++ b/.github/workflows/_build_linux.yml @@ -55,6 +55,7 @@ on: jobs: fd-build: runs-on: [self-hosted, GPU-Build] + timeout-minutes: 240 outputs: wheel_path: ${{ steps.set_output.outputs.wheel_path }} steps: diff --git a/.github/workflows/_pre_ce_test.yml b/.github/workflows/_pre_ce_test.yml index b7415f9cb..f66098341 100644 --- a/.github/workflows/_pre_ce_test.yml +++ b/.github/workflows/_pre_ce_test.yml @@ -30,6 +30,7 @@ on: jobs: run_ce_cases: runs-on: [self-hosted, PRE_CE_RUN_2Card] + timeout-minutes: 60 steps: - name: Print current runner name run: | diff --git a/.github/workflows/_stable_test.yml b/.github/workflows/_stable_test.yml new file mode 100644 index 000000000..05889805d --- /dev/null +++ b/.github/workflows/_stable_test.yml @@ -0,0 +1,162 @@ +name: Stable Test +description: "Run Stable Tests" + +on: + workflow_call: + inputs: + DOCKER_IMAGE: + description: "Build Images" + required: true + type: string + default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310" + FASTDEPLOY_ARCHIVE_URL: + description: "URL of the compressed FastDeploy code archive." + required: true + type: string + FASTDEPLOY_WHEEL_URL: + description: "URL of the FastDeploy Wheel." + required: true + type: string + CACHE_DIR: + description: "Cache Dir Use" + required: false + type: string + default: "" + MODEL_CACHE_DIR: + description: "Cache Dir Use" + required: false + type: string + default: "" + +jobs: + stable_tests: + runs-on: [self-hosted, GPU-h1z1-2Cards] + timeout-minutes: 60 + steps: + - name: Code Prepare + shell: bash + env: + docker_image: ${{ inputs.DOCKER_IMAGE }} + fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }} + run: | + set -x + REPO="https://github.com/${{ github.repository }}.git" + FULL_REPO="${{ github.repository }}" + REPO_NAME="${FULL_REPO##*/}" + BASE_BRANCH="${{ github.base_ref }}" + + # Clean the repository directory before starting + docker run --rm --net=host -v $(pwd):/workspace -w /workspace \ + -e "REPO_NAME=${REPO_NAME}" \ + ${docker_image} /bin/bash -c ' + if [ -d ${REPO_NAME} ]; then + echo "Directory ${REPO_NAME} exists, removing it..." + rm -rf ${REPO_NAME}* + fi + ' + + wget -q ${fd_archive_url} + tar -xf FastDeploy.tar.gz + rm -rf FastDeploy.tar.gz + cd FastDeploy + git config --global user.name "FastDeployCI" + git config --global user.email "fastdeploy_ci@example.com" + git log -n 3 --oneline + + - name: Run FastDeploy Stable Tests + shell: bash + env: + docker_image: ${{ inputs.DOCKER_IMAGE }} + fastdeploy_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }} + CACHE_DIR: ${{ inputs.CACHE_DIR }} + MODEL_CACHE_DIR: ${{ inputs.MODEL_CACHE_DIR }} + run: | + runner_name="${{ runner.name }}" + CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}') + DEVICES=$(echo "$CARD_ID" | fold -w1 | paste -sd,) + DEVICE_PORT=$(echo "$DEVICES" | cut -d',' -f1) + + FLASK_PORT=$((42068 + DEVICE_PORT * 100)) + FD_API_PORT=$((42088 + DEVICE_PORT * 100)) + FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100)) + FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100)) + FD_CACHE_QUEUE_PORT=$((42038 + DEVICE_PORT * 100)) + FD_INFERENCE_MSG_QUEUE_ID=$(( 42048 + DEVICE_PORT * 100)) + echo "Test ENV Parameter:" + echo "=========================================================" + echo "FLASK_PORT=${FLASK_PORT}" + echo "FD_API_PORT=${FD_API_PORT}" + echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" + echo "FD_METRICS_PORT=${FD_METRICS_PORT}" + echo "FD_INFERENCE_MSG_QUEUE_ID=${INFERENCE_MSG_QUEUE_ID}" + echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" + echo "DEVICES=${DEVICES}" + echo "=========================================================" + + CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}" + echo "CACHE_DIR is set to ${CACHE_DIR}" + if [ ! -f "${CACHE_DIR}/gitconfig" ]; then + touch "${CACHE_DIR}/gitconfig" + fi + if [ ! -d "${MODEL_CACHE_DIR}" ]; then + echo "Error: MODEL_CACHE_DIR '${MODEL_CACHE_DIR}' does not exist." + exit 1 + fi + + PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT) + LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log" + echo "==== LOG_FILE is ${LOG_FILE} ====" + + echo "==== PORT CLEAN BEFORE TASK RUN ====" | tee -a $LOG_FILE + + for port in "${PORTS[@]}"; do + PIDS=$(lsof -t -i :$port || true) + if [ -n "$PIDS" ]; then + echo "Port $port is occupied by PID(s): $PIDS" | tee -a $LOG_FILE + echo "$PIDS" | xargs -r kill -9 + echo "Port $port cleared" | tee -a $LOG_FILE + else + echo "Port $port is free" | tee -a $LOG_FILE + fi + done + + echo "==== PORT CLEAN COMPLETE ====" | tee -a $LOG_FILE + + docker run --rm --ipc=host --pid=host --net=host \ + -v $(pwd):/workspace \ + -w /workspace \ + -e fastdeploy_wheel_url=${fastdeploy_wheel_url} \ + -e "FD_API_PORT=${FD_API_PORT}" \ + -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \ + -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \ + -e "FLASK_PORT=${FLASK_PORT}" \ + -e "FD_INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID}" \ + -e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \ + -v "${MODEL_CACHE_DIR}:/MODELDATA" \ + -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ + -v "${CACHE_DIR}/.cache:/root/.cache" \ + -v "${CACHE_DIR}/ConfigDir:/root/.config" \ + -e TZ="Asia/Shanghai" \ + --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' + python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ + + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + + python -m pip install ${fastdeploy_wheel_url} + python -m pip install pytest + + git config --global --add safe.directory /workspace/FastDeploy + cd FastDeploy + TEST_EXIT_CODE=0 + pushd tests/ce/stable_cases + bash launch_model.sh /MODELDATA + bash run.sh || TEST_EXIT_CODE=1 + popd + echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env + ' + if [ -f ./FastDeploy/exit_code.env ]; then + source ./FastDeploy/exit_code.env + cat ./FastDeploy/exit_code.env >> $GITHUB_ENV + fi + echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" + exit ${TEST_EXIT_CODE} diff --git a/.github/workflows/_unit_test_coverage.yml b/.github/workflows/_unit_test_coverage.yml index 7d02a1f9e..728c1d46c 100644 --- a/.github/workflows/_unit_test_coverage.yml +++ b/.github/workflows/_unit_test_coverage.yml @@ -31,6 +31,7 @@ on: jobs: run_tests_with_coverage: runs-on: [self-hosted, GPU-h1z1-2Cards] + timeout-minutes: 60 outputs: diff_cov_file_url: ${{ steps.cov_upload.outputs.diff_cov_file_url }} unittest_failed_url: ${{ steps.cov_upload.outputs.unittest_failed_url }} @@ -150,12 +151,12 @@ jobs: python -m pip install coverage python -m pip install diff-cover python -m pip install ${fd_wheel_url} - if [ -d "test/plugins" ]; then - cd test/plugins + if [ -d "tests/plugins" ]; then + cd tests/plugins python setup.py install cd ../.. else - echo "Warning: test/plugins directory not found, skipping setup.py install" + echo "Warning: tests/plugins directory not found, skipping setup.py install" fi export COVERAGE_FILE=/workspace/FastDeploy/coveragedata/.coverage export COVERAGE_RCFILE=/workspace/FastDeploy/scripts/.coveragerc @@ -204,7 +205,7 @@ jobs: echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_OUTPUT echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_ENV fi - unittest_result="test/failed_tests.log" + unittest_result="tests/failed_tests.log" if [ -s ${unittest_result} ];then python ${push_file} ${unittest_result} ${target_path}/UnitTestResult target_path_stripped="${target_path#paddle-github-action/}" diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml index 8faa7436b..0d6383784 100644 --- a/.github/workflows/pr_build_and_test.yml +++ b/.github/workflows/pr_build_and_test.yml @@ -83,3 +83,13 @@ jobs: FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + + stable_test: + name: Run Stable Tests + needs: [clone,build] + uses: ./.github/workflows/_stable_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" diff --git a/scripts/coverage_run.sh b/scripts/coverage_run.sh index 8f0e149c4..b5c6578a5 100644 --- a/scripts/coverage_run.sh +++ b/scripts/coverage_run.sh @@ -2,7 +2,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" echo "$DIR" -run_path="$DIR/../test/" +run_path="$DIR/../tests/" cd ${run_path} ls diff --git a/scripts/run_ci_gcu.sh b/scripts/run_ci_gcu.sh index 172511216..46ceee8d7 100644 --- a/scripts/run_ci_gcu.sh +++ b/scripts/run_ci_gcu.sh @@ -97,7 +97,7 @@ cat server.log echo -e "\n" echo "Start inference..." -python test/ci_use/GCU/run_ernie.py +python tests/ci_use/GCU/run_ernie.py exit_code=$? echo -e "exit_code is ${exit_code}.\n" diff --git a/scripts/run_ci_iluvatar.sh b/scripts/run_ci_iluvatar.sh index dca1369bf..fe702be87 100644 --- a/scripts/run_ci_iluvatar.sh +++ b/scripts/run_ci_iluvatar.sh @@ -31,7 +31,7 @@ export INFERENCE_MSG_QUEUE_ID=232132 export FD_DEBUG=1 export PADDLE_XCCL_BACKEND=iluvatar_gpu export FD_SAMPLING_CLASS=rejection -python test/ci_use/iluvatar_UT/run_ernie300B_4layer.py +python tests/ci_use/iluvatar_UT/run_ernie300B_4layer.py exit_code=$? echo exit_code is ${exit_code} diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh index b39f9482a..04d4b8699 100644 --- a/scripts/run_ci_xpu.sh +++ b/scripts/run_ci_xpu.sh @@ -77,7 +77,7 @@ done cat server.log # 执行服务化推理 -python test/ci_use/XPU_45T/run_45T.py +python tests/ci_use/XPU_45T/run_45T.py exit_code=$? echo exit_code is ${exit_code} @@ -143,7 +143,7 @@ done cat server.log # 执行服务化推理 -python test/ci_use/XPU_45T/run_45T.py +python tests/ci_use/XPU_45T/run_45T.py kv_block_test_exit_code=$? echo kv_block_test_exit_code is ${kv_block_test_exit_code} diff --git a/scripts/run_pre_ce.sh b/scripts/run_pre_ce.sh index 726b91e85..67b06736e 100644 --- a/scripts/run_pre_ce.sh +++ b/scripts/run_pre_ce.sh @@ -9,7 +9,7 @@ python -m pip install -r requirements.txt python -m pip install jsonschema aistudio_sdk==0.3.5 failed_files=() -run_path="$DIR/../test/ci_use/" +run_path="$DIR/../tests/ci_use/" # load all test files for subdir in "$run_path"*/; do diff --git a/scripts/run_unittest.sh b/scripts/run_unittest.sh index 576a27016..8d14790de 100644 --- a/scripts/run_unittest.sh +++ b/scripts/run_unittest.sh @@ -46,7 +46,7 @@ done <<< "$gpu_info" export CUDA_VISIBLE_DEVICES=${min_gpu} # 使用 find 命令查找 test 目录下的 .py 文件 -test_files=$(find test -type f -name "test*.py") +test_files=$(find tests -type f -name "test*.py") # 遍历每个找到的测试文件 for test_file in $test_files; do diff --git a/test/ce/accuracy_cases/gsm8k.parquet b/tests/ce/accuracy_cases/gsm8k.parquet similarity index 100% rename from test/ce/accuracy_cases/gsm8k.parquet rename to tests/ce/accuracy_cases/gsm8k.parquet diff --git a/test/ce/accuracy_cases/gsm8k.py b/tests/ce/accuracy_cases/gsm8k.py similarity index 100% rename from test/ce/accuracy_cases/gsm8k.py rename to tests/ce/accuracy_cases/gsm8k.py diff --git a/test/ce/deploy/deploy.py b/tests/ce/deploy/deploy.py similarity index 100% rename from test/ce/deploy/deploy.py rename to tests/ce/deploy/deploy.py diff --git a/test/ce/performance/stress_tools.py b/tests/ce/performance/stress_tools.py similarity index 100% rename from test/ce/performance/stress_tools.py rename to tests/ce/performance/stress_tools.py diff --git a/test/ce/server/core/__init__.py b/tests/ce/server/core/__init__.py similarity index 100% rename from test/ce/server/core/__init__.py rename to tests/ce/server/core/__init__.py diff --git a/test/ce/server/core/logger.py b/tests/ce/server/core/logger.py similarity index 100% rename from test/ce/server/core/logger.py rename to tests/ce/server/core/logger.py diff --git a/test/ce/server/core/request_template.py b/tests/ce/server/core/request_template.py similarity index 100% rename from test/ce/server/core/request_template.py rename to tests/ce/server/core/request_template.py diff --git a/test/ce/server/core/utils.py b/tests/ce/server/core/utils.py similarity index 100% rename from test/ce/server/core/utils.py rename to tests/ce/server/core/utils.py diff --git a/test/ce/server/demo.py b/tests/ce/server/demo.py similarity index 100% rename from test/ce/server/demo.py rename to tests/ce/server/demo.py diff --git a/test/ce/server/requirements.txt b/tests/ce/server/requirements.txt similarity index 100% rename from test/ce/server/requirements.txt rename to tests/ce/server/requirements.txt diff --git a/test/ce/server/test_DDoS.py b/tests/ce/server/test_DDoS.py similarity index 100% rename from test/ce/server/test_DDoS.py rename to tests/ce/server/test_DDoS.py diff --git a/test/ce/server/test_base_chat.py b/tests/ce/server/test_base_chat.py similarity index 100% rename from test/ce/server/test_base_chat.py rename to tests/ce/server/test_base_chat.py diff --git a/test/ce/server/test_compare_top_logprobs.py b/tests/ce/server/test_compare_top_logprobs.py similarity index 100% rename from test/ce/server/test_compare_top_logprobs.py rename to tests/ce/server/test_compare_top_logprobs.py diff --git a/test/ce/server/test_completions.py b/tests/ce/server/test_completions.py similarity index 81% rename from test/ce/server/test_completions.py rename to tests/ce/server/test_completions.py index 12532c5f6..1ee7cbaa8 100644 --- a/test/ce/server/test_completions.py +++ b/tests/ce/server/test_completions.py @@ -9,22 +9,18 @@ Checking for /v1/completions parameters import json -from core import ( - TEMPLATE, - URL, - build_request_payload, - send_request, -) +from core import TEMPLATE, URL, build_request_payload, send_request URL = URL.replace("/v1/chat/completions", "/v1/completions") + def test_completion_total_tokens(): data = { "prompt": "你是谁", "stream": True, "stream_options": {"include_usage": True, "continuous_usage_stats": True}, } - + payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload, stream=True) last_data = None @@ -33,10 +29,9 @@ def test_completion_total_tokens(): break if line.strip() == "" or not line.startswith("data: "): continue - line = line[len("data: "):] + line = line[len("data: ") :] last_data = json.loads(line) usage = last_data["usage"] total_tokens = usage["completion_tokens"] + usage["prompt_tokens"] assert "total_tokens" in usage, "total_tokens 不存在" - assert usage["total_tokens"]== total_tokens, "total_tokens计数不正确" - \ No newline at end of file + assert usage["total_tokens"] == total_tokens, "total_tokens计数不正确" diff --git a/test/ce/server/test_evil_cases.py b/tests/ce/server/test_evil_cases.py similarity index 83% rename from test/ce/server/test_evil_cases.py rename to tests/ce/server/test_evil_cases.py index 508bfeabd..874b520b8 100644 --- a/test/ce/server/test_evil_cases.py +++ b/tests/ce/server/test_evil_cases.py @@ -130,15 +130,14 @@ def test_multilingual_input(): "messages": [ { "role": "user", - "content": "这是一个包含多种语言的输入:Hello, 世界!Bonjour, le monde! Hola, el mundo! こんにちは、世界!" + "content": "这是一个包含多种语言的输入:Hello, 世界!Bonjour, le monde! Hola, el mundo! こんにちは、世界!", } ], "stream": False, - } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() - + # 验证响应是否包含有效的回复 assert "choices" in resp, "未收到有效的回复" assert len(resp["choices"]) > 0, "回复为空" @@ -150,18 +149,9 @@ def test_multilingual_input(): print("多语言混合输入测试通过!") - def test_too_long_input(): """测试超长输入是否被正确处理""" - data = { - "messages": [ - { - "role": "user", - "content": "a," * 200000 # 超过最大输入长度 - } - ], - "stream": False - } + data = {"messages": [{"role": "user", "content": "a," * 200000}], "stream": False} # 超过最大输入长度 payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() assert resp["detail"].get("object") == "error", "超长输入未被识别为错误" @@ -170,15 +160,7 @@ def test_too_long_input(): def test_empty_input(): """测试空输入是否被正确处理""" - data = { - "messages": [ - { - "role": "user", - "content": "" # 空输入 - } - ], - "stream": False - } + data = {"messages": [{"role": "user", "content": ""}], "stream": False} # 空输入 payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() assert "error" not in resp.get("object"), "空输入被识别为错误" @@ -191,7 +173,7 @@ def test_prompt_only_spaces(): "messages": [ { "role": "user", - "content": " ", # 纯空格 + "content": " ", # 纯空格 } ], "stream": False, @@ -202,19 +184,11 @@ def test_prompt_only_spaces(): assert resp.get("object") == "chat.completion", "应返回 chat.completion 对象" response_content = resp["choices"][0]["message"]["content"] assert len(response_content) > 0, "messages content为空,未正常生成回复" - + def test_illegal_characters(): """测试非法字符输入是否被正确处理""" - data = { - "messages": [ - { - "role": "user", - "content": "非洲的首都是?:\x00\x01\x02" # 非法字符 - } - ], - "stream": False - } + data = {"messages": [{"role": "user", "content": "非洲的首都是?:\x00\x01\x02"}], "stream": False} # 非法字符 payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() assert len(resp["choices"][0]["message"]["content"]) > 0, "非法字符输入影响模型回复" @@ -224,10 +198,7 @@ def test_sql_injection(): """测试 SQL 注入攻击是否被正确处理""" data = { "messages": [ - { - "role": "user", - "content": "SELECT * FROM users WHERE username = 'admin' OR '1'='1';" # SQL 注入攻击 - } + {"role": "user", "content": "SELECT * FROM users WHERE username = 'admin' OR '1'='1';"} # SQL 注入攻击 ], "stream": False, "max_tokens": 50, @@ -243,12 +214,7 @@ def test_sql_injection(): def test_xss_attack(): """测试 XSS 攻击是否被正确处理""" data = { - "messages": [ - { - "role": "user", - "content": "" # XSS 攻击 - } - ], + "messages": [{"role": "user", "content": ""}], # XSS 攻击 "stream": False, "max_tokens": 50, } @@ -264,14 +230,9 @@ def test_stop_empty_string(): """测试 stop 参数为空字符串时的行为""" data = { "stream": False, - "messages": [ - { - "role": "user", - "content": "非洲的首都是?" - } - ], + "messages": [{"role": "user", "content": "非洲的首都是?"}], "max_tokens": 10, - "stop": "" # 空字符串 + "stop": "", # 空字符串 } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() @@ -283,14 +244,9 @@ def test_stop_multiple_strings(): """测试 stop 参数为多个字符串时的行为""" data = { "stream": False, - "messages": [ - { - "role": "user", - "content": "非洲的首都是?" - } - ], + "messages": [{"role": "user", "content": "非洲的首都是?"}], "max_tokens": 50, - "stop": ["。", "!", "?"] # 多个停止条件 + "stop": ["。", "!", "?"], # 多个停止条件 } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() @@ -303,14 +259,9 @@ def test_stop_with_special_characters(): """测试 stop 参数为包含特殊字符的字符串时的行为""" data = { "stream": False, - "messages": [ - { - "role": "user", - "content": "非洲的首都是?" - } - ], + "messages": [{"role": "user", "content": "非洲的首都是?"}], "max_tokens": 50, - "stop": "!@#$%^&*()" # 包含特殊字符 + "stop": "!@#$%^&*()", # 包含特殊字符 } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() @@ -323,14 +274,9 @@ def test_stop_with_newlines(): """测试 stop 参数为包含换行符的字符串时的行为""" data = { "stream": False, - "messages": [ - { - "role": "user", - "content": "非洲的首都是?" - } - ], + "messages": [{"role": "user", "content": "非洲的首都是?"}], "max_tokens": 50, - "stop": "\n\n" # 包含换行符 + "stop": "\n\n", # 包含换行符 } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() @@ -345,12 +291,12 @@ def test_model_empty(): "messages": [ { "role": "user", - "content": "非洲的首都是?", + "content": "非洲的首都是?", } ], "stream": False, "max_tokens": 10, - "model": "" # 空模型 + "model": "", # 空模型 } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() @@ -365,12 +311,12 @@ def test_model_invalid(): "messages": [ { "role": "user", - "content": "非洲的首都是?", + "content": "非洲的首都是?", } ], "stream": False, "max_tokens": 10, - "model": "non-existent-model" # 不存在的模型 + "model": "non-existent-model", # 不存在的模型 } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() @@ -385,18 +331,20 @@ def test_model_with_special_characters(): "messages": [ { "role": "user", - "content": "非洲的首都是?", + "content": "非洲的首都是?", } ], "stream": False, "max_tokens": 10, - "model": "!@#" # 包含特殊字符 + "model": "!@#", # 包含特殊字符 } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() assert resp.get("object") == "chat.completion", "不存在的 model 应触发校验异常" assert "!@#" in resp.get("model"), "未返回预期的 model 信息" - assert len(resp.get("choices")[0].get("message").get("content")) > 0, "模型名为model 参数为非法格式,未正常生成回复" + assert ( + len(resp.get("choices")[0].get("message").get("content")) > 0 + ), "模型名为model 参数为非法格式,未正常生成回复" def test_max_tokens_negative(): @@ -405,7 +353,7 @@ def test_max_tokens_negative(): "messages": [ { "role": "user", - "content": "非洲的首都是?", + "content": "非洲的首都是?", } ], "stream": False, @@ -414,7 +362,7 @@ def test_max_tokens_negative(): payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() assert resp.get("detail").get("object") == "error", "max_tokens < 0 未触发校验异常" - assert 'max_tokens can be defined [1,' in resp.get("detail").get("message"), "未返回预期的 max_tokens 错误信息" + assert "max_tokens can be defined [1," in resp.get("detail").get("message"), "未返回预期的 max_tokens 错误信息" def test_max_tokens_min(): @@ -423,7 +371,7 @@ def test_max_tokens_min(): "messages": [ { "role": "user", - "content": "非洲的首都是?", + "content": "非洲的首都是?", } ], "stream": False, @@ -431,8 +379,10 @@ def test_max_tokens_min(): } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() - assert resp.get('detail').get("object") == "error", "max_tokens未0时API未拦截住" - assert "reasoning_max_tokens must be between max_tokens and 1" in resp.get('detail').get("message", ""), "未返回预期的 max_tokens 达到异常值0 的 错误信息" + assert resp.get("detail").get("object") == "error", "max_tokens未0时API未拦截住" + assert "reasoning_max_tokens must be between max_tokens and 1" in resp.get("detail").get( + "message", "" + ), "未返回预期的 max_tokens 达到异常值0 的 错误信息" def test_max_tokens_non_integer(): @@ -441,7 +391,7 @@ def test_max_tokens_non_integer(): "messages": [ { "role": "user", - "content": "非洲的首都是?", + "content": "非洲的首都是?", } ], "stream": False, @@ -449,5 +399,6 @@ def test_max_tokens_non_integer(): } payload = build_request_payload(TEMPLATE, data) resp = send_request(URL, payload).json() - assert resp.get('detail')[0].get("msg") == "Input should be a valid integer, got a number with a fractional part", "未返回预期的 max_tokens 为非整数的错误信息" - + assert ( + resp.get("detail")[0].get("msg") == "Input should be a valid integer, got a number with a fractional part" + ), "未返回预期的 max_tokens 为非整数的错误信息" diff --git a/test/ce/server/test_logprobs.py b/tests/ce/server/test_logprobs.py similarity index 100% rename from test/ce/server/test_logprobs.py rename to tests/ce/server/test_logprobs.py diff --git a/test/ce/server/test_params_boundary.py b/tests/ce/server/test_params_boundary.py similarity index 100% rename from test/ce/server/test_params_boundary.py rename to tests/ce/server/test_params_boundary.py diff --git a/test/ce/server/test_repetition_early_stop.py b/tests/ce/server/test_repetition_early_stop.py similarity index 100% rename from test/ce/server/test_repetition_early_stop.py rename to tests/ce/server/test_repetition_early_stop.py diff --git a/test/ce/server/test_seed_usage.py b/tests/ce/server/test_seed_usage.py similarity index 100% rename from test/ce/server/test_seed_usage.py rename to tests/ce/server/test_seed_usage.py diff --git a/test/ce/server/test_stream.py b/tests/ce/server/test_stream.py similarity index 100% rename from test/ce/server/test_stream.py rename to tests/ce/server/test_stream.py diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh new file mode 100644 index 000000000..1850dc944 --- /dev/null +++ b/tests/ce/stable_cases/launch_model.sh @@ -0,0 +1,59 @@ +#!/bin/bash +MODEL_PATH="${1}/TP2" +FD_API_PORT=${FD_API_PORT:-8000} +FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001} +FD_METRICS_PORT=${FD_METRICS_PORT:-8002} +FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003} + + + +if [ -z "$MODEL_PATH" ]; then + echo "❌ 用法: $0 <模型路径>" + exit 1 +fi + +if [ ! -d "$MODEL_PATH" ]; then + echo "❌ 错误:模型目录不存在: $MODEL_PATH" + exit 1 +fi + +echo "使用模型: $MODEL_PATH" + + +# 清理日志 +rm -rf log/* +mkdir -p log + +# 环境变量 +export CUDA_VISIBLE_DEVICES=0,1 +export INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID:-7679} +export ENABLE_V1_KVCACHE_SCHEDULER=1 + + +python -m fastdeploy.entrypoints.openai.api_server \ + --tensor-parallel-size 2 \ + --port ${FD_API_PORT} \ + --engine-worker-queue-port ${FD_ENGINE_QUEUE_PORT} \ + --metrics-port ${FD_METRICS_PORT} \ + --cache-queue-port ${FD_CACHE_QUEUE_PORT} \ + --quantization wint8 \ + --max-model-len 32768 \ + --max-num-seqs 256 \ + --gpu-memory-utilization 0.9 \ + --model "$MODEL_PATH" \ + --load-strategy ipc_snapshot \ + --dynamic-load-weight & + +success=0 + +for i in $(seq 1 300); do + if (echo > /dev/tcp/127.0.0.1/$FD_API_PORT) >/dev/null 2>&1; then + echo "API server is up on port $FD_API_PORT on iteration $i" + success=1 + break + fi + sleep 1 +done +if [ $success -eq 0 ]; then + echo "超时: API 服务在 300 秒内未启动 (端口 $FD_API_PORT)" +fi diff --git a/tests/ce/stable_cases/run.sh b/tests/ce/stable_cases/run.sh new file mode 100644 index 000000000..6b7f939bb --- /dev/null +++ b/tests/ce/stable_cases/run.sh @@ -0,0 +1,160 @@ +#!/bin/bash + +# ================== Configuration Parameters ================== +FD_API_PORT=${FD_API_PORT:-8000} +FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001} +FD_METRICS_PORT=${FD_METRICS_PORT:-8002} +FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003} + + +HOST="0.0.0.0" +PORT="${FD_API_PORT}" # 这里需要配合启动脚本那个URL PORT +BASE_URL="http://$HOST:$PORT" + +TOTAL_ROUNDS=30 +CHAT_REQUESTS_PER_ROUND=5 +export CUDA_VISIBLE_DEVICES=0,1 +MAX_MEMORY_MB=10240 # 10GB + +# ==================================================== +# assert_eq actual expected message +assert_eq() { + local actual="$1" + local expected="$2" + local msg="$3" + if [ "$actual" != "$expected" ]; then + echo "Assertion failed: $msg" >&2 + exit 1 + fi +} + +# assert_true condition message +assert_true() { + local condition="$1" + local msg="$2" + if [ "$condition" != "1" ] && [ "$condition" != "true" ]; then + echo "Assertion failed: $msg" >&2 + exit 1 + fi +} + +# assert_success exit_code message +assert_success() { + local code="$1" + local msg="$2" + if [ "$code" -ne 0 ]; then + echo "Assertion failed: $msg" >&2 + exit 1 + fi +} + +# curl_get_status(url, options...) → returns via global variables http_code and response_body +curl_get_status() { + local result + result=$(curl -s -w "%{http_code}" "$@") + http_code="${result: -3}" + response_body="${result%???}" +} + +# ==================================================== +# Get visible GPU IDs from CUDA_VISIBLE_DEVICES +# ==================================================== + +get_visible_gpu_ids() { + local ids=() + IFS=',' read -ra ADDR <<< "$CUDA_VISIBLE_DEVICES" + for i in "${ADDR[@]}"; do + if [[ "$i" =~ ^[0-9]+$ ]]; then + ids+=("$i") + fi + done + echo "${ids[@]}" +} + +# ==================================================== +# Check GPU memory usage (must not exceed MAX_MEMORY_MB) +# ==================================================== + +check_gpu_memory() { + local gpu_ids + gpu_ids=($(get_visible_gpu_ids)) + + if [ ${#gpu_ids[@]} -eq 0 ]; then + echo "Assertion failed: No valid GPU IDs in CUDA_VISIBLE_DEVICES='$CUDA_VISIBLE_DEVICES'" >&2 + exit 1 + fi + + for gpu_id in "${gpu_ids[@]}"; do + local memory_used + memory_used=$(nvidia-smi -i "$gpu_id" --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null) || \ + assert_success $? "Failed to query GPU $gpu_id memory usage" + + if ! [[ "$memory_used" =~ ^[0-9]+ ]]; then + echo "Assertion failed: Invalid memory value for GPU $gpu_id: $memory_used" >&2 + exit 1 + fi + + assert_true "$(( memory_used <= MAX_MEMORY_MB ))" \ + "GPU $gpu_id memory $memory_used MB > $MAX_MEMORY_MB MB" + done +} + +# ==================================================== + +for round in $(seq 1 $TOTAL_ROUNDS); do + echo "=== Round $round / $TOTAL_ROUNDS ===" + + # Step 1: Clear loaded weights + echo "[Step 1] Clearing load weight..." + curl_get_status -i "$BASE_URL/clear_load_weight" + assert_eq "$http_code" "200" "/clear_load_weight failed with HTTP $http_code" + + # Step 2: Check GPU memory usage + echo "[Step 2] Checking GPU memory..." + check_gpu_memory + + # Step 3: Update model weights + echo "[Step 3] Updating model weight..." + curl_get_status -i "$BASE_URL/update_model_weight" + assert_eq "$http_code" "200" "/update_model_weight failed with HTTP $http_code" + + # Step 4: Send chat completion requests + echo "[Step 4] Sending $CHAT_REQUESTS_PER_ROUND chat completions..." + for i in $(seq 1 $CHAT_REQUESTS_PER_ROUND); do + echo " Request $i / $CHAT_REQUESTS_PER_ROUND" + # Send request and capture response + response=$(curl -s -X POST "$BASE_URL/v1/chat/completions" \ + -H "Content-Type: application/json" \ + -d '{"messages": [{"role": "user", "content": "Hello!"}]}') + + # Extract the 'content' field from the response + content=$(echo "$response" | \ + grep -o '"content":"[^"]*"' | \ + head -1 | \ + sed 's/^"content":"//' | \ + sed 's/"$//') + + if [ -z "$content" ]; then + # Fallback: try extracting content using sed more robustly + content=$(echo "$response" | \ + sed -n 's/.*"content":"\([^"]*\)".*/\1/p' | \ + head -1) + fi + + # Check if content is empty or null + if [ -z "$content" ] || [ "$content" = "null" ]; then + echo "Failed: Empty or null 'content' in response" >&2 + echo "Raw response:" >&2 + echo "$response" >&2 + exit 1 + fi + + echo "Received non-empty response" + echo -e "\n---\n" + done + + echo "Round $round completed." + echo "==================================\n" +done + +echo "All $TOTAL_ROUNDS rounds completed successfully." diff --git a/test/ci_use/EB_Lite/test_EB_Lite_serving.py b/tests/ci_use/EB_Lite/test_EB_Lite_serving.py similarity index 100% rename from test/ci_use/EB_Lite/test_EB_Lite_serving.py rename to tests/ci_use/EB_Lite/test_EB_Lite_serving.py diff --git a/test/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py b/tests/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py similarity index 100% rename from test/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py rename to tests/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py diff --git a/test/ci_use/EB_VL_Lite/baseline.txt b/tests/ci_use/EB_VL_Lite/baseline.txt similarity index 100% rename from test/ci_use/EB_VL_Lite/baseline.txt rename to tests/ci_use/EB_VL_Lite/baseline.txt diff --git a/test/ci_use/EB_VL_Lite/rollout_model.py b/tests/ci_use/EB_VL_Lite/rollout_model.py similarity index 100% rename from test/ci_use/EB_VL_Lite/rollout_model.py rename to tests/ci_use/EB_VL_Lite/rollout_model.py diff --git a/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py b/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py similarity index 100% rename from test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py rename to tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py diff --git a/test/ci_use/EB_VL_Lite/test_rollout_model.py b/tests/ci_use/EB_VL_Lite/test_rollout_model.py similarity index 100% rename from test/ci_use/EB_VL_Lite/test_rollout_model.py rename to tests/ci_use/EB_VL_Lite/test_rollout_model.py diff --git a/test/ci_use/GCU/run_ernie.py b/tests/ci_use/GCU/run_ernie.py similarity index 100% rename from test/ci_use/GCU/run_ernie.py rename to tests/ci_use/GCU/run_ernie.py diff --git a/test/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py b/tests/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py similarity index 100% rename from test/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py rename to tests/ci_use/Qwen2-7B-Instruct_offline/test_Qwen2-7B-Instruct_offline.py diff --git a/test/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py b/tests/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py similarity index 100% rename from test/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py rename to tests/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py diff --git a/test/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py b/tests/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py similarity index 100% rename from test/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py rename to tests/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py diff --git a/test/ci_use/XPU_45T/run_45T.py b/tests/ci_use/XPU_45T/run_45T.py similarity index 100% rename from test/ci_use/XPU_45T/run_45T.py rename to tests/ci_use/XPU_45T/run_45T.py diff --git a/test/ci_use/iluvatar_UT/run_ernie300B_4layer.py b/tests/ci_use/iluvatar_UT/run_ernie300B_4layer.py similarity index 100% rename from test/ci_use/iluvatar_UT/run_ernie300B_4layer.py rename to tests/ci_use/iluvatar_UT/run_ernie300B_4layer.py diff --git a/test/distributed/custom_all_reduce.py b/tests/distributed/custom_all_reduce.py similarity index 100% rename from test/distributed/custom_all_reduce.py rename to tests/distributed/custom_all_reduce.py diff --git a/test/distributed/test_custom_all_reduce.py b/tests/distributed/test_custom_all_reduce.py similarity index 100% rename from test/distributed/test_custom_all_reduce.py rename to tests/distributed/test_custom_all_reduce.py diff --git a/test/entrypoints/openai/test_build_sample_logprobs.py b/tests/entrypoints/openai/test_build_sample_logprobs.py similarity index 100% rename from test/entrypoints/openai/test_build_sample_logprobs.py rename to tests/entrypoints/openai/test_build_sample_logprobs.py diff --git a/test/entrypoints/openai/test_completion_echo.py b/tests/entrypoints/openai/test_completion_echo.py similarity index 100% rename from test/entrypoints/openai/test_completion_echo.py rename to tests/entrypoints/openai/test_completion_echo.py diff --git a/test/entrypoints/openai/test_serving_completion.py b/tests/entrypoints/openai/test_serving_completion.py similarity index 100% rename from test/entrypoints/openai/test_serving_completion.py rename to tests/entrypoints/openai/test_serving_completion.py diff --git a/test/entrypoints/test_generation.py b/tests/entrypoints/test_generation.py similarity index 100% rename from test/entrypoints/test_generation.py rename to tests/entrypoints/test_generation.py diff --git a/test/graph_optimization/test_cuda_graph_dynamic_subgraph.py b/tests/graph_optimization/test_cuda_graph_dynamic_subgraph.py similarity index 100% rename from test/graph_optimization/test_cuda_graph_dynamic_subgraph.py rename to tests/graph_optimization/test_cuda_graph_dynamic_subgraph.py diff --git a/test/graph_optimization/test_cuda_graph_spec_decode.py b/tests/graph_optimization/test_cuda_graph_spec_decode.py similarity index 100% rename from test/graph_optimization/test_cuda_graph_spec_decode.py rename to tests/graph_optimization/test_cuda_graph_spec_decode.py diff --git a/test/input/test_ernie_processor.py b/tests/input/test_ernie_processor.py similarity index 100% rename from test/input/test_ernie_processor.py rename to tests/input/test_ernie_processor.py diff --git a/test/layers/test_append_attention.py b/tests/layers/test_append_attention.py similarity index 100% rename from test/layers/test_append_attention.py rename to tests/layers/test_append_attention.py diff --git a/test/layers/test_min_sampling.py b/tests/layers/test_min_sampling.py similarity index 100% rename from test/layers/test_min_sampling.py rename to tests/layers/test_min_sampling.py diff --git a/test/layers/test_quant_layer.py b/tests/layers/test_quant_layer.py similarity index 100% rename from test/layers/test_quant_layer.py rename to tests/layers/test_quant_layer.py diff --git a/test/layers/test_repetition_early_stopper.py b/tests/layers/test_repetition_early_stopper.py similarity index 100% rename from test/layers/test_repetition_early_stopper.py rename to tests/layers/test_repetition_early_stopper.py diff --git a/test/layers/test_sampler.py b/tests/layers/test_sampler.py similarity index 100% rename from test/layers/test_sampler.py rename to tests/layers/test_sampler.py diff --git a/test/model_executor/test_forward_meta_str.py b/tests/model_executor/test_forward_meta_str.py similarity index 100% rename from test/model_executor/test_forward_meta_str.py rename to tests/model_executor/test_forward_meta_str.py diff --git a/test/operators/test_air_top_p_sampling.py b/tests/operators/test_air_top_p_sampling.py similarity index 100% rename from test/operators/test_air_top_p_sampling.py rename to tests/operators/test_air_top_p_sampling.py diff --git a/test/operators/test_cutlass_scaled_mm.py b/tests/operators/test_cutlass_scaled_mm.py similarity index 100% rename from test/operators/test_cutlass_scaled_mm.py rename to tests/operators/test_cutlass_scaled_mm.py diff --git a/test/operators/test_deqant_int8_cpp_extension.py b/tests/operators/test_deqant_int8_cpp_extension.py similarity index 100% rename from test/operators/test_deqant_int8_cpp_extension.py rename to tests/operators/test_deqant_int8_cpp_extension.py diff --git a/test/operators/test_dequant.py b/tests/operators/test_dequant.py similarity index 100% rename from test/operators/test_dequant.py rename to tests/operators/test_dequant.py diff --git a/test/operators/test_flash_mask_attn.py b/tests/operators/test_flash_mask_attn.py similarity index 100% rename from test/operators/test_flash_mask_attn.py rename to tests/operators/test_flash_mask_attn.py diff --git a/test/operators/test_fp8_fp8_half_cuda_core_gemm.py b/tests/operators/test_fp8_fp8_half_cuda_core_gemm.py similarity index 100% rename from test/operators/test_fp8_fp8_half_cuda_core_gemm.py rename to tests/operators/test_fp8_fp8_half_cuda_core_gemm.py diff --git a/test/operators/test_fused_moe.py b/tests/operators/test_fused_moe.py similarity index 100% rename from test/operators/test_fused_moe.py rename to tests/operators/test_fused_moe.py diff --git a/test/operators/test_get_token_penalty_multi_scores.py b/tests/operators/test_get_token_penalty_multi_scores.py similarity index 100% rename from test/operators/test_get_token_penalty_multi_scores.py rename to tests/operators/test_get_token_penalty_multi_scores.py diff --git a/test/operators/test_moe_top_k_select.py b/tests/operators/test_moe_top_k_select.py similarity index 100% rename from test/operators/test_moe_top_k_select.py rename to tests/operators/test_moe_top_k_select.py diff --git a/test/operators/test_noaux_tc.py b/tests/operators/test_noaux_tc.py similarity index 100% rename from test/operators/test_noaux_tc.py rename to tests/operators/test_noaux_tc.py diff --git a/test/operators/test_perchannel_gemm.py b/tests/operators/test_perchannel_gemm.py similarity index 100% rename from test/operators/test_perchannel_gemm.py rename to tests/operators/test_perchannel_gemm.py diff --git a/test/operators/test_rejection_top_p_sampling.py b/tests/operators/test_rejection_top_p_sampling.py similarity index 100% rename from test/operators/test_rejection_top_p_sampling.py rename to tests/operators/test_rejection_top_p_sampling.py diff --git a/test/operators/test_scaled_gemm_f8_i4_f16.py b/tests/operators/test_scaled_gemm_f8_i4_f16.py similarity index 100% rename from test/operators/test_scaled_gemm_f8_i4_f16.py rename to tests/operators/test_scaled_gemm_f8_i4_f16.py diff --git a/test/operators/test_split_fuse.py b/tests/operators/test_split_fuse.py similarity index 100% rename from test/operators/test_split_fuse.py rename to tests/operators/test_split_fuse.py diff --git a/test/operators/test_stop_generation_multi_ends.py b/tests/operators/test_stop_generation_multi_ends.py similarity index 100% rename from test/operators/test_stop_generation_multi_ends.py rename to tests/operators/test_stop_generation_multi_ends.py diff --git a/test/operators/test_token_penalty.py b/tests/operators/test_token_penalty.py similarity index 100% rename from test/operators/test_token_penalty.py rename to tests/operators/test_token_penalty.py diff --git a/test/operators/test_w4afp8_gemm.py b/tests/operators/test_w4afp8_gemm.py similarity index 100% rename from test/operators/test_w4afp8_gemm.py rename to tests/operators/test_w4afp8_gemm.py diff --git a/test/operators/test_wfp8afp8_sparse_gemm.py b/tests/operators/test_wfp8afp8_sparse_gemm.py similarity index 100% rename from test/operators/test_wfp8afp8_sparse_gemm.py rename to tests/operators/test_wfp8afp8_sparse_gemm.py diff --git a/test/plugins/fd_add_dummy_model/__init__.py b/tests/plugins/fd_add_dummy_model/__init__.py similarity index 100% rename from test/plugins/fd_add_dummy_model/__init__.py rename to tests/plugins/fd_add_dummy_model/__init__.py diff --git a/test/plugins/fd_add_dummy_model_runner/__init__.py b/tests/plugins/fd_add_dummy_model_runner/__init__.py similarity index 100% rename from test/plugins/fd_add_dummy_model_runner/__init__.py rename to tests/plugins/fd_add_dummy_model_runner/__init__.py diff --git a/test/plugins/setup.py b/tests/plugins/setup.py similarity index 100% rename from test/plugins/setup.py rename to tests/plugins/setup.py diff --git a/test/plugins/test_model_registry.py b/tests/plugins/test_model_registry.py similarity index 100% rename from test/plugins/test_model_registry.py rename to tests/plugins/test_model_registry.py diff --git a/test/utils/test_custom_chat_template.py b/tests/utils/test_custom_chat_template.py similarity index 100% rename from test/utils/test_custom_chat_template.py rename to tests/utils/test_custom_chat_template.py diff --git a/test/utils/test_download.py b/tests/utils/test_download.py similarity index 100% rename from test/utils/test_download.py rename to tests/utils/test_download.py diff --git a/test/utils/test_version.py b/tests/utils/test_version.py similarity index 100% rename from test/utils/test_version.py rename to tests/utils/test_version.py