mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 04:46:16 +08:00
Add stable ci (#3460)
* add stable ci * fix * update * fix * rename tests dir;fix stable ci bug * add timeout limit * update
This commit is contained in:
5
.github/workflows/_accuracy_test.yml
vendored
5
.github/workflows/_accuracy_test.yml
vendored
@@ -31,6 +31,7 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
accuracy_tests:
|
accuracy_tests:
|
||||||
runs-on: [self-hosted, GPU-h20-1Cards]
|
runs-on: [self-hosted, GPU-h20-1Cards]
|
||||||
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- name: Code Prepare
|
- name: Code Prepare
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -147,7 +148,7 @@ jobs:
|
|||||||
|
|
||||||
git config --global --add safe.directory /workspace/FastDeploy
|
git config --global --add safe.directory /workspace/FastDeploy
|
||||||
cd FastDeploy
|
cd FastDeploy
|
||||||
pushd test/ce/deploy
|
pushd tests/ce/deploy
|
||||||
python3.10 deploy.py > dd.log 2>&1 &
|
python3.10 deploy.py > dd.log 2>&1 &
|
||||||
sleep 3
|
sleep 3
|
||||||
curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
|
curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
|
||||||
@@ -157,7 +158,7 @@ jobs:
|
|||||||
curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
|
curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
|
||||||
popd
|
popd
|
||||||
|
|
||||||
pushd test/ce/accuracy_cases
|
pushd tests/ce/accuracy_cases
|
||||||
export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
|
export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
|
||||||
export TEMPLATE=TOKEN_LOGPROB
|
export TEMPLATE=TOKEN_LOGPROB
|
||||||
export MODEL_SIZE=0.3B
|
export MODEL_SIZE=0.3B
|
||||||
|
5
.github/workflows/_base_test.yml
vendored
5
.github/workflows/_base_test.yml
vendored
@@ -31,6 +31,7 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
base_tests:
|
base_tests:
|
||||||
runs-on: [self-hosted, GPU-h20-1Cards]
|
runs-on: [self-hosted, GPU-h20-1Cards]
|
||||||
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- name: Code Prepare
|
- name: Code Prepare
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -147,7 +148,7 @@ jobs:
|
|||||||
|
|
||||||
git config --global --add safe.directory /workspace/FastDeploy
|
git config --global --add safe.directory /workspace/FastDeploy
|
||||||
cd FastDeploy
|
cd FastDeploy
|
||||||
pushd test/ce/deploy
|
pushd tests/ce/deploy
|
||||||
python3.10 deploy.py > dd.log 2>&1 &
|
python3.10 deploy.py > dd.log 2>&1 &
|
||||||
sleep 3
|
sleep 3
|
||||||
curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
|
curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
|
||||||
@@ -157,7 +158,7 @@ jobs:
|
|||||||
curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
|
curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
|
||||||
popd
|
popd
|
||||||
|
|
||||||
pushd test/ce/server
|
pushd tests/ce/server
|
||||||
export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
|
export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
|
||||||
export TEMPLATE=TOKEN_LOGPROB
|
export TEMPLATE=TOKEN_LOGPROB
|
||||||
TEST_EXIT_CODE=0
|
TEST_EXIT_CODE=0
|
||||||
|
1
.github/workflows/_build_linux.yml
vendored
1
.github/workflows/_build_linux.yml
vendored
@@ -55,6 +55,7 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
fd-build:
|
fd-build:
|
||||||
runs-on: [self-hosted, GPU-Build]
|
runs-on: [self-hosted, GPU-Build]
|
||||||
|
timeout-minutes: 240
|
||||||
outputs:
|
outputs:
|
||||||
wheel_path: ${{ steps.set_output.outputs.wheel_path }}
|
wheel_path: ${{ steps.set_output.outputs.wheel_path }}
|
||||||
steps:
|
steps:
|
||||||
|
1
.github/workflows/_pre_ce_test.yml
vendored
1
.github/workflows/_pre_ce_test.yml
vendored
@@ -30,6 +30,7 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
run_ce_cases:
|
run_ce_cases:
|
||||||
runs-on: [self-hosted, PRE_CE_RUN_2Card]
|
runs-on: [self-hosted, PRE_CE_RUN_2Card]
|
||||||
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- name: Print current runner name
|
- name: Print current runner name
|
||||||
run: |
|
run: |
|
||||||
|
162
.github/workflows/_stable_test.yml
vendored
Normal file
162
.github/workflows/_stable_test.yml
vendored
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
name: Stable Test
|
||||||
|
description: "Run Stable Tests"
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
DOCKER_IMAGE:
|
||||||
|
description: "Build Images"
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310"
|
||||||
|
FASTDEPLOY_ARCHIVE_URL:
|
||||||
|
description: "URL of the compressed FastDeploy code archive."
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
FASTDEPLOY_WHEEL_URL:
|
||||||
|
description: "URL of the FastDeploy Wheel."
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
CACHE_DIR:
|
||||||
|
description: "Cache Dir Use"
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
default: ""
|
||||||
|
MODEL_CACHE_DIR:
|
||||||
|
description: "Cache Dir Use"
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
default: ""
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
stable_tests:
|
||||||
|
runs-on: [self-hosted, GPU-h1z1-2Cards]
|
||||||
|
timeout-minutes: 60
|
||||||
|
steps:
|
||||||
|
- name: Code Prepare
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||||
|
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||||
|
run: |
|
||||||
|
set -x
|
||||||
|
REPO="https://github.com/${{ github.repository }}.git"
|
||||||
|
FULL_REPO="${{ github.repository }}"
|
||||||
|
REPO_NAME="${FULL_REPO##*/}"
|
||||||
|
BASE_BRANCH="${{ github.base_ref }}"
|
||||||
|
|
||||||
|
# Clean the repository directory before starting
|
||||||
|
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||||
|
-e "REPO_NAME=${REPO_NAME}" \
|
||||||
|
${docker_image} /bin/bash -c '
|
||||||
|
if [ -d ${REPO_NAME} ]; then
|
||||||
|
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||||
|
rm -rf ${REPO_NAME}*
|
||||||
|
fi
|
||||||
|
'
|
||||||
|
|
||||||
|
wget -q ${fd_archive_url}
|
||||||
|
tar -xf FastDeploy.tar.gz
|
||||||
|
rm -rf FastDeploy.tar.gz
|
||||||
|
cd FastDeploy
|
||||||
|
git config --global user.name "FastDeployCI"
|
||||||
|
git config --global user.email "fastdeploy_ci@example.com"
|
||||||
|
git log -n 3 --oneline
|
||||||
|
|
||||||
|
- name: Run FastDeploy Stable Tests
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||||
|
fastdeploy_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
|
||||||
|
CACHE_DIR: ${{ inputs.CACHE_DIR }}
|
||||||
|
MODEL_CACHE_DIR: ${{ inputs.MODEL_CACHE_DIR }}
|
||||||
|
run: |
|
||||||
|
runner_name="${{ runner.name }}"
|
||||||
|
CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}')
|
||||||
|
DEVICES=$(echo "$CARD_ID" | fold -w1 | paste -sd,)
|
||||||
|
DEVICE_PORT=$(echo "$DEVICES" | cut -d',' -f1)
|
||||||
|
|
||||||
|
FLASK_PORT=$((42068 + DEVICE_PORT * 100))
|
||||||
|
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
||||||
|
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
||||||
|
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
||||||
|
FD_CACHE_QUEUE_PORT=$((42038 + DEVICE_PORT * 100))
|
||||||
|
FD_INFERENCE_MSG_QUEUE_ID=$(( 42048 + DEVICE_PORT * 100))
|
||||||
|
echo "Test ENV Parameter:"
|
||||||
|
echo "========================================================="
|
||||||
|
echo "FLASK_PORT=${FLASK_PORT}"
|
||||||
|
echo "FD_API_PORT=${FD_API_PORT}"
|
||||||
|
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
||||||
|
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
||||||
|
echo "FD_INFERENCE_MSG_QUEUE_ID=${INFERENCE_MSG_QUEUE_ID}"
|
||||||
|
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
|
||||||
|
echo "DEVICES=${DEVICES}"
|
||||||
|
echo "========================================================="
|
||||||
|
|
||||||
|
CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
|
||||||
|
echo "CACHE_DIR is set to ${CACHE_DIR}"
|
||||||
|
if [ ! -f "${CACHE_DIR}/gitconfig" ]; then
|
||||||
|
touch "${CACHE_DIR}/gitconfig"
|
||||||
|
fi
|
||||||
|
if [ ! -d "${MODEL_CACHE_DIR}" ]; then
|
||||||
|
echo "Error: MODEL_CACHE_DIR '${MODEL_CACHE_DIR}' does not exist."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
|
||||||
|
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
||||||
|
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
||||||
|
|
||||||
|
echo "==== PORT CLEAN BEFORE TASK RUN ====" | tee -a $LOG_FILE
|
||||||
|
|
||||||
|
for port in "${PORTS[@]}"; do
|
||||||
|
PIDS=$(lsof -t -i :$port || true)
|
||||||
|
if [ -n "$PIDS" ]; then
|
||||||
|
echo "Port $port is occupied by PID(s): $PIDS" | tee -a $LOG_FILE
|
||||||
|
echo "$PIDS" | xargs -r kill -9
|
||||||
|
echo "Port $port cleared" | tee -a $LOG_FILE
|
||||||
|
else
|
||||||
|
echo "Port $port is free" | tee -a $LOG_FILE
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "==== PORT CLEAN COMPLETE ====" | tee -a $LOG_FILE
|
||||||
|
|
||||||
|
docker run --rm --ipc=host --pid=host --net=host \
|
||||||
|
-v $(pwd):/workspace \
|
||||||
|
-w /workspace \
|
||||||
|
-e fastdeploy_wheel_url=${fastdeploy_wheel_url} \
|
||||||
|
-e "FD_API_PORT=${FD_API_PORT}" \
|
||||||
|
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
||||||
|
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
||||||
|
-e "FLASK_PORT=${FLASK_PORT}" \
|
||||||
|
-e "FD_INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID}" \
|
||||||
|
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
|
||||||
|
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
|
||||||
|
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
||||||
|
-v "${CACHE_DIR}/.cache:/root/.cache" \
|
||||||
|
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
|
||||||
|
-e TZ="Asia/Shanghai" \
|
||||||
|
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
|
||||||
|
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
|
||||||
|
|
||||||
|
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||||
|
|
||||||
|
python -m pip install ${fastdeploy_wheel_url}
|
||||||
|
python -m pip install pytest
|
||||||
|
|
||||||
|
git config --global --add safe.directory /workspace/FastDeploy
|
||||||
|
cd FastDeploy
|
||||||
|
TEST_EXIT_CODE=0
|
||||||
|
pushd tests/ce/stable_cases
|
||||||
|
bash launch_model.sh /MODELDATA
|
||||||
|
bash run.sh || TEST_EXIT_CODE=1
|
||||||
|
popd
|
||||||
|
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
|
||||||
|
'
|
||||||
|
if [ -f ./FastDeploy/exit_code.env ]; then
|
||||||
|
source ./FastDeploy/exit_code.env
|
||||||
|
cat ./FastDeploy/exit_code.env >> $GITHUB_ENV
|
||||||
|
fi
|
||||||
|
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
|
||||||
|
exit ${TEST_EXIT_CODE}
|
9
.github/workflows/_unit_test_coverage.yml
vendored
9
.github/workflows/_unit_test_coverage.yml
vendored
@@ -31,6 +31,7 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
run_tests_with_coverage:
|
run_tests_with_coverage:
|
||||||
runs-on: [self-hosted, GPU-h1z1-2Cards]
|
runs-on: [self-hosted, GPU-h1z1-2Cards]
|
||||||
|
timeout-minutes: 60
|
||||||
outputs:
|
outputs:
|
||||||
diff_cov_file_url: ${{ steps.cov_upload.outputs.diff_cov_file_url }}
|
diff_cov_file_url: ${{ steps.cov_upload.outputs.diff_cov_file_url }}
|
||||||
unittest_failed_url: ${{ steps.cov_upload.outputs.unittest_failed_url }}
|
unittest_failed_url: ${{ steps.cov_upload.outputs.unittest_failed_url }}
|
||||||
@@ -150,12 +151,12 @@ jobs:
|
|||||||
python -m pip install coverage
|
python -m pip install coverage
|
||||||
python -m pip install diff-cover
|
python -m pip install diff-cover
|
||||||
python -m pip install ${fd_wheel_url}
|
python -m pip install ${fd_wheel_url}
|
||||||
if [ -d "test/plugins" ]; then
|
if [ -d "tests/plugins" ]; then
|
||||||
cd test/plugins
|
cd tests/plugins
|
||||||
python setup.py install
|
python setup.py install
|
||||||
cd ../..
|
cd ../..
|
||||||
else
|
else
|
||||||
echo "Warning: test/plugins directory not found, skipping setup.py install"
|
echo "Warning: tests/plugins directory not found, skipping setup.py install"
|
||||||
fi
|
fi
|
||||||
export COVERAGE_FILE=/workspace/FastDeploy/coveragedata/.coverage
|
export COVERAGE_FILE=/workspace/FastDeploy/coveragedata/.coverage
|
||||||
export COVERAGE_RCFILE=/workspace/FastDeploy/scripts/.coveragerc
|
export COVERAGE_RCFILE=/workspace/FastDeploy/scripts/.coveragerc
|
||||||
@@ -204,7 +205,7 @@ jobs:
|
|||||||
echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_OUTPUT
|
echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_OUTPUT
|
||||||
echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_ENV
|
echo "diff_cov_result_json_url=${DIFF_COV_JSON_URL}" >> $GITHUB_ENV
|
||||||
fi
|
fi
|
||||||
unittest_result="test/failed_tests.log"
|
unittest_result="tests/failed_tests.log"
|
||||||
if [ -s ${unittest_result} ];then
|
if [ -s ${unittest_result} ];then
|
||||||
python ${push_file} ${unittest_result} ${target_path}/UnitTestResult
|
python ${push_file} ${unittest_result} ${target_path}/UnitTestResult
|
||||||
target_path_stripped="${target_path#paddle-github-action/}"
|
target_path_stripped="${target_path#paddle-github-action/}"
|
||||||
|
10
.github/workflows/pr_build_and_test.yml
vendored
10
.github/workflows/pr_build_and_test.yml
vendored
@@ -83,3 +83,13 @@ jobs:
|
|||||||
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
|
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
|
||||||
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
|
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
|
||||||
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
|
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
|
||||||
|
|
||||||
|
stable_test:
|
||||||
|
name: Run Stable Tests
|
||||||
|
needs: [clone,build]
|
||||||
|
uses: ./.github/workflows/_stable_test.yml
|
||||||
|
with:
|
||||||
|
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
|
||||||
|
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
|
||||||
|
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
|
||||||
|
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
echo "$DIR"
|
echo "$DIR"
|
||||||
|
|
||||||
run_path="$DIR/../test/"
|
run_path="$DIR/../tests/"
|
||||||
cd ${run_path}
|
cd ${run_path}
|
||||||
ls
|
ls
|
||||||
|
|
||||||
|
@@ -97,7 +97,7 @@ cat server.log
|
|||||||
echo -e "\n"
|
echo -e "\n"
|
||||||
|
|
||||||
echo "Start inference..."
|
echo "Start inference..."
|
||||||
python test/ci_use/GCU/run_ernie.py
|
python tests/ci_use/GCU/run_ernie.py
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
echo -e "exit_code is ${exit_code}.\n"
|
echo -e "exit_code is ${exit_code}.\n"
|
||||||
|
|
||||||
|
@@ -31,7 +31,7 @@ export INFERENCE_MSG_QUEUE_ID=232132
|
|||||||
export FD_DEBUG=1
|
export FD_DEBUG=1
|
||||||
export PADDLE_XCCL_BACKEND=iluvatar_gpu
|
export PADDLE_XCCL_BACKEND=iluvatar_gpu
|
||||||
export FD_SAMPLING_CLASS=rejection
|
export FD_SAMPLING_CLASS=rejection
|
||||||
python test/ci_use/iluvatar_UT/run_ernie300B_4layer.py
|
python tests/ci_use/iluvatar_UT/run_ernie300B_4layer.py
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
echo exit_code is ${exit_code}
|
echo exit_code is ${exit_code}
|
||||||
|
|
||||||
|
@@ -77,7 +77,7 @@ done
|
|||||||
cat server.log
|
cat server.log
|
||||||
|
|
||||||
# 执行服务化推理
|
# 执行服务化推理
|
||||||
python test/ci_use/XPU_45T/run_45T.py
|
python tests/ci_use/XPU_45T/run_45T.py
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
echo exit_code is ${exit_code}
|
echo exit_code is ${exit_code}
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ done
|
|||||||
cat server.log
|
cat server.log
|
||||||
|
|
||||||
# 执行服务化推理
|
# 执行服务化推理
|
||||||
python test/ci_use/XPU_45T/run_45T.py
|
python tests/ci_use/XPU_45T/run_45T.py
|
||||||
kv_block_test_exit_code=$?
|
kv_block_test_exit_code=$?
|
||||||
echo kv_block_test_exit_code is ${kv_block_test_exit_code}
|
echo kv_block_test_exit_code is ${kv_block_test_exit_code}
|
||||||
|
|
||||||
|
@@ -9,7 +9,7 @@ python -m pip install -r requirements.txt
|
|||||||
python -m pip install jsonschema aistudio_sdk==0.3.5
|
python -m pip install jsonschema aistudio_sdk==0.3.5
|
||||||
|
|
||||||
failed_files=()
|
failed_files=()
|
||||||
run_path="$DIR/../test/ci_use/"
|
run_path="$DIR/../tests/ci_use/"
|
||||||
|
|
||||||
# load all test files
|
# load all test files
|
||||||
for subdir in "$run_path"*/; do
|
for subdir in "$run_path"*/; do
|
||||||
|
@@ -46,7 +46,7 @@ done <<< "$gpu_info"
|
|||||||
export CUDA_VISIBLE_DEVICES=${min_gpu}
|
export CUDA_VISIBLE_DEVICES=${min_gpu}
|
||||||
|
|
||||||
# 使用 find 命令查找 test 目录下的 .py 文件
|
# 使用 find 命令查找 test 目录下的 .py 文件
|
||||||
test_files=$(find test -type f -name "test*.py")
|
test_files=$(find tests -type f -name "test*.py")
|
||||||
|
|
||||||
# 遍历每个找到的测试文件
|
# 遍历每个找到的测试文件
|
||||||
for test_file in $test_files; do
|
for test_file in $test_files; do
|
||||||
|
@@ -9,22 +9,18 @@ Checking for /v1/completions parameters
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from core import (
|
from core import TEMPLATE, URL, build_request_payload, send_request
|
||||||
TEMPLATE,
|
|
||||||
URL,
|
|
||||||
build_request_payload,
|
|
||||||
send_request,
|
|
||||||
)
|
|
||||||
|
|
||||||
URL = URL.replace("/v1/chat/completions", "/v1/completions")
|
URL = URL.replace("/v1/chat/completions", "/v1/completions")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_total_tokens():
|
def test_completion_total_tokens():
|
||||||
data = {
|
data = {
|
||||||
"prompt": "你是谁",
|
"prompt": "你是谁",
|
||||||
"stream": True,
|
"stream": True,
|
||||||
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
|
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
|
||||||
}
|
}
|
||||||
|
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload, stream=True)
|
resp = send_request(URL, payload, stream=True)
|
||||||
last_data = None
|
last_data = None
|
||||||
@@ -33,10 +29,9 @@ def test_completion_total_tokens():
|
|||||||
break
|
break
|
||||||
if line.strip() == "" or not line.startswith("data: "):
|
if line.strip() == "" or not line.startswith("data: "):
|
||||||
continue
|
continue
|
||||||
line = line[len("data: "):]
|
line = line[len("data: ") :]
|
||||||
last_data = json.loads(line)
|
last_data = json.loads(line)
|
||||||
usage = last_data["usage"]
|
usage = last_data["usage"]
|
||||||
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
|
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
|
||||||
assert "total_tokens" in usage, "total_tokens 不存在"
|
assert "total_tokens" in usage, "total_tokens 不存在"
|
||||||
assert usage["total_tokens"]== total_tokens, "total_tokens计数不正确"
|
assert usage["total_tokens"] == total_tokens, "total_tokens计数不正确"
|
||||||
|
|
@@ -130,15 +130,14 @@ def test_multilingual_input():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "这是一个包含多种语言的输入:Hello, 世界!Bonjour, le monde! Hola, el mundo! こんにちは、世界!"
|
"content": "这是一个包含多种语言的输入:Hello, 世界!Bonjour, le monde! Hola, el mundo! こんにちは、世界!",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
|
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
|
|
||||||
# 验证响应是否包含有效的回复
|
# 验证响应是否包含有效的回复
|
||||||
assert "choices" in resp, "未收到有效的回复"
|
assert "choices" in resp, "未收到有效的回复"
|
||||||
assert len(resp["choices"]) > 0, "回复为空"
|
assert len(resp["choices"]) > 0, "回复为空"
|
||||||
@@ -150,18 +149,9 @@ def test_multilingual_input():
|
|||||||
print("多语言混合输入测试通过!")
|
print("多语言混合输入测试通过!")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_too_long_input():
|
def test_too_long_input():
|
||||||
"""测试超长输入是否被正确处理"""
|
"""测试超长输入是否被正确处理"""
|
||||||
data = {
|
data = {"messages": [{"role": "user", "content": "a," * 200000}], "stream": False} # 超过最大输入长度
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "a," * 200000 # 超过最大输入长度
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stream": False
|
|
||||||
}
|
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert resp["detail"].get("object") == "error", "超长输入未被识别为错误"
|
assert resp["detail"].get("object") == "error", "超长输入未被识别为错误"
|
||||||
@@ -170,15 +160,7 @@ def test_too_long_input():
|
|||||||
|
|
||||||
def test_empty_input():
|
def test_empty_input():
|
||||||
"""测试空输入是否被正确处理"""
|
"""测试空输入是否被正确处理"""
|
||||||
data = {
|
data = {"messages": [{"role": "user", "content": ""}], "stream": False} # 空输入
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "" # 空输入
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stream": False
|
|
||||||
}
|
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert "error" not in resp.get("object"), "空输入被识别为错误"
|
assert "error" not in resp.get("object"), "空输入被识别为错误"
|
||||||
@@ -191,7 +173,7 @@ def test_prompt_only_spaces():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": " ", # 纯空格
|
"content": " ", # 纯空格
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
@@ -202,19 +184,11 @@ def test_prompt_only_spaces():
|
|||||||
assert resp.get("object") == "chat.completion", "应返回 chat.completion 对象"
|
assert resp.get("object") == "chat.completion", "应返回 chat.completion 对象"
|
||||||
response_content = resp["choices"][0]["message"]["content"]
|
response_content = resp["choices"][0]["message"]["content"]
|
||||||
assert len(response_content) > 0, "messages content为空,未正常生成回复"
|
assert len(response_content) > 0, "messages content为空,未正常生成回复"
|
||||||
|
|
||||||
|
|
||||||
def test_illegal_characters():
|
def test_illegal_characters():
|
||||||
"""测试非法字符输入是否被正确处理"""
|
"""测试非法字符输入是否被正确处理"""
|
||||||
data = {
|
data = {"messages": [{"role": "user", "content": "非洲的首都是?:\x00\x01\x02"}], "stream": False} # 非法字符
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "非洲的首都是?:\x00\x01\x02" # 非法字符
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stream": False
|
|
||||||
}
|
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert len(resp["choices"][0]["message"]["content"]) > 0, "非法字符输入影响模型回复"
|
assert len(resp["choices"][0]["message"]["content"]) > 0, "非法字符输入影响模型回复"
|
||||||
@@ -224,10 +198,7 @@ def test_sql_injection():
|
|||||||
"""测试 SQL 注入攻击是否被正确处理"""
|
"""测试 SQL 注入攻击是否被正确处理"""
|
||||||
data = {
|
data = {
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{"role": "user", "content": "SELECT * FROM users WHERE username = 'admin' OR '1'='1';"} # SQL 注入攻击
|
||||||
"role": "user",
|
|
||||||
"content": "SELECT * FROM users WHERE username = 'admin' OR '1'='1';" # SQL 注入攻击
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"max_tokens": 50,
|
"max_tokens": 50,
|
||||||
@@ -243,12 +214,7 @@ def test_sql_injection():
|
|||||||
def test_xss_attack():
|
def test_xss_attack():
|
||||||
"""测试 XSS 攻击是否被正确处理"""
|
"""测试 XSS 攻击是否被正确处理"""
|
||||||
data = {
|
data = {
|
||||||
"messages": [
|
"messages": [{"role": "user", "content": "<script>alert('XSS');</script>"}], # XSS 攻击
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "<script>alert('XSS');</script>" # XSS 攻击
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"max_tokens": 50,
|
"max_tokens": 50,
|
||||||
}
|
}
|
||||||
@@ -264,14 +230,9 @@ def test_stop_empty_string():
|
|||||||
"""测试 stop 参数为空字符串时的行为"""
|
"""测试 stop 参数为空字符串时的行为"""
|
||||||
data = {
|
data = {
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"messages": [
|
"messages": [{"role": "user", "content": "非洲的首都是?"}],
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "非洲的首都是?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"max_tokens": 10,
|
"max_tokens": 10,
|
||||||
"stop": "" # 空字符串
|
"stop": "", # 空字符串
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
@@ -283,14 +244,9 @@ def test_stop_multiple_strings():
|
|||||||
"""测试 stop 参数为多个字符串时的行为"""
|
"""测试 stop 参数为多个字符串时的行为"""
|
||||||
data = {
|
data = {
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"messages": [
|
"messages": [{"role": "user", "content": "非洲的首都是?"}],
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "非洲的首都是?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"max_tokens": 50,
|
"max_tokens": 50,
|
||||||
"stop": ["。", "!", "?"] # 多个停止条件
|
"stop": ["。", "!", "?"], # 多个停止条件
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
@@ -303,14 +259,9 @@ def test_stop_with_special_characters():
|
|||||||
"""测试 stop 参数为包含特殊字符的字符串时的行为"""
|
"""测试 stop 参数为包含特殊字符的字符串时的行为"""
|
||||||
data = {
|
data = {
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"messages": [
|
"messages": [{"role": "user", "content": "非洲的首都是?"}],
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "非洲的首都是?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"max_tokens": 50,
|
"max_tokens": 50,
|
||||||
"stop": "!@#$%^&*()" # 包含特殊字符
|
"stop": "!@#$%^&*()", # 包含特殊字符
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
@@ -323,14 +274,9 @@ def test_stop_with_newlines():
|
|||||||
"""测试 stop 参数为包含换行符的字符串时的行为"""
|
"""测试 stop 参数为包含换行符的字符串时的行为"""
|
||||||
data = {
|
data = {
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"messages": [
|
"messages": [{"role": "user", "content": "非洲的首都是?"}],
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "非洲的首都是?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"max_tokens": 50,
|
"max_tokens": 50,
|
||||||
"stop": "\n\n" # 包含换行符
|
"stop": "\n\n", # 包含换行符
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
@@ -345,12 +291,12 @@ def test_model_empty():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "非洲的首都是?",
|
"content": "非洲的首都是?",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"max_tokens": 10,
|
"max_tokens": 10,
|
||||||
"model": "" # 空模型
|
"model": "", # 空模型
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
@@ -365,12 +311,12 @@ def test_model_invalid():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "非洲的首都是?",
|
"content": "非洲的首都是?",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"max_tokens": 10,
|
"max_tokens": 10,
|
||||||
"model": "non-existent-model" # 不存在的模型
|
"model": "non-existent-model", # 不存在的模型
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
@@ -385,18 +331,20 @@ def test_model_with_special_characters():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "非洲的首都是?",
|
"content": "非洲的首都是?",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"max_tokens": 10,
|
"max_tokens": 10,
|
||||||
"model": "!@#" # 包含特殊字符
|
"model": "!@#", # 包含特殊字符
|
||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert resp.get("object") == "chat.completion", "不存在的 model 应触发校验异常"
|
assert resp.get("object") == "chat.completion", "不存在的 model 应触发校验异常"
|
||||||
assert "!@#" in resp.get("model"), "未返回预期的 model 信息"
|
assert "!@#" in resp.get("model"), "未返回预期的 model 信息"
|
||||||
assert len(resp.get("choices")[0].get("message").get("content")) > 0, "模型名为model 参数为非法格式,未正常生成回复"
|
assert (
|
||||||
|
len(resp.get("choices")[0].get("message").get("content")) > 0
|
||||||
|
), "模型名为model 参数为非法格式,未正常生成回复"
|
||||||
|
|
||||||
|
|
||||||
def test_max_tokens_negative():
|
def test_max_tokens_negative():
|
||||||
@@ -405,7 +353,7 @@ def test_max_tokens_negative():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "非洲的首都是?",
|
"content": "非洲的首都是?",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
@@ -414,7 +362,7 @@ def test_max_tokens_negative():
|
|||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert resp.get("detail").get("object") == "error", "max_tokens < 0 未触发校验异常"
|
assert resp.get("detail").get("object") == "error", "max_tokens < 0 未触发校验异常"
|
||||||
assert 'max_tokens can be defined [1,' in resp.get("detail").get("message"), "未返回预期的 max_tokens 错误信息"
|
assert "max_tokens can be defined [1," in resp.get("detail").get("message"), "未返回预期的 max_tokens 错误信息"
|
||||||
|
|
||||||
|
|
||||||
def test_max_tokens_min():
|
def test_max_tokens_min():
|
||||||
@@ -423,7 +371,7 @@ def test_max_tokens_min():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "非洲的首都是?",
|
"content": "非洲的首都是?",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
@@ -431,8 +379,10 @@ def test_max_tokens_min():
|
|||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert resp.get('detail').get("object") == "error", "max_tokens未0时API未拦截住"
|
assert resp.get("detail").get("object") == "error", "max_tokens未0时API未拦截住"
|
||||||
assert "reasoning_max_tokens must be between max_tokens and 1" in resp.get('detail').get("message", ""), "未返回预期的 max_tokens 达到异常值0 的 错误信息"
|
assert "reasoning_max_tokens must be between max_tokens and 1" in resp.get("detail").get(
|
||||||
|
"message", ""
|
||||||
|
), "未返回预期的 max_tokens 达到异常值0 的 错误信息"
|
||||||
|
|
||||||
|
|
||||||
def test_max_tokens_non_integer():
|
def test_max_tokens_non_integer():
|
||||||
@@ -441,7 +391,7 @@ def test_max_tokens_non_integer():
|
|||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "非洲的首都是?",
|
"content": "非洲的首都是?",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"stream": False,
|
"stream": False,
|
||||||
@@ -449,5 +399,6 @@ def test_max_tokens_non_integer():
|
|||||||
}
|
}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert resp.get('detail')[0].get("msg") == "Input should be a valid integer, got a number with a fractional part", "未返回预期的 max_tokens 为非整数的错误信息"
|
assert (
|
||||||
|
resp.get("detail")[0].get("msg") == "Input should be a valid integer, got a number with a fractional part"
|
||||||
|
), "未返回预期的 max_tokens 为非整数的错误信息"
|
59
tests/ce/stable_cases/launch_model.sh
Normal file
59
tests/ce/stable_cases/launch_model.sh
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
MODEL_PATH="${1}/TP2"
|
||||||
|
FD_API_PORT=${FD_API_PORT:-8000}
|
||||||
|
FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001}
|
||||||
|
FD_METRICS_PORT=${FD_METRICS_PORT:-8002}
|
||||||
|
FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if [ -z "$MODEL_PATH" ]; then
|
||||||
|
echo "❌ 用法: $0 <模型路径>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -d "$MODEL_PATH" ]; then
|
||||||
|
echo "❌ 错误:模型目录不存在: $MODEL_PATH"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "使用模型: $MODEL_PATH"
|
||||||
|
|
||||||
|
|
||||||
|
# 清理日志
|
||||||
|
rm -rf log/*
|
||||||
|
mkdir -p log
|
||||||
|
|
||||||
|
# 环境变量
|
||||||
|
export CUDA_VISIBLE_DEVICES=0,1
|
||||||
|
export INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID:-7679}
|
||||||
|
export ENABLE_V1_KVCACHE_SCHEDULER=1
|
||||||
|
|
||||||
|
|
||||||
|
python -m fastdeploy.entrypoints.openai.api_server \
|
||||||
|
--tensor-parallel-size 2 \
|
||||||
|
--port ${FD_API_PORT} \
|
||||||
|
--engine-worker-queue-port ${FD_ENGINE_QUEUE_PORT} \
|
||||||
|
--metrics-port ${FD_METRICS_PORT} \
|
||||||
|
--cache-queue-port ${FD_CACHE_QUEUE_PORT} \
|
||||||
|
--quantization wint8 \
|
||||||
|
--max-model-len 32768 \
|
||||||
|
--max-num-seqs 256 \
|
||||||
|
--gpu-memory-utilization 0.9 \
|
||||||
|
--model "$MODEL_PATH" \
|
||||||
|
--load-strategy ipc_snapshot \
|
||||||
|
--dynamic-load-weight &
|
||||||
|
|
||||||
|
success=0
|
||||||
|
|
||||||
|
for i in $(seq 1 300); do
|
||||||
|
if (echo > /dev/tcp/127.0.0.1/$FD_API_PORT) >/dev/null 2>&1; then
|
||||||
|
echo "API server is up on port $FD_API_PORT on iteration $i"
|
||||||
|
success=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
if [ $success -eq 0 ]; then
|
||||||
|
echo "超时: API 服务在 300 秒内未启动 (端口 $FD_API_PORT)"
|
||||||
|
fi
|
160
tests/ce/stable_cases/run.sh
Normal file
160
tests/ce/stable_cases/run.sh
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# ================== Configuration Parameters ==================
|
||||||
|
FD_API_PORT=${FD_API_PORT:-8000}
|
||||||
|
FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001}
|
||||||
|
FD_METRICS_PORT=${FD_METRICS_PORT:-8002}
|
||||||
|
FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003}
|
||||||
|
|
||||||
|
|
||||||
|
HOST="0.0.0.0"
|
||||||
|
PORT="${FD_API_PORT}" # 这里需要配合启动脚本那个URL PORT
|
||||||
|
BASE_URL="http://$HOST:$PORT"
|
||||||
|
|
||||||
|
TOTAL_ROUNDS=30
|
||||||
|
CHAT_REQUESTS_PER_ROUND=5
|
||||||
|
export CUDA_VISIBLE_DEVICES=0,1
|
||||||
|
MAX_MEMORY_MB=10240 # 10GB
|
||||||
|
|
||||||
|
# ====================================================
|
||||||
|
# assert_eq actual expected message
|
||||||
|
assert_eq() {
|
||||||
|
local actual="$1"
|
||||||
|
local expected="$2"
|
||||||
|
local msg="$3"
|
||||||
|
if [ "$actual" != "$expected" ]; then
|
||||||
|
echo "Assertion failed: $msg" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# assert_true condition message
|
||||||
|
assert_true() {
|
||||||
|
local condition="$1"
|
||||||
|
local msg="$2"
|
||||||
|
if [ "$condition" != "1" ] && [ "$condition" != "true" ]; then
|
||||||
|
echo "Assertion failed: $msg" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# assert_success exit_code message
|
||||||
|
assert_success() {
|
||||||
|
local code="$1"
|
||||||
|
local msg="$2"
|
||||||
|
if [ "$code" -ne 0 ]; then
|
||||||
|
echo "Assertion failed: $msg" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# curl_get_status(url, options...) → returns via global variables http_code and response_body
|
||||||
|
curl_get_status() {
|
||||||
|
local result
|
||||||
|
result=$(curl -s -w "%{http_code}" "$@")
|
||||||
|
http_code="${result: -3}"
|
||||||
|
response_body="${result%???}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ====================================================
|
||||||
|
# Get visible GPU IDs from CUDA_VISIBLE_DEVICES
|
||||||
|
# ====================================================
|
||||||
|
|
||||||
|
get_visible_gpu_ids() {
|
||||||
|
local ids=()
|
||||||
|
IFS=',' read -ra ADDR <<< "$CUDA_VISIBLE_DEVICES"
|
||||||
|
for i in "${ADDR[@]}"; do
|
||||||
|
if [[ "$i" =~ ^[0-9]+$ ]]; then
|
||||||
|
ids+=("$i")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo "${ids[@]}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ====================================================
|
||||||
|
# Check GPU memory usage (must not exceed MAX_MEMORY_MB)
|
||||||
|
# ====================================================
|
||||||
|
|
||||||
|
check_gpu_memory() {
|
||||||
|
local gpu_ids
|
||||||
|
gpu_ids=($(get_visible_gpu_ids))
|
||||||
|
|
||||||
|
if [ ${#gpu_ids[@]} -eq 0 ]; then
|
||||||
|
echo "Assertion failed: No valid GPU IDs in CUDA_VISIBLE_DEVICES='$CUDA_VISIBLE_DEVICES'" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for gpu_id in "${gpu_ids[@]}"; do
|
||||||
|
local memory_used
|
||||||
|
memory_used=$(nvidia-smi -i "$gpu_id" --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null) || \
|
||||||
|
assert_success $? "Failed to query GPU $gpu_id memory usage"
|
||||||
|
|
||||||
|
if ! [[ "$memory_used" =~ ^[0-9]+ ]]; then
|
||||||
|
echo "Assertion failed: Invalid memory value for GPU $gpu_id: $memory_used" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
assert_true "$(( memory_used <= MAX_MEMORY_MB ))" \
|
||||||
|
"GPU $gpu_id memory $memory_used MB > $MAX_MEMORY_MB MB"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# ====================================================
|
||||||
|
|
||||||
|
for round in $(seq 1 $TOTAL_ROUNDS); do
|
||||||
|
echo "=== Round $round / $TOTAL_ROUNDS ==="
|
||||||
|
|
||||||
|
# Step 1: Clear loaded weights
|
||||||
|
echo "[Step 1] Clearing load weight..."
|
||||||
|
curl_get_status -i "$BASE_URL/clear_load_weight"
|
||||||
|
assert_eq "$http_code" "200" "/clear_load_weight failed with HTTP $http_code"
|
||||||
|
|
||||||
|
# Step 2: Check GPU memory usage
|
||||||
|
echo "[Step 2] Checking GPU memory..."
|
||||||
|
check_gpu_memory
|
||||||
|
|
||||||
|
# Step 3: Update model weights
|
||||||
|
echo "[Step 3] Updating model weight..."
|
||||||
|
curl_get_status -i "$BASE_URL/update_model_weight"
|
||||||
|
assert_eq "$http_code" "200" "/update_model_weight failed with HTTP $http_code"
|
||||||
|
|
||||||
|
# Step 4: Send chat completion requests
|
||||||
|
echo "[Step 4] Sending $CHAT_REQUESTS_PER_ROUND chat completions..."
|
||||||
|
for i in $(seq 1 $CHAT_REQUESTS_PER_ROUND); do
|
||||||
|
echo " Request $i / $CHAT_REQUESTS_PER_ROUND"
|
||||||
|
# Send request and capture response
|
||||||
|
response=$(curl -s -X POST "$BASE_URL/v1/chat/completions" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"messages": [{"role": "user", "content": "Hello!"}]}')
|
||||||
|
|
||||||
|
# Extract the 'content' field from the response
|
||||||
|
content=$(echo "$response" | \
|
||||||
|
grep -o '"content":"[^"]*"' | \
|
||||||
|
head -1 | \
|
||||||
|
sed 's/^"content":"//' | \
|
||||||
|
sed 's/"$//')
|
||||||
|
|
||||||
|
if [ -z "$content" ]; then
|
||||||
|
# Fallback: try extracting content using sed more robustly
|
||||||
|
content=$(echo "$response" | \
|
||||||
|
sed -n 's/.*"content":"\([^"]*\)".*/\1/p' | \
|
||||||
|
head -1)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if content is empty or null
|
||||||
|
if [ -z "$content" ] || [ "$content" = "null" ]; then
|
||||||
|
echo "Failed: Empty or null 'content' in response" >&2
|
||||||
|
echo "Raw response:" >&2
|
||||||
|
echo "$response" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Received non-empty response"
|
||||||
|
echo -e "\n---\n"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Round $round completed."
|
||||||
|
echo "==================================\n"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "All $TOTAL_ROUNDS rounds completed successfully."
|
Reference in New Issue
Block a user