Merge pull request #2624 from XieYunshen/develop

Add scripts and GitHub Actions workflows for CI
This commit is contained in:
YUNSHEN XIE
2025-06-16 11:38:21 +08:00
committed by GitHub
5 changed files with 781 additions and 0 deletions

81
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,81 @@
name: CI
on:
pull_request:
branches: [ develop ]
workflow_dispatch:
concurrency:
group: ${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
build:
runs-on: [self-hosted, GPU-L20-4Card]
steps:
- name: Print current runner name
run: |
echo "Current runner name: ${{ runner.name }}"
# Because the system version is lower than 2.23, the checkout cannot be used.
# - name: Checkout code
# uses: actions/checkout@v4
- name: Code Checkout
env:
docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:fastdeploy-ciuse-cuda126
run: |
REPO="https://github.com/${{ github.repository }}.git"
FULL_REPO="${{ github.repository }}"
REPO_NAME="${FULL_REPO##*/}"
# Clean the repository directory before starting
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
-e "REPO_NAME=${REPO_NAME}" \
${docker_image} /bin/bash -c '
if [ -d ${REPO_NAME} ]; then
echo "Directory ${REPO_NAME} exists, removing it..."
rm -rf ${REPO_NAME}
fi
'
git clone ${REPO} ${REPO_NAME}
cd FastDeploy
if [ "${{ github.event_name }}" = "pull_request" ]; then
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr/${{ github.event.pull_request.number }}
git merge pr/${{ github.event.pull_request.number }}
git log -n 3 --oneline
else
git checkout ${{ github.sha }}
git log -n 3 --oneline
fi
- name: Run CI unittest
env:
docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:fastdeploy-ciuse-cuda126
run: |
runner_name="${{ runner.name }}"
last_char="${runner_name: -1}"
if [[ "$last_char" =~ [0-3] ]]; then
gpu_id="$last_char"
else
gpu_id="0"
fi
FD_API_PORT=$((9180 + gpu_id * 100))
FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100))
FD_METRICS_PORT=$((9170 + gpu_id * 100))
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
-v "/ssd4/GithubActions/gitconfig:/etc/gitconfig:ro" \
-v "/ssd4/GithubActions/ModelData:/ModelData:ro" \
-v "/ssd4/GithubActions/CacheDir:/root/.cache" \
-v "/ssd4/GithubActions/ConfigDir:/root/.config" \
-e "MODEL_PATH=/ModelData" \
-e "FD_API_PORT=${FD_API_PORT}" \
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
--gpus device=${gpu_id} ${docker_image} /bin/bash -c "
git config --global --add safe.directory /workspace/FastDeploy
cd FastDeploy
bash scripts/run_ci.sh
"

37
scripts/run_ci.sh Normal file
View File

@@ -0,0 +1,37 @@
#!/bin/bash
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "$DIR"
python -m pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
python -m pip install -r requirements.txt
python -m pip install jsonschema aistudio_sdk==0.2.6
bash build.sh || exit 1
failed_files=()
run_path="$DIR/../test/ci_use"
pushd "$run_path" || exit 1 # 目录不存在时退出
for file in test_*; do
if [ -f "$file" ]; then
abs_path=$(realpath "$file")
echo "Running pytest on $abs_path"
if ! python -m pytest -sv "$abs_path"; then
echo "Test failed: $file"
failed_files+=("$file")
fi
fi
done
popd
if [ ${#failed_files[@]} -gt 0 ]; then
echo "The following tests failed:"
for f in "${failed_files[@]}"; do
echo "$f"
done
exit 1
else
echo "All tests passed!"
exit 0
fi

View File

@@ -0,0 +1,167 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
import traceback
from fastdeploy import LLM, SamplingParams
import os
import subprocess
import signal
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
def format_chat_prompt(messages):
"""
Format multi-turn conversation into prompt string, suitable for chat models.
Uses Qwen2 style with <|im_start|> / <|im_end|> tokens.
"""
prompt = ""
for msg in messages:
role, content = msg["role"], msg["content"]
if role == "user":
prompt += "<|im_start|>user\n{content}<|im_end|>\n".format(content=content)
elif role == "assistant":
prompt += "<|im_start|>assistant\n{content}<|im_end|>\n".format(content=content)
prompt += "<|im_start|>assistant\n"
return prompt
@pytest.fixture(scope="module")
def model_path():
"""
Get model path from environment variable MODEL_PATH,
default to "./Qwen2-7B-Instruct" if not set.
"""
base_path = os.getenv("MODEL_PATH")
if base_path:
return os.path.join(base_path, "Qwen2-7B-Instruct")
else:
return "./Qwen2-7B-Instruct"
@pytest.fixture(scope="module")
def llm(model_path):
"""
Fixture to initialize the LLM model with a given model path
"""
try:
output = subprocess.check_output(f"lsof -i:{FD_ENGINE_QUEUE_PORT} -t", shell=True).decode().strip()
for pid in output.splitlines():
os.kill(int(pid), signal.SIGKILL)
print(f"Killed process on port {FD_ENGINE_QUEUE_PORT}, pid={pid}")
except subprocess.CalledProcessError:
pass
try:
llm = LLM(
model=model_path,
tensor_parallel_size=1,
engine_worker_queue_port=FD_ENGINE_QUEUE_PORT,
max_model_len=4096
)
print("Model loaded successfully from {}.".format(model_path))
yield llm
except Exception:
print("Failed to load model from {}.".format(model_path))
traceback.print_exc()
pytest.fail("Failed to initialize LLM model from {}".format(model_path))
def test_generate_prompts(llm):
"""
Test basic prompt generation
"""
# Only one prompt enabled for testing currently
prompts = [
"请介绍一下中国的四大发明。",
# "太阳和地球之间的距离是多少?",
# "写一首关于春天的古风诗。",
]
sampling_params = SamplingParams(
temperature=0.8,
top_p=0.95,
)
try:
outputs = llm.generate(prompts, sampling_params)
# Verify basic properties of the outputs
assert len(outputs) == len(prompts), "Number of outputs should match number of prompts"
for i, output in enumerate(outputs):
assert output.prompt == prompts[i], "Prompt mismatch for case {}".format(i + 1)
assert isinstance(output.outputs.text, str), "Output text should be string for case {}".format(i + 1)
assert len(output.outputs.text) > 0, "Generated text should not be empty for case {}".format(i + 1)
assert isinstance(output.finished, bool), "'finished' should be boolean for case {}".format(i + 1)
assert output.metrics.model_execute_time > 0, "Execution time should be positive for case {}".format(i + 1)
print("=== Prompt generation Case {} Passed ===".format(i + 1))
except Exception:
print("Failed during prompt generation.")
traceback.print_exc()
pytest.fail("Prompt generation test failed")
def test_chat_completion(llm):
"""
Test chat completion with multiple turns
"""
chat_cases = [
[
{"role": "user", "content": "你好,请介绍一下你自己。"},
],
[
{"role": "user", "content": "你知道地球到月球的距离是多少吗?"},
{"role": "assistant", "content": "大约是38万公里左右。"},
{"role": "user", "content": "那太阳到地球的距离是多少?"},
],
[
{"role": "user", "content": "请给我起一个中文名。"},
{"role": "assistant", "content": "好的,你可以叫“星辰”。"},
{"role": "user", "content": "再起一个。"},
{"role": "assistant", "content": "那就叫”大海“吧。"},
{"role": "user", "content": "再来三个。"},
],
]
sampling_params = SamplingParams(
temperature=0.8,
top_p=0.95,
)
for i, case in enumerate(chat_cases):
prompt = format_chat_prompt(case)
try:
outputs = llm.generate(prompt, sampling_params)
# Verify chat completion properties
assert len(outputs) == 1, "Should return one output per prompt"
assert isinstance(outputs[0].outputs.text, str), "Output text should be string"
assert len(outputs[0].outputs.text) > 0, "Generated text should not be empty"
assert outputs[0].metrics.model_execute_time > 0, "Execution time should be positive"
print("=== Chat Case {} Passed ===".format(i + 1))
except Exception:
print("[ERROR] Chat Case {} failed.".format(i + 1))
traceback.print_exc()
pytest.fail("Chat case {} failed".format(i + 1))
if __name__ == "__main__":
"""
Main entry point for the test script.
"""
pytest.main(["-sv", __file__])

View File

@@ -0,0 +1,491 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
import requests
import time
import json
from jsonschema import validate
import concurrent.futures
import numpy as np
import subprocess
import socket
import os
import signal
import sys
# Read ports from environment variables; use default values if not set
FD_API_PORT = int(os.getenv("FD_API_PORT", 8189))
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8013))
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8333))
# List of ports to clean before and after tests
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
def is_port_open(host: str, port: int, timeout=1.0):
"""
Check if a TCP port is open on the given host.
Returns True if connection succeeds, False otherwise.
"""
try:
with socket.create_connection((host, port), timeout):
return True
except Exception:
return False
def kill_process_on_port(port: int):
"""
Kill processes that are listening on the given port.
Uses `lsof` to find process ids and sends SIGKILL.
"""
try:
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
for pid in output.splitlines():
os.kill(int(pid), signal.SIGKILL)
print(f"Killed process on port {port}, pid={pid}")
except subprocess.CalledProcessError:
pass
def clean_ports():
"""
Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
"""
for port in PORTS_TO_CLEAN:
kill_process_on_port(port)
@pytest.fixture(scope="session", autouse=True)
def setup_and_run_server():
"""
Pytest fixture that runs once per test session:
- Cleans ports before tests
- Starts the API server as a subprocess
- Waits for server port to open (up to 30 seconds)
- Tears down server after all tests finish
"""
print("Pre-test port cleanup...")
clean_ports()
base_path = os.getenv("MODEL_PATH")
if base_path:
model_path=os.path.join(base_path, "Qwen2-7B-Instruct")
else:
model_path="./Qwen2-7B-Instruct"
log_path = "api_server.log"
cmd = [
sys.executable, "-m", "fastdeploy.entrypoints.openai.api_server",
"--model", model_path,
"--port", str(FD_API_PORT),
"--tensor-parallel-size", "1",
"--engine-worker-queue-port", str(FD_ENGINE_QUEUE_PORT),
"--metrics-port", str(FD_METRICS_PORT)
]
with open(log_path, "w") as logfile:
process = subprocess.Popen(cmd, stdout=logfile, stderr=subprocess.STDOUT)
# Wait up to 120 seconds for API server port to become available
for _ in range(120):
if is_port_open("127.0.0.1", FD_API_PORT):
print(f"API server is up on port {FD_API_PORT}")
break
time.sleep(1)
else:
process.terminate()
raise RuntimeError(f"API server did not start on port {FD_API_PORT}")
yield
print("Post-test server cleanup...")
try:
os.kill(process.pid, signal.SIGTERM)
print("API server terminated")
except Exception as e:
print(f"Failed to kill server: {e}")
clean_ports()
@pytest.fixture(scope="session")
def api_url(request):
"""
Returns the API endpoint URL for chat completions.
"""
return f"http://0.0.0.0:{FD_API_PORT}" + "/v1/chat/completions"
@pytest.fixture(scope="session")
def metrics_url(request):
"""
Returns the metrics endpoint URL.
"""
return f"http://0.0.0.0:{FD_METRICS_PORT}/metrics"
@pytest.fixture
def headers():
"""
Returns common HTTP request headers.
"""
return {"Content-Type": "application/json"}
@pytest.fixture
def consistent_payload():
"""
Returns a fixed payload for consistency testing,
including a fixed random seed and temperature.
"""
return {
"messages": [{"role": "user", "content": "用一句话介绍 PaddlePaddle"}],
"temperature": 0.9,
"top_p": 0, # fix top_p to reduce randomness
"seed": 13 # fixed random seed
}
# ==========================
# JSON Schema for validating chat API responses
# ==========================
chat_response_schema = {
"type": "object",
"properties": {
"id": {"type": "string"},
"object": {"type": "string"},
"created": {"type": "number"},
"model": {"type": "string"},
"choices": {
"type": "array",
"items": {
"type": "object",
"properties": {
"message": {
"type": "object",
"properties": {
"role": {"type": "string"},
"content": {"type": "string"},
},
"required": ["role", "content"]
},
"index": {"type": "number"},
"finish_reason": {"type": "string"}
},
"required": ["message", "index", "finish_reason"]
}
}
},
"required": ["id", "object", "created", "model", "choices"]
}
# ==========================
# Helper function to calculate difference rate between two texts
# ==========================
def calculate_diff_rate(text1, text2):
"""
Calculate the difference rate between two strings
based on the normalized Levenshtein edit distance.
Returns a float in [0,1], where 0 means identical.
"""
if text1 == text2:
return 0.0
len1, len2 = len(text1), len(text2)
dp = [[0] * (len2 + 1) for _ in range(len1 + 1)]
for i in range(len1 + 1):
for j in range(len2 + 1):
if i == 0 or j == 0:
dp[i][j] = i + j
elif text1[i - 1] == text2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
else:
dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
edit_distance = dp[len1][len2]
max_len = max(len1, len2)
return edit_distance / max_len if max_len > 0 else 0.0
# ==========================
# Valid prompt test cases for parameterized testing
# ==========================
valid_prompts = [
[{"role": "user", "content": "你好"}],
[{"role": "user", "content": "用一句话介绍 FastDeploy"}],
[{"role": "user", "content": "今天天气怎么样?"}],
]
@pytest.mark.parametrize("messages", valid_prompts)
def test_valid_chat(messages, api_url, headers):
"""
Test valid chat requests.
"""
start = time.time()
resp = requests.post(api_url, headers=headers, json={"messages": messages})
duration = time.time() - start
assert resp.status_code == 200
validate(instance=resp.json(), schema=chat_response_schema)
assert duration < 5, "Response too slow{:.2f}s".format(duration)
# ==========================
# Consistency test for repeated runs with fixed payload
# ==========================
def test_consistency_between_runs(api_url, headers, consistent_payload):
"""
Test that two runs with the same fixed input produce similar outputs.
"""
# First request
resp1 = requests.post(api_url, headers=headers, json=consistent_payload)
assert resp1.status_code == 200
result1 = resp1.json()
content1 = result1["choices"][0]["message"]["content"]
# Second request
resp2 = requests.post(api_url, headers=headers, json=consistent_payload)
assert resp2.status_code == 200
result2 = resp2.json()
content2 = result2["choices"][0]["message"]["content"]
# Calculate difference rate
diff_rate = calculate_diff_rate(content1, content2)
# Verify that the difference rate is below the threshold
assert diff_rate < 0.05, f"Output difference too large ({diff_rate:.4%})"
# ==========================
# Invalid prompt tests
# ==========================
invalid_prompts = [
[], # Empty array
[{}], # Empty object
[{"role": "user"}], # Missing content
[{"content": "hello"}], # Missing role
]
@pytest.mark.parametrize("messages", invalid_prompts)
def test_invalid_chat(messages, api_url, headers):
"""
Test invalid chat inputs
"""
resp = requests.post(api_url, headers=headers, json={"messages": messages})
assert resp.status_code >= 400, "Invalid request should return an error status code"
# ==========================
# Test for input exceeding context length
# ==========================
def test_exceed_context_length(api_url, headers):
"""
Test case for inputs that exceed the model's maximum context length.
"""
# Construct an overly long message
long_content = "你好," * 20000
messages = [
{"role": "user", "content": long_content}
]
resp = requests.post(api_url, headers=headers, json={"messages": messages})
# Check if the response indicates a token limit error or server error (500)
try:
response_json = resp.json()
print("Response JSON content:", json.dumps(response_json, ensure_ascii=False)[:1000])
except Exception:
response_json = {}
# Check status code and response content
assert resp.status_code != 200 or "token" in json.dumps(response_json).lower(), \
"Expected token limit error or similar, but got a normal response: {}".format(response_json)
# ==========================
# ChatTemplate Valid Structure Test
# ==========================
chat_template_cases = [
{"template": "chatml", "messages": [{"role": "user", "content": "你是谁?"}]},
{"template": "llama", "messages": [{"role": "user", "content": "请自我介绍"}]},
{"template": "alpaca", "messages": [{"role": "user", "content": "介绍一下 FastDeploy"}]},
]
@pytest.mark.parametrize("payload", chat_template_cases)
def test_chattemplate_valid(payload, api_url, headers):
"""
Test valid ChatTemplate structures.
"""
resp = requests.post(api_url, headers=headers, json=payload)
assert resp.status_code == 200, "Request failed for template={}".format(payload['template'])
validate(instance=resp.json(), schema=chat_response_schema)
# ==========================
# ChatTemplate Invalid Structure Test
# ==========================
invalid_template_cases = [
{"template": "nonexist", "messages": [{"role": "user", "content": "你好"}]},
{"template": 123, "messages": [{"role": "user", "content": "你好"}]},
{"template": "", "messages": [{"role": "user", "content": "你好"}]},
]
@pytest.mark.parametrize("payload", invalid_template_cases)
@pytest.mark.skip(reason="Validation not yet supported; assertion temporarily disabled")
def test_chattemplate_invalid(payload, api_url, headers):
"""
Test invalid ChatTemplate structures.
"""
resp = requests.post(api_url, headers=headers, json=payload)
assert resp.status_code >= 400, "Invalid template should return an error status code"
# ==========================
# System Role Test
# ==========================
def test_system_role(api_url, headers):
"""
Test whether the system role can correctly guide model behavior.
"""
messages = [
{"role": "system", "content": "You are an English translation assistant."},
{"role": "user", "content": "Please translate: 你好"},
]
resp = requests.post(api_url, headers=headers, json={"messages": messages})
assert resp.status_code == 200
validate(instance=resp.json(), schema=chat_response_schema)
result = resp.json()["choices"][0]["message"]["content"]
assert "hello" in result.lower()
# ==========================
# Multi-turn Conversation Test
# ==========================
def test_multi_turn_conversation(api_url, headers):
"""
Test whether multi-turn conversation context is effective.
"""
messages = [
{"role": "user", "content": "你是谁?"},
{"role": "assistant", "content": "我是AI助手"},
{"role": "user", "content": "你能做什么?"}
]
resp = requests.post(api_url, headers=headers, json={"messages": messages})
assert resp.status_code == 200
validate(instance=resp.json(), schema=chat_response_schema)
# ==========================
# Simple Performance Test
# ==========================
def test_simple_perf(api_url, headers):
"""
Send 10 requests to check response stability.
"""
prompts = [{"role": "user", "content": "Introduce FastDeploy."}]
for _ in range(10):
resp = requests.post(api_url, headers=headers, json={"messages": prompts})
assert resp.status_code == 200
# ==========================
# Concurrent Performance Test
# ==========================
@pytest.mark.skip(reason="concurrent is unavailable")
def test_concurrent_perf(api_url, headers):
"""
Send concurrent requests to test stability and response time.
"""
prompts = [{"role": "user", "content": "Introduce FastDeploy."}]
def send_request():
"""
Send a single request
"""
resp = requests.post(api_url, headers=headers, json={"messages": prompts})
assert resp.status_code == 200
return resp.elapsed.total_seconds()
with concurrent.futures.ThreadPoolExecutor(max_workers=33) as executor:
futures = [executor.submit(send_request) for _ in range(33)]
durations = [f.result() for f in futures]
print("Response time for each request:", durations)
# ==========================
# Metrics Endpoint Test
# ==========================
def test_metrics_endpoint(metrics_url):
"""
Test the metrics monitoring endpoint.
"""
resp = requests.get(metrics_url, timeout=5)
assert resp.status_code == 200, "Unexpected status code: {}".format(resp.status_code)
assert "text/plain" in resp.headers["Content-Type"], "Content-Type is not text/plain"
# Parse Prometheus metrics data
metrics_data = resp.text
# print(metrics_data)
lines = metrics_data.split("\n")
metric_lines = [line for line in lines if not line.startswith("#") and line.strip() != ""]
assert len(metric_lines) > 0, "No valid Prometheus metrics found"
# Assert specific metric values
num_requests_running_found = False
num_requests_waiting_found = False
time_to_first_token_seconds_sum_found = False
time_per_output_token_seconds_sum_found = False
e2e_request_latency_seconds_sum_found = False
request_inference_time_seconds_sum_found = False
request_queue_time_seconds_sum_found = False
for line in metric_lines:
if line.startswith("fastdeploy:num_requests_running"):
_, value = line.rsplit(" ", 1)
assert float(value) >= 0, "Invalid value for num_requests_running"
num_requests_running_found = True
elif line.startswith("fastdeploy:num_requests_waiting"):
_, value = line.rsplit(" ", 1)
assert float(value) >= 0, "Invalid value for num_requests_waiting"
num_requests_waiting_found = True
elif line.startswith("fastdeploy:time_to_first_token_seconds_sum"):
_, value = line.rsplit(" ", 1)
assert float(value) >= 0, "Invalid value for time_to_first_token_seconds_sum"
time_to_first_token_seconds_sum_found = True
elif line.startswith("fastdeploy:time_per_output_token_seconds_sum"):
_, value = line.rsplit(" ", 1)
assert float(value) >= 0, "Invalid value for time_per_output_token_seconds_sum"
time_per_output_token_seconds_sum_found = True
elif line.startswith("fastdeploy:e2e_request_latency_seconds_sum"):
_, value = line.rsplit(" ", 1)
assert float(value) >= 0, "Invalid value for e2e_request_latency_seconds_sum"
e2e_request_latency_seconds_sum_found = True
elif line.startswith("fastdeploy:request_inference_time_seconds_sum"):
_, value = line.rsplit(" ", 1)
assert float(value) >= 0, "Invalid value for request_inference_time_seconds_sum"
request_inference_time_seconds_sum_found = True
elif line.startswith("fastdeploy:request_queue_time_seconds_sum"):
_, value = line.rsplit(" ", 1)
assert float(value) >= 0, "Invalid value for request_queue_time_seconds_sum"
request_queue_time_seconds_sum_found = True
assert num_requests_running_found, "Missing metric: fastdeploy:num_requests_running"
assert num_requests_waiting_found, "Missing metric: fastdeploy:num_requests_waiting"
assert time_to_first_token_seconds_sum_found, "Missing metric: fastdeploy:time_to_first_token_seconds_sum"
assert time_per_output_token_seconds_sum_found, "Missing metric: fastdeploy:time_per_output_token_seconds_sum"
assert e2e_request_latency_seconds_sum_found, "Missing metric: fastdeploy:e2e_request_latency_seconds_sum"
assert request_inference_time_seconds_sum_found, "Missing metric: fastdeploy:request_inference_time_seconds_sum"
assert request_queue_time_seconds_sum_found, "Missing metric: fastdeploy:request_queue_time_seconds_sum"

View File

@@ -0,0 +1,5 @@
FROM ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev
RUN apt update && apt install -y lsof
RUN wget https://raw.githubusercontent.com/PaddlePaddle/FastDeploy/refs/heads/develop/requirements.txt
RUN python -m pip install -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple && python -m pip install pytest
RUN apt update && apt install -y python3.10-venv