Merge pull request #2624 from XieYunshen/develop

Add scripts and GitHub Actions workflows for CI
2025-10-06 00:57:33 +08:00 · 2025-06-16 11:38:21 +08:00
parent f57422e3c1 0c490b72cc
commit d68cd5faec
5 changed files with 781 additions and 0 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,81 @@
 name: CI
 on:
  pull_request:
    branches: [ develop ]
  workflow_dispatch:
 concurrency:
  group: ${{ github.event.pull_request.number }}
  cancel-in-progress: true
 jobs:
  build:
    runs-on: [self-hosted, GPU-L20-4Card]
    steps:
      - name: Print current runner name
        run: |
          echo "Current runner name: ${{ runner.name }}"
      # Because the system version is lower than 2.23, the checkout cannot be used.
      # - name: Checkout code
      #   uses: actions/checkout@v4
      - name: Code Checkout
        env:
          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:fastdeploy-ciuse-cuda126
        run: |
          REPO="https://github.com/${{ github.repository }}.git"
          FULL_REPO="${{ github.repository }}"
          REPO_NAME="${FULL_REPO##*/}"
          # Clean the repository directory before starting
          docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
          -e "REPO_NAME=${REPO_NAME}" \
          ${docker_image} /bin/bash -c '
            if [ -d ${REPO_NAME} ]; then
              echo "Directory ${REPO_NAME} exists, removing it..."
              rm -rf ${REPO_NAME}
            fi
          '
          git clone ${REPO} ${REPO_NAME}
          cd FastDeploy
          if [ "${{ github.event_name }}" = "pull_request" ]; then
            git fetch origin pull/${{ github.event.pull_request.number }}/head:pr/${{ github.event.pull_request.number }}
            git merge pr/${{ github.event.pull_request.number }}
            git log -n 3 --oneline
          else
            git checkout ${{ github.sha }}
            git log -n 3 --oneline
          fi
      - name: Run CI unittest
        env:
          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:fastdeploy-ciuse-cuda126
        run: |
          runner_name="${{ runner.name }}"
          last_char="${runner_name: -1}"
          if [[ "$last_char" =~ [0-3] ]]; then
            gpu_id="$last_char"
          else
            gpu_id="0"  
          fi
          FD_API_PORT=$((9180 + gpu_id * 100))
          FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100))
          FD_METRICS_PORT=$((9170 + gpu_id * 100))
          PARENT_DIR=$(dirname "$WORKSPACE")
          echo "PARENT_DIR:$PARENT_DIR"
          docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
          -v "/ssd4/GithubActions/gitconfig:/etc/gitconfig:ro" \
          -v "/ssd4/GithubActions/ModelData:/ModelData:ro" \
          -v "/ssd4/GithubActions/CacheDir:/root/.cache" \
          -v "/ssd4/GithubActions/ConfigDir:/root/.config" \
          -e "MODEL_PATH=/ModelData" \
          -e "FD_API_PORT=${FD_API_PORT}" \
          -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
          -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
          --gpus device=${gpu_id} ${docker_image} /bin/bash -c "
          git config --global --add safe.directory /workspace/FastDeploy
          cd FastDeploy
          bash scripts/run_ci.sh
          "
--- a/scripts/run_ci.sh
+++ b/scripts/run_ci.sh
@@ -0,0 +1,37 @@
 #!/bin/bash
 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 echo "$DIR"
 python -m pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 python -m pip install -r requirements.txt
 python -m pip install jsonschema aistudio_sdk==0.2.6
 bash build.sh || exit 1
 failed_files=()
 run_path="$DIR/../test/ci_use"
 pushd "$run_path" || exit 1  # 目录不存在时退出
 for file in test_*; do
    if [ -f "$file" ]; then
        abs_path=$(realpath "$file")
        echo "Running pytest on $abs_path"
        if ! python -m pytest -sv "$abs_path"; then
            echo "Test failed: $file"
            failed_files+=("$file")
        fi
    fi
 done
 popd
 if [ ${#failed_files[@]} -gt 0 ]; then
    echo "The following tests failed:"
    for f in "${failed_files[@]}"; do
        echo "$f"
    done
    exit 1
 else
    echo "All tests passed!"
    exit 0
 fi
--- a/test/ci_use/test_qwen2_offline.py
+++ b/test/ci_use/test_qwen2_offline.py
@@ -0,0 +1,167 @@
 # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pytest
 import traceback
 from fastdeploy import LLM, SamplingParams
 import os
 import subprocess
 import signal
 FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
 def format_chat_prompt(messages):
    """
    Format multi-turn conversation into prompt string, suitable for chat models.
    Uses Qwen2 style with <|im_start|> / <|im_end|> tokens.
    """
    prompt = ""
    for msg in messages:
        role, content = msg["role"], msg["content"]
        if role == "user":
            prompt += "<|im_start|>user\n{content}<|im_end|>\n".format(content=content)
        elif role == "assistant":
            prompt += "<|im_start|>assistant\n{content}<|im_end|>\n".format(content=content)
    prompt += "<|im_start|>assistant\n"
    return prompt
@pytest.fixture(scope="module")
 def model_path():
    """
    Get model path from environment variable MODEL_PATH,
    default to "./Qwen2-7B-Instruct" if not set.
    """
    base_path = os.getenv("MODEL_PATH")
    if base_path:
        return os.path.join(base_path, "Qwen2-7B-Instruct")
    else:
        return "./Qwen2-7B-Instruct"
@pytest.fixture(scope="module")
 def llm(model_path):
    """
    Fixture to initialize the LLM model with a given model path
    """
    try:
        output = subprocess.check_output(f"lsof -i:{FD_ENGINE_QUEUE_PORT} -t", shell=True).decode().strip()
        for pid in output.splitlines():
            os.kill(int(pid), signal.SIGKILL)
            print(f"Killed process on port {FD_ENGINE_QUEUE_PORT}, pid={pid}")
    except subprocess.CalledProcessError:
        pass
    try:
        llm = LLM(
            model=model_path,
            tensor_parallel_size=1,
            engine_worker_queue_port=FD_ENGINE_QUEUE_PORT,
            max_model_len=4096
        )
        print("Model loaded successfully from {}.".format(model_path))
        yield llm
    except Exception:
        print("Failed to load model from {}.".format(model_path))
        traceback.print_exc()
        pytest.fail("Failed to initialize LLM model from {}".format(model_path))
 def test_generate_prompts(llm):
    """
    Test basic prompt generation
    """
    # Only one prompt enabled for testing currently
    prompts = [
        "请介绍一下中国的四大发明。",
        # "太阳和地球之间的距离是多少？",
        # "写一首关于春天的古风诗。",
    ]
    sampling_params = SamplingParams(
        temperature=0.8,
        top_p=0.95,
    )
    try:
        outputs = llm.generate(prompts, sampling_params)
        # Verify basic properties of the outputs
        assert len(outputs) == len(prompts), "Number of outputs should match number of prompts"
        for i, output in enumerate(outputs):
            assert output.prompt == prompts[i], "Prompt mismatch for case {}".format(i + 1)
            assert isinstance(output.outputs.text, str), "Output text should be string for case {}".format(i + 1)
            assert len(output.outputs.text) > 0, "Generated text should not be empty for case {}".format(i + 1)
            assert isinstance(output.finished, bool), "'finished' should be boolean for case {}".format(i + 1)
            assert output.metrics.model_execute_time > 0, "Execution time should be positive for case {}".format(i + 1)
            print("=== Prompt generation Case {} Passed ===".format(i + 1))
    except Exception:
        print("Failed during prompt generation.")
        traceback.print_exc()
        pytest.fail("Prompt generation test failed")
 def test_chat_completion(llm):
    """
    Test chat completion with multiple turns
    """
    chat_cases = [
        [
            {"role": "user", "content": "你好，请介绍一下你自己。"},
        ],
        [
            {"role": "user", "content": "你知道地球到月球的距离是多少吗？"},
            {"role": "assistant", "content": "大约是38万公里左右。"},
            {"role": "user", "content": "那太阳到地球的距离是多少？"},
        ],
        [
            {"role": "user", "content": "请给我起一个中文名。"},
            {"role": "assistant", "content": "好的，你可以叫“星辰”。"},
            {"role": "user", "content": "再起一个。"},
            {"role": "assistant", "content": "那就叫”大海“吧。"},
            {"role": "user", "content": "再来三个。"},
        ],
    ]
    sampling_params = SamplingParams(
        temperature=0.8,
        top_p=0.95,
    )
    for i, case in enumerate(chat_cases):
        prompt = format_chat_prompt(case)
        try:
            outputs = llm.generate(prompt, sampling_params)
            # Verify chat completion properties
            assert len(outputs) == 1, "Should return one output per prompt"
            assert isinstance(outputs[0].outputs.text, str), "Output text should be string"
            assert len(outputs[0].outputs.text) > 0, "Generated text should not be empty"
            assert outputs[0].metrics.model_execute_time > 0, "Execution time should be positive"
            print("=== Chat Case {} Passed ===".format(i + 1))
        except Exception:
            print("[ERROR] Chat Case {} failed.".format(i + 1))
            traceback.print_exc()
            pytest.fail("Chat case {} failed".format(i + 1))
 if __name__ == "__main__":
    """
    Main entry point for the test script.
    """
    pytest.main(["-sv", __file__])
--- a/test/ci_use/test_qwen2_serving.py
+++ b/test/ci_use/test_qwen2_serving.py
@@ -0,0 +1,491 @@
 # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pytest
 import requests
 import time
 import json
 from jsonschema import validate
 import concurrent.futures
 import numpy as np
 import subprocess
 import socket
 import os
 import signal
 import sys
 # Read ports from environment variables; use default values if not set
 FD_API_PORT = int(os.getenv("FD_API_PORT", 8189))
 FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8013))
 FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8333))
 # List of ports to clean before and after tests
 PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
 def is_port_open(host: str, port: int, timeout=1.0):
    """
    Check if a TCP port is open on the given host.
    Returns True if connection succeeds, False otherwise.
    """
    try:
        with socket.create_connection((host, port), timeout):
            return True
    except Exception:
        return False
 def kill_process_on_port(port: int):
    """
    Kill processes that are listening on the given port.
    Uses `lsof` to find process ids and sends SIGKILL.
    """
    try:
        output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
        for pid in output.splitlines():
            os.kill(int(pid), signal.SIGKILL)
            print(f"Killed process on port {port}, pid={pid}")
    except subprocess.CalledProcessError:
        pass
 def clean_ports():
    """
    Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
    """
    for port in PORTS_TO_CLEAN:
        kill_process_on_port(port)
@pytest.fixture(scope="session", autouse=True)
 def setup_and_run_server():
    """
    Pytest fixture that runs once per test session:
    - Cleans ports before tests
    - Starts the API server as a subprocess
    - Waits for server port to open (up to 30 seconds)
    - Tears down server after all tests finish
    """
    print("Pre-test port cleanup...")
    clean_ports()
    base_path = os.getenv("MODEL_PATH")
    if base_path:
        model_path=os.path.join(base_path, "Qwen2-7B-Instruct")
    else:
        model_path="./Qwen2-7B-Instruct"
    log_path = "api_server.log"
    cmd = [
        sys.executable, "-m", "fastdeploy.entrypoints.openai.api_server",
        "--model", model_path,
        "--port", str(FD_API_PORT),
        "--tensor-parallel-size", "1",
        "--engine-worker-queue-port", str(FD_ENGINE_QUEUE_PORT),
        "--metrics-port", str(FD_METRICS_PORT)
    ]
    with open(log_path, "w") as logfile:
        process = subprocess.Popen(cmd, stdout=logfile, stderr=subprocess.STDOUT)
    # Wait up to 120 seconds for API server port to become available
    for _ in range(120):
        if is_port_open("127.0.0.1", FD_API_PORT):
            print(f"API server is up on port {FD_API_PORT}")
            break
        time.sleep(1)
    else:
        process.terminate()
        raise RuntimeError(f"API server did not start on port {FD_API_PORT}")
    yield
    print("Post-test server cleanup...")
    try:
        os.kill(process.pid, signal.SIGTERM)
        print("API server terminated")
    except Exception as e:
        print(f"Failed to kill server: {e}")
    clean_ports()
@pytest.fixture(scope="session")
 def api_url(request):
    """
    Returns the API endpoint URL for chat completions.
    """
    return f"http://0.0.0.0:{FD_API_PORT}" + "/v1/chat/completions"
@pytest.fixture(scope="session")
 def metrics_url(request):
    """
    Returns the metrics endpoint URL.
    """
    return f"http://0.0.0.0:{FD_METRICS_PORT}/metrics"
@pytest.fixture
 def headers():
    """
    Returns common HTTP request headers.
    """
    return {"Content-Type": "application/json"}
@pytest.fixture
 def consistent_payload():
    """
    Returns a fixed payload for consistency testing,
    including a fixed random seed and temperature.
    """
    return {
        "messages": [{"role": "user", "content": "用一句话介绍 PaddlePaddle"}],
        "temperature": 0.9,
        "top_p": 0,  # fix top_p to reduce randomness
        "seed": 13  # fixed random seed
    }
 # ==========================
 # JSON Schema for validating chat API responses
 # ==========================
 chat_response_schema = {
    "type": "object",
    "properties": {
        "id": {"type": "string"},
        "object": {"type": "string"},
        "created": {"type": "number"},
        "model": {"type": "string"},
        "choices": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "message": {
                        "type": "object",
                        "properties": {
                            "role": {"type": "string"},
                            "content": {"type": "string"},
                        },
                        "required": ["role", "content"]
                    },
                    "index": {"type": "number"},
                    "finish_reason": {"type": "string"}
                },
                "required": ["message", "index", "finish_reason"]
            }
        }
    },
    "required": ["id", "object", "created", "model", "choices"]
 }
 # ==========================
 # Helper function to calculate difference rate between two texts
 # ==========================
 def calculate_diff_rate(text1, text2):
    """
    Calculate the difference rate between two strings
    based on the normalized Levenshtein edit distance.
    Returns a float in [0,1], where 0 means identical.
    """
    if text1 == text2:
        return 0.0
    len1, len2 = len(text1), len(text2)
    dp = [[0] * (len2 + 1) for _ in range(len1 + 1)]
    for i in range(len1 + 1):
        for j in range(len2 + 1):
            if i == 0 or j == 0:
                dp[i][j] = i + j
            elif text1[i - 1] == text2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
    edit_distance = dp[len1][len2]
    max_len = max(len1, len2)
    return edit_distance / max_len if max_len > 0 else 0.0
 # ==========================
 # Valid prompt test cases for parameterized testing
 # ==========================
 valid_prompts = [
    [{"role": "user", "content": "你好"}],
    [{"role": "user", "content": "用一句话介绍 FastDeploy"}],
    [{"role": "user", "content": "今天天气怎么样？"}],
 ]
@pytest.mark.parametrize("messages", valid_prompts)
 def test_valid_chat(messages, api_url, headers):
    """
    Test valid chat requests.
    """
    start = time.time()
    resp = requests.post(api_url, headers=headers, json={"messages": messages})
    duration = time.time() - start
    assert resp.status_code == 200
    validate(instance=resp.json(), schema=chat_response_schema)
    assert duration < 5, "Response too slow：{:.2f}s".format(duration)
 # ==========================
 # Consistency test for repeated runs with fixed payload
 # ==========================
 def test_consistency_between_runs(api_url, headers, consistent_payload):
    """
    Test that two runs with the same fixed input produce similar outputs.
    """
    # First request
    resp1 = requests.post(api_url, headers=headers, json=consistent_payload)
    assert resp1.status_code == 200
    result1 = resp1.json()
    content1 = result1["choices"][0]["message"]["content"]
    # Second request
    resp2 = requests.post(api_url, headers=headers, json=consistent_payload)
    assert resp2.status_code == 200
    result2 = resp2.json()
    content2 = result2["choices"][0]["message"]["content"]
    # Calculate difference rate
    diff_rate = calculate_diff_rate(content1, content2)
    # Verify that the difference rate is below the threshold
    assert diff_rate < 0.05, f"Output difference too large ({diff_rate:.4%})"
 # ==========================
 # Invalid prompt tests
 # ==========================
 invalid_prompts = [
    [],  # Empty array
    [{}],  # Empty object
    [{"role": "user"}],  # Missing content
    [{"content": "hello"}],  # Missing role
 ]
@pytest.mark.parametrize("messages", invalid_prompts)
 def test_invalid_chat(messages, api_url, headers):
    """
    Test invalid chat inputs
    """
    resp = requests.post(api_url, headers=headers, json={"messages": messages})
    assert resp.status_code >= 400, "Invalid request should return an error status code"
 # ==========================
 # Test for input exceeding context length
 # ==========================
 def test_exceed_context_length(api_url, headers):
    """
    Test case for inputs that exceed the model's maximum context length.
    """
    # Construct an overly long message
    long_content = "你好，" * 20000
    messages = [
        {"role": "user", "content": long_content}
    ]
    resp = requests.post(api_url, headers=headers, json={"messages": messages})
    # Check if the response indicates a token limit error or server error (500)
    try:
        response_json = resp.json()
        print("Response JSON content:", json.dumps(response_json, ensure_ascii=False)[:1000])
    except Exception:
        response_json = {}
    # Check status code and response content
    assert resp.status_code != 200 or "token" in json.dumps(response_json).lower(), \
        "Expected token limit error or similar, but got a normal response: {}".format(response_json)
 # ==========================
 # ChatTemplate Valid Structure Test
 # ==========================
 chat_template_cases = [
    {"template": "chatml", "messages": [{"role": "user", "content": "你是谁？"}]},
    {"template": "llama", "messages": [{"role": "user", "content": "请自我介绍"}]},
    {"template": "alpaca", "messages": [{"role": "user", "content": "介绍一下 FastDeploy"}]},
 ]
@pytest.mark.parametrize("payload", chat_template_cases)
 def test_chattemplate_valid(payload, api_url, headers):
    """
    Test valid ChatTemplate structures.
    """
    resp = requests.post(api_url, headers=headers, json=payload)
    assert resp.status_code == 200, "Request failed for template={}".format(payload['template'])
    validate(instance=resp.json(), schema=chat_response_schema)
 # ==========================
 # ChatTemplate Invalid Structure Test
 # ==========================
 invalid_template_cases = [
    {"template": "nonexist", "messages": [{"role": "user", "content": "你好"}]},
    {"template": 123, "messages": [{"role": "user", "content": "你好"}]},
    {"template": "", "messages": [{"role": "user", "content": "你好"}]},
 ]
@pytest.mark.parametrize("payload", invalid_template_cases)
@pytest.mark.skip(reason="Validation not yet supported; assertion temporarily disabled")
 def test_chattemplate_invalid(payload, api_url, headers):
    """
    Test invalid ChatTemplate structures.
    """
    resp = requests.post(api_url, headers=headers, json=payload)
    assert resp.status_code >= 400, "Invalid template should return an error status code"
 # ==========================
 # System Role Test
 # ==========================
 def test_system_role(api_url, headers):
    """
    Test whether the system role can correctly guide model behavior.
    """
    messages = [
        {"role": "system", "content": "You are an English translation assistant."},
        {"role": "user", "content": "Please translate: 你好"},
    ]
    resp = requests.post(api_url, headers=headers, json={"messages": messages})
    assert resp.status_code == 200
    validate(instance=resp.json(), schema=chat_response_schema)
    result = resp.json()["choices"][0]["message"]["content"]
    assert "hello" in result.lower()
 # ==========================
 # Multi-turn Conversation Test
 # ==========================
 def test_multi_turn_conversation(api_url, headers):
    """
    Test whether multi-turn conversation context is effective.
    """
    messages = [
        {"role": "user", "content": "你是谁？"},
        {"role": "assistant", "content": "我是AI助手"},
        {"role": "user", "content": "你能做什么？"}
    ]
    resp = requests.post(api_url, headers=headers, json={"messages": messages})
    assert resp.status_code == 200
    validate(instance=resp.json(), schema=chat_response_schema)
 # ==========================
 # Simple Performance Test
 # ==========================
 def test_simple_perf(api_url, headers):
    """
    Send 10 requests to check response stability.
    """
    prompts = [{"role": "user", "content": "Introduce FastDeploy."}]
    for _ in range(10):
        resp = requests.post(api_url, headers=headers, json={"messages": prompts})
        assert resp.status_code == 200
 # ==========================
 # Concurrent Performance Test
 # ==========================
@pytest.mark.skip(reason="concurrent is unavailable")
 def test_concurrent_perf(api_url, headers):
    """
    Send concurrent requests to test stability and response time.
    """
    prompts = [{"role": "user", "content": "Introduce FastDeploy."}]
    def send_request():
        """
        Send a single request
        """
        resp = requests.post(api_url, headers=headers, json={"messages": prompts})
        assert resp.status_code == 200
        return resp.elapsed.total_seconds()
    with concurrent.futures.ThreadPoolExecutor(max_workers=33) as executor:
        futures = [executor.submit(send_request) for _ in range(33)]
        durations = [f.result() for f in futures]
    print("Response time for each request:", durations)
 # ==========================
 # Metrics Endpoint Test
 # ==========================
 def test_metrics_endpoint(metrics_url):
    """
    Test the metrics monitoring endpoint.
    """
    resp = requests.get(metrics_url, timeout=5)
    assert resp.status_code == 200, "Unexpected status code: {}".format(resp.status_code)
    assert "text/plain" in resp.headers["Content-Type"], "Content-Type is not text/plain"
    # Parse Prometheus metrics data
    metrics_data = resp.text
    # print(metrics_data)
    lines = metrics_data.split("\n")
    metric_lines = [line for line in lines if not line.startswith("#") and line.strip() != ""]
    assert len(metric_lines) > 0, "No valid Prometheus metrics found"
    # Assert specific metric values
    num_requests_running_found = False
    num_requests_waiting_found = False
    time_to_first_token_seconds_sum_found = False
    time_per_output_token_seconds_sum_found = False
    e2e_request_latency_seconds_sum_found = False
    request_inference_time_seconds_sum_found = False
    request_queue_time_seconds_sum_found = False
    for line in metric_lines:
        if line.startswith("fastdeploy:num_requests_running"):
            _, value = line.rsplit(" ", 1)
            assert float(value) >= 0, "Invalid value for num_requests_running"
            num_requests_running_found = True
        elif line.startswith("fastdeploy:num_requests_waiting"):
            _, value = line.rsplit(" ", 1)
            assert float(value) >= 0, "Invalid value for num_requests_waiting"
            num_requests_waiting_found = True
        elif line.startswith("fastdeploy:time_to_first_token_seconds_sum"):
            _, value = line.rsplit(" ", 1)
            assert float(value) >= 0, "Invalid value for time_to_first_token_seconds_sum"
            time_to_first_token_seconds_sum_found = True
        elif line.startswith("fastdeploy:time_per_output_token_seconds_sum"):
            _, value = line.rsplit(" ", 1)
            assert float(value) >= 0, "Invalid value for time_per_output_token_seconds_sum"
            time_per_output_token_seconds_sum_found = True
        elif line.startswith("fastdeploy:e2e_request_latency_seconds_sum"):
            _, value = line.rsplit(" ", 1)
            assert float(value) >= 0, "Invalid value for e2e_request_latency_seconds_sum"
            e2e_request_latency_seconds_sum_found = True
        elif line.startswith("fastdeploy:request_inference_time_seconds_sum"):
            _, value = line.rsplit(" ", 1)
            assert float(value) >= 0, "Invalid value for request_inference_time_seconds_sum"
            request_inference_time_seconds_sum_found = True
        elif line.startswith("fastdeploy:request_queue_time_seconds_sum"):
            _, value = line.rsplit(" ", 1)
            assert float(value) >= 0, "Invalid value for request_queue_time_seconds_sum"
            request_queue_time_seconds_sum_found = True
    assert num_requests_running_found, "Missing metric: fastdeploy:num_requests_running"
    assert num_requests_waiting_found, "Missing metric: fastdeploy:num_requests_waiting"
    assert time_to_first_token_seconds_sum_found, "Missing metric: fastdeploy:time_to_first_token_seconds_sum"
    assert time_per_output_token_seconds_sum_found, "Missing metric: fastdeploy:time_per_output_token_seconds_sum"
    assert e2e_request_latency_seconds_sum_found, "Missing metric: fastdeploy:e2e_request_latency_seconds_sum"
    assert request_inference_time_seconds_sum_found, "Missing metric: fastdeploy:request_inference_time_seconds_sum"
    assert request_queue_time_seconds_sum_found, "Missing metric: fastdeploy:request_queue_time_seconds_sum"
--- a/tools/dockerfile/Dockerfile.ci
+++ b/tools/dockerfile/Dockerfile.ci
@@ -0,0 +1,5 @@
 FROM ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda126-dev
 RUN apt update && apt install -y lsof
 RUN wget https://raw.githubusercontent.com/PaddlePaddle/FastDeploy/refs/heads/develop/requirements.txt 
 RUN python -m pip install -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple && python -m pip install pytest
 RUN apt update && apt install -y python3.10-venv