mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-08 01:50:27 +08:00
add cache queue port (#3904)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
* add cache queue port * add cache queue port * add cache queue port
This commit is contained in:
5
.github/workflows/_accuracy_test.yml
vendored
5
.github/workflows/_accuracy_test.yml
vendored
@@ -80,12 +80,14 @@ jobs:
|
|||||||
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
||||||
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
||||||
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
||||||
|
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
|
||||||
echo "Test ENV Parameter:"
|
echo "Test ENV Parameter:"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
echo "FLASK_PORT=${FLASK_PORT}"
|
echo "FLASK_PORT=${FLASK_PORT}"
|
||||||
echo "FD_API_PORT=${FD_API_PORT}"
|
echo "FD_API_PORT=${FD_API_PORT}"
|
||||||
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
||||||
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
||||||
|
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
|
||||||
echo "DEVICES=${DEVICES}"
|
echo "DEVICES=${DEVICES}"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
|
|
||||||
@@ -99,7 +101,7 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
|
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
|
||||||
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
||||||
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
||||||
|
|
||||||
@@ -133,6 +135,7 @@ jobs:
|
|||||||
-e "FD_API_PORT=${FD_API_PORT}" \
|
-e "FD_API_PORT=${FD_API_PORT}" \
|
||||||
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
||||||
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
||||||
|
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
|
||||||
-e "FLASK_PORT=${FLASK_PORT}" \
|
-e "FLASK_PORT=${FLASK_PORT}" \
|
||||||
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
|
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
|
||||||
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
||||||
|
5
.github/workflows/_base_test.yml
vendored
5
.github/workflows/_base_test.yml
vendored
@@ -80,12 +80,14 @@ jobs:
|
|||||||
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
||||||
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
||||||
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
||||||
|
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
|
||||||
echo "Test ENV Parameter:"
|
echo "Test ENV Parameter:"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
echo "FLASK_PORT=${FLASK_PORT}"
|
echo "FLASK_PORT=${FLASK_PORT}"
|
||||||
echo "FD_API_PORT=${FD_API_PORT}"
|
echo "FD_API_PORT=${FD_API_PORT}"
|
||||||
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
||||||
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
||||||
|
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
|
||||||
echo "DEVICES=${DEVICES}"
|
echo "DEVICES=${DEVICES}"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
|
|
||||||
@@ -99,7 +101,7 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
|
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
|
||||||
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
||||||
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
||||||
|
|
||||||
@@ -134,6 +136,7 @@ jobs:
|
|||||||
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
||||||
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
||||||
-e "FLASK_PORT=${FLASK_PORT}" \
|
-e "FLASK_PORT=${FLASK_PORT}" \
|
||||||
|
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
|
||||||
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
|
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
|
||||||
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
||||||
-v "${CACHE_DIR}/.cache:/root/.cache" \
|
-v "${CACHE_DIR}/.cache:/root/.cache" \
|
||||||
|
5
.github/workflows/_logprob_test_linux.yml
vendored
5
.github/workflows/_logprob_test_linux.yml
vendored
@@ -71,12 +71,14 @@ jobs:
|
|||||||
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
||||||
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
||||||
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
||||||
|
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
|
||||||
echo "Test ENV Parameter:"
|
echo "Test ENV Parameter:"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
echo "FLASK_PORT=${FLASK_PORT}"
|
echo "FLASK_PORT=${FLASK_PORT}"
|
||||||
echo "FD_API_PORT=${FD_API_PORT}"
|
echo "FD_API_PORT=${FD_API_PORT}"
|
||||||
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
||||||
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
||||||
|
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
|
||||||
echo "DEVICES=${DEVICES}"
|
echo "DEVICES=${DEVICES}"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
|
|
||||||
@@ -90,7 +92,7 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
|
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
|
||||||
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
||||||
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
||||||
|
|
||||||
@@ -123,6 +125,7 @@ jobs:
|
|||||||
-e "FD_API_PORT=${FD_API_PORT}" \
|
-e "FD_API_PORT=${FD_API_PORT}" \
|
||||||
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
||||||
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
||||||
|
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
|
||||||
-e "FLASK_PORT=${FLASK_PORT}" \
|
-e "FLASK_PORT=${FLASK_PORT}" \
|
||||||
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
|
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
|
||||||
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
||||||
|
5
.github/workflows/_pre_ce_test.yml
vendored
5
.github/workflows/_pre_ce_test.yml
vendored
@@ -81,12 +81,14 @@ jobs:
|
|||||||
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
||||||
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
||||||
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
||||||
|
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
|
||||||
echo "Test ENV Parameter:"
|
echo "Test ENV Parameter:"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
echo "FLASK_PORT=${FLASK_PORT}"
|
echo "FLASK_PORT=${FLASK_PORT}"
|
||||||
echo "FD_API_PORT=${FD_API_PORT}"
|
echo "FD_API_PORT=${FD_API_PORT}"
|
||||||
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
||||||
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
||||||
|
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
|
||||||
echo "DEVICES=${DEVICES}"
|
echo "DEVICES=${DEVICES}"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
|
|
||||||
@@ -96,7 +98,7 @@ jobs:
|
|||||||
touch "${CACHE_DIR}/gitconfig"
|
touch "${CACHE_DIR}/gitconfig"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
|
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
|
||||||
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
||||||
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
||||||
|
|
||||||
@@ -134,6 +136,7 @@ jobs:
|
|||||||
-e "FD_API_PORT=${FD_API_PORT}" \
|
-e "FD_API_PORT=${FD_API_PORT}" \
|
||||||
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
||||||
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
||||||
|
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
|
||||||
-e "FLASK_PORT=${FLASK_PORT}" \
|
-e "FLASK_PORT=${FLASK_PORT}" \
|
||||||
-e "fd_wheel_url=${fd_wheel_url}" \
|
-e "fd_wheel_url=${fd_wheel_url}" \
|
||||||
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
|
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
|
||||||
|
5
.github/workflows/_unit_test_coverage.yml
vendored
5
.github/workflows/_unit_test_coverage.yml
vendored
@@ -102,12 +102,14 @@ jobs:
|
|||||||
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
FD_API_PORT=$((42088 + DEVICE_PORT * 100))
|
||||||
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
|
||||||
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
|
||||||
|
FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
|
||||||
echo "Test ENV Parameter:"
|
echo "Test ENV Parameter:"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
echo "FLASK_PORT=${FLASK_PORT}"
|
echo "FLASK_PORT=${FLASK_PORT}"
|
||||||
echo "FD_API_PORT=${FD_API_PORT}"
|
echo "FD_API_PORT=${FD_API_PORT}"
|
||||||
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
||||||
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
||||||
|
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
|
||||||
echo "DEVICES=${DEVICES}"
|
echo "DEVICES=${DEVICES}"
|
||||||
echo "========================================================="
|
echo "========================================================="
|
||||||
|
|
||||||
@@ -117,7 +119,7 @@ jobs:
|
|||||||
touch "${CACHE_DIR}/gitconfig"
|
touch "${CACHE_DIR}/gitconfig"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
|
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
|
||||||
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
||||||
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
||||||
|
|
||||||
@@ -156,6 +158,7 @@ jobs:
|
|||||||
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
||||||
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
||||||
-e "FLASK_PORT=${FLASK_PORT}" \
|
-e "FLASK_PORT=${FLASK_PORT}" \
|
||||||
|
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
|
||||||
-e TZ="Asia/Shanghai" \
|
-e TZ="Asia/Shanghai" \
|
||||||
-e "fd_wheel_url=${fd_wheel_url}" \
|
-e "fd_wheel_url=${fd_wheel_url}" \
|
||||||
-e "BASE_REF=${BASE_REF}" \
|
-e "BASE_REF=${BASE_REF}" \
|
||||||
|
@@ -59,10 +59,12 @@ FLASK_PORT = get_available_port("FLASK_PORT", base_port + 1)
|
|||||||
FD_API_PORT = get_available_port("FD_API_PORT", FLASK_PORT + 1)
|
FD_API_PORT = get_available_port("FD_API_PORT", FLASK_PORT + 1)
|
||||||
FD_ENGINE_QUEUE_PORT = get_available_port("FD_ENGINE_QUEUE_PORT", FD_API_PORT + 1)
|
FD_ENGINE_QUEUE_PORT = get_available_port("FD_ENGINE_QUEUE_PORT", FD_API_PORT + 1)
|
||||||
FD_METRICS_PORT = get_available_port("FD_METRICS_PORT", FD_ENGINE_QUEUE_PORT + 1)
|
FD_METRICS_PORT = get_available_port("FD_METRICS_PORT", FD_ENGINE_QUEUE_PORT + 1)
|
||||||
|
FD_CACHE_QUEUE_PORT = get_available_port("FD_CACHE_QUEUE_PORT", FD_METRICS_PORT + 1)
|
||||||
DEFAULT_PARAMS = {
|
DEFAULT_PARAMS = {
|
||||||
"--port": FD_API_PORT,
|
"--port": FD_API_PORT,
|
||||||
"--engine-worker-queue-port": FD_ENGINE_QUEUE_PORT,
|
"--engine-worker-queue-port": FD_ENGINE_QUEUE_PORT,
|
||||||
"--metrics-port": FD_METRICS_PORT,
|
"--metrics-port": FD_METRICS_PORT,
|
||||||
|
"--cache-queue-port": FD_CACHE_QUEUE_PORT,
|
||||||
"--enable-logprob": True,
|
"--enable-logprob": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,7 +181,7 @@ def stop_server(signum=None, frame=None):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to stop server: {e}, {str(traceback.format_exc())}")
|
print(f"Failed to stop server: {e}, {str(traceback.format_exc())}")
|
||||||
|
|
||||||
for port in [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]:
|
for port in [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]:
|
||||||
try:
|
try:
|
||||||
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
|
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
|
||||||
for pid in output.splitlines():
|
for pid in output.splitlines():
|
||||||
|
@@ -30,9 +30,10 @@ import requests
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8234))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -102,6 +103,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
@@ -28,12 +28,14 @@ import requests
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8234))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [
|
PORTS_TO_CLEAN = [
|
||||||
FD_API_PORT,
|
FD_API_PORT,
|
||||||
FD_ENGINE_QUEUE_PORT,
|
FD_ENGINE_QUEUE_PORT,
|
||||||
FD_METRICS_PORT,
|
FD_METRICS_PORT,
|
||||||
|
FD_CACHE_QUEUE_PORT,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -112,6 +114,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
@@ -29,9 +29,10 @@ import requests
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8234))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -103,6 +104,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--enable-mm",
|
"--enable-mm",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
|
@@ -27,15 +27,18 @@ import requests
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [
|
PORTS_TO_CLEAN = [
|
||||||
FD_API_PORT,
|
FD_API_PORT,
|
||||||
FD_ENGINE_QUEUE_PORT,
|
FD_ENGINE_QUEUE_PORT,
|
||||||
FD_METRICS_PORT,
|
FD_METRICS_PORT,
|
||||||
|
FD_CACHE_QUEUE_PORT,
|
||||||
FD_API_PORT + 1,
|
FD_API_PORT + 1,
|
||||||
FD_ENGINE_QUEUE_PORT + 1,
|
FD_ENGINE_QUEUE_PORT + 1,
|
||||||
FD_METRICS_PORT + 1,
|
FD_METRICS_PORT + 1,
|
||||||
|
FD_CACHE_QUEUE_PORT + 1,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -116,6 +119,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"8192",
|
"8192",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
@@ -157,7 +162,7 @@ def setup_and_run_server():
|
|||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT + 1),
|
str(FD_METRICS_PORT + 1),
|
||||||
"--cache-queue-port",
|
"--cache-queue-port",
|
||||||
str(FD_API_PORT + 2),
|
str(FD_CACHE_QUEUE_PORT + 1),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"8192",
|
"8192",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
@@ -24,6 +24,7 @@ import pytest
|
|||||||
from fastdeploy import LLM, SamplingParams
|
from fastdeploy import LLM, SamplingParams
|
||||||
|
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
MAX_WAIT_SECONDS = 60
|
MAX_WAIT_SECONDS = 60
|
||||||
|
|
||||||
|
|
||||||
@@ -87,6 +88,7 @@ def llm(model_path):
|
|||||||
model=model_path,
|
model=model_path,
|
||||||
tensor_parallel_size=1,
|
tensor_parallel_size=1,
|
||||||
engine_worker_queue_port=FD_ENGINE_QUEUE_PORT,
|
engine_worker_queue_port=FD_ENGINE_QUEUE_PORT,
|
||||||
|
cache_queue_port=FD_CACHE_QUEUE_PORT,
|
||||||
max_model_len=32768,
|
max_model_len=32768,
|
||||||
quantization="wint8",
|
quantization="wint8",
|
||||||
)
|
)
|
||||||
|
@@ -31,9 +31,10 @@ from jsonschema import validate
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -103,6 +104,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
@@ -27,9 +27,10 @@ import requests
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -99,6 +100,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
@@ -29,9 +29,10 @@ import requests
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -110,6 +111,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
@@ -30,9 +30,10 @@ import requests
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -114,6 +115,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--enable-mm",
|
"--enable-mm",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
|
@@ -32,9 +32,10 @@ from jsonschema import validate
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -115,6 +116,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
@@ -50,6 +50,7 @@ class TestGeneration(unittest.TestCase):
|
|||||||
max_num_batched_tokens=4096,
|
max_num_batched_tokens=4096,
|
||||||
tensor_parallel_size=1,
|
tensor_parallel_size=1,
|
||||||
engine_worker_queue_port=int(os.getenv("FD_ENGINE_QUEUE_PORT")),
|
engine_worker_queue_port=int(os.getenv("FD_ENGINE_QUEUE_PORT")),
|
||||||
|
cache_queue_port=int(os.getenv("FD_CACHE_QUEUE_PORT")),
|
||||||
)
|
)
|
||||||
cls.llm = weakref.proxy(llm)
|
cls.llm = weakref.proxy(llm)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@@ -360,6 +360,7 @@ class TestMobaAttention(unittest.TestCase):
|
|||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
max_model_len=131072,
|
max_model_len=131072,
|
||||||
engine_worker_queue_port=int(os.getenv("FD_ENGINE_QUEUE_PORT")),
|
engine_worker_queue_port=int(os.getenv("FD_ENGINE_QUEUE_PORT")),
|
||||||
|
cache_queue_port=int(os.getenv("FD_CACHE_QUEUE_PORT")),
|
||||||
max_num_seqs=32,
|
max_num_seqs=32,
|
||||||
quantization="wint4",
|
quantization="wint4",
|
||||||
enable_chunked_prefill=True,
|
enable_chunked_prefill=True,
|
||||||
|
@@ -30,6 +30,7 @@ from tests.model_loader.utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
prompts = ["解释下“温故而知新", "Hello, how are you?"]
|
prompts = ["解释下“温故而知新", "Hello, how are you?"]
|
||||||
|
|
||||||
@@ -126,6 +127,7 @@ def test_common_model(
|
|||||||
"default",
|
"default",
|
||||||
FD_ENGINE_QUEUE_PORT,
|
FD_ENGINE_QUEUE_PORT,
|
||||||
prompts,
|
prompts,
|
||||||
|
FD_CACHE_QUEUE_PORT,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
fd_outputs_v1 = run_with_timeout(
|
fd_outputs_v1 = run_with_timeout(
|
||||||
@@ -140,6 +142,7 @@ def test_common_model(
|
|||||||
"default_v1",
|
"default_v1",
|
||||||
FD_ENGINE_QUEUE_PORT,
|
FD_ENGINE_QUEUE_PORT,
|
||||||
prompts,
|
prompts,
|
||||||
|
FD_CACHE_QUEUE_PORT,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
check_tokens_id_and_text_close(
|
check_tokens_id_and_text_close(
|
||||||
|
@@ -32,9 +32,10 @@ if project_root not in sys.path:
|
|||||||
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
FD_API_PORT = int(os.getenv("FD_API_PORT", 8188))
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133))
|
||||||
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
# List of ports to clean before and after tests
|
# List of ports to clean before and after tests
|
||||||
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]
|
PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]
|
||||||
|
|
||||||
|
|
||||||
def is_port_open(host: str, port: int, timeout=1.0):
|
def is_port_open(host: str, port: int, timeout=1.0):
|
||||||
@@ -106,6 +107,8 @@ def setup_and_run_server():
|
|||||||
str(FD_ENGINE_QUEUE_PORT),
|
str(FD_ENGINE_QUEUE_PORT),
|
||||||
"--metrics-port",
|
"--metrics-port",
|
||||||
str(FD_METRICS_PORT),
|
str(FD_METRICS_PORT),
|
||||||
|
"--cache-queue-port",
|
||||||
|
str(FD_CACHE_QUEUE_PORT),
|
||||||
"--enable-mm",
|
"--enable-mm",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
|
@@ -30,6 +30,7 @@ from tests.model_loader.utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
prompts = ["北京天安门在哪里?"]
|
prompts = ["北京天安门在哪里?"]
|
||||||
|
|
||||||
@@ -128,6 +129,7 @@ def test_model_against_baseline(
|
|||||||
"default_v1",
|
"default_v1",
|
||||||
FD_ENGINE_QUEUE_PORT,
|
FD_ENGINE_QUEUE_PORT,
|
||||||
prompts,
|
prompts,
|
||||||
|
FD_CACHE_QUEUE_PORT,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -28,6 +28,7 @@ FD_ENGINE_QUEUE_PORTS = [
|
|||||||
[9981, 9982],
|
[9981, 9982],
|
||||||
[9991, 9992],
|
[9991, 9992],
|
||||||
]
|
]
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
|
|
||||||
|
|
||||||
models = [
|
models = [
|
||||||
@@ -53,6 +54,7 @@ def llm(request):
|
|||||||
max_model_len=8192,
|
max_model_len=8192,
|
||||||
num_gpu_blocks_override=1024,
|
num_gpu_blocks_override=1024,
|
||||||
engine_worker_queue_port=FD_ENGINE_QUEUE_PORTS[port_index],
|
engine_worker_queue_port=FD_ENGINE_QUEUE_PORTS[port_index],
|
||||||
|
cache_queue_port=FD_CACHE_QUEUE_PORT,
|
||||||
load_choices="default",
|
load_choices="default",
|
||||||
enable_expert_parallel=True,
|
enable_expert_parallel=True,
|
||||||
)
|
)
|
||||||
|
@@ -78,6 +78,7 @@ def form_model_get_output_topp0(
|
|||||||
load_choices,
|
load_choices,
|
||||||
engine_worker_queue_port,
|
engine_worker_queue_port,
|
||||||
prompts,
|
prompts,
|
||||||
|
cache_queue_port,
|
||||||
result_queue,
|
result_queue,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
@@ -88,6 +89,7 @@ def form_model_get_output_topp0(
|
|||||||
load_choices=load_choices,
|
load_choices=load_choices,
|
||||||
quantization=quantization,
|
quantization=quantization,
|
||||||
engine_worker_queue_port=engine_worker_queue_port,
|
engine_worker_queue_port=engine_worker_queue_port,
|
||||||
|
cache_queue_port=cache_queue_port,
|
||||||
) as fd_model:
|
) as fd_model:
|
||||||
fd_outputs = fd_model.generate_topp0(prompts, max_tokens=max_tokens)
|
fd_outputs = fd_model.generate_topp0(prompts, max_tokens=max_tokens)
|
||||||
result_queue.put(fd_outputs)
|
result_queue.put(fd_outputs)
|
||||||
|
@@ -24,6 +24,7 @@ import pytest
|
|||||||
from fastdeploy import LLM, SamplingParams
|
from fastdeploy import LLM, SamplingParams
|
||||||
|
|
||||||
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
||||||
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
||||||
MAX_WAIT_SECONDS = 60
|
MAX_WAIT_SECONDS = 60
|
||||||
|
|
||||||
os.environ["LD_LIBRARY_PATH"] = "/usr/local/nccl/"
|
os.environ["LD_LIBRARY_PATH"] = "/usr/local/nccl/"
|
||||||
@@ -76,6 +77,7 @@ def llm(model_path):
|
|||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
num_gpu_blocks_override=1024,
|
num_gpu_blocks_override=1024,
|
||||||
engine_worker_queue_port=FD_ENGINE_QUEUE_PORT,
|
engine_worker_queue_port=FD_ENGINE_QUEUE_PORT,
|
||||||
|
cache_queue_port=FD_CACHE_QUEUE_PORT,
|
||||||
max_model_len=8192,
|
max_model_len=8192,
|
||||||
seed=1,
|
seed=1,
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user