Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
Zhang Yulong
2025-09-09 10:09:13 +08:00
committed by GitHub
parent 1dc1397ef6
commit 2359c8d21c
4 changed files with 28 additions and 1 deletions

View File

@@ -28,6 +28,7 @@ for subdir in "$run_path"*/; do
timeout 600 python -m pytest --disable-warnings -sv "$file"
exit_code=$?
set -e
ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk '{print $2}' | xargs -r kill -9
if [ $exit_code -ne 0 ]; then
if [ -f "${subdir%/}/log/workerlog.0" ]; then

View File

@@ -180,6 +180,19 @@ def stop_server(signum=None, frame=None):
os.killpg(os.getpgid(pid_port["PID"]), signal.SIGTERM)
except Exception as e:
print(f"Failed to stop server: {e}, {str(traceback.format_exc())}")
try:
result = subprocess.run(
f"ps -efww | grep {FD_CACHE_QUEUE_PORT} | grep -v grep", shell=True, capture_output=True, text=True
)
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split()
pid = int(parts[1])
print(f"Killing PID: {pid}")
os.kill(pid, signal.SIGKILL)
except Exception as e:
print(f"Failed to kill cache manager process: {e}, {str(traceback.format_exc())}")
for port in [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]:
try:

View File

@@ -5,4 +5,3 @@ addopts =
--ignore=tests/ce
--ignore=tests/operators/test_fused_moe.py
--ignore=tests/operators/test_w4afp8_gemm.py
--ignore=tests/model_loader/test_common_model.py

View File

@@ -23,6 +23,7 @@ from multiprocessing import Process, Queue
import pytest
TokensIdText = list[tuple[list[int], str]]
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8234))
def clear_logs():
@@ -122,6 +123,19 @@ def clean_ports(ports_to_clean: list[int]):
"""
Kill all processes occupying the ports listed in PORTS_TO_CLEAN.
"""
try:
result = subprocess.run(
f"ps -efww | grep {FD_CACHE_QUEUE_PORT} | grep -v grep", shell=True, capture_output=True, text=True
)
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split()
pid = int(parts[1])
print(f"Killing PID: {pid}")
os.kill(pid, signal.SIGKILL)
except Exception as e:
print(f"Failed to kill cache manager process: {e}, {str(traceback.format_exc())}")
for port in ports_to_clean:
kill_process_on_port(port)