diff --git a/scripts/run_pre_ce.sh b/scripts/run_pre_ce.sh index ab36dac96..0dffd3a2a 100644 --- a/scripts/run_pre_ce.sh +++ b/scripts/run_pre_ce.sh @@ -28,6 +28,7 @@ for subdir in "$run_path"*/; do timeout 600 python -m pytest --disable-warnings -sv "$file" exit_code=$? set -e + ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk '{print $2}' | xargs -r kill -9 if [ $exit_code -ne 0 ]; then if [ -f "${subdir%/}/log/workerlog.0" ]; then diff --git a/tests/ce/deploy/deploy.py b/tests/ce/deploy/deploy.py index d9a2556e3..5ccac8106 100644 --- a/tests/ce/deploy/deploy.py +++ b/tests/ce/deploy/deploy.py @@ -180,6 +180,19 @@ def stop_server(signum=None, frame=None): os.killpg(os.getpgid(pid_port["PID"]), signal.SIGTERM) except Exception as e: print(f"Failed to stop server: {e}, {str(traceback.format_exc())}") + try: + result = subprocess.run( + f"ps -efww | grep {FD_CACHE_QUEUE_PORT} | grep -v grep", shell=True, capture_output=True, text=True + ) + for line in result.stdout.strip().split("\n"): + if not line: + continue + parts = line.split() + pid = int(parts[1]) + print(f"Killing PID: {pid}") + os.kill(pid, signal.SIGKILL) + except Exception as e: + print(f"Failed to kill cache manager process: {e}, {str(traceback.format_exc())}") for port in [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]: try: diff --git a/tests/cov_pytest.ini b/tests/cov_pytest.ini index 2ba465e6b..4e0c31e58 100644 --- a/tests/cov_pytest.ini +++ b/tests/cov_pytest.ini @@ -5,4 +5,3 @@ addopts = --ignore=tests/ce --ignore=tests/operators/test_fused_moe.py --ignore=tests/operators/test_w4afp8_gemm.py - --ignore=tests/model_loader/test_common_model.py diff --git a/tests/model_loader/utils.py b/tests/model_loader/utils.py index 67113bb0b..19f7c51aa 100644 --- a/tests/model_loader/utils.py +++ b/tests/model_loader/utils.py @@ -23,6 +23,7 @@ from multiprocessing import Process, Queue import pytest TokensIdText = list[tuple[list[int], str]] +FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8234)) def clear_logs(): @@ -122,6 +123,19 @@ def clean_ports(ports_to_clean: list[int]): """ Kill all processes occupying the ports listed in PORTS_TO_CLEAN. """ + try: + result = subprocess.run( + f"ps -efww | grep {FD_CACHE_QUEUE_PORT} | grep -v grep", shell=True, capture_output=True, text=True + ) + for line in result.stdout.strip().split("\n"): + if not line: + continue + parts = line.split() + pid = int(parts[1]) + print(f"Killing PID: {pid}") + os.kill(pid, signal.SIGKILL) + except Exception as e: + print(f"Failed to kill cache manager process: {e}, {str(traceback.format_exc())}") for port in ports_to_clean: kill_process_on_port(port)