diff --git a/tests/conftest.py b/tests/conftest.py index 80e4047c0..143101c44 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,7 @@ import os import signal import socket import subprocess +import time from typing import Any, Union import pytest @@ -70,6 +71,7 @@ class FDRunner: if "engine_worker_queue_port" in kwargs: ports_to_clean.append(kwargs["engine_worker_queue_port"]) clean_ports(ports_to_clean) + time.sleep(5) self.llm = LLM( model=model_name_or_path, tensor_parallel_size=tensor_parallel_size, diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py index b7c918411..ef21e52b3 100644 --- a/tests/model_loader/test_common_model.py +++ b/tests/model_loader/test_common_model.py @@ -166,7 +166,7 @@ model_param_map = { ], }, "Qwen2-7B-Instruct": { - "quantizations": ["None", "wint8"], + "quantizations": ["wint4"], }, "Qwen3-30B-A3B": { "tensor_parallel_size": 2,