load hadamard_block_size from config (#3797)

2025-10-17 14:11:14 +08:00 · 2025-09-05 17:07:58 +08:00
parent 41aee08982
commit 2cf55168ca
10 changed files with 60 additions and 30 deletions
--- a/tests/model_loader/test_w4a8_model.py
+++ b/tests/model_loader/test_w4a8_model.py
@@ -23,10 +23,10 @@ from fastdeploy.entrypoints.llm import LLM

 bash_path = os.getenv("MODEL_PATH")
 FD_ENGINE_QUEUE_PORTS = [
-    [9961, 9962, 9963, 9964, 9965, 9966, 9967, 9968],
-    [9971, 9972, 9973, 9974, 9975, 9976, 9977, 9978],
-    [9981, 9982, 9983, 9984, 9985, 9986, 9987, 9988],
-    [9991, 9992, 9993, 9994, 9995, 9996, 9997, 9998],
+    [9961, 9962],
+    [9971, 9972],
+    [9981, 9982],
+    [9991, 9992],
 ]


@@ -49,7 +49,7 @@ def llm(request):
        llm_instance = LLM(
            model=model_path,
            tensor_parallel_size=1,
-            data_parallel_size=8,
+            data_parallel_size=2,
            max_model_len=8192,
            num_gpu_blocks_override=1024,
            engine_worker_queue_port=FD_ENGINE_QUEUE_PORTS[port_index],
@@ -58,7 +58,7 @@ def llm(request):
        )
        yield weakref.proxy(llm_instance)
    except Exception as e:
-        pytest.skip(f"LLM initialization failed: {e}")
+        assert False, f"LLM initialization failed: {e}"


@pytest.mark.timeout(60)