mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-08 01:50:27 +08:00
@@ -15,6 +15,7 @@ import os
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
from typing import Any, Union
|
||||
|
||||
import pytest
|
||||
@@ -70,6 +71,7 @@ class FDRunner:
|
||||
if "engine_worker_queue_port" in kwargs:
|
||||
ports_to_clean.append(kwargs["engine_worker_queue_port"])
|
||||
clean_ports(ports_to_clean)
|
||||
time.sleep(5)
|
||||
self.llm = LLM(
|
||||
model=model_name_or_path,
|
||||
tensor_parallel_size=tensor_parallel_size,
|
||||
|
@@ -166,7 +166,7 @@ model_param_map = {
|
||||
],
|
||||
},
|
||||
"Qwen2-7B-Instruct": {
|
||||
"quantizations": ["None", "wint8"],
|
||||
"quantizations": ["wint4"],
|
||||
},
|
||||
"Qwen3-30B-A3B": {
|
||||
"tensor_parallel_size": 2,
|
||||
|
Reference in New Issue
Block a user