mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 09:31:35 +08:00
@@ -15,6 +15,7 @@ import os
|
|||||||
import signal
|
import signal
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import time
|
||||||
from typing import Any, Union
|
from typing import Any, Union
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -70,6 +71,7 @@ class FDRunner:
|
|||||||
if "engine_worker_queue_port" in kwargs:
|
if "engine_worker_queue_port" in kwargs:
|
||||||
ports_to_clean.append(kwargs["engine_worker_queue_port"])
|
ports_to_clean.append(kwargs["engine_worker_queue_port"])
|
||||||
clean_ports(ports_to_clean)
|
clean_ports(ports_to_clean)
|
||||||
|
time.sleep(5)
|
||||||
self.llm = LLM(
|
self.llm = LLM(
|
||||||
model=model_name_or_path,
|
model=model_name_or_path,
|
||||||
tensor_parallel_size=tensor_parallel_size,
|
tensor_parallel_size=tensor_parallel_size,
|
||||||
|
@@ -166,7 +166,7 @@ model_param_map = {
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
"Qwen2-7B-Instruct": {
|
"Qwen2-7B-Instruct": {
|
||||||
"quantizations": ["None", "wint8"],
|
"quantizations": ["wint4"],
|
||||||
},
|
},
|
||||||
"Qwen3-30B-A3B": {
|
"Qwen3-30B-A3B": {
|
||||||
"tensor_parallel_size": 2,
|
"tensor_parallel_size": 2,
|
||||||
|
Reference in New Issue
Block a user