mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-17 14:11:14 +08:00
load hadamard_block_size from config (#3797)
This commit is contained in:
@@ -23,10 +23,10 @@ from fastdeploy.entrypoints.llm import LLM
|
||||
|
||||
bash_path = os.getenv("MODEL_PATH")
|
||||
FD_ENGINE_QUEUE_PORTS = [
|
||||
[9961, 9962, 9963, 9964, 9965, 9966, 9967, 9968],
|
||||
[9971, 9972, 9973, 9974, 9975, 9976, 9977, 9978],
|
||||
[9981, 9982, 9983, 9984, 9985, 9986, 9987, 9988],
|
||||
[9991, 9992, 9993, 9994, 9995, 9996, 9997, 9998],
|
||||
[9961, 9962],
|
||||
[9971, 9972],
|
||||
[9981, 9982],
|
||||
[9991, 9992],
|
||||
]
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ def llm(request):
|
||||
llm_instance = LLM(
|
||||
model=model_path,
|
||||
tensor_parallel_size=1,
|
||||
data_parallel_size=8,
|
||||
data_parallel_size=2,
|
||||
max_model_len=8192,
|
||||
num_gpu_blocks_override=1024,
|
||||
engine_worker_queue_port=FD_ENGINE_QUEUE_PORTS[port_index],
|
||||
@@ -58,7 +58,7 @@ def llm(request):
|
||||
)
|
||||
yield weakref.proxy(llm_instance)
|
||||
except Exception as e:
|
||||
pytest.skip(f"LLM initialization failed: {e}")
|
||||
assert False, f"LLM initialization failed: {e}"
|
||||
|
||||
|
||||
@pytest.mark.timeout(60)
|
||||
|
Reference in New Issue
Block a user