mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
678 lines
17 KiB
Python
678 lines
17 KiB
Python
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
import sys
|
|
|
|
import pytest
|
|
|
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
project_root = os.path.abspath(os.path.join(current_dir, ".."))
|
|
if project_root not in sys.path:
|
|
sys.path.insert(0, project_root)
|
|
|
|
from tests.model_loader.utils import (
|
|
check_tokens_id_and_text_close,
|
|
form_model_get_output_topp0,
|
|
get_paddle_model_path,
|
|
run_with_timeout,
|
|
)
|
|
|
|
FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
|
|
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
|
|
|
|
prompts = ["解释下”温故而知新”", "Hello, how are you?"]
|
|
|
|
# {id,baseline}
|
|
baseline = {
|
|
"Qwen3-0.6B.None.default": [
|
|
(
|
|
[
|
|
99487,
|
|
115040,
|
|
105855,
|
|
3837,
|
|
101034,
|
|
99652,
|
|
18493,
|
|
100384,
|
|
101047,
|
|
99892,
|
|
3837,
|
|
101034,
|
|
100007,
|
|
18493,
|
|
100384,
|
|
15946,
|
|
104026,
|
|
99487,
|
|
115040,
|
|
36407,
|
|
100627,
|
|
105683,
|
|
105520,
|
|
106579,
|
|
26850,
|
|
101140,
|
|
3837,
|
|
35946,
|
|
85106,
|
|
81167,
|
|
99487,
|
|
151645,
|
|
],
|
|
"这个成语的意思,以及它在教学中的应用,以及如何在教学中运用这个成语来提高学生的语文素养?\n\n首先,我需要确认这个",
|
|
),
|
|
(
|
|
[
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
151645,
|
|
],
|
|
" I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry",
|
|
),
|
|
],
|
|
"Qwen3-0.6B.wint8.default": [
|
|
(
|
|
[
|
|
99487,
|
|
115040,
|
|
105855,
|
|
3837,
|
|
101034,
|
|
99652,
|
|
18493,
|
|
100384,
|
|
101047,
|
|
99892,
|
|
3837,
|
|
101034,
|
|
100007,
|
|
18493,
|
|
100384,
|
|
15946,
|
|
104026,
|
|
99487,
|
|
115040,
|
|
36407,
|
|
100627,
|
|
99720,
|
|
105595,
|
|
101062,
|
|
8997,
|
|
2073,
|
|
99416,
|
|
99535,
|
|
68536,
|
|
52183,
|
|
16628,
|
|
151645,
|
|
],
|
|
"这个成语的意思,以及它在教学中的应用,以及如何在教学中运用这个成语来提高学生的学习效果。\n“温故而知新",
|
|
),
|
|
(
|
|
[
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
369,
|
|
279,
|
|
60009,
|
|
13,
|
|
358,
|
|
2776,
|
|
14589,
|
|
151645,
|
|
],
|
|
" I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry",
|
|
),
|
|
],
|
|
"Qwen3-0.6B.wint4.default": [
|
|
(
|
|
[
|
|
99487,
|
|
115040,
|
|
9370,
|
|
109091,
|
|
8997,
|
|
102349,
|
|
5122,
|
|
99487,
|
|
115040,
|
|
9370,
|
|
109091,
|
|
20412,
|
|
5122,
|
|
99416,
|
|
99535,
|
|
100052,
|
|
29826,
|
|
3837,
|
|
99794,
|
|
100052,
|
|
29826,
|
|
3837,
|
|
101982,
|
|
102009,
|
|
16628,
|
|
100032,
|
|
1773,
|
|
104136,
|
|
5122,
|
|
99416,
|
|
99535,
|
|
151645,
|
|
],
|
|
"这个成语的含义。\n答案:这个成语的含义是:温故旧事,了解旧事,从而掌握新知识。解释:温故",
|
|
),
|
|
(
|
|
[
|
|
358,
|
|
2776,
|
|
264,
|
|
5458,
|
|
518,
|
|
264,
|
|
12103,
|
|
13,
|
|
358,
|
|
2776,
|
|
264,
|
|
5458,
|
|
518,
|
|
264,
|
|
12103,
|
|
13,
|
|
358,
|
|
2776,
|
|
264,
|
|
5458,
|
|
518,
|
|
264,
|
|
12103,
|
|
13,
|
|
358,
|
|
2776,
|
|
264,
|
|
5458,
|
|
518,
|
|
264,
|
|
12103,
|
|
151645,
|
|
],
|
|
" I'm a student at a university. I'm a student at a university. I'm a student at a university. I'm a student at a university",
|
|
),
|
|
],
|
|
"ernie-4_5-21b-a3b-bf16-paddle.wint8.default": [
|
|
(
|
|
[
|
|
58544,
|
|
23,
|
|
5458,
|
|
93956,
|
|
1294,
|
|
94705,
|
|
94752,
|
|
55817,
|
|
94136,
|
|
94041,
|
|
93986,
|
|
94227,
|
|
80951,
|
|
94226,
|
|
1855,
|
|
18982,
|
|
78351,
|
|
93956,
|
|
94338,
|
|
35829,
|
|
5154,
|
|
93977,
|
|
24053,
|
|
58544,
|
|
706,
|
|
8290,
|
|
94022,
|
|
94035,
|
|
1594,
|
|
26635,
|
|
94029,
|
|
2,
|
|
],
|
|
"的含义\n不了, “温故而知新”是《论语》中的一句名言,由孔子提出。这句话的含义可以解释为:通过回顾和",
|
|
),
|
|
(
|
|
[
|
|
354,
|
|
4932,
|
|
536,
|
|
93968,
|
|
276,
|
|
4447,
|
|
1622,
|
|
93937,
|
|
25062,
|
|
93938,
|
|
354,
|
|
1481,
|
|
318,
|
|
7427,
|
|
441,
|
|
536,
|
|
274,
|
|
4497,
|
|
326,
|
|
57142,
|
|
38210,
|
|
385,
|
|
274,
|
|
24742,
|
|
18268,
|
|
56335,
|
|
93963,
|
|
3717,
|
|
82674,
|
|
23050,
|
|
45955,
|
|
2,
|
|
],
|
|
" I hope you're doing well. Today, I want to share with you a simple and delicious recipe for a classic Italian dish: Spaghetti Carbon",
|
|
),
|
|
],
|
|
"Qwen2-7B-Instruct.wint4.default": [
|
|
(
|
|
[
|
|
106599,
|
|
105855,
|
|
8997,
|
|
2073,
|
|
99416,
|
|
99535,
|
|
68536,
|
|
52183,
|
|
16628,
|
|
854,
|
|
110434,
|
|
26940,
|
|
67831,
|
|
72881,
|
|
25067,
|
|
101047,
|
|
26940,
|
|
47764,
|
|
68536,
|
|
99824,
|
|
87243,
|
|
103283,
|
|
17714,
|
|
36987,
|
|
99416,
|
|
99535,
|
|
68536,
|
|
52183,
|
|
16628,
|
|
3837,
|
|
73670,
|
|
151645,
|
|
],
|
|
"这句话的意思。\n“温故而知新”出自《论语》中的《学而篇》,原文为:“温故而知新,可以",
|
|
),
|
|
(
|
|
[
|
|
358,
|
|
2776,
|
|
1101,
|
|
264,
|
|
6366,
|
|
2025,
|
|
11,
|
|
773,
|
|
358,
|
|
1513,
|
|
944,
|
|
614,
|
|
15650,
|
|
476,
|
|
21261,
|
|
13,
|
|
358,
|
|
2776,
|
|
1588,
|
|
311,
|
|
1492,
|
|
498,
|
|
448,
|
|
894,
|
|
4755,
|
|
498,
|
|
2578,
|
|
614,
|
|
311,
|
|
279,
|
|
1850,
|
|
151645,
|
|
],
|
|
" I'm just a computer program, so I don't have feelings or emotions. I'm here to help you with any questions you might have to the best",
|
|
),
|
|
],
|
|
"Qwen3-30B-A3B.block_wise_fp8.triton": [
|
|
(
|
|
[
|
|
106599,
|
|
9370,
|
|
109091,
|
|
90395,
|
|
107485,
|
|
46944,
|
|
99912,
|
|
111564,
|
|
1773,
|
|
1036,
|
|
99416,
|
|
99535,
|
|
68536,
|
|
52183,
|
|
16628,
|
|
854,
|
|
99639,
|
|
99700,
|
|
110434,
|
|
26940,
|
|
67831,
|
|
72881,
|
|
25067,
|
|
9370,
|
|
115040,
|
|
3837,
|
|
111490,
|
|
67338,
|
|
107090,
|
|
100052,
|
|
107232,
|
|
151645,
|
|
],
|
|
"这句话的含义,并给出一个实际的例子。 “温故而知新”是一句出自《论语》的成语,意思是通过复习旧的知识",
|
|
),
|
|
(
|
|
[
|
|
358,
|
|
2776,
|
|
4460,
|
|
311,
|
|
1477,
|
|
279,
|
|
897,
|
|
315,
|
|
279,
|
|
25098,
|
|
315,
|
|
279,
|
|
729,
|
|
282,
|
|
2075,
|
|
8,
|
|
284,
|
|
220,
|
|
16,
|
|
11884,
|
|
87,
|
|
61,
|
|
17,
|
|
488,
|
|
220,
|
|
16,
|
|
8,
|
|
504,
|
|
856,
|
|
284,
|
|
481,
|
|
151645,
|
|
],
|
|
" I'm trying to find the value of the integral of the function f(x) = 1/(x^2 + 1) from x = -",
|
|
),
|
|
],
|
|
"Qwen3-30B-A3B.block_wise_fp8.deepgemm": [
|
|
(
|
|
[
|
|
106599,
|
|
9370,
|
|
109091,
|
|
90395,
|
|
107485,
|
|
46944,
|
|
99912,
|
|
111564,
|
|
1773,
|
|
1036,
|
|
99416,
|
|
99535,
|
|
68536,
|
|
52183,
|
|
16628,
|
|
854,
|
|
99639,
|
|
99700,
|
|
110434,
|
|
26940,
|
|
67831,
|
|
72881,
|
|
25067,
|
|
9370,
|
|
115040,
|
|
3837,
|
|
111490,
|
|
67338,
|
|
107090,
|
|
100052,
|
|
107232,
|
|
151645,
|
|
],
|
|
"这句话的含义,并给出一个实际的例子。 “温故而知新”是一句出自《论语》的成语,意思是通过复习旧的知识",
|
|
),
|
|
(
|
|
[
|
|
358,
|
|
2776,
|
|
4460,
|
|
311,
|
|
11625,
|
|
419,
|
|
3491,
|
|
25,
|
|
330,
|
|
9885,
|
|
279,
|
|
897,
|
|
315,
|
|
279,
|
|
7493,
|
|
25,
|
|
220,
|
|
16,
|
|
15,
|
|
15,
|
|
15,
|
|
14,
|
|
16,
|
|
15,
|
|
15,
|
|
15,
|
|
488,
|
|
220,
|
|
16,
|
|
15,
|
|
15,
|
|
151645,
|
|
],
|
|
" I'm trying to solve this problem: \"Find the value of the expression: 1000/1000 + 100",
|
|
),
|
|
],
|
|
}
|
|
|
|
model_param_map = {
|
|
"Qwen3-0.6B": {
|
|
"max_num_seqs": 1,
|
|
"quantizations": ["None", "wint8", "wint4"],
|
|
},
|
|
"ernie-4_5-21b-a3b-bf16-paddle": {
|
|
"max_num_seqs": 1,
|
|
"tensor_parallel_size": 2,
|
|
"quantizations": [
|
|
"wint8",
|
|
],
|
|
},
|
|
"Qwen2-7B-Instruct": {
|
|
"max_num_seqs": 1,
|
|
"quantizations": ["wint4"],
|
|
},
|
|
"Qwen3-30B-A3B": {
|
|
"tensor_parallel_size": 2,
|
|
"max_num_seqs": 1,
|
|
"quantizations": [
|
|
{
|
|
"quant_type": "block_wise_fp8",
|
|
"backend": "triton",
|
|
"env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"},
|
|
},
|
|
{
|
|
"quant_type": "block_wise_fp8",
|
|
"backend": "deepgemm",
|
|
"env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17", "FD_USE_DEEP_GEMM": "1"},
|
|
},
|
|
],
|
|
},
|
|
}
|
|
|
|
|
|
params = []
|
|
for model, cfg in model_param_map.items():
|
|
for q in cfg["quantizations"]:
|
|
if isinstance(q, dict):
|
|
quant, backend, env = q["quant_type"], q.get("backend", "default"), q.get("env", {})
|
|
else:
|
|
quant, backend, env = q, "default", {}
|
|
params.append(
|
|
pytest.param(
|
|
model,
|
|
cfg.get("torch_model_name_or_path", ""),
|
|
cfg.get("tensor_parallel_size", 1),
|
|
cfg.get("max_num_seqs", 1),
|
|
cfg.get("max_model_len", 1024),
|
|
quant,
|
|
cfg.get("max_tokens", 32),
|
|
env,
|
|
marks=[pytest.mark.core_model],
|
|
id=f"{model}.{quant}.{backend}",
|
|
)
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env",
|
|
params,
|
|
)
|
|
def test_common_model(
|
|
fd_runner,
|
|
model_name_or_path: str,
|
|
torch_model_name_or_path: str,
|
|
tensor_parallel_size: int,
|
|
max_num_seqs,
|
|
max_model_len: int,
|
|
max_tokens: int,
|
|
quantization: str,
|
|
env,
|
|
request,
|
|
monkeypatch,
|
|
) -> None:
|
|
model_path = get_paddle_model_path(model_name_or_path)
|
|
if env:
|
|
for k, v in env.items():
|
|
monkeypatch.setenv(k, v)
|
|
|
|
form_model_get_output = form_model_get_output_topp0
|
|
|
|
fd_outputs_v1 = run_with_timeout(
|
|
target=form_model_get_output,
|
|
args=(
|
|
fd_runner,
|
|
model_path,
|
|
tensor_parallel_size,
|
|
max_num_seqs,
|
|
max_model_len,
|
|
max_tokens,
|
|
quantization,
|
|
"default_v1",
|
|
FD_ENGINE_QUEUE_PORT,
|
|
prompts,
|
|
FD_CACHE_QUEUE_PORT,
|
|
),
|
|
)
|
|
|
|
check_tokens_id_and_text_close(
|
|
outputs_0_lst=baseline[request.node.callspec.id],
|
|
outputs_1_lst=fd_outputs_v1,
|
|
name_0="default loader",
|
|
name_1="default_v1 loader",
|
|
)
|