[Features] support hugging face qwen3 dense and qwen2 model (#3574)

* support qwen2 and qwen3 hugging face

* fix moe

* defualt_v1 loader

* hugging_face_format deprecated

* modify hugging_face_foramt to model_format

* model_format auto

* fix environemt

* fix bug

* fix qwen3-0.6 bug

* model_format is str

* fix
This commit is contained in:
lizexu123
2025-08-26 10:54:53 +08:00
committed by GitHub
parent 66c5addce4
commit c43a4bec00
10 changed files with 182 additions and 11 deletions

View File

@@ -27,6 +27,25 @@ TokensIdText = list[tuple[list[int], str]]
# (token_ids, text)
def get_model_paths(base_model_name: str) -> tuple[str, str]:
"""return (fastdeploy_path, huggingface_path)"""
# FastDeploy model path
fd_base_path = os.getenv("MODEL_PATH")
if fd_base_path:
fd_model_path = os.path.join(fd_base_path, base_model_name)
else:
fd_model_path = base_model_name
# HuggingFace model path
torch_model_path = os.path.join(
fd_base_path,
"torch",
base_model_name,
)
return fd_model_path, torch_model_path
def check_tokens_id_and_text_close(
*,
outputs_0_lst: TokensIdText,
@@ -104,6 +123,7 @@ model_param_map = {
},
}
params = []
for model, cfg in model_param_map.items():
for q in cfg["quantizations"]:
@@ -176,3 +196,84 @@ def test_common_model(
name_0="default loader",
name_1="default_v1 loader",
)
hugging_face_model_param_map = {
"Qwen2.5-7B-Instruct": {
"tensor_parallel_size": 2,
"quantizations": ["None"],
},
}
hf_params = []
for model, cfg in hugging_face_model_param_map.items():
for q in cfg["quantizations"]:
hf_params.append(
pytest.param(
model,
cfg.get("tensor_parallel_size", 1),
cfg.get("max_model_len", 1024),
q,
cfg.get("max_tokens", 32),
marks=[pytest.mark.core_model],
)
)
@pytest.mark.parametrize(
"model_name_or_path,tensor_parallel_size,max_model_len,quantization,max_tokens",
hf_params,
)
def test_paddle_vs_torch_model(
fd_runner,
model_name_or_path: str,
tensor_parallel_size: int,
max_model_len: int,
max_tokens: int,
quantization: str,
) -> None:
fd_model_path, torch_model_path = get_model_paths(model_name_or_path)
result_queue = Queue()
p_paddle = Process(
target=form_model_get_output,
args=(
fd_runner,
fd_model_path,
tensor_parallel_size,
max_model_len,
max_tokens,
quantization,
"default",
result_queue,
),
)
p_paddle.start()
p_paddle.join()
paddle_outputs = result_queue.get(timeout=60)
p_hf = Process(
target=form_model_get_output,
args=(
fd_runner,
torch_model_path,
tensor_parallel_size,
max_model_len,
max_tokens,
quantization,
"default_v1",
result_queue,
),
)
p_hf.start()
p_hf.join()
hf_outputs = result_queue.get(timeout=60)
check_tokens_id_and_text_close(
outputs_0_lst=paddle_outputs,
outputs_1_lst=hf_outputs,
name_0="Paddle model (default loader)",
name_1="HuggingFace model (default_v1 loader)",
)