mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[V1 loader] Qwen25 VL support v1 loader and torch style safetensors load (#4388)
* [BugFix] qwen2.5vl enable_thinking=true and image_patch_id bug fix * [Docs]offine infer add apply_chat_template add_generation_prompt parameter * [Model]qwen2.5VL support --use-cudagraph * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test v2 * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test v3 * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test v4 * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test v5 * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test v6 * [Model]qwen2.5VL support --use-cudagraph buffer and qwenvl test v7 * qwen25vl v1 loader * qwen25vl v1 loader v2 * qwen25vl v1 loader v3 * qwen25vl v1 loader fix tp2 weight PySafeSlice * qwen25vl v1 loader no test * qwen25vl v1 loader add unit test * qwen25vl v1 loader add unit test v2 * qwen25vl v1 loader add torch unit test v3 * qwen25vl v1 loader add torch unit test v4 * qwen25vl v1 loader add torch unit test v5 * qwen25vl v1 loader add torch unit test v6
This commit is contained in:
@@ -239,7 +239,7 @@ class QwenVLProcessor(TextProcessor):
|
||||
request[k] = v
|
||||
else:
|
||||
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
|
||||
request.setdefault("enable_thinking", True)
|
||||
request.setdefault("enable_thinking", False)
|
||||
outputs = self.processor.request2ids(request)
|
||||
|
||||
else:
|
||||
@@ -249,11 +249,8 @@ class QwenVLProcessor(TextProcessor):
|
||||
if request.get("completion_token_ids"):
|
||||
self.append_completion_tokens(outputs, request["completion_token_ids"])
|
||||
|
||||
enable_thinking = False
|
||||
if request.get("chat_template_kwargs"):
|
||||
chat_template_kwargs = request.get("chat_template_kwargs")
|
||||
enable_thinking = chat_template_kwargs.get("enable_thinking", False)
|
||||
request["enable_thinking"] = enable_thinking
|
||||
# qwen25_vl not support thinking
|
||||
request["enable_thinking"] = False
|
||||
|
||||
outputs = self.pack_outputs(outputs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user