[Bug fix] fix pooling models (#5358)

* fix

* fix

* fix test

* fix gpu_model_runner

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
lizexu123
2025-12-04 11:06:30 +08:00
committed by GitHub
parent a52aea073c
commit 946025480e
5 changed files with 26 additions and 47 deletions

View File

@@ -922,6 +922,16 @@ class EmbeddingChatRequest(BaseModel):
user: Optional[str] = None
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
# --8<-- [start:chat-embedding-extra-params]
add_generation_prompt: bool = Field(
default=False,
description=(
"If true, the generation prompt will be added to the chat template. "
"This is a parameter used by chat template in tokenizer config of the "
"model."
),
)
add_special_tokens: bool = Field(
default=False,
description=(