[Feature] support min_p_sampling (#2872)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* Fastdeploy support min_p

* add test_min_p

* fix

* min_p_sampling

* update

* delete vl_gpu_model_runner.py

* fix

* Align usage of min_p with vLLM

* fix

* modified unit test

* fix test_min_sampling

* pre-commit all files

* fix

* fix

* fix

* fix xpu_model_runner.py
This commit is contained in:
lizexu123
2025-07-21 14:17:59 +08:00
committed by GitHub
parent 95a214ae43
commit 67990e0572
15 changed files with 302 additions and 1 deletions

View File

@@ -339,6 +339,7 @@ class CompletionRequest(BaseModel):
temperature: Optional[float] = None
top_p: Optional[float] = None
top_k: Optional[int] = None
min_p: Optional[float] = None
user: Optional[str] = None
response_format: Optional[AnyResponseFormat] = None
@@ -460,6 +461,7 @@ class ChatCompletionRequest(BaseModel):
temperature: Optional[float] = None
top_p: Optional[float] = None
top_k: Optional[int] = None
min_p: Optional[float] = None
user: Optional[str] = None
metadata: Optional[dict] = None