[Feature] support logits processors (#4515)

* [feat] provide an interface for logits processors and a builtin LogitBiasLogitsProcessor

* [chore] fix code style

* [fix] add unit test & fix existing bugs

* [feat] add engine/worker arg --logits-processors

* [fix] redefine user args as logits_processors_args and fix some bugs

* [fix] fix test_sampler

* Update fastdeploy/model_executor/logits_processor/builtin.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update fastdeploy/model_executor/logits_processor/__init__.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update tests/model_executor/test_logits_processor.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* [fix] fix typo

* Update fastdeploy/engine/sampling_params.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* [fix] fix bracelet

* [chore] redefine logits processor interface: pass the entire share_inputs into LP, do not copy share_inputs and logits

* [doc] add docs

* [fix] fix logit bias processor not applied when decoding is too fast & add docs and tests

* [fix] fix redundant code

* [feat] skip apply() if no bias is specified

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
李泳桦
2025-10-29 00:08:53 +08:00
committed by GitHub
parent 24b9505971
commit a012e3608b
18 changed files with 882 additions and 14 deletions

View File

@@ -970,6 +970,9 @@ class PlasAttentionConfig:
"""
return json.dumps({key: value for key, value in self.__dict__.items() if value is not None})
def __str__(self) -> str:
return json.dumps({key: value for key, value in self.__dict__.items()})
class EarlyStopConfig:
def __init__(
@@ -1071,6 +1074,9 @@ class LoadConfig:
if hasattr(self, key):
setattr(self, key, value)
def __str__(self) -> str:
return json.dumps({key: value for key, value in self.__dict__.items()})
class PoolerConfig:
"""Controls the behavior of output pooling in pooling models."""
@@ -1339,11 +1345,15 @@ class StructuredOutputsConfig:
self.guided_decoding_backend: Optional[str] = None
# disable any whitespace for guided decoding
self.disable_any_whitespace: bool = True
self.logits_processors: Optional[list[str]] = None
for key, value in args.items():
if hasattr(self, key) and value != "None":
setattr(self, key, value)
def __str__(self) -> str:
return json.dumps({key: value for key, value in self.__dict__.items()})
class FDConfig:
"""