[Feature] Guided Decoding add LLguidance backend (#5124)

* llguidance

* add requirements_guided_decoding.txt and doc

* fix test_guidance_*.py

* fix test_guidance_*.py && mv

* fix llguidance choice

* test_guidance_*

* rm lazy loader

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
Daci
2025-12-03 20:23:57 +08:00
committed by GitHub
parent 4e8096bd0d
commit 83dbc4e5dd
14 changed files with 1307 additions and 8 deletions

View File

@@ -1664,13 +1664,27 @@ class FDConfig:
if (
self.structured_outputs_config is not None
and self.structured_outputs_config.guided_decoding_backend == "auto"
and self.structured_outputs_config.guided_decoding_backend != "off"
):
if current_platform.is_xpu() or self.speculative_config.method is not None:
logger.warning("Speculative Decoding and XPU currently do not support Guided decoding, set off.")
self.structured_outputs_config.guided_decoding_backend = "off"
else:
elif self.structured_outputs_config.guided_decoding_backend in ["auto", "xgrammar"]:
self.structured_outputs_config.guided_decoding_backend = "xgrammar"
elif self.structured_outputs_config.guided_decoding_backend == "guidance":
try:
import llguidance.torch
llguidance.torch
except ImportError:
raise ImportError(
"The 'llguidance' package is required for using guidance as the guided decoding backend. "
"Please install it via the appropriate method."
)
else:
raise NotImplementedError(
f"Guided decoding backend '{self.structured_outputs_config.guided_decoding_backend}' is not implemented. [auto, xgrammar, guidance, off]"
)
if self.model_config.enable_mm:
if self.cache_config.max_encoder_cache is None or self.cache_config.max_encoder_cache < 0:
@@ -1790,7 +1804,8 @@ class FDConfig:
"XGrammar",
"auto",
"off",
], f"Only support xgrammar、auto guided decoding backend, but got {self.structured_outputs_config.guided_decoding_backend}."
"guidance",
], f"Only support [auto, xgrammar, guidance, off] guided decoding backend, but got {self.structured_outputs_config.guided_decoding_backend}."
if self.structured_outputs_config.guided_decoding_backend != "off":
# TODO: speculative decoding support guided_decoding