mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 04:46:16 +08:00
2228 lines
50 KiB
HTML
2228 lines
50 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
|
|
|
|
|
|
<link rel="prev" href="../speculative_decoding/">
|
|
|
|
|
|
<link rel="next" href="../reasoning_output/">
|
|
|
|
|
|
<link rel="icon" href="../../assets/images/favicon.ico">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.20">
|
|
|
|
|
|
|
|
<title>Structured Outputs - FastDeploy: Large Language Model Deployment</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.e53b48f4.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#structured-outputs" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<header class="md-header md-header--shadow" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href="../.." title="FastDeploy: Large Language Model Deployment" class="md-header__button md-logo" aria-label="FastDeploy: Large Language Model Deployment" data-md-component="logo">
|
|
|
|
<img src="../../assets/images/logo.jpg" alt="logo">
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
FastDeploy: Large Language Model Deployment
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Structured Outputs
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<form class="md-header__option" data-md-component="palette">
|
|
|
|
|
|
|
|
|
|
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
|
|
|
|
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
|
|
</label>
|
|
|
|
|
|
|
|
|
|
|
|
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to system preference" type="radio" name="__palette" id="__palette_1">
|
|
|
|
<label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
|
|
</label>
|
|
|
|
|
|
</form>
|
|
|
|
|
|
|
|
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
|
|
|
|
|
<div class="md-header__option">
|
|
<div class="md-select">
|
|
|
|
<button class="md-header__button md-icon" aria-label="Select language">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
|
|
</button>
|
|
<div class="md-select__inner">
|
|
<ul class="md-select__list">
|
|
|
|
<li class="md-select__item">
|
|
<a href="./" hreflang="en" class="md-select__link">
|
|
English
|
|
</a>
|
|
</li>
|
|
|
|
<li class="md-select__item">
|
|
<a href="../../zh/features/structured_outputs/" hreflang="zh" class="md-select__link">
|
|
简体中文
|
|
</a>
|
|
</li>
|
|
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<label class="md-header__button md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
</label>
|
|
<div class="md-search" data-md-component="search" role="dialog">
|
|
<label class="md-search__overlay" for="__search"></label>
|
|
<div class="md-search__inner" role="search">
|
|
<form class="md-search__form" name="search">
|
|
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
|
<label class="md-search__icon md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</label>
|
|
<nav class="md-search__options" aria-label="Search">
|
|
|
|
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
|
</button>
|
|
</nav>
|
|
|
|
</form>
|
|
<div class="md-search__output">
|
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
|
<div class="md-search-result" data-md-component="search-result">
|
|
<div class="md-search-result__meta">
|
|
Initializing search
|
|
</div>
|
|
<ol class="md-search-result__list" role="presentation"></ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-header__source">
|
|
<a href="https://github.com/PaddlePaddle/FastDeploy" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
FastDeploy
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href="../.." title="FastDeploy: Large Language Model Deployment" class="md-nav__button md-logo" aria-label="FastDeploy: Large Language Model Deployment" data-md-component="logo">
|
|
|
|
<img src="../../assets/images/logo.jpg" alt="logo">
|
|
|
|
</a>
|
|
FastDeploy: Large Language Model Deployment
|
|
</label>
|
|
|
|
<div class="md-nav__source">
|
|
<a href="https://github.com/PaddlePaddle/FastDeploy" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
FastDeploy
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../.." class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
FastDeploy
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Quick Start
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Quick Start
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_1" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_2_1" id="__nav_2_1_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Installation
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_1_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_2_1">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Installation
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/installation/nvidia_gpu/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Nvidia GPU
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/installation/kunlunxin_xpu/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
KunlunXin XPU
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/installation/hygon_dcu/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
HYGON DCU
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/installation/Enflame_gcu/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Enflame S60
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/installation/iluvatar_gpu/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Iluvatar CoreX
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/installation/metax_gpu/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Metax C550
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/quick_start/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Quick Deployment For ERNIE-4.5-0.3B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/quick_start_vl/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Quick Deployment for ERNIE-4.5-VL-28B-A3B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/ernie-4.5/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-300B-A47B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/ernie-4.5-vl/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-VL-424B-A47B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../get_started/quick_start_qwen/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Quick Deployment For QWEN
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Online Serving
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_3">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Online Serving
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../online_serving/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
OpenAI-Compatible API Server
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../online_serving/metrics/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Monitor Metrics
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../online_serving/scheduler/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Scheduler
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../online_serving/graceful_shutdown_service/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Graceful Shutdown
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../offline_inference/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Offline Inference
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Best Practices
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_5">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Best Practices
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../best_practices/ERNIE-4.5-0.3B-Paddle/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-0.3B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../best_practices/ERNIE-4.5-21B-A3B-Paddle/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-21B-A3B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../best_practices/ERNIE-4.5-300B-A47B-Paddle/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-300B-A47B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../best_practices/ERNIE-4.5-21B-A3B-Thinking/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-21B-A3B-Thinking
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../best_practices/ERNIE-4.5-VL-28B-A3B-Paddle/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-VL-28B-A3B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../best_practices/ERNIE-4.5-VL-424B-A47B-Paddle/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
ERNIE-4.5-VL-424B-A47B
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../best_practices/FAQ/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
FAQ
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Quantization
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_6">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Quantization
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../quantization/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../quantization/online_quantization/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Online Quantization
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../quantization/wint2/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
WINT2 Quantization
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" checked>
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Features
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_7">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Features
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../prefix_caching/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Prefix Caching
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../disaggregated/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Disaggregation
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../chunked_prefill/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Chunked Prefill
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../load_balance/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Load Balance
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../speculative_decoding/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Speculative Decoding
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Structured Outputs
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Structured Outputs
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#overview" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#usage" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Usage
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Usage">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#openai-interface" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
OpenAI Interface
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#openai-beta-interface" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
OpenAI Beta Interface
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#offline-inference" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Offline Inference
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../reasoning_output/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Reasoning Output
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../early_stop/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Early Stop
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../plugins/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Plugins
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../sampling/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Sampling
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../multi-node_deployment/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
MultiNode Deployment
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../graph_optimization/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Graph Optimization
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../data_parallel_service/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Data Parallelism
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../plas_attention/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
PLAS
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../supported_models/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Supported Models
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../benchmark/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Benchmark
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_10" >
|
|
|
|
|
|
<label class="md-nav__link" for="__nav_10" id="__nav_10_label" tabindex="0">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Usage
|
|
|
|
</span>
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_10_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_10">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Usage
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../usage/log/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Log Description
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../usage/code_overview/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Code Overview
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../usage/environment_variables/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
Environment Variables
|
|
|
|
</span>
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#overview" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Overview
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#usage" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Usage
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Usage">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#openai-interface" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
OpenAI Interface
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#openai-beta-interface" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
OpenAI Beta Interface
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#offline-inference" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
Offline Inference
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<h1 id="structured-outputs">Structured Outputs</h1>
|
|
<h2 id="overview">Overview</h2>
|
|
<p>Structured Outputs refer to predefined format constraints that force large language models to generate content strictly following specified structures. This feature significantly improves output controllability and is suitable for scenarios requiring precise format outputs (such as API calls, data parsing, code generation, etc.), while supporting dynamic grammar extensions to balance flexibility and standardization.</p>
|
|
<p>FastDeploy supports using the <a href="https://xgrammar.mlc.ai/docs/">XGrammar</a> backend to generate structured outputs.</p>
|
|
<p>Supported output formats:</p>
|
|
<ul>
|
|
<li><code>json</code>: Output content in standard JSON format</li>
|
|
<li><code>regex</code>: Precise control over text patterns (e.g., dates, emails, ID numbers), supporting complex regex syntax</li>
|
|
<li><code>choice</code>: Output strictly limited to predefined options</li>
|
|
<li><code>grammar</code>: Uses extended BNF grammar to define complex structures, supporting field names, types and logical relationship constraints</li>
|
|
<li><code>structural tag</code>: Defines structured outputs through tag trees, supporting nested tags, attribute validation and mixed constraints</li>
|
|
</ul>
|
|
<h2 id="usage">Usage</h2>
|
|
<p>When starting the service, you can specify the desired backend using the <code>--guided-decoding-backend</code> parameter. If set to <code>auto</code>, FastDeploy will automatically select the appropriate backend.</p>
|
|
<h3 id="openai-interface">OpenAI Interface</h3>
|
|
<p>FastDeploy supports OpenAI's <a href="https://platform.openai.com/docs/api-reference/completions">Completions</a> and <a href="https://platform.openai.com/docs/api-reference/chat">Chat Completions</a> APIs. OpenAI specifies structured output formats through the <code>response_format</code> parameter.</p>
|
|
<p>FastDeploy ensures JSON output by specifying <code>json_object</code>. To define specific parameter names and types within JSON, use <code>json_schema</code>. Refer to examples for detailed usage.</p>
|
|
<pre><code class="language-python">import openai
|
|
|
|
port = "8170"
|
|
client = openai.Client(base_url=f"http://127.0.0.1:{port}/v1", api_key="null")
|
|
|
|
completion = client.chat.completions.create(
|
|
model="null",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "Generate a JSON object containing: names of China's Four Great Inventions, their dynasties of origin, and brief descriptions (each under 50 characters)",
|
|
}
|
|
],
|
|
response_format={"type": "json_object"}
|
|
)
|
|
print(completion.choices[0].message.content)
|
|
print("\n")
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code class="language-json">{"inventions": [{"name": "Paper Making", "dynasty": "Han Dynasty", "description": "Invented during Han Dynasty, revolutionized writing and storage."}, {"name": "Printing", "dynasty": "Tang Dynasty", "description": "Woodblock printing developed in Tang, movable type in Song Dynasty."}, {"name": "Compass", "dynasty": "Han Dynasty (concept)", "description": "Early use in Han Dynasty, refined for navigation by Song Dynasty."}, {"name": "Gunpowder", "dynasty": "Tang Dynasty", "description": "Discovered in Tang Dynasty, later used in weapons and fireworks."}]}
|
|
</code></pre>
|
|
<p>The following example requires the model to return JSON data about a book, which must include <code>author</code>, <code>title</code>, and <code>genre</code> fields, with <code>genre</code> restricted to given <code>BookType</code> options.</p>
|
|
<pre><code class="language-python">import openai
|
|
from pydantic import BaseModel
|
|
from enum import Enum
|
|
|
|
class BookType(str, Enum):
|
|
romance = "Romance"
|
|
historical = "Historical"
|
|
adventure = "Adventure"
|
|
mystery = "Mystery"
|
|
dystopian = "Dystopian"
|
|
|
|
class BookDescription(BaseModel):
|
|
author: str
|
|
title: str
|
|
genre: BookType
|
|
|
|
port = "8170"
|
|
client = openai.Client(base_url=f"http://127.0.0.1:{port}/v1", api_key="null")
|
|
|
|
completion = client.chat.completions.create(
|
|
model="null",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "Generate a JSON describing a literary work, including author, title and book type.",
|
|
}
|
|
],
|
|
response_format={
|
|
"type": "json_schema",
|
|
"json_schema": {
|
|
"name": "book-description",
|
|
"schema": BookDescription.model_json_schema()
|
|
},
|
|
},
|
|
)
|
|
print(completion.choices[0].message.content)
|
|
print("\n")
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code class="language-json">{"author": "George Orwell", "title": "1984", "genre": "Dystopian"}
|
|
</code></pre>
|
|
<p><code>structural_tag</code> can extract key information from input and call specified methods to return predefined structured output. The following example demonstrates getting a specified timezone using <code>structural_tag</code>:</p>
|
|
<pre><code class="language-python">import openai
|
|
|
|
port = "8170"
|
|
client = openai.Client(base_url=f"http://127.0.0.1:{port}/v1", api_key="null")
|
|
|
|
content_str = """
|
|
You have the following function available:
|
|
|
|
{
|
|
"name": "get_current_date",
|
|
"description": "Get current date and time for given timezone",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"timezone": {
|
|
"type": "string",
|
|
"description": "Timezone to get current date/time, e.g.: Asia/Shanghai",
|
|
}
|
|
},
|
|
"required": ["timezone"],
|
|
}
|
|
}
|
|
|
|
If you choose to call only this function, reply in this format:
|
|
<{start_tag}={function_name}>{parameters}{end_tag}
|
|
where:
|
|
|
|
start_tag => `<function`
|
|
parameters => JSON dictionary with parameter names as keys
|
|
end_tag => `</function>`
|
|
|
|
Example:
|
|
<function=example_function>{"param": "value"}</function>
|
|
|
|
Note:
|
|
- Function call must follow specified format
|
|
- Required parameters must be specified
|
|
- Only one function can be called at a time
|
|
- Place entire function call response on a single line
|
|
|
|
You are an AI assistant. Answer the following question.
|
|
"""
|
|
|
|
completion = client.chat.completions.create(
|
|
model="null",
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": content_str,
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "You're traveling to Shanghai today",
|
|
}
|
|
],
|
|
response_format={
|
|
"type": "structural_tag",
|
|
"structures": [
|
|
{
|
|
"begin": "<function=get_current_date>",
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"timezone": {
|
|
"type": "string",
|
|
"description": "Timezone to get current date/time, e.g.: Asia/Shanghai",
|
|
}
|
|
},
|
|
"required": ["timezone"],
|
|
},
|
|
"end": "</function>",
|
|
}
|
|
],
|
|
"triggers": ["<function="],
|
|
},
|
|
)
|
|
print(completion.choices[0].message.content)
|
|
print("\n")
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code><function=get_current_date>{"timezone": "Asia/Shanghai"}</function>
|
|
</code></pre>
|
|
<p>For <code>choice</code>, <code>grammar</code>, and <code>regex</code> formats, FastDeploy supports them through the <code>extra_body</code> parameter. <code>choice</code> is straightforward - the model selects the best option from predefined choices. Example:</p>
|
|
<pre><code class="language-python">import openai
|
|
|
|
port = "8170"
|
|
client = openai.Client(base_url=f"http://127.0.0.1:{port}/v1", api_key="null")
|
|
|
|
completion = client.chat.completions.create(
|
|
model="null",
|
|
messages=[
|
|
{"role": "user", "content": "What is the landmark building in Shenzhen?"}
|
|
],
|
|
extra_body={"guided_choice": ["Ping An Finance Centre", "China Resources Headquarters", "KK100", "Diwang Mansion"]},
|
|
)
|
|
print(completion.choices[0].message.content)
|
|
print("\n")
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code>Ping An Finance Centre
|
|
</code></pre>
|
|
<p><code>regex</code> allows restricting output format via regular expressions. Example for generating a standard web address:</p>
|
|
<pre><code class="language-python">import openai
|
|
|
|
port = "8170"
|
|
client = openai.Client(base_url=f"http://127.0.0.1:{port}/v1", api_key="null")
|
|
|
|
completion = client.chat.completions.create(
|
|
model="null",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "Generate a standard format web address including protocol and domain.\n",
|
|
}
|
|
],
|
|
extra_body={"guided_regex": r"^https:\/\/www\.[a-zA-Z]+\.com\/?$\n"},
|
|
)
|
|
print(completion.choices[0].message.content)
|
|
print("\n")
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code>https://www.example.com/
|
|
</code></pre>
|
|
<p><code>grammar</code> allows defining complex constraints using EBNF syntax. Example for generating HTML code:</p>
|
|
<pre><code class="language-python">import openai
|
|
|
|
port = "8170"
|
|
client = openai.Client(base_url=f"http://127.0.0.1:{port}/v1", api_key="null")
|
|
|
|
html_h1_grammar = """
|
|
root ::= html_statement
|
|
|
|
html_statement ::= "<h1" style_attribute? ">" text "</h1>"
|
|
|
|
style_attribute ::= " style=" dq style_value dq
|
|
|
|
style_value ::= (font_style ("; " font_weight)?) | (font_weight ("; " font_style)?)
|
|
|
|
font_style ::= "font-family: '" font_name "'"
|
|
|
|
font_weight ::= "font-weight: " weight_value
|
|
|
|
font_name ::= "Arial" | "Times New Roman" | "Courier New"
|
|
|
|
weight_value ::= "normal" | "bold"
|
|
|
|
text ::= [A-Za-z0-9 ]+
|
|
|
|
dq ::= ["]
|
|
"""
|
|
|
|
completion = client.chat.completions.create(
|
|
model="null",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "Generate HTML code for this heading in bold Times New Roman font: ERNIE Bot",
|
|
}
|
|
],
|
|
extra_body={"guided_grammar": html_h1_grammar},
|
|
)
|
|
print(completion.choices[0].message.content)
|
|
print("\n")
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code><h1 style="font-family: 'Times New Roman'; font-weight: bold">ERNIE Bot</h1>
|
|
</code></pre>
|
|
<h3 id="openai-beta-interface">OpenAI Beta Interface</h3>
|
|
<p>Starting from OpenAI Client v1.54.4, the <code>client.beta.chat.completions.parse</code> interface provides better support for Python native types. Example using <code>pydantic</code>:</p>
|
|
<pre><code class="language-python">from pydantic import BaseModel
|
|
import openai
|
|
|
|
class Info(BaseModel):
|
|
addr: str
|
|
height: int
|
|
|
|
port = "8170"
|
|
client = openai.Client(base_url=f"http://127.0.0.1:{port}/v1", api_key="null")
|
|
|
|
completion = client.beta.chat.completions.parse(
|
|
model="null",
|
|
messages=[
|
|
{"role": "user", "content": "The Oriental Pearl Tower is located at No.1 Century Avenue, Pudong New Area, Shanghai, with a height of 468 meters. What is its address and height?"},
|
|
],
|
|
response_format=Info,
|
|
)
|
|
|
|
message = completion.choices[0].message
|
|
print(message)
|
|
print("\n")
|
|
assert message.parsed
|
|
print("Address:", message.parsed.addr)
|
|
print("Height:", message.parsed.height)
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code>ParsedChatCompletionMessage[Info](content='{"addr": "No.1 Century Avenue, Pudong New Area, Shanghai", "height": 468}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, parsed=Info(addr='No.1 Century Avenue, Pudong New Area, Shanghai', height=468), reasoning_content=None)
|
|
|
|
|
|
Address: No.1 Century Avenue, Pudong New Area, Shanghai
|
|
Height: 468
|
|
</code></pre>
|
|
<h3 id="offline-inference">Offline Inference</h3>
|
|
<p>Offline inference allows restricting the model's output format by pre-specified constraints. In <code>FastDeploy</code>, constraints can be specified through the <code>GuidedDecodingParams</code> class in <code>SamplingParams</code>. <code>GuidedDecodingParams</code> supports the following constraint types, with usage similar to online inference:</p>
|
|
<pre><code class="language-python">json: Optional[Union[str, dict]] = None
|
|
regex: Optional[str] = None
|
|
choice: Optional[List[str]] = None
|
|
grammar: Optional[str] = None
|
|
json_object: Optional[bool] = None
|
|
structural_tag: Optional[str] = None
|
|
</code></pre>
|
|
<p>The following example demonstrates how to use offline inference to generate a structured json:</p>
|
|
<pre><code class="language-python">from fastdeploy import LLM, SamplingParams
|
|
from fastdeploy.engine.sampling_params import GuidedDecodingParams
|
|
from pydantic import BaseModel
|
|
from enum import Enum
|
|
|
|
class BookType(str, Enum):
|
|
romance = "Romance"
|
|
historical = "Historical"
|
|
adventure = "Adventure"
|
|
mystery = "Mystery"
|
|
dystopian = "Dystopian"
|
|
|
|
class BookDescription(BaseModel):
|
|
author: str
|
|
title: str
|
|
genre: BookType
|
|
|
|
# Constrained decoding parameters
|
|
guided_decoding_params = GuidedDecodingParams(json=BookDescription.model_json_schema())
|
|
|
|
# Sampling parameters
|
|
sampling_params = SamplingParams(
|
|
top_p=0.95,
|
|
max_tokens=6400,
|
|
guided_decoding=guided_decoding_params,
|
|
)
|
|
|
|
# Load model
|
|
llm = LLM(model="ERNIE-4.5-0.3B", tensor_parallel_size=1, max_model_len=8192, guided_decoding_backend="auto")
|
|
|
|
outputs = llm.generate(
|
|
prompts="Generate a JSON describing a literary work, including author, title and book type.",
|
|
sampling_params=sampling_params,
|
|
)
|
|
|
|
# Output results
|
|
for output in outputs:
|
|
print(output.outputs.text)
|
|
</code></pre>
|
|
<p>Output:</p>
|
|
<pre><code>{"author": "George Orwell", "title": "1984", "genre": "Dystopian"}
|
|
</code></pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
<div class="md-copyright__highlight">
|
|
Copyright © 2025 Maintained by FastDeploy
|
|
</div>
|
|
|
|
|
|
Made with
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|
Material for MkDocs
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script id="__config" type="application/json">{"base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|
|
|
|
|
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |