mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00
2135 lines
45 KiB
HTML
2135 lines
45 KiB
HTML
|
||
<!doctype html>
|
||
<html lang="en" class="no-js">
|
||
<head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
|
||
|
||
|
||
|
||
<link rel="prev" href="../graph_optimization/">
|
||
|
||
|
||
<link rel="next" href="../plas_attention/">
|
||
|
||
|
||
<link rel="icon" href="../../assets/images/favicon.ico">
|
||
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.20">
|
||
|
||
|
||
|
||
<title>Data Parallelism - FastDeploy: Large Language Model Deployment</title>
|
||
|
||
|
||
|
||
<link rel="stylesheet" href="../../assets/stylesheets/main.e53b48f4.min.css">
|
||
|
||
|
||
<link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||
|
||
|
||
|
||
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</head>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
||
|
||
|
||
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||
<label class="md-overlay" for="__drawer"></label>
|
||
<div data-md-component="skip">
|
||
|
||
|
||
<a href="#data-parallelism" class="md-skip">
|
||
Skip to content
|
||
</a>
|
||
|
||
</div>
|
||
<div data-md-component="announce">
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<header class="md-header md-header--shadow" data-md-component="header">
|
||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||
<a href="../.." title="FastDeploy: Large Language Model Deployment" class="md-header__button md-logo" aria-label="FastDeploy: Large Language Model Deployment" data-md-component="logo">
|
||
|
||
<img src="../../assets/images/logo.jpg" alt="logo">
|
||
|
||
</a>
|
||
<label class="md-header__button md-icon" for="__drawer">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
||
</label>
|
||
<div class="md-header__title" data-md-component="header-title">
|
||
<div class="md-header__ellipsis">
|
||
<div class="md-header__topic">
|
||
<span class="md-ellipsis">
|
||
FastDeploy: Large Language Model Deployment
|
||
</span>
|
||
</div>
|
||
<div class="md-header__topic" data-md-component="header-topic">
|
||
<span class="md-ellipsis">
|
||
|
||
Data Parallelism
|
||
|
||
</span>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<form class="md-header__option" data-md-component="palette">
|
||
|
||
|
||
|
||
|
||
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
|
||
|
||
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
|
||
</label>
|
||
|
||
|
||
|
||
|
||
|
||
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to system preference" type="radio" name="__palette" id="__palette_1">
|
||
|
||
<label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
|
||
</label>
|
||
|
||
|
||
</form>
|
||
|
||
|
||
|
||
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
||
|
||
|
||
<div class="md-header__option">
|
||
<div class="md-select">
|
||
|
||
<button class="md-header__button md-icon" aria-label="Select language">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
|
||
</button>
|
||
<div class="md-select__inner">
|
||
<ul class="md-select__list">
|
||
|
||
<li class="md-select__item">
|
||
<a href="./" hreflang="en" class="md-select__link">
|
||
English
|
||
</a>
|
||
</li>
|
||
|
||
<li class="md-select__item">
|
||
<a href="../../zh/features/data_parallel_service/" hreflang="zh" class="md-select__link">
|
||
简体中文
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<label class="md-header__button md-icon" for="__search">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||
</label>
|
||
<div class="md-search" data-md-component="search" role="dialog">
|
||
<label class="md-search__overlay" for="__search"></label>
|
||
<div class="md-search__inner" role="search">
|
||
<form class="md-search__form" name="search">
|
||
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
||
<label class="md-search__icon md-icon" for="__search">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
||
</label>
|
||
<nav class="md-search__options" aria-label="Search">
|
||
|
||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
||
</button>
|
||
</nav>
|
||
|
||
</form>
|
||
<div class="md-search__output">
|
||
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
||
<div class="md-search-result" data-md-component="search-result">
|
||
<div class="md-search-result__meta">
|
||
Initializing search
|
||
</div>
|
||
<ol class="md-search-result__list" role="presentation"></ol>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-header__source">
|
||
<a href="https://github.com/PaddlePaddle/FastDeploy" title="Go to repository" class="md-source" data-md-component="source">
|
||
<div class="md-source__icon md-icon">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||
</div>
|
||
<div class="md-source__repository">
|
||
FastDeploy
|
||
</div>
|
||
</a>
|
||
</div>
|
||
|
||
</nav>
|
||
|
||
</header>
|
||
|
||
<div class="md-container" data-md-component="container">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<main class="md-main" data-md-component="main">
|
||
<div class="md-main__inner md-grid">
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||
<label class="md-nav__title" for="__drawer">
|
||
<a href="../.." title="FastDeploy: Large Language Model Deployment" class="md-nav__button md-logo" aria-label="FastDeploy: Large Language Model Deployment" data-md-component="logo">
|
||
|
||
<img src="../../assets/images/logo.jpg" alt="logo">
|
||
|
||
</a>
|
||
FastDeploy: Large Language Model Deployment
|
||
</label>
|
||
|
||
<div class="md-nav__source">
|
||
<a href="https://github.com/PaddlePaddle/FastDeploy" title="Go to repository" class="md-source" data-md-component="source">
|
||
<div class="md-source__icon md-icon">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
|
||
</div>
|
||
<div class="md-source__repository">
|
||
FastDeploy
|
||
</div>
|
||
</a>
|
||
</div>
|
||
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../.." class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
FastDeploy
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Quick Start
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_2">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Quick Start
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_1" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_2_1" id="__nav_2_1_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Installation
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_1_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_2_1">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Installation
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/installation/nvidia_gpu/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Nvidia GPU
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/installation/kunlunxin_xpu/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
KunlunXin XPU
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/installation/hygon_dcu/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
HYGON DCU
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/installation/Enflame_gcu/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Enflame S60
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/installation/iluvatar_gpu/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Iluvatar CoreX
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/installation/metax_gpu/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Metax C550
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/quick_start/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Quick Deployment For ERNIE-4.5-0.3B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/quick_start_vl/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Quick Deployment for ERNIE-4.5-VL-28B-A3B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/ernie-4.5/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-300B-A47B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/ernie-4.5-vl/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-VL-424B-A47B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../get_started/quick_start_qwen/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Quick Deployment For QWEN
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Online Serving
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_3">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Online Serving
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../online_serving/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
OpenAI-Compatible API Server
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../online_serving/metrics/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Monitor Metrics
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../online_serving/scheduler/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Scheduler
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../online_serving/graceful_shutdown_service/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Graceful Shutdown
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../offline_inference/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Offline Inference
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Best Practices
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_5">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Best Practices
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../best_practices/ERNIE-4.5-0.3B-Paddle/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-0.3B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../best_practices/ERNIE-4.5-21B-A3B-Paddle/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-21B-A3B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../best_practices/ERNIE-4.5-300B-A47B-Paddle/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-300B-A47B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../best_practices/ERNIE-4.5-21B-A3B-Thinking/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-21B-A3B-Thinking
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../best_practices/ERNIE-4.5-VL-28B-A3B-Paddle/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-VL-28B-A3B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../best_practices/ERNIE-4.5-VL-424B-A47B-Paddle/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
ERNIE-4.5-VL-424B-A47B
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../best_practices/FAQ/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
FAQ
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Quantization
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_6">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Quantization
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../quantization/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Overview
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../quantization/online_quantization/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Online Quantization
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../quantization/wint2/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
WINT2 Quantization
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" checked>
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Features
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="true">
|
||
<label class="md-nav__title" for="__nav_7">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Features
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../prefix_caching/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Prefix Caching
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../disaggregated/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Disaggregation
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../chunked_prefill/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Chunked Prefill
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../load_balance/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Load Balance
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../speculative_decoding/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Speculative Decoding
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../structured_outputs/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Structured Outputs
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../reasoning_output/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Reasoning Output
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../early_stop/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Early Stop
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../plugins/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Plugins
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../sampling/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Sampling
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../multi-node_deployment/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
MultiNode Deployment
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../graph_optimization/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Graph Optimization
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active">
|
||
|
||
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__link md-nav__link--active" for="__toc">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Data Parallelism
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<a href="./" class="md-nav__link md-nav__link--active">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Data Parallelism
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#data-distribution-strategy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Data Distribution Strategy
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Data Distribution Strategy">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#expert-parallelism-hybrid-deployment" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Expert Parallelism + Hybrid Deployment
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Expert Parallelism + Hybrid Deployment">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#offline-inference" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Offline Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#online-inference" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Online Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#user-managed-scheduling" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
User-Managed Scheduling
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="User-Managed Scheduling">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#online-inference_1" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Online Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#parameter-description" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Parameter Description
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#data-parallelism-disaggregated-deployment" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Data Parallelism + Disaggregated Deployment
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Data Parallelism + Disaggregated Deployment">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#online-inference_2" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Online Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../plas_attention/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
PLAS
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../supported_models/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Supported Models
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../benchmark/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Benchmark
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_10" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_10" id="__nav_10_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Usage
|
||
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_10_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_10">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Usage
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../usage/log/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Log Description
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../usage/code_overview/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Code Overview
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../usage/environment_variables/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Environment Variables
|
||
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#data-distribution-strategy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Data Distribution Strategy
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Data Distribution Strategy">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#expert-parallelism-hybrid-deployment" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Expert Parallelism + Hybrid Deployment
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Expert Parallelism + Hybrid Deployment">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#offline-inference" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Offline Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#online-inference" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Online Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#user-managed-scheduling" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
User-Managed Scheduling
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="User-Managed Scheduling">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#online-inference_1" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Online Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#parameter-description" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Parameter Description
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#data-parallelism-disaggregated-deployment" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Data Parallelism + Disaggregated Deployment
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Data Parallelism + Disaggregated Deployment">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#online-inference_2" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Online Inference
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-content" data-md-component="content">
|
||
<article class="md-content__inner md-typeset">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<h1 id="data-parallelism">Data Parallelism</h1>
|
||
<p>Under the MOE model, enabling Expert Parallelism (EP) combined with Data Parallelism (DP), where EP distributes expert workloads and DP enables parallel request processing.</p>
|
||
<h2 id="data-distribution-strategy">Data Distribution Strategy</h2>
|
||
<p>FastDeploy uses the splitwise scheduler to monitor the load status of each DP node and distribute incoming data accordingly.</p>
|
||
<p>The splitwise scheduler relies on Redis to store DP load status and distribute received data.</p>
|
||
<h3 id="expert-parallelism-hybrid-deployment">Expert Parallelism + Hybrid Deployment</h3>
|
||
<p>FastDeploy provides the splitwise scheduler that monitors DP load status and schedules incoming data.
|
||
The scheduling flow is shown below - users randomly request IP and port, obtain load status via Redis, and data is distributed to less-loaded DPs for inference.
|
||
<img alt="Scheduling Architecture" src="../images/scheduler_img.png" /></p>
|
||
<h4 id="offline-inference">Offline Inference</h4>
|
||
<pre><code class="language-python">prompts = [
|
||
"Hello, my name is",
|
||
"你好,请问今天是星期",
|
||
"请写6个以数字开头的成语",
|
||
"写一个300字的小说大纲,内容是李白穿越到现代,最后成为公司文职人员的故事",
|
||
"我要采访一位科幻作家,创建一个包含5个问题的列表"
|
||
]
|
||
|
||
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=128)
|
||
|
||
llm = LLM(
|
||
model="ERNIE-4_5-300B-A47B-FP8-Paddle",
|
||
tensor_parallel_size=1,
|
||
data_parallel_size=8,
|
||
max_model_len=8192,
|
||
num_gpu_blocks_override=1024,
|
||
engine_worker_queue_port="6077,6078,6079,6080,6081,6082,6083,6084",
|
||
enable_expert_parallel=True,
|
||
scheduler_name="splitwise",
|
||
scheduler_host="127.0.0.1",
|
||
scheduler_topic="test",
|
||
scheduler_port=6379
|
||
)
|
||
outputs = llm.generate(prompts, sampling_params)
|
||
|
||
for output in outputs:
|
||
prompt = output.prompt
|
||
generated_text = output.outputs.text
|
||
print("generated_text: ", generated_text)
|
||
print("\n")
|
||
</code></pre>
|
||
<h4 id="online-inference">Online Inference</h4>
|
||
<pre><code class="language-shell">python -m fastdeploy.entrypoints.openai.api_server \
|
||
--model ERNIE-4_5-300B-A47B-FP8-Paddle \
|
||
--port 8184 --metrics-port 8185 \
|
||
--engine-worker-queue-port "6077,6078,6079,6080,6081,6082,6083,6084" \
|
||
--data-parallel-size 8 --tensor-parallel-size 1\
|
||
--enable-expert-parallel \
|
||
--scheduler-name "splitwise" \
|
||
--scheduler-host "127.0.0.1" \
|
||
--scheduler-port 6379 \
|
||
--scheduler-topic "test" \
|
||
--scheduler-ttl 9000
|
||
</code></pre>
|
||
<h3 id="user-managed-scheduling">User-Managed Scheduling</h3>
|
||
<p>FastDeploy provides multi_api_server, allowing users to launch multiple API servers and manually select DPs for requests. In this case, users can add their own load balancing models for scheduling. (Currently only supports online inference)</p>
|
||
<h4 id="online-inference_1">Online Inference</h4>
|
||
<p><img alt="Scheduling Architecture" src="../images/no_scheduler_img.png" /></p>
|
||
<pre><code class="language-shell">export FD_ENABLE_MULTI_API_SERVER=1
|
||
python -m fastdeploy.entrypoints.openai.multi_api_server \
|
||
--ports "1811,1822,1833,1844,1855,1866,1877,1888" \
|
||
--num-servers 8 \
|
||
--metrics-ports "3101,3201,3301,3401,3501,3601,3701,3801" \
|
||
--args --model ERNIE-4_5-300B-A47B-FP8-Paddle \
|
||
--engine-worker-queue-port "25611,25621,25631,25641,25651,25661,25671,25681" \
|
||
--tensor-parallel-size 1 \
|
||
--data-parallel-size 8 \
|
||
--max-model-len 12288 \
|
||
--max-num-seqs 64 \
|
||
--num-gpu-blocks-override 256 \
|
||
--enable-expert-parallel
|
||
</code></pre>
|
||
<h3 id="parameter-description">Parameter Description</h3>
|
||
<ul>
|
||
<li>num-servers: Number of API servers to launch </li>
|
||
<li>ports: Ports for API servers </li>
|
||
<li>args: Arguments for API servers </li>
|
||
</ul>
|
||
<h3 id="data-parallelism-disaggregated-deployment">Data Parallelism + Disaggregated Deployment</h3>
|
||
<p>Refer to <a href="../disaggregated/#multi-machine-disaggregated-deployment">Disaggregated Deployment</a></p>
|
||
<h4 id="online-inference_2">Online Inference</h4>
|
||
<p>For multi-machine deployment, ensure network cards support RDMA and all cluster nodes are interconnected.</p>
|
||
<p><strong>Note</strong>:
|
||
* <code>KVCACHE_RDMA_NICS</code> specifies RDMA network cards for the current machine, multiple cards should be separated by commas.
|
||
* The repository provides an automatic RDMA network card detection script <code>bash scripts/get_rdma_nics.sh <device></code>, where <device> can be <code>cpu</code> or <code>gpu</code>.</p>
|
||
<p><strong>Prefill Instance</strong></p>
|
||
<pre><code class="language-bash">export FD_LOG_DIR="log_prefill"
|
||
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||
echo "set RDMA NICS"
|
||
export $(bash scripts/get_rdma_nics.sh gpu)
|
||
echo "KVCACHE_RDMA_NICS ${KVCACHE_RDMA_NICS}"
|
||
python -m fastdeploy.entrypoints.openai.api_server \
|
||
--model ERNIE-4_5-300B-A47B-FP8-Paddle \
|
||
--port 8180 --metrics-port 8181 \
|
||
--engine-worker-queue-port "25611,25621,25631,25641,25651,25661,25671,25681" \
|
||
--cache-queue-port 8183 \
|
||
--tensor-parallel-size 1 \
|
||
--data-parallel-size 4 \
|
||
--enable-expert-parallel \
|
||
--cache-transfer-protocol "rdma,ipc" \
|
||
--rdma-comm-ports "7671,7672,7673,7674,7675,7676,7677,7678" \
|
||
--pd-comm-port "2334" \
|
||
--splitwise-role "prefill" \
|
||
--scheduler-name "splitwise" \
|
||
--scheduler-host "127.0.0.1" \
|
||
--scheduler-port 6379 \
|
||
--scheduler-topic "test" \
|
||
--scheduler-ttl 9000
|
||
</code></pre>
|
||
<p><strong>Decode Instance</strong></p>
|
||
<pre><code class="language-bash">export FD_LOG_DIR="log_decode"
|
||
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||
echo "set RDMA NICS"
|
||
export $(bash scripts/get_rdma_nics.sh gpu)
|
||
echo "KVCACHE_RDMA_NICS ${KVCACHE_RDMA_NICS}"
|
||
python -m fastdeploy.entrypoints.openai.api_server \
|
||
--model ERNIE-4_5-300B-A47B-FP8-Paddle \
|
||
--port 8184 --metrics-port 8185 \
|
||
--engine-worker-queue-port "25611,25621,25631,25641,25651,25661,25671,25681" \
|
||
--cache-queue-port 8187 \
|
||
--tensor-parallel-size 1 \
|
||
--data-parallel-size 4 \
|
||
--enable-expert-parallel \
|
||
--scheduler-name "splitwise" \
|
||
--cache-transfer-protocol "rdma,ipc" \
|
||
--rdma-comm-ports "7671,7672,7673,7674,7675,7676,7677,7678" \
|
||
--pd-comm-port "2334" \
|
||
--scheduler-host "127.0.0.1" \
|
||
--scheduler-port 6379 \
|
||
--scheduler-ttl 9000
|
||
--scheduler-topic "test" \
|
||
--splitwise-role "decode"
|
||
</code></pre>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</article>
|
||
</div>
|
||
|
||
|
||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||
</div>
|
||
|
||
</main>
|
||
|
||
<footer class="md-footer">
|
||
|
||
<div class="md-footer-meta md-typeset">
|
||
<div class="md-footer-meta__inner md-grid">
|
||
<div class="md-copyright">
|
||
|
||
<div class="md-copyright__highlight">
|
||
Copyright © 2025 Maintained by FastDeploy
|
||
</div>
|
||
|
||
|
||
Made with
|
||
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||
Material for MkDocs
|
||
</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
<div class="md-dialog" data-md-component="dialog">
|
||
<div class="md-dialog__inner md-typeset"></div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<script id="__config" type="application/json">{"base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||
|
||
|
||
<script src="../../assets/javascripts/bundle.f55a23d4.min.js"></script>
|
||
|
||
|
||
</body>
|
||
</html> |