Files
FastDeploy/zh/features/multi-node_deployment/index.html

2893 lines
47 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!doctype html>
<html lang="zh" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="prev" href="../sampling/">
<link rel="next" href="../graph_optimization/">
<link rel="alternate" href="../../../features/multi-node_deployment/" hreflang="en">
<link rel="alternate" href="./" hreflang="zh">
<link rel="icon" href="../../../assets/images/favicon.ico">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.1">
<title>多机部署 - 飞桨大语言模型推理部署工具包</title>
<link rel="stylesheet" href="../../../assets/stylesheets/main.484c7ddc.min.css">
<link rel="stylesheet" href="../../../assets/stylesheets/palette.ab4e12ef.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<script>__md_scope=new URL("../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#_1" class="md-skip">
跳转至
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="页眉">
<a href="../../" title="飞桨大语言模型推理部署工具包" class="md-header__button md-logo" aria-label="飞桨大语言模型推理部署工具包" data-md-component="logo">
<img src="../../../assets/images/logo.jpg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
飞桨大语言模型推理部署工具包
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
多机部署
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to system preference" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<div class="md-header__option">
<div class="md-select">
<button class="md-header__button md-icon" aria-label="选择当前语言">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
</button>
<div class="md-select__inner">
<ul class="md-select__list">
<li class="md-select__item">
<a href="../../../features/multi-node_deployment/" hreflang="en" class="md-select__link">
English
</a>
</li>
<li class="md-select__item">
<a href="./" hreflang="zh" class="md-select__link">
简体中文
</a>
</li>
</ul>
</div>
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="搜索" placeholder="搜索" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="查找">
<button type="reset" class="md-search__icon md-icon" title="清空当前内容" aria-label="清空当前内容" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
正在初始化搜索引擎
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/PaddlePaddle/FastDeploy" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
FastDeploy
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="导航栏" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../" title="飞桨大语言模型推理部署工具包" class="md-nav__button md-logo" aria-label="飞桨大语言模型推理部署工具包" data-md-component="logo">
<img src="../../../assets/images/logo.jpg" alt="logo">
</a>
飞桨大语言模型推理部署工具包
</label>
<div class="md-nav__source">
<a href="https://github.com/PaddlePaddle/FastDeploy" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
FastDeploy
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../" class="md-nav__link">
<span class="md-ellipsis">
FastDeploy
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
快速入门
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
快速入门
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_1" >
<label class="md-nav__link" for="__nav_2_1" id="__nav_2_1_label" tabindex="0">
<span class="md-ellipsis">
安装
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2_1">
<span class="md-nav__icon md-icon"></span>
安装
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../get_started/installation/nvidia_gpu/" class="md-nav__link">
<span class="md-ellipsis">
英伟达 GPU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/installation/kunlunxin_xpu/" class="md-nav__link">
<span class="md-ellipsis">
昆仑芯 XPU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/installation/intel_gaudi/" class="md-nav__link">
<span class="md-ellipsis">
英特尔 Gaudi
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/installation/hygon_dcu/" class="md-nav__link">
<span class="md-ellipsis">
海光 DCU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/installation/Enflame_gcu/" class="md-nav__link">
<span class="md-ellipsis">
燧原 S60
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/installation/iluvatar_gpu/" class="md-nav__link">
<span class="md-ellipsis">
天数 CoreX
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/installation/metax_gpu/" class="md-nav__link">
<span class="md-ellipsis">
沐曦 C550
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../get_started/quick_start/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-0.3B快速部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/quick_start_vl/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-28B-A3B快速部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/ernie-4.5/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-300B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/ernie-4.5-vl/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-424B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/quick_start_qwen/" class="md-nav__link">
<span class="md-ellipsis">
Qwen3-0.6b快速部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../get_started/quick_start_qwen25_vl/" class="md-nav__link">
<span class="md-ellipsis">
Qwen2.5-VL系列快速部署
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
在线服务
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
在线服务
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../online_serving/" class="md-nav__link">
<span class="md-ellipsis">
兼容 OpenAI 协议的服务化部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../online_serving/metrics/" class="md-nav__link">
<span class="md-ellipsis">
监控Metrics
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../online_serving/scheduler/" class="md-nav__link">
<span class="md-ellipsis">
调度器
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../online_serving/graceful_shutdown_service/" class="md-nav__link">
<span class="md-ellipsis">
服务优雅关闭
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../offline_inference/" class="md-nav__link">
<span class="md-ellipsis">
离线推理
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
<span class="md-ellipsis">
Best Practices
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
Best Practices
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../best_practices/ERNIE-4.5-0.3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-0.3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../best_practices/ERNIE-4.5-21B-A3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-21B-A3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../best_practices/ERNIE-4.5-300B-A47B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-300B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../best_practices/ERNIE-4.5-21B-A3B-Thinking/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-21B-A3B-Thinking
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../best_practices/ERNIE-4.5-VL-28B-A3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-28B-A3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../best_practices/ERNIE-4.5-VL-424B-A47B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-424B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../best_practices/PaddleOCR-VL-0.9B/" class="md-nav__link">
<span class="md-ellipsis">
PaddleOCR-VL-0.9B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../best_practices/FAQ/" class="md-nav__link">
<span class="md-ellipsis">
常见问题
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
<span class="md-ellipsis">
量化
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_6">
<span class="md-nav__icon md-icon"></span>
量化
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../quantization/" class="md-nav__link">
<span class="md-ellipsis">
概述
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../quantization/online_quantization/" class="md-nav__link">
<span class="md-ellipsis">
在线量化
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../quantization/wint2/" class="md-nav__link">
<span class="md-ellipsis">
WINT2量化
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" checked>
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
<span class="md-ellipsis">
特性
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_7">
<span class="md-nav__icon md-icon"></span>
特性
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../prefix_caching/" class="md-nav__link">
<span class="md-ellipsis">
前缀缓存
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../disaggregated/" class="md-nav__link">
<span class="md-ellipsis">
分离式部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../chunked_prefill/" class="md-nav__link">
<span class="md-ellipsis">
分块预填充
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../load_balance/" class="md-nav__link">
<span class="md-ellipsis">
负载均衡
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../speculative_decoding/" class="md-nav__link">
<span class="md-ellipsis">
投机解码
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../structured_outputs/" class="md-nav__link">
<span class="md-ellipsis">
结构化输出
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../reasoning_output/" class="md-nav__link">
<span class="md-ellipsis">
思考链内容
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../early_stop/" class="md-nav__link">
<span class="md-ellipsis">
早停功能
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../plugins/" class="md-nav__link">
<span class="md-ellipsis">
插件机制
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../sampling/" class="md-nav__link">
<span class="md-ellipsis">
采样策略
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
多机部署
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
多机部署
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#_2" class="md-nav__link">
<span class="md-ellipsis">
概述
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#_3" class="md-nav__link">
<span class="md-ellipsis">
环境准备
</span>
</a>
<nav class="md-nav" aria-label="环境准备">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#_4" class="md-nav__link">
<span class="md-ellipsis">
网络要求
</span>
</a>
<nav class="md-nav" aria-label="网络要求">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#_5" class="md-nav__link">
<span class="md-ellipsis">
软件要求
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#_6" class="md-nav__link">
<span class="md-ellipsis">
张量并行部署
</span>
</a>
<nav class="md-nav" aria-label="张量并行部署">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#_7" class="md-nav__link">
<span class="md-ellipsis">
推荐启动方式
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#_8" class="md-nav__link">
<span class="md-ellipsis">
使用说明
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#_9" class="md-nav__link">
<span class="md-ellipsis">
参数说明
</span>
</a>
<nav class="md-nav" aria-label="参数说明">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#ips" class="md-nav__link">
<span class="md-ellipsis">
ips参数
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#tensor_parallel_size" class="md-nav__link">
<span class="md-ellipsis">
tensor_parallel_size参数
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../graph_optimization/" class="md-nav__link">
<span class="md-ellipsis">
图优化
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../data_parallel_service/" class="md-nav__link">
<span class="md-ellipsis">
数据并行
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../plas_attention/" class="md-nav__link">
<span class="md-ellipsis">
PLAS
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../supported_models/" class="md-nav__link">
<span class="md-ellipsis">
支持模型列表
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../benchmark/" class="md-nav__link">
<span class="md-ellipsis">
基准测试
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_10" >
<label class="md-nav__link" for="__nav_10" id="__nav_10_label" tabindex="0">
<span class="md-ellipsis">
用法
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_10_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_10">
<span class="md-nav__icon md-icon"></span>
用法
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../usage/log/" class="md-nav__link">
<span class="md-ellipsis">
日志说明
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/code_overview/" class="md-nav__link">
<span class="md-ellipsis">
代码概述
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/environment_variables/" class="md-nav__link">
<span class="md-ellipsis">
环境变量
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_11" >
<label class="md-nav__link" for="__nav_11" id="__nav_11_label" tabindex="0">
<span class="md-ellipsis">
CLI 使用说明
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_11_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_11">
<span class="md-nav__icon md-icon"></span>
CLI 使用说明
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../cli/" class="md-nav__link">
<span class="md-ellipsis">
概述
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/chat/" class="md-nav__link">
<span class="md-ellipsis">
Chat命令
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/complete/" class="md-nav__link">
<span class="md-ellipsis">
Complete命令
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/serve/" class="md-nav__link">
<span class="md-ellipsis">
Server命令
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/collect-env/" class="md-nav__link">
<span class="md-ellipsis">
Collect Env命令
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/bench/" class="md-nav__link">
<span class="md-ellipsis">
Bench命令
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/run-batch/" class="md-nav__link">
<span class="md-ellipsis">
Run Batch命令
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/tokenizer/" class="md-nav__link">
<span class="md-ellipsis">
Tokenizer命令
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_12" >
<label class="md-nav__link" for="__nav_12" id="__nav_12_label" tabindex="0">
<span class="md-ellipsis">
可观测性
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_12_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_12">
<span class="md-nav__icon md-icon"></span>
可观测性
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../observability/trace/" class="md-nav__link">
<span class="md-ellipsis">
Trace服务
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#_2" class="md-nav__link">
<span class="md-ellipsis">
概述
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#_3" class="md-nav__link">
<span class="md-ellipsis">
环境准备
</span>
</a>
<nav class="md-nav" aria-label="环境准备">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#_4" class="md-nav__link">
<span class="md-ellipsis">
网络要求
</span>
</a>
<nav class="md-nav" aria-label="网络要求">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#_5" class="md-nav__link">
<span class="md-ellipsis">
软件要求
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#_6" class="md-nav__link">
<span class="md-ellipsis">
张量并行部署
</span>
</a>
<nav class="md-nav" aria-label="张量并行部署">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#_7" class="md-nav__link">
<span class="md-ellipsis">
推荐启动方式
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#_8" class="md-nav__link">
<span class="md-ellipsis">
使用说明
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#_9" class="md-nav__link">
<span class="md-ellipsis">
参数说明
</span>
</a>
<nav class="md-nav" aria-label="参数说明">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#ips" class="md-nav__link">
<span class="md-ellipsis">
ips参数
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#tensor_parallel_size" class="md-nav__link">
<span class="md-ellipsis">
tensor_parallel_size参数
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<p><a href="./">English</a></p>
<h1 id="_1">多节点部署</h1>
<h2 id="_2">概述</h2>
<p>多节点部署旨在解决单个机器GPU显存不足时支持跨多台机器的张量并行执行。</p>
<h2 id="_3">环境准备</h2>
<h3 id="_4">网络要求</h3>
<ol>
<li>所有节点必须在同一本地网络中</li>
<li>确保所有节点之间双向连通(可使用<code>ping</code><code>nc -zv</code>测试)</li>
</ol>
<h4 id="_5">软件要求</h4>
<ol>
<li>所有节点安装相同版本的FastDeploy</li>
<li>[建议安装]安装并配置MPIOpenMPI或MPICH</li>
</ol>
<h2 id="_6">张量并行部署</h2>
<h3 id="_7">推荐启动方式</h3>
<p>我们推荐使用mpirun进行一键启动无需手动启动每个节点。</p>
<h3 id="_8">使用说明</h3>
<ol>
<li>在所有机器上执行相同的命令</li>
<li><code>ips</code>参数中的IP顺序决定了节点启动顺序</li>
<li>第一个IP将被指定为主节点</li>
<li>
<p>确保所有节点能够解析彼此的主机名</p>
</li>
<li>
<p>在线推理启动示例:</p>
<p><code>shell
python -m fastdeploy.entrypoints.openai.api_server \
--model baidu/ERNIE-4.5-300B-A47B-Paddle \
--port 8180 \
--metrics-port 8181 \
--engine-worker-queue-port 8182 \
--max-model-len 32768 \
--max-num-seqs 32 \
--tensor-parallel-size 16 \
--graph-optimization-config '{"use_cudagraph":false}' \
--no-enable-prefix-caching \
--disable-custom-all-reduce \
--ips 192.168.1.101,192.168.1.102</code></p>
</li>
</ol>
<blockquote>
<p>:bulb: 多机张量并行部署暂不支持CUDAGraphs、PrefixCaching与CustomAllReduce需在部署命令中显示关闭。</p>
</blockquote>
<ul>
<li>
<p>离线启动示例:</p>
<p>```python
from fastdeploy.engine.sampling_params import SamplingParams
from fastdeploy.entrypoints.llm import LLM</p>
<p>model_name_or_path = "baidu/ERNIE-4.5-300B-A47B-Paddle"</p>
<p>sampling_params = SamplingParams(temperature=0.1, max_tokens=30)
llm = LLM(model=model_name_or_path, tensor_parallel_size=16, ips="192.168.1.101,192.168.1.102")
if llm._check_master():
output = llm.generate(prompts="你是谁?", use_tqdm=True, sampling_params=sampling_params)
print(output)
```</p>
</li>
<li>
<p>注意:</p>
</li>
<li>只有主节点可以接收完成请求</li>
<li>请始终将请求发送到主节点ips列表中的第一个IP</li>
<li>主节点将在所有节点间分配工作负载</li>
</ul>
<h3 id="_9">参数说明</h3>
<h4 id="ips"><code>ips</code>参数</h4>
<ul>
<li><strong>类型</strong>: <code>字符串</code></li>
<li><strong>格式</strong>: 逗号分隔的IPv4地址</li>
<li><strong>描述</strong>: 指定部署组中所有节点的IP地址</li>
<li><strong>必填</strong>: 仅多节点部署时需要</li>
<li><strong>示例</strong>: <code>"192.168.1.101,192.168.1.102,192.168.1.103"</code></li>
</ul>
<h4 id="tensor_parallel_size"><code>tensor_parallel_size</code>参数</h4>
<ul>
<li><strong>类型</strong>: <code>整数</code></li>
<li><strong>描述</strong>: 所有节点上的GPU总数</li>
<li><strong>必填</strong>: 是</li>
<li><strong>示例</strong>: 对于2个节点各8个GPU设置为16</li>
</ul>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-copyright__highlight">
Copyright &copy; 2025 Maintained by FastDeploy
</div>
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"annotate": null, "base": "../../..", "features": [], "search": "../../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "\u5df2\u590d\u5236", "clipboard.copy": "\u590d\u5236", "search.result.more.one": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.more.other": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 # \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.none": "\u6ca1\u6709\u627e\u5230\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.one": "\u627e\u5230 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.other": "# \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.placeholder": "\u952e\u5165\u4ee5\u5f00\u59cb\u641c\u7d22", "search.result.term.missing": "\u7f3a\u5c11", "select.version": "\u9009\u62e9\u5f53\u524d\u7248\u672c"}, "version": null}</script>
<script src="../../../assets/javascripts/bundle.79ae519e.min.js"></script>
</body>
</html>