Files

1955 lines
39 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="zh" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="prev" href="../iluvatar_gpu/">
<link rel="next" href="../../quick_start/">
<link rel="icon" href="../../../../assets/images/favicon.ico">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.20">
<title>沐曦 C550 - 飞桨大语言模型推理部署工具包</title>
<link rel="stylesheet" href="../../../../assets/stylesheets/main.e53b48f4.min.css">
<link rel="stylesheet" href="../../../../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<script>__md_scope=new URL("../../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#metax-gpu-c550-ernie-45" class="md-skip">
跳转至
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="页眉">
<a href="../../../" title="飞桨大语言模型推理部署工具包" class="md-header__button md-logo" aria-label="飞桨大语言模型推理部署工具包" data-md-component="logo">
<img src="../../../../assets/images/logo.jpg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
飞桨大语言模型推理部署工具包
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
沐曦 C550
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to system preference" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<div class="md-header__option">
<div class="md-select">
<button class="md-header__button md-icon" aria-label="选择当前语言">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
</button>
<div class="md-select__inner">
<ul class="md-select__list">
<li class="md-select__item">
<a href="../../../../get_started/installation/metax_gpu/" hreflang="en" class="md-select__link">
English
</a>
</li>
<li class="md-select__item">
<a href="./" hreflang="zh" class="md-select__link">
简体中文
</a>
</li>
</ul>
</div>
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="搜索" placeholder="搜索" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="查找">
<button type="reset" class="md-search__icon md-icon" title="清空当前内容" aria-label="清空当前内容" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
正在初始化搜索引擎
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/PaddlePaddle/FastDeploy" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
FastDeploy
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="导航栏" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../../" title="飞桨大语言模型推理部署工具包" class="md-nav__button md-logo" aria-label="飞桨大语言模型推理部署工具包" data-md-component="logo">
<img src="../../../../assets/images/logo.jpg" alt="logo">
</a>
飞桨大语言模型推理部署工具包
</label>
<div class="md-nav__source">
<a href="https://github.com/PaddlePaddle/FastDeploy" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
FastDeploy
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../" class="md-nav__link">
<span class="md-ellipsis">
FastDeploy
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
快速入门
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
快速入门
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_1" checked>
<label class="md-nav__link" for="__nav_2_1" id="__nav_2_1_label" tabindex="0">
<span class="md-ellipsis">
安装
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2_1">
<span class="md-nav__icon md-icon"></span>
安装
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../nvidia_gpu/" class="md-nav__link">
<span class="md-ellipsis">
英伟达 GPU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../kunlunxin_xpu/" class="md-nav__link">
<span class="md-ellipsis">
昆仑芯 XPU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../hygon_dcu/" class="md-nav__link">
<span class="md-ellipsis">
海光 DCU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../Enflame_gcu/" class="md-nav__link">
<span class="md-ellipsis">
燧原 S60
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../iluvatar_gpu/" class="md-nav__link">
<span class="md-ellipsis">
天数 CoreX
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
沐曦 C550
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
沐曦 C550
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#1" class="md-nav__link">
<span class="md-ellipsis">
1. 容器镜像获取
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#2" class="md-nav__link">
<span class="md-ellipsis">
2. 预安装
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#3-fastdeploy" class="md-nav__link">
<span class="md-ellipsis">
3. FastDeploy代码下载并编译
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#4" class="md-nav__link">
<span class="md-ellipsis">
4. 环境验证
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#5" class="md-nav__link">
<span class="md-ellipsis">
5. 示例
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../quick_start/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-0.3B快速部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../quick_start_vl/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-28B-A3B快速部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../ernie-4.5/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-300B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../ernie-4.5-vl/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-424B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../quick_start_qwen/" class="md-nav__link">
<span class="md-ellipsis">
Qwen3-0.6b快速部署
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
在线服务
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
在线服务
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../online_serving/" class="md-nav__link">
<span class="md-ellipsis">
兼容 OpenAI 协议的服务化部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../online_serving/metrics/" class="md-nav__link">
<span class="md-ellipsis">
监控Metrics
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../online_serving/scheduler/" class="md-nav__link">
<span class="md-ellipsis">
调度器
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../online_serving/graceful_shutdown_service/" class="md-nav__link">
<span class="md-ellipsis">
服务优雅关闭
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../offline_inference/" class="md-nav__link">
<span class="md-ellipsis">
离线推理
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
<span class="md-ellipsis">
最佳实践
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
最佳实践
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-0.3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-0.3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-21B-A3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-21B-A3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-300B-A47B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-300B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-21B-A3B-Thinking/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-21B-A3B-Thinking
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-VL-28B-A3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-28B-A3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-VL-424B-A47B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-424B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/FAQ/" class="md-nav__link">
<span class="md-ellipsis">
常见问题
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
<span class="md-ellipsis">
量化
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_6">
<span class="md-nav__icon md-icon"></span>
量化
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../quantization/" class="md-nav__link">
<span class="md-ellipsis">
概述
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../quantization/online_quantization/" class="md-nav__link">
<span class="md-ellipsis">
在线量化
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../quantization/wint2/" class="md-nav__link">
<span class="md-ellipsis">
WINT2量化
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
<span class="md-ellipsis">
特性
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_7">
<span class="md-nav__icon md-icon"></span>
特性
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../features/prefix_caching/" class="md-nav__link">
<span class="md-ellipsis">
前缀缓存
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/disaggregated/" class="md-nav__link">
<span class="md-ellipsis">
分离式部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/chunked_prefill/" class="md-nav__link">
<span class="md-ellipsis">
分块预填充
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/load_balance/" class="md-nav__link">
<span class="md-ellipsis">
负载均衡
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/speculative_decoding/" class="md-nav__link">
<span class="md-ellipsis">
投机解码
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/structured_outputs/" class="md-nav__link">
<span class="md-ellipsis">
结构化输出
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/reasoning_output/" class="md-nav__link">
<span class="md-ellipsis">
思考链内容
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/early_stop/" class="md-nav__link">
<span class="md-ellipsis">
早停功能
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/plugins/" class="md-nav__link">
<span class="md-ellipsis">
插件机制
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/sampling/" class="md-nav__link">
<span class="md-ellipsis">
采样策略
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/multi-node_deployment/" class="md-nav__link">
<span class="md-ellipsis">
多机部署
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/graph_optimization/" class="md-nav__link">
<span class="md-ellipsis">
图优化
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/data_parallel_service/" class="md-nav__link">
<span class="md-ellipsis">
数据并行
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/plas_attention/" class="md-nav__link">
<span class="md-ellipsis">
PLAS
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../supported_models/" class="md-nav__link">
<span class="md-ellipsis">
支持模型列表
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../benchmark/" class="md-nav__link">
<span class="md-ellipsis">
基准测试
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_10" >
<label class="md-nav__link" for="__nav_10" id="__nav_10_label" tabindex="0">
<span class="md-ellipsis">
用法
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_10_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_10">
<span class="md-nav__icon md-icon"></span>
用法
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../usage/log/" class="md-nav__link">
<span class="md-ellipsis">
日志说明
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../usage/code_overview/" class="md-nav__link">
<span class="md-ellipsis">
代码概述
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../usage/environment_variables/" class="md-nav__link">
<span class="md-ellipsis">
环境变量
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#1" class="md-nav__link">
<span class="md-ellipsis">
1. 容器镜像获取
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#2" class="md-nav__link">
<span class="md-ellipsis">
2. 预安装
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#3-fastdeploy" class="md-nav__link">
<span class="md-ellipsis">
3. FastDeploy代码下载并编译
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#4" class="md-nav__link">
<span class="md-ellipsis">
4. 环境验证
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#5" class="md-nav__link">
<span class="md-ellipsis">
5. 示例
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="metax-gpu-c550-ernie-45">使用 Metax GPU C550 运行ERNIE 4.5 系列模型</h1>
<p>FastDeploy在Metax C550上对ERNIE 4.5系列模型进行了深度适配和优化实现了推理入口和GPU的统一无需修改即可完成推理任务的迁移。</p>
<p>环境准备:
- Python &gt;= 3.10
- Linux X86_64</p>
<table>
<thead>
<tr>
<th style="text-align: center;">Chip Type</th>
<th style="text-align: center;">Driver Version</th>
<th style="text-align: center;">KMD Version</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: center;">MetaX C550</td>
<td style="text-align: center;">3.0.0.1</td>
<td style="text-align: center;">2.14.6</td>
</tr>
</tbody>
</table>
<h2 id="1">1. 容器镜像获取</h2>
<pre><code class="language-shell">docker login --username=cr_temp_user --password=eyJpbnN0YW5jZUlkIjoiY3JpLXpxYTIzejI2YTU5M3R3M2QiLCJ0aW1lIjoiMTc1NTUxODEwODAwMCIsInR5cGUiOiJzdWIiLCJ1c2VySWQiOiIyMDcwOTQwMTA1NjYzNDE3OTIifQ:8226ca50ce5476c42062e24d3c465545de1c1780 cr.metax-tech.com &amp;&amp; docker pull cr.metax-tech.com/public-library/maca-native:3.0.0.4-ubuntu20.04-amd64
</code></pre>
<h2 id="2">2. 预安装</h2>
<pre><code class="language-shell">1pip install paddlepaddle==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
2pip install paddle-metax-gpu==3.0.0.dev20250807 -i https://www.paddlepaddle.org.cn/packages/nightly/maca/
</code></pre>
<h2 id="3-fastdeploy">3. FastDeploy代码下载并编译</h2>
<pre><code class="language-shell">git clone https://github.com/PaddlePaddle/FastDeploy
cd FastDeploy
bash build.sh
</code></pre>
<p>The built packages will be in the <code>FastDeploy/dist</code> directory.</p>
<h2 id="4">4. 环境验证</h2>
<p>After installation, verify the environment with this Python code:</p>
<pre><code class="language-python">import paddle
from paddle.jit.marker import unified
# Verify GPU availability
paddle.utils.run_check()
# Verify FastDeploy custom operators compilation
from fastdeploy.model_executor.ops.gpu import beam_search_softmax
</code></pre>
<p>If the above code executes successfully, the environment is ready.</p>
<h2 id="5">5. 示例</h2>
<p>from fastdeploy import LLM, SamplingParams</p>
<p>prompts = [
"Hello. My name is",
]</p>
<p>sampling_params = SamplingParams(top_p=0.95, max_tokens=32, temperature=0.6)</p>
<p>llm = LLM(model="/root/model/ERNIE-4.5-21B-A3B-Paddle", tensor_parallel_size=1, max_model_len=256, engine_worker_queue_port=9135, quantization='wint8', static_decode_blocks=0, gpu_memory_utilization=0.9)</p>
<p>outputs = llm.generate(prompts, sampling_params)</p>
<p>print(f"Generated {len(outputs)} outputs")
print("=" * 50 + "\n")</p>
<p>for output in outputs:
prompt = output.prompt
generated_text = output.outputs.text
print(prompt)
print(generated_text)
print("-" * 50)</p>
<p>输出:
INFO 2025-08-18 10:54:18,455 416822 engine.py[line:202] Waiting worker processes ready...
Loading Weights: 100%|█████████████████████████████████████████████████████████████████████████| 100/100 [03:33&lt;00:00, 2.14s/it]
Loading Layers: 100%|██████████████████████████████████████████████████████████████████████████| 100/100 [00:18&lt;00:00, 5.54it/s]
INFO 2025-08-18 10:58:16,149 416822 engine.py[line:247] Worker processes are launched with 240.08204197883606 seconds.
Processed prompts: 100%|███████████████████████| 1/1 [00:21&lt;00:00, 21.84s/it, est. speed input: 0.00 toks/s, output: 0.00 toks/s]
Generated 1 outputs
==================================================</p>
<p>Hello. My name is
Alice and I'm here to help you. What can I do for you today?
Hello Alice! I'm trying to organize a small party</p>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-copyright__highlight">
Copyright &copy; 2025 Maintained by FastDeploy
</div>
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../../..", "features": [], "search": "../../../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "\u5df2\u590d\u5236", "clipboard.copy": "\u590d\u5236", "search.result.more.one": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.more.other": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 # \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.none": "\u6ca1\u6709\u627e\u5230\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.one": "\u627e\u5230 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.other": "# \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.placeholder": "\u952e\u5165\u4ee5\u5f00\u59cb\u641c\u7d22", "search.result.term.missing": "\u7f3a\u5c11", "select.version": "\u9009\u62e9\u5f53\u524d\u7248\u672c"}, "version": null}</script>
<script src="../../../../assets/javascripts/bundle.f55a23d4.min.js"></script>
</body>
</html>