Files
FastDeploy/get_started/installation/Enflame_gcu/index.html

2052 lines
45 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="prev" href="../hygon_dcu/">
<link rel="next" href="../iluvatar_gpu/">
<link rel="icon" href="../../../assets/images/favicon.ico">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.20">
<title>Enflame S60 - FastDeploy: Large Language Model Deployment</title>
<link rel="stylesheet" href="../../../assets/stylesheets/main.e53b48f4.min.css">
<link rel="stylesheet" href="../../../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<script>__md_scope=new URL("../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#running-ernie-45-series-models-with-fastdeploy" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../../.." title="FastDeploy: Large Language Model Deployment" class="md-header__button md-logo" aria-label="FastDeploy: Large Language Model Deployment" data-md-component="logo">
<img src="../../../assets/images/logo.jpg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
FastDeploy: Large Language Model Deployment
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Enflame S60
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to system preference" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<div class="md-header__option">
<div class="md-select">
<button class="md-header__button md-icon" aria-label="Select language">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
</button>
<div class="md-select__inner">
<ul class="md-select__list">
<li class="md-select__item">
<a href="./" hreflang="en" class="md-select__link">
English
</a>
</li>
<li class="md-select__item">
<a href="../../../zh/get_started/installation/Enflame_gcu/" hreflang="zh" class="md-select__link">
简体中文
</a>
</li>
</ul>
</div>
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/PaddlePaddle/FastDeploy" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
FastDeploy
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../.." title="FastDeploy: Large Language Model Deployment" class="md-nav__button md-logo" aria-label="FastDeploy: Large Language Model Deployment" data-md-component="logo">
<img src="../../../assets/images/logo.jpg" alt="logo">
</a>
FastDeploy: Large Language Model Deployment
</label>
<div class="md-nav__source">
<a href="https://github.com/PaddlePaddle/FastDeploy" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
FastDeploy
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../.." class="md-nav__link">
<span class="md-ellipsis">
FastDeploy
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
Quick Start
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Quick Start
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2_1" checked>
<label class="md-nav__link" for="__nav_2_1" id="__nav_2_1_label" tabindex="0">
<span class="md-ellipsis">
Installation
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_2_1">
<span class="md-nav__icon md-icon"></span>
Installation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../nvidia_gpu/" class="md-nav__link">
<span class="md-ellipsis">
Nvidia GPU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../kunlunxin_xpu/" class="md-nav__link">
<span class="md-ellipsis">
KunlunXin XPU
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../hygon_dcu/" class="md-nav__link">
<span class="md-ellipsis">
HYGON DCU
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
Enflame S60
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Enflame S60
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#quick-start" class="md-nav__link">
<span class="md-ellipsis">
🚀 Quick Start 🚀
</span>
</a>
<nav class="md-nav" aria-label="🚀 Quick Start 🚀">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#0-machine-preparation" class="md-nav__link">
<span class="md-ellipsis">
0. Machine Preparation
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#1-environment-setup-estimated-time-5-10-minutes" class="md-nav__link">
<span class="md-ellipsis">
1. Environment Setup (Estimated time: 5-10 minutes)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#2-data-preparation-estimated-time-2-5-minutes" class="md-nav__link">
<span class="md-ellipsis">
2. Data Preparation (Estimated time: 2-5 minutes)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#3-inference-estimated-time-2-5-minutes" class="md-nav__link">
<span class="md-ellipsis">
3. Inference (Estimated time: 2-5 minutes)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#4-accuracy-testing-estimated-time-60180-minutes" class="md-nav__link">
<span class="md-ellipsis">
4. Accuracy Testing (Estimated time: 60180 minutes)
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../iluvatar_gpu/" class="md-nav__link">
<span class="md-ellipsis">
Iluvatar CoreX
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../metax_gpu/" class="md-nav__link">
<span class="md-ellipsis">
Metax C550
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../quick_start/" class="md-nav__link">
<span class="md-ellipsis">
Quick Deployment For ERNIE-4.5-0.3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../quick_start_vl/" class="md-nav__link">
<span class="md-ellipsis">
Quick Deployment for ERNIE-4.5-VL-28B-A3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../ernie-4.5/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-300B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../ernie-4.5-vl/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-424B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../quick_start_qwen/" class="md-nav__link">
<span class="md-ellipsis">
Quick Deployment For QWEN
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
Online Serving
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Online Serving
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../online_serving/" class="md-nav__link">
<span class="md-ellipsis">
OpenAI-Compatible API Server
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../online_serving/metrics/" class="md-nav__link">
<span class="md-ellipsis">
Monitor Metrics
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../online_serving/scheduler/" class="md-nav__link">
<span class="md-ellipsis">
Scheduler
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../online_serving/graceful_shutdown_service/" class="md-nav__link">
<span class="md-ellipsis">
Graceful Shutdown
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../offline_inference/" class="md-nav__link">
<span class="md-ellipsis">
Offline Inference
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
<span class="md-ellipsis">
Best Practices
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
Best Practices
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-0.3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-0.3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-21B-A3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-21B-A3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-300B-A47B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-300B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-21B-A3B-Thinking/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-21B-A3B-Thinking
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-VL-28B-A3B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-28B-A3B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/ERNIE-4.5-VL-424B-A47B-Paddle/" class="md-nav__link">
<span class="md-ellipsis">
ERNIE-4.5-VL-424B-A47B
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../best_practices/FAQ/" class="md-nav__link">
<span class="md-ellipsis">
FAQ
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
<span class="md-ellipsis">
Quantization
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_6">
<span class="md-nav__icon md-icon"></span>
Quantization
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../quantization/" class="md-nav__link">
<span class="md-ellipsis">
Overview
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../quantization/online_quantization/" class="md-nav__link">
<span class="md-ellipsis">
Online Quantization
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../quantization/wint2/" class="md-nav__link">
<span class="md-ellipsis">
WINT2 Quantization
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
<span class="md-ellipsis">
Features
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_7">
<span class="md-nav__icon md-icon"></span>
Features
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../features/prefix_caching/" class="md-nav__link">
<span class="md-ellipsis">
Prefix Caching
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/disaggregated/" class="md-nav__link">
<span class="md-ellipsis">
Disaggregation
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/chunked_prefill/" class="md-nav__link">
<span class="md-ellipsis">
Chunked Prefill
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/load_balance/" class="md-nav__link">
<span class="md-ellipsis">
Load Balance
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/speculative_decoding/" class="md-nav__link">
<span class="md-ellipsis">
Speculative Decoding
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/structured_outputs/" class="md-nav__link">
<span class="md-ellipsis">
Structured Outputs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/reasoning_output/" class="md-nav__link">
<span class="md-ellipsis">
Reasoning Output
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/early_stop/" class="md-nav__link">
<span class="md-ellipsis">
Early Stop
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/plugins/" class="md-nav__link">
<span class="md-ellipsis">
Plugins
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/sampling/" class="md-nav__link">
<span class="md-ellipsis">
Sampling
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/multi-node_deployment/" class="md-nav__link">
<span class="md-ellipsis">
MultiNode Deployment
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/graph_optimization/" class="md-nav__link">
<span class="md-ellipsis">
Graph Optimization
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/data_parallel_service/" class="md-nav__link">
<span class="md-ellipsis">
Data Parallelism
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../features/plas_attention/" class="md-nav__link">
<span class="md-ellipsis">
PLAS
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../supported_models/" class="md-nav__link">
<span class="md-ellipsis">
Supported Models
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../benchmark/" class="md-nav__link">
<span class="md-ellipsis">
Benchmark
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_10" >
<label class="md-nav__link" for="__nav_10" id="__nav_10_label" tabindex="0">
<span class="md-ellipsis">
Usage
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_10_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_10">
<span class="md-nav__icon md-icon"></span>
Usage
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../usage/log/" class="md-nav__link">
<span class="md-ellipsis">
Log Description
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../usage/code_overview/" class="md-nav__link">
<span class="md-ellipsis">
Code Overview
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../usage/environment_variables/" class="md-nav__link">
<span class="md-ellipsis">
Environment Variables
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#quick-start" class="md-nav__link">
<span class="md-ellipsis">
🚀 Quick Start 🚀
</span>
</a>
<nav class="md-nav" aria-label="🚀 Quick Start 🚀">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#0-machine-preparation" class="md-nav__link">
<span class="md-ellipsis">
0. Machine Preparation
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#1-environment-setup-estimated-time-5-10-minutes" class="md-nav__link">
<span class="md-ellipsis">
1. Environment Setup (Estimated time: 5-10 minutes)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#2-data-preparation-estimated-time-2-5-minutes" class="md-nav__link">
<span class="md-ellipsis">
2. Data Preparation (Estimated time: 2-5 minutes)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#3-inference-estimated-time-2-5-minutes" class="md-nav__link">
<span class="md-ellipsis">
3. Inference (Estimated time: 2-5 minutes)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#4-accuracy-testing-estimated-time-60180-minutes" class="md-nav__link">
<span class="md-ellipsis">
4. Accuracy Testing (Estimated time: 60180 minutes)
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="running-ernie-45-series-models-with-fastdeploy">Running ERNIE 4.5 Series Models with FastDeploy</h1>
<p>The Enflame S60 (<a href="https://www.enflame-tech.com/">Learn about Enflame</a>) is a next-generation AI inference accelerator card designed for large-scale deployment in data centers. It meets the demands of large language models (LLMs), search/advertising/recommendation systems, and traditional models. Characterized by broad model coverage, user-friendliness, and high portability, it is widely applicable to mainstream inference scenarios such as image and text generation applications, search and recommendation systems, and text/image/speech recognition.</p>
<p>FastDeploy has deeply adapted and optimized the ERNIE 4.5 Series Models for the Enflame S60, achieving a unified inference interface between GCU and GPU. This allows seamless migration of inference tasks without code modifications.</p>
<h2 id="quick-start">🚀 Quick Start 🚀</h2>
<h3 id="0-machine-preparation">0. Machine Preparation</h3>
<p>Before starting, prepare a machine equipped with Enflame S60 accelerator cards. Requirements:</p>
<table>
<thead>
<tr>
<th style="text-align: center;">Chip Type</th>
<th style="text-align: center;">Driver Version</th>
<th style="text-align: center;">TopsRider Version</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: center;">Enflame S60</td>
<td style="text-align: center;">1.5.0.5</td>
<td style="text-align: center;">3.4.623</td>
</tr>
</tbody>
</table>
<p><strong>Note: To verify if your machine has Enflame S60 accelerator cards installed, run the following command in the host environment and check for output:</strong></p>
<pre><code class="language-bash">lspci | grep S60
# Example: lspci | grep S60, Example Output:
08:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
09:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
0e:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
11:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
32:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
38:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
3b:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
3c:00.0 Processing accelerators: Shanghai Enflame Technology Co. Ltd S60 [Enflame] (rev 01)
</code></pre>
<h3 id="1-environment-setup-estimated-time-5-10-minutes">1. Environment Setup (Estimated time: 5-10 minutes)</h3>
<ol>
<li>Pull the Docker image</li>
</ol>
<pre><code class="language-bash"># Note: This image only contains the Paddle development environment, not precompiled PaddlePaddle packages
docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-gcu:topsrider3.5.102-ubuntu20-x86_64-gcc84
</code></pre>
<ol>
<li>Start the container</li>
</ol>
<pre><code class="language-bash">docker run --name paddle-gcu-llm -v /home:/home -v /work:/work --network=host --ipc=host -it --privileged ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-gcu:topsrider3.5.102-ubuntu20-x86_64-gcc84 /bin/bash
</code></pre>
<ol>
<li>Obtain and install drivers<br/>
<strong>Full software packages are preloaded in the Docker container. Copy them to an external directory, e.g., <code>/home/workspace/deps/</code></strong></li>
</ol>
<pre><code class="language-bash">mkdir -p /home/workspace/deps/ &amp;&amp; cp /root/TopsRider_i3x_*/TopsRider_i3x_*_deb_amd64.run /home/workspace/deps/
</code></pre>
<ol>
<li>Install drivers<br/>
<strong>Execute this operation in the host environment</strong></li>
</ol>
<pre><code class="language-bash">cd /home/workspace/deps/
bash TopsRider_i3x_*_deb_amd64.run --driver --no-auto-load -y
</code></pre>
<p>After driver installation, <strong>re-enter the Docker container</strong>:</p>
<pre><code class="language-bash">docker start paddle-gcu-llm
docker exec -it paddle-gcu-llm bash
</code></pre>
<ol>
<li>Install PaddlePaddle &amp; PaddleCustomDevice<br/></li>
</ol>
<pre><code class="language-bash"># PaddlePaddle Deep Learning Framework provides fundamental computing capabilities
python -m pip install paddlepaddle==3.1.1 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
# PaddleCustomDevice implements custom hardware backend for PaddlePaddle, providing GCU operator implementations
python -m pip install paddle-custom-gcu==3.1.1 -i https://www.paddlepaddle.org.cn/packages/stable/gcu/
# For source compilation, refer to: https://github.com/PaddlePaddle/PaddleCustomDevice/blob/develop/backends/gcu/README_cn.md
</code></pre>
<p>For latest paddle version on iluvatar. Refer to <a href="https://www.paddlepaddle.org.cn/">PaddlePaddle Installation</a></p>
<ol>
<li>Install FastDeploy and dependencies</li>
</ol>
<pre><code class="language-bash">python -m pip install fastdeploy -i https://www.paddlepaddle.org.cn/packages/stable/gcu/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simplels
</code></pre>
<p>You can build FastDeploy from source if you need the <code>latest version</code>.</p>
<pre><code class="language-bash">git clone https://github.com/PaddlePaddle/FastDeploy
cd FastDeploy
python -m pip install -r requirements.txt --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simplels
bash build.sh 1
</code></pre>
<h3 id="2-data-preparation-estimated-time-2-5-minutes">2. Data Preparation (Estimated time: 2-5 minutes)</h3>
<p>Use a trained model for inference on GSM8K dataset:</p>
<pre><code class="language-bash">mkdir -p /home/workspace/benchmark/ &amp;&amp; cd /home/workspace/benchmark/
wget https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
</code></pre>
<p>Place model weights in a directory, e.g., <code>/work/models/ERNIE-4.5-300B-A47B-Paddle/</code></p>
<h3 id="3-inference-estimated-time-2-5-minutes">3. Inference (Estimated time: 2-5 minutes)</h3>
<p>Start the inference service:</p>
<pre><code class="language-bash">python -m fastdeploy.entrypoints.openai.api_server \
--model &quot;/work/models/ERNIE-4.5-300B-A47B-Paddle/&quot; \
--port 8188 \
--metrics-port 8200 \
--tensor-parallel-size 8 \
--max-model-len 32768 \
--num-gpu-blocks-override 4096 \
--max-num-batched-tokens 32768 \
--quantization &quot;wint4&quot;
</code></pre>
<p>Query the model service:</p>
<pre><code class="language-bash">curl -X POST &quot;http://0.0.0.0:8188/v1/chat/completions&quot; \
-H &quot;Content-Type: application/json&quot; \
-d '{
&quot;messages&quot;: [
{&quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Where is Beijing?&quot;}
]
}'
</code></pre>
<p>Successful execution returns inference results, e.g.:</p>
<pre><code class="language-json">{&quot;id&quot;:&quot;chatcmpl-20f1210d-6943-4110-ad2d-c76ba11604ad&quot;,&quot;object&quot;:&quot;chat.completion&quot;,&quot;created&quot;:1751621261,&quot;model&quot;:&quot;default&quot;,&quot;choices&quot;:[{&quot;index&quot;:0,&quot;message&quot;:{&quot;role&quot;:&quot;assistant&quot;,&quot;content&quot;:&quot;Beijing is the capital city of the People's Republic of China, located in the northern part of the country. It is situated in the North China Plain, bordered by the mountains to the west, north, and northeast. Beijing serves as China's political, cultural, and international exchange center, playing a crucial role in the nation's development and global interactions.&quot;,&quot;reasoning_content&quot;:null,&quot;tool_calls&quot;:null},&quot;finish_reason&quot;:&quot;stop&quot;}],&quot;usage&quot;:{&quot;prompt_tokens&quot;:11,&quot;total_tokens&quot;:88,&quot;completion_tokens&quot;:77,&quot;prompt_tokens_details&quot;:{&quot;cached_tokens&quot;:0}}}
</code></pre>
<h3 id="4-accuracy-testing-estimated-time-60180-minutes">4. Accuracy Testing (Estimated time: 60180 minutes)</h3>
<p>Place the accuracy script <code>bench_gsm8k.py</code> in <code>/home/workspace/benchmark/</code> and modify sampling parameters, e.g.:</p>
<pre><code class="language-bash">data = {
&quot;messages&quot;: [
{
&quot;role&quot;: &quot;user&quot;,
&quot;content&quot;: prompt,
}
],
&quot;temperature&quot;: 0.6,
&quot;max_tokens&quot;: 2047,
&quot;top_p&quot;: 0.95,
&quot;do_sample&quot;: True,
}
</code></pre>
<p>Run accuracy tests:</p>
<pre><code class="language-bash">cd /home/workspace/benchmark/
python -u bench_gsm8k.py --port 8188 --num-questions 1319 --num-shots 5 --parallel 8
</code></pre>
<p>Upon completion, accuracy results are saved in <code>result.jsonl</code>, e.g.:</p>
<pre><code class="language-json">{&quot;task&quot;: &quot;gsm8k&quot;, &quot;backend&quot;: &quot;paddlepaddle&quot;, &quot;num_gpus&quot;: 1, &quot;latency&quot;: 13446.01, &quot;accuracy&quot;: 0.956, &quot;num_requests&quot;: 1319, &quot;other&quot;: {&quot;num_questions&quot;: 1319, &quot;parallel&quot;: 8}}
</code></pre>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-copyright__highlight">
Copyright &copy; 2025 Maintained by FastDeploy
</div>
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../..", "features": [], "search": "../../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../../assets/javascripts/bundle.f55a23d4.min.js"></script>
</body>
</html>