mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00
44 lines
1.9 KiB
YAML
44 lines
1.9 KiB
YAML
site_name: 'FastDeploy 2.0: Large Language Model Deployement'
|
|
nav:
|
|
- 'FastDeploy 2.0': index.md
|
|
- 'Quick Start':
|
|
- Installation:
|
|
- 'Nvidia GPU': get_started/installation/nvidia_gpu.md
|
|
- 'KunlunXin XPU': get_started/installation/kunlunxin_xpu.md
|
|
- 'Enflame S60': get_started/installation/Enflame_gcu.md
|
|
- 'Iluvatar CoreX': get_started/installation/iluvatar_gpu.md
|
|
- 'Quick Deployment For ERNIE-4.5-0.3B-Paddle': get_started/quick_start.md
|
|
- 'Quick Deployment for ERNIE-4.5-VL-28B-A3B': get_started/quick_start_vl.md
|
|
- 'ERNIE-4.5-300B-A47B': get_started/ernie-4.5.md
|
|
- 'ERNIE-4.5-VL-424B-A47B': get_started/ernie-4.5-vl.md
|
|
- 'Online Serving':
|
|
- 'OpenAI-Compitable API Server': online_serving/README.md
|
|
- 'Monitor Metrics': online_serving/metrics.md
|
|
- 'Scheduler': online_serving/scheduler.md
|
|
- 'Offline Inference': offline_inference.md
|
|
- Quantiation:
|
|
- 'Overview': quantization/README.md
|
|
- 'Online Quantization': quantization/online_quantization.md
|
|
- 'WINT2 Quantization': quantization/wint2.md
|
|
- Features:
|
|
- 'Prefix Caching': features/prefix_caching.md
|
|
- 'Disaggration': features/disaggregated.md
|
|
- 'Chunked Prefill': features/chunked_prefill.md
|
|
- 'Load Balance': features/load_balance.md
|
|
- 'Speculative Decoding': features/speculative_decoding.md
|
|
- 'Structured Outputs': features/structured_outputs.md
|
|
- 'Reasoning Output': features/reasoning_output.md
|
|
- 'Supported Models': supported_models.md
|
|
- Benchmark: benchmark.md
|
|
- Usage:
|
|
- 'Log Description': usage/log.md
|
|
- 'Code Overview': usage/code_overview.md
|
|
- 'Environment Variables': usage/environment_variables.md
|
|
theme:
|
|
name: 'material'
|
|
highlightjs: true
|
|
icon:
|
|
repo: fontawesome/brands/github
|
|
repo_url: https://github.com/PaddlePaddle/FastDeploy
|
|
repo_name: FastDeploy
|