mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
Sync v2.0 version of code to github repo
This commit is contained in:
56
fastdeploy/demo/offline_prefix_caching_demo.py
Normal file
56
fastdeploy/demo/offline_prefix_caching_demo.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
from fastdeploy import LLM, SamplingParams
|
||||
|
||||
common_prefix = (
|
||||
"北京,中华人民共和国的首都,是一座融合了厚重历史与现代活力的超大城市。作为国家的政治中心、文化中心、国际交往中心和科技创新中心,北京承载着国家最高权力机关和众多国际机构。\n"
|
||||
"北京的历史可追溯至三千年前。它是元、明、清三朝古都,拥有众多举世闻名的文化遗产:世界上规模最大、保存最完整的古代宫殿建筑群——故宫,历经六百余年风雨;被誉为世界建筑奇迹的万里长城,"
|
||||
"其精华段蜿蜒于北京北部群山;庄严肃穆的天坛,是古代帝王祭天的圣地;贯穿城市南北、体现传统规划智慧的中轴线,串联起众多历史地标。\n"
|
||||
"步入现代,北京展现出蓬勃的活力。鸟巢(国家体育场)和水立方(国家游泳中心)是2008年奥运会的标志性遗产。中央商务区(CBD) 摩天大楼林立,彰显着经济实力。"
|
||||
"同时,传统的胡同和四合院依然散发着独特的生活气息,北京烤鸭等美食吸引着世界各地的游客。北京,这座古老而年轻的城市,正以其兼容并蓄的魅力,续写着辉煌篇章。\n"
|
||||
"阅读以上文字,回答下列问题"
|
||||
)
|
||||
|
||||
|
||||
prompts = [
|
||||
"北京作为中国的首都,主要承担着哪几个方面的中心职能?",
|
||||
"文中提到了哪两处最具代表性的古代皇家祭祀与居所建筑?",
|
||||
"文章分别列举了哪些具体实例来展现北京的“厚重历史”与“现代活力”?请各举两例",
|
||||
]
|
||||
|
||||
generating_prompts = [common_prefix + prompt for prompt in prompts]
|
||||
|
||||
|
||||
sampling_params = SamplingParams(temperature=1, top_p=0.0)
|
||||
|
||||
model = "baidu/ERNIE-4.5-21B-A3B-Paddle"
|
||||
|
||||
prefix_cached_llm = LLM(
|
||||
model=model,
|
||||
quantization="wint4",
|
||||
enable_prefix_caching=True,
|
||||
)
|
||||
|
||||
|
||||
prefix_outputs = prefix_cached_llm.generate(generating_prompts, sampling_params)
|
||||
|
||||
# 输出结果
|
||||
for output in prefix_outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs.text
|
||||
print("generated_text", generated_text)
|
||||
print("-" * 50)
|
Reference in New Issue
Block a user