Files
FastDeploy/fastdeploy/demo/offline_prefix_caching_demo.py
2025-07-19 23:19:27 +08:00

57 lines
2.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from fastdeploy import LLM, SamplingParams
common_prefix = (
"北京,中华人民共和国的首都,是一座融合了厚重历史与现代活力的超大城市。作为国家的政治中心、文化中心、国际交往中心和科技创新中心,北京承载着国家最高权力机关和众多国际机构。\n"
"北京的历史可追溯至三千年前。它是元、明、清三朝古都,拥有众多举世闻名的文化遗产:世界上规模最大、保存最完整的古代宫殿建筑群——故宫,历经六百余年风雨;被誉为世界建筑奇迹的万里长城,"
"其精华段蜿蜒于北京北部群山;庄严肃穆的天坛,是古代帝王祭天的圣地;贯穿城市南北、体现传统规划智慧的中轴线,串联起众多历史地标。\n"
"步入现代北京展现出蓬勃的活力。鸟巢国家体育场和水立方国家游泳中心是2008年奥运会的标志性遗产。中央商务区CBD 摩天大楼林立,彰显着经济实力。"
"同时,传统的胡同和四合院依然散发着独特的生活气息,北京烤鸭等美食吸引着世界各地的游客。北京,这座古老而年轻的城市,正以其兼容并蓄的魅力,续写着辉煌篇章。\n"
"阅读以上文字,回答下列问题"
)
prompts = [
"北京作为中国的首都,主要承担着哪几个方面的中心职能?",
"文中提到了哪两处最具代表性的古代皇家祭祀与居所建筑?",
"文章分别列举了哪些具体实例来展现北京的“厚重历史”与“现代活力”?请各举两例",
]
generating_prompts = [common_prefix + prompt for prompt in prompts]
sampling_params = SamplingParams(temperature=1, top_p=0.0)
model = "baidu/ERNIE-4.5-21B-A3B-Paddle"
prefix_cached_llm = LLM(
model=model,
quantization="wint4",
enable_prefix_caching=True,
)
prefix_outputs = prefix_cached_llm.generate(generating_prompts, sampling_params)
# 输出结果
for output in prefix_outputs:
prompt = output.prompt
generated_text = output.outputs.text
print("generated_text", generated_text)
print("-" * 50)