Sync v2.0 version of code to github repo

2025-10-05 08:37:06 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/fastdeploy/demo/offline_prefix_caching_demo.py
+++ b/fastdeploy/demo/offline_prefix_caching_demo.py
@@ -0,0 +1,56 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from fastdeploy import LLM, SamplingParams
+
+common_prefix = (
+    "北京，中华人民共和国的首都，是一座融合了厚重历史与现代活力的超大城市。作为国家的政治中心、文化中心、国际交往中心和科技创新中心，北京承载着国家最高权力机关和众多国际机构。\n"
+    "北京的历史可追溯至三千年前。它是元、明、清三朝古都，拥有众多举世闻名的文化遗产：世界上规模最大、保存最完整的古代宫殿建筑群——故宫，历经六百余年风雨；被誉为世界建筑奇迹的万里长城，"
+    "其精华段蜿蜒于北京北部群山；庄严肃穆的天坛，是古代帝王祭天的圣地；贯穿城市南北、体现传统规划智慧的中轴线，串联起众多历史地标。\n"
+    "步入现代，北京展现出蓬勃的活力。鸟巢（国家体育场）和水立方（国家游泳中心）是2008年奥运会的标志性遗产。中央商务区（CBD） 摩天大楼林立，彰显着经济实力。"
+    "同时，传统的胡同和四合院依然散发着独特的生活气息，北京烤鸭等美食吸引着世界各地的游客。北京，这座古老而年轻的城市，正以其兼容并蓄的魅力，续写着辉煌篇章。\n"
+    "阅读以上文字，回答下列问题"
+)
+
+
+prompts = [
+    "北京作为中国的首都，主要承担着哪几个方面的中心职能？",
+    "文中提到了哪两处最具代表性的古代皇家祭祀与居所建筑？",
+    "文章分别列举了哪些具体实例来展现北京的“厚重历史”与“现代活力”？请各举两例",
+]
+
+generating_prompts = [common_prefix + prompt for prompt in prompts]
+
+
+sampling_params = SamplingParams(temperature=1, top_p=0.0)
+
+model = "baidu/ERNIE-4.5-21B-A3B-Paddle"
+
+prefix_cached_llm = LLM(
+        model=model,
+        quantization="wint4",
+        enable_prefix_caching=True,
+    )
+
+
+prefix_outputs = prefix_cached_llm.generate(generating_prompts, sampling_params)
+
+# 输出结果
+for output in prefix_outputs:
+    prompt = output.prompt
+    generated_text = output.outputs.text
+    print("generated_text", generated_text)
+    print("-" * 50)