[Metax] add ci yaml (#5520)

Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-12-12 13:35:38 +08:00
parent 8d477e3d01
commit f32e331ef5
2 changed files with 198 additions and 0 deletions
--- a/tests/ci_use/Metax_UT/run_ernie_vl_28B.py
+++ b/tests/ci_use/Metax_UT/run_ernie_vl_28B.py
@@ -0,0 +1,36 @@
+import os
+
+os.environ["MACA_VISIBLE_DEVICES"] = "0,1"
+os.environ["FD_MOE_BACKEND"] = "cutlass"
+os.environ["PADDLE_XCCL_BACKEND"] = "metax_gpu"
+os.environ["FLAGS_weight_only_linear_arch"] = "80"
+os.environ["FD_METAX_KVCACHE_MEM"] = "8"
+os.environ["ENABLE_V1_KVCACHE_SCHEDULER"] = "1"
+os.environ["FD_ENC_DEC_BLOCK_NUM"] = "2"
+
+
+import fastdeploy
+
+sampling_params = fastdeploy.SamplingParams(top_p=0.95, max_tokens=2048, temperature=0.6)
+
+llm = fastdeploy.LLM(
+    model="/data/models/PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Thinking",
+    tensor_parallel_size=2,
+    engine_worker_queue_port=8899,
+    max_model_len=2048,
+    quantization="wint8",
+    load_choices="default_v1",
+    disable_custom_all_reduce=True,
+)
+
+prompts = [
+    "A robe takes 2 bolts of blue fiber and half that much white fiber. How many bolts in total does it take?",
+]
+
+outputs = llm.generate(prompts, sampling_params)
+
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs.text
+    print(f"Prompt: {prompt!r}")
+    print(f"Generated: {generated_text!r}")