From eae34a416c187c3a28832f8c1455022a003f6290 Mon Sep 17 00:00:00 2001 From: xiegegege <46314656+xiegegege@users.noreply.github.com> Date: Tue, 25 Nov 2025 19:53:30 +0800 Subject: [PATCH] [benchmark]add qwen3-235b pd+ep yaml (#5225) --- benchmarks/yaml/eb45-32k-wint4-mtp-h800-tp4.yaml | 2 +- .../yaml/qwen3-235b-32k-fp8-tp1-dp4_decode.yaml | 13 +++++++++++++ .../yaml/qwen3-235b-32k-fp8-tp1-dp4_prefill.yaml | 13 +++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_decode.yaml create mode 100644 benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_prefill.yaml diff --git a/benchmarks/yaml/eb45-32k-wint4-mtp-h800-tp4.yaml b/benchmarks/yaml/eb45-32k-wint4-mtp-h800-tp4.yaml index 974c2eaf7..c71c247ee 100644 --- a/benchmarks/yaml/eb45-32k-wint4-mtp-h800-tp4.yaml +++ b/benchmarks/yaml/eb45-32k-wint4-mtp-h800-tp4.yaml @@ -1,6 +1,6 @@ max_model_len: 32768 max_num_seqs: 96 -gpu_memory_utilization: 0.85 +gpu_memory_utilization: 0.8 kv_cache_ratio: 0.71 tensor_parallel_size: 4 quantization: wint4 diff --git a/benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_decode.yaml b/benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_decode.yaml new file mode 100644 index 000000000..28acf9994 --- /dev/null +++ b/benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_decode.yaml @@ -0,0 +1,13 @@ +max_model_len: 32768 +max_num_seqs: 32 +data_parallel_size: 4 +tensor_parallel_size: 1 +enable_expert_parallel: True +enable_prefix_caching: False +splitwise_role: decode +cache_transfer_protocol: "rdma" +rdma_comm_ports: "7671,7672,7673,7674" +pd_comm_port: "2335" +engine_worker_queue_port: "4582,4583,4584,4585" +graph_optimization_config: + use_cudagraph: False diff --git a/benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_prefill.yaml b/benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_prefill.yaml new file mode 100644 index 000000000..d9e879e72 --- /dev/null +++ b/benchmarks/yaml/qwen3-235b-32k-fp8-tp1-dp4_prefill.yaml @@ -0,0 +1,13 @@ +max_model_len: 32768 +max_num_seqs: 32 +data_parallel_size: 4 +tensor_parallel_size: 1 +enable_expert_parallel: True +enable_prefix_caching: False +splitwise_role: prefill +cache_transfer_protocol: "rdma" +rdma_comm_ports: "7675,7676,7677,7678" +pd_comm_port: "2334" +engine_worker_queue_port: "4368,4369,4360,4361" +graph_optimization_config: + use_cudagraph: False