diff --git a/benchmarks/yaml/eb45-8k-fp8-tp1-dp8_ep.yaml b/benchmarks/yaml/eb45-8k-fp8-tp1-dp8_ep.yaml new file mode 100644 index 000000000..a65fc42e6 --- /dev/null +++ b/benchmarks/yaml/eb45-8k-fp8-tp1-dp8_ep.yaml @@ -0,0 +1,6 @@ +num_gpu_blocks_override: 1024 +max_model_len: 8192 +max_num_seqs: 64 +data_parallel_size: 8 +tensor_parallel_size: 1 +enable_expert_parallel: True