From b23e684b672fc14af713ba09e8d98463b01744d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=91=A8=E5=91=A8?= <39978853+zhoutianzi666@users.noreply.github.com> Date: Mon, 17 Nov 2025 18:54:13 +0800 Subject: [PATCH] revert group size 3 (#5079) --- custom_ops/gpu_ops/append_attn/template_config.json | 6 +++--- tests/layers/test_attention_layer.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/custom_ops/gpu_ops/append_attn/template_config.json b/custom_ops/gpu_ops/append_attn/template_config.json index b750965a5..c462afe07 100644 --- a/custom_ops/gpu_ops/append_attn/template_config.json +++ b/custom_ops/gpu_ops/append_attn/template_config.json @@ -17,7 +17,7 @@ "IsDynamicC8" ], "dispatch_params": { - "GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16], + "GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16], "HEAD_DIM": [128], "BLOCK_SIZE": [64], "CAUSAL": [0, 1], @@ -54,7 +54,7 @@ "ENABLE_PREFILL" ], "dispatch_params": { - "GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16], + "GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16], "HEAD_DIM": [128], "BLOCK_SIZE": [64], "CAUSAL": [0, 1], @@ -89,7 +89,7 @@ "ENABLE_PREFILL" ], "dispatch_params": { - "GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16], + "GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16], "HEAD_DIM": [64,128], "BLOCK_SIZE": [64], "CAUSAL": [0, 1], diff --git a/tests/layers/test_attention_layer.py b/tests/layers/test_attention_layer.py index 5e803b8a2..82f8f0798 100644 --- a/tests/layers/test_attention_layer.py +++ b/tests/layers/test_attention_layer.py @@ -117,10 +117,10 @@ class TestAttentionPerformance(unittest.TestCase): config_dict = { "architectures": ["Ernie4_5_MoeForCausalLM"], "dtype": "bfloat16", - "hidden_size": 1536, + "hidden_size": 4096, "max_position_embeddings": 131072, "max_model_len": 2 * (9000 + 128), - "num_attention_heads": 12, + "num_attention_heads": 32, "num_key_value_heads": 4, "num_hidden_layers": 39, }