From b23e684b672fc14af713ba09e8d98463b01744d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E5=91=A8=E5=91=A8?=
 <39978853+zhoutianzi666@users.noreply.github.com>
Date: Mon, 17 Nov 2025 18:54:13 +0800
Subject: [PATCH] revert group size 3 (#5079)

---
 custom_ops/gpu_ops/append_attn/template_config.json | 6 +++---
 tests/layers/test_attention_layer.py                | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/custom_ops/gpu_ops/append_attn/template_config.json b/custom_ops/gpu_ops/append_attn/template_config.json
index b750965a5..c462afe07 100644
--- a/custom_ops/gpu_ops/append_attn/template_config.json
+++ b/custom_ops/gpu_ops/append_attn/template_config.json
@@ -17,7 +17,7 @@
       "IsDynamicC8"
     ],
     "dispatch_params": {
-      "GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
+      "GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
       "HEAD_DIM": [128],
       "BLOCK_SIZE": [64],
       "CAUSAL": [0, 1],
@@ -54,7 +54,7 @@
       "ENABLE_PREFILL"
     ],
     "dispatch_params": {
-      "GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
+      "GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
       "HEAD_DIM": [128],
       "BLOCK_SIZE": [64],
       "CAUSAL": [0, 1],
@@ -89,7 +89,7 @@
       "ENABLE_PREFILL"
     ],
     "dispatch_params": {
-      "GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
+      "GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
       "HEAD_DIM": [64,128],
       "BLOCK_SIZE": [64],
       "CAUSAL": [0, 1],
diff --git a/tests/layers/test_attention_layer.py b/tests/layers/test_attention_layer.py
index 5e803b8a2..82f8f0798 100644
--- a/tests/layers/test_attention_layer.py
+++ b/tests/layers/test_attention_layer.py
@@ -117,10 +117,10 @@ class TestAttentionPerformance(unittest.TestCase):
         config_dict = {
             "architectures": ["Ernie4_5_MoeForCausalLM"],
             "dtype": "bfloat16",
-            "hidden_size": 1536,
+            "hidden_size": 4096,
             "max_position_embeddings": 131072,
             "max_model_len": 2 * (9000 + 128),
-            "num_attention_heads": 12,
+            "num_attention_heads": 32,
             "num_key_value_heads": 4,
             "num_hidden_layers": 39,
         }