revert group size 3 (#5079)

This commit is contained in:
周周周
2025-11-17 18:54:13 +08:00
committed by GitHub
parent d9f64adb0e
commit b23e684b67
2 changed files with 5 additions and 5 deletions

View File

@@ -17,7 +17,7 @@
"IsDynamicC8"
],
"dispatch_params": {
"GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
"GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
"HEAD_DIM": [128],
"BLOCK_SIZE": [64],
"CAUSAL": [0, 1],
@@ -54,7 +54,7 @@
"ENABLE_PREFILL"
],
"dispatch_params": {
"GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
"GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
"HEAD_DIM": [128],
"BLOCK_SIZE": [64],
"CAUSAL": [0, 1],
@@ -89,7 +89,7 @@
"ENABLE_PREFILL"
],
"dispatch_params": {
"GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
"GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
"HEAD_DIM": [64,128],
"BLOCK_SIZE": [64],
"CAUSAL": [0, 1],

View File

@@ -117,10 +117,10 @@ class TestAttentionPerformance(unittest.TestCase):
config_dict = {
"architectures": ["Ernie4_5_MoeForCausalLM"],
"dtype": "bfloat16",
"hidden_size": 1536,
"hidden_size": 4096,
"max_position_embeddings": 131072,
"max_model_len": 2 * (9000 + 128),
"num_attention_heads": 12,
"num_attention_heads": 32,
"num_key_value_heads": 4,
"num_hidden_layers": 39,
}