mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
revert group size 3 (#5079)
This commit is contained in:
@@ -17,7 +17,7 @@
|
||||
"IsDynamicC8"
|
||||
],
|
||||
"dispatch_params": {
|
||||
"GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
|
||||
"GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
|
||||
"HEAD_DIM": [128],
|
||||
"BLOCK_SIZE": [64],
|
||||
"CAUSAL": [0, 1],
|
||||
@@ -54,7 +54,7 @@
|
||||
"ENABLE_PREFILL"
|
||||
],
|
||||
"dispatch_params": {
|
||||
"GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
|
||||
"GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
|
||||
"HEAD_DIM": [128],
|
||||
"BLOCK_SIZE": [64],
|
||||
"CAUSAL": [0, 1],
|
||||
@@ -89,7 +89,7 @@
|
||||
"ENABLE_PREFILL"
|
||||
],
|
||||
"dispatch_params": {
|
||||
"GROUP_SIZE": [1, 2, 3, 4, 5, 6, 7, 8, 12, 14, 16],
|
||||
"GROUP_SIZE": [1, 2, 4, 5, 6, 7, 8, 12, 14, 16],
|
||||
"HEAD_DIM": [64,128],
|
||||
"BLOCK_SIZE": [64],
|
||||
"CAUSAL": [0, 1],
|
||||
|
||||
@@ -117,10 +117,10 @@ class TestAttentionPerformance(unittest.TestCase):
|
||||
config_dict = {
|
||||
"architectures": ["Ernie4_5_MoeForCausalLM"],
|
||||
"dtype": "bfloat16",
|
||||
"hidden_size": 1536,
|
||||
"hidden_size": 4096,
|
||||
"max_position_embeddings": 131072,
|
||||
"max_model_len": 2 * (9000 + 128),
|
||||
"num_attention_heads": 12,
|
||||
"num_attention_heads": 32,
|
||||
"num_key_value_heads": 4,
|
||||
"num_hidden_layers": 39,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user