deepgemm pre-compile tool support mixed parallel (#4282)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled

This commit is contained in:
GoldPancake
2025-09-26 18:43:39 +08:00
committed by GitHub
parent 5c6e859681
commit 79c2c52756

View File

@@ -41,7 +41,6 @@ def generate_kn_pairs(args, model_cfg: dict) -> Tuple[List, List, List]:
gemm_kn_pairs = [] gemm_kn_pairs = []
grouped_gemm_contiguous_kn_pairs = [] grouped_gemm_contiguous_kn_pairs = []
grouped_gemm_masked_kn_pairs = [] grouped_gemm_masked_kn_pairs = []
if tp_size > 1 and ep_size == 1:
logger.debug("Generating kn pairs for tensor parallel.") logger.debug("Generating kn pairs for tensor parallel.")
# Dense normal gemm # Dense normal gemm
gemm_kn_pairs.extend( gemm_kn_pairs.extend(
@@ -60,32 +59,8 @@ def generate_kn_pairs(args, model_cfg: dict) -> Tuple[List, List, List]:
[hidden_size, int(moe_intermediate_size * 2 / tp_size)], [hidden_size, int(moe_intermediate_size * 2 / tp_size)],
] ]
) )
if has_shared_experts:
logger.debug("Generating kn pairs for models with shared experts.") if ep_size > 1:
gemm_kn_pairs.extend(
[
[hidden_size, int(moe_intermediate_size * 4 / tp_size)],
[int(moe_intermediate_size * 2 / tp_size), hidden_size],
]
)
elif tp_size == 1 and ep_size > 1:
logger.debug("Generating kn pairs for expert parallel.")
# Dense normal gemm
gemm_kn_pairs.extend(
[
[intermediate_size, hidden_size],
[hidden_size, int(head_dim * (num_attention_heads + num_key_value_heads * 2))],
[hidden_size, int(intermediate_size * 2)],
[hidden_size, hidden_size],
]
)
# Moe grouped gemm contiguous
grouped_gemm_contiguous_kn_pairs.extend(
[
[moe_intermediate_size, hidden_size],
[hidden_size, int(moe_intermediate_size * 2)],
]
)
# Moe grouped gemm masked # Moe grouped gemm masked
grouped_gemm_masked_kn_pairs.extend( grouped_gemm_masked_kn_pairs.extend(
[ [
@@ -97,14 +72,10 @@ def generate_kn_pairs(args, model_cfg: dict) -> Tuple[List, List, List]:
logger.debug("Generating kn pairs for models with shared experts.") logger.debug("Generating kn pairs for models with shared experts.")
gemm_kn_pairs.extend( gemm_kn_pairs.extend(
[ [
[hidden_size, int(moe_intermediate_size * 4)], [hidden_size, int(moe_intermediate_size * 4 / tp_size)],
[int(moe_intermediate_size * 2), hidden_size], [int(moe_intermediate_size * 2 / tp_size), hidden_size],
] ]
) )
elif tp_size > 1 and ep_size > 1:
raise ValueError("Not supported to enable EP and TP at the same time for now.")
else:
raise ValueError("Please check the tensor parallel size and expert parallel size.")
return ( return (
gemm_kn_pairs, gemm_kn_pairs,