mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
deepgemm pre-compile tool support mixed parallel (#4282)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
This commit is contained in:
@@ -41,7 +41,6 @@ def generate_kn_pairs(args, model_cfg: dict) -> Tuple[List, List, List]:
|
|||||||
gemm_kn_pairs = []
|
gemm_kn_pairs = []
|
||||||
grouped_gemm_contiguous_kn_pairs = []
|
grouped_gemm_contiguous_kn_pairs = []
|
||||||
grouped_gemm_masked_kn_pairs = []
|
grouped_gemm_masked_kn_pairs = []
|
||||||
if tp_size > 1 and ep_size == 1:
|
|
||||||
logger.debug("Generating kn pairs for tensor parallel.")
|
logger.debug("Generating kn pairs for tensor parallel.")
|
||||||
# Dense normal gemm
|
# Dense normal gemm
|
||||||
gemm_kn_pairs.extend(
|
gemm_kn_pairs.extend(
|
||||||
@@ -60,32 +59,8 @@ def generate_kn_pairs(args, model_cfg: dict) -> Tuple[List, List, List]:
|
|||||||
[hidden_size, int(moe_intermediate_size * 2 / tp_size)],
|
[hidden_size, int(moe_intermediate_size * 2 / tp_size)],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
if has_shared_experts:
|
|
||||||
logger.debug("Generating kn pairs for models with shared experts.")
|
if ep_size > 1:
|
||||||
gemm_kn_pairs.extend(
|
|
||||||
[
|
|
||||||
[hidden_size, int(moe_intermediate_size * 4 / tp_size)],
|
|
||||||
[int(moe_intermediate_size * 2 / tp_size), hidden_size],
|
|
||||||
]
|
|
||||||
)
|
|
||||||
elif tp_size == 1 and ep_size > 1:
|
|
||||||
logger.debug("Generating kn pairs for expert parallel.")
|
|
||||||
# Dense normal gemm
|
|
||||||
gemm_kn_pairs.extend(
|
|
||||||
[
|
|
||||||
[intermediate_size, hidden_size],
|
|
||||||
[hidden_size, int(head_dim * (num_attention_heads + num_key_value_heads * 2))],
|
|
||||||
[hidden_size, int(intermediate_size * 2)],
|
|
||||||
[hidden_size, hidden_size],
|
|
||||||
]
|
|
||||||
)
|
|
||||||
# Moe grouped gemm contiguous
|
|
||||||
grouped_gemm_contiguous_kn_pairs.extend(
|
|
||||||
[
|
|
||||||
[moe_intermediate_size, hidden_size],
|
|
||||||
[hidden_size, int(moe_intermediate_size * 2)],
|
|
||||||
]
|
|
||||||
)
|
|
||||||
# Moe grouped gemm masked
|
# Moe grouped gemm masked
|
||||||
grouped_gemm_masked_kn_pairs.extend(
|
grouped_gemm_masked_kn_pairs.extend(
|
||||||
[
|
[
|
||||||
@@ -97,14 +72,10 @@ def generate_kn_pairs(args, model_cfg: dict) -> Tuple[List, List, List]:
|
|||||||
logger.debug("Generating kn pairs for models with shared experts.")
|
logger.debug("Generating kn pairs for models with shared experts.")
|
||||||
gemm_kn_pairs.extend(
|
gemm_kn_pairs.extend(
|
||||||
[
|
[
|
||||||
[hidden_size, int(moe_intermediate_size * 4)],
|
[hidden_size, int(moe_intermediate_size * 4 / tp_size)],
|
||||||
[int(moe_intermediate_size * 2), hidden_size],
|
[int(moe_intermediate_size * 2 / tp_size), hidden_size],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
elif tp_size > 1 and ep_size > 1:
|
|
||||||
raise ValueError("Not supported to enable EP and TP at the same time for now.")
|
|
||||||
else:
|
|
||||||
raise ValueError("Please check the tensor parallel size and expert parallel size.")
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
gemm_kn_pairs,
|
gemm_kn_pairs,
|
||||||
|
Reference in New Issue
Block a user