ep support logprob (#4089)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

This commit is contained in:
chen
2025-09-12 21:11:16 +08:00
committed by GitHub
parent 10768a4d79
commit 2485333f71
3 changed files with 11 additions and 17 deletions

View File

@@ -39,9 +39,6 @@ void GetOutputTopK(const paddle::Tensor& x,
int k,
int64_t rank_id,
bool wait_flag) {
if (rank_id > 0) {
return;
}
static struct msgdata msg_rcv;
int msg_queue_id = 1;

View File

@@ -400,8 +400,6 @@ class EngineArgs:
if self.enable_logprob:
if self.speculative_config is not None:
raise NotImplementedError("Logprob does not support speculation_config.")
if self.enable_expert_parallel:
raise NotImplementedError("Logprob does not support enable_expert_parallel.")
if not current_platform.is_cuda():
raise NotImplementedError("Only CUDA platform supports logprob.")
if self.splitwise_role != "mixed":

View File

@@ -160,13 +160,6 @@ class TokenProcessor:
if self.output_tokens[0] == -2:
continue
else:
if (
self.cfg.parallel_config.enable_expert_parallel
and self.cfg.parallel_config.data_parallel_size > 1
):
get_output_ep(self.output_tokens, rank_id, is_blocking)
else:
if self.use_logprobs:
get_output_topk(
@@ -177,6 +170,12 @@ class TokenProcessor:
rank_id,
is_blocking,
)
elif (
self.cfg.parallel_config.enable_expert_parallel
and self.cfg.parallel_config.data_parallel_size > 1
):
get_output_ep(self.output_tokens, rank_id, is_blocking)
else:
get_output(self.output_tokens, rank_id, is_blocking)