ep support logprob (#4089) (#4151)

This commit is contained in:
chen
2025-09-19 14:07:31 +08:00
committed by GitHub
parent a685e5ad35
commit 66a98b44ed
3 changed files with 11 additions and 17 deletions

View File

@@ -39,9 +39,6 @@ void GetOutputTopK(const paddle::Tensor& x,
int k,
int64_t rank_id,
bool wait_flag) {
if (rank_id > 0) {
return;
}
static struct msgdata msg_rcv;
int msg_queue_id = 1;

View File

@@ -401,8 +401,6 @@ class EngineArgs:
if self.enable_logprob:
if self.speculative_config is not None:
raise NotImplementedError("Logprob does not support speculation_config.")
if self.enable_expert_parallel:
raise NotImplementedError("Logprob does not support enable_expert_parallel.")
if not current_platform.is_cuda():
raise NotImplementedError("Only CUDA platform supports logprob.")
if self.speculative_config is not None:

View File

@@ -302,13 +302,6 @@ class TokenProcessor:
if self.output_tokens[0] == -2:
continue
else:
if (
self.cfg.parallel_config.enable_expert_parallel
and self.cfg.parallel_config.data_parallel_size > 1
):
get_output_ep(self.output_tokens, rank_id, is_blocking)
else:
if self.use_logprobs:
get_output_topk(
@@ -319,6 +312,12 @@ class TokenProcessor:
rank_id,
is_blocking,
)
elif (
self.cfg.parallel_config.enable_expert_parallel
and self.cfg.parallel_config.data_parallel_size > 1
):
get_output_ep(self.output_tokens, rank_id, is_blocking)
else:
get_output(self.output_tokens, rank_id, is_blocking)