ep support logprob (#4089) (#4151)

This commit is contained in:
chen
2025-09-19 14:07:31 +08:00
committed by GitHub
parent a685e5ad35
commit 66a98b44ed
3 changed files with 11 additions and 17 deletions

View File

@@ -39,9 +39,6 @@ void GetOutputTopK(const paddle::Tensor& x,
int k, int k,
int64_t rank_id, int64_t rank_id,
bool wait_flag) { bool wait_flag) {
if (rank_id > 0) {
return;
}
static struct msgdata msg_rcv; static struct msgdata msg_rcv;
int msg_queue_id = 1; int msg_queue_id = 1;

View File

@@ -401,8 +401,6 @@ class EngineArgs:
if self.enable_logprob: if self.enable_logprob:
if self.speculative_config is not None: if self.speculative_config is not None:
raise NotImplementedError("Logprob does not support speculation_config.") raise NotImplementedError("Logprob does not support speculation_config.")
if self.enable_expert_parallel:
raise NotImplementedError("Logprob does not support enable_expert_parallel.")
if not current_platform.is_cuda(): if not current_platform.is_cuda():
raise NotImplementedError("Only CUDA platform supports logprob.") raise NotImplementedError("Only CUDA platform supports logprob.")
if self.speculative_config is not None: if self.speculative_config is not None:

View File

@@ -303,24 +303,23 @@ class TokenProcessor:
continue continue
else: else:
if ( if self.use_logprobs:
get_output_topk(
self.output_tokens,
self.output_scores,
self.output_ranks,
K,
rank_id,
is_blocking,
)
elif (
self.cfg.parallel_config.enable_expert_parallel self.cfg.parallel_config.enable_expert_parallel
and self.cfg.parallel_config.data_parallel_size > 1 and self.cfg.parallel_config.data_parallel_size > 1
): ):
get_output_ep(self.output_tokens, rank_id, is_blocking) get_output_ep(self.output_tokens, rank_id, is_blocking)
else: else:
if self.use_logprobs: get_output(self.output_tokens, rank_id, is_blocking)
get_output_topk(
self.output_tokens,
self.output_scores,
self.output_ranks,
K,
rank_id,
is_blocking,
)
else:
get_output(self.output_tokens, rank_id, is_blocking)
if self.output_tokens[0, 0] == -2: if self.output_tokens[0, 0] == -2:
continue continue