mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
@@ -39,9 +39,6 @@ void GetOutputTopK(const paddle::Tensor& x,
|
|||||||
int k,
|
int k,
|
||||||
int64_t rank_id,
|
int64_t rank_id,
|
||||||
bool wait_flag) {
|
bool wait_flag) {
|
||||||
if (rank_id > 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct msgdata msg_rcv;
|
static struct msgdata msg_rcv;
|
||||||
int msg_queue_id = 1;
|
int msg_queue_id = 1;
|
||||||
|
@@ -401,8 +401,6 @@ class EngineArgs:
|
|||||||
if self.enable_logprob:
|
if self.enable_logprob:
|
||||||
if self.speculative_config is not None:
|
if self.speculative_config is not None:
|
||||||
raise NotImplementedError("Logprob does not support speculation_config.")
|
raise NotImplementedError("Logprob does not support speculation_config.")
|
||||||
if self.enable_expert_parallel:
|
|
||||||
raise NotImplementedError("Logprob does not support enable_expert_parallel.")
|
|
||||||
if not current_platform.is_cuda():
|
if not current_platform.is_cuda():
|
||||||
raise NotImplementedError("Only CUDA platform supports logprob.")
|
raise NotImplementedError("Only CUDA platform supports logprob.")
|
||||||
if self.speculative_config is not None:
|
if self.speculative_config is not None:
|
||||||
|
@@ -303,24 +303,23 @@ class TokenProcessor:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if (
|
if self.use_logprobs:
|
||||||
|
get_output_topk(
|
||||||
|
self.output_tokens,
|
||||||
|
self.output_scores,
|
||||||
|
self.output_ranks,
|
||||||
|
K,
|
||||||
|
rank_id,
|
||||||
|
is_blocking,
|
||||||
|
)
|
||||||
|
elif (
|
||||||
self.cfg.parallel_config.enable_expert_parallel
|
self.cfg.parallel_config.enable_expert_parallel
|
||||||
and self.cfg.parallel_config.data_parallel_size > 1
|
and self.cfg.parallel_config.data_parallel_size > 1
|
||||||
):
|
):
|
||||||
get_output_ep(self.output_tokens, rank_id, is_blocking)
|
get_output_ep(self.output_tokens, rank_id, is_blocking)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if self.use_logprobs:
|
get_output(self.output_tokens, rank_id, is_blocking)
|
||||||
get_output_topk(
|
|
||||||
self.output_tokens,
|
|
||||||
self.output_scores,
|
|
||||||
self.output_ranks,
|
|
||||||
K,
|
|
||||||
rank_id,
|
|
||||||
is_blocking,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
get_output(self.output_tokens, rank_id, is_blocking)
|
|
||||||
|
|
||||||
if self.output_tokens[0, 0] == -2:
|
if self.output_tokens[0, 0] == -2:
|
||||||
continue
|
continue
|
||||||
|
Reference in New Issue
Block a user