mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
@@ -39,9 +39,6 @@ void GetOutputTopK(const paddle::Tensor& x,
|
||||
int k,
|
||||
int64_t rank_id,
|
||||
bool wait_flag) {
|
||||
if (rank_id > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
static struct msgdata msg_rcv;
|
||||
int msg_queue_id = 1;
|
||||
|
@@ -401,8 +401,6 @@ class EngineArgs:
|
||||
if self.enable_logprob:
|
||||
if self.speculative_config is not None:
|
||||
raise NotImplementedError("Logprob does not support speculation_config.")
|
||||
if self.enable_expert_parallel:
|
||||
raise NotImplementedError("Logprob does not support enable_expert_parallel.")
|
||||
if not current_platform.is_cuda():
|
||||
raise NotImplementedError("Only CUDA platform supports logprob.")
|
||||
if self.speculative_config is not None:
|
||||
|
@@ -302,13 +302,6 @@ class TokenProcessor:
|
||||
if self.output_tokens[0] == -2:
|
||||
continue
|
||||
|
||||
else:
|
||||
if (
|
||||
self.cfg.parallel_config.enable_expert_parallel
|
||||
and self.cfg.parallel_config.data_parallel_size > 1
|
||||
):
|
||||
get_output_ep(self.output_tokens, rank_id, is_blocking)
|
||||
|
||||
else:
|
||||
if self.use_logprobs:
|
||||
get_output_topk(
|
||||
@@ -319,6 +312,12 @@ class TokenProcessor:
|
||||
rank_id,
|
||||
is_blocking,
|
||||
)
|
||||
elif (
|
||||
self.cfg.parallel_config.enable_expert_parallel
|
||||
and self.cfg.parallel_config.data_parallel_size > 1
|
||||
):
|
||||
get_output_ep(self.output_tokens, rank_id, is_blocking)
|
||||
|
||||
else:
|
||||
get_output(self.output_tokens, rank_id, is_blocking)
|
||||
|
||||
|
Reference in New Issue
Block a user