mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
support logprob in v1 for release/2.1 (#3446)
This commit is contained in:
@@ -315,6 +315,11 @@ class TokenProcessor:
|
|||||||
scores = self.output_scores[: batch * (K + 1)].numpy().reshape([batch, K + 1])[:, : (K + 1)]
|
scores = self.output_scores[: batch * (K + 1)].numpy().reshape([batch, K + 1])[:, : (K + 1)]
|
||||||
ranks = self.output_ranks[:batch].numpy()
|
ranks = self.output_ranks[:batch].numpy()
|
||||||
batch_result = list()
|
batch_result = list()
|
||||||
|
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||||
|
need_to_be_reschedule_req_ids = list(self.resource_manager.to_be_rescheduled_request_id_set)
|
||||||
|
for request_id in need_to_be_reschedule_req_ids:
|
||||||
|
if self.resource_manager.requests[request_id].idx >= (batch - 1): # No more token generated for preempted request
|
||||||
|
self.resource_manager.reschedule_preempt_task(request_id)
|
||||||
for i in range(batch):
|
for i in range(batch):
|
||||||
if self.resource_manager.stop_flags[i]:
|
if self.resource_manager.stop_flags[i]:
|
||||||
continue
|
continue
|
||||||
@@ -326,6 +331,9 @@ class TokenProcessor:
|
|||||||
if recovery_stop:
|
if recovery_stop:
|
||||||
llm_logger.info(f"recovery stop signal found at task {task_id}")
|
llm_logger.info(f"recovery stop signal found at task {task_id}")
|
||||||
if not recovery_stop and token_id < 0:
|
if not recovery_stop and token_id < 0:
|
||||||
|
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||||
|
if task_id in self.resource_manager.to_be_rescheduled_request_id_set:
|
||||||
|
self.resource_manager.reschedule_preempt_task(task_id)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if task.get("prefill_chunk_info", None) is not None:
|
if task.get("prefill_chunk_info", None) is not None:
|
||||||
@@ -383,6 +391,7 @@ class TokenProcessor:
|
|||||||
self.tokens_counter[task_id] += 1
|
self.tokens_counter[task_id] += 1
|
||||||
if token_id != RECOVERY_STOP_SIGNAL:
|
if token_id != RECOVERY_STOP_SIGNAL:
|
||||||
result.outputs.token_ids.append(token_id)
|
result.outputs.token_ids.append(token_id)
|
||||||
|
task.output_token_ids.append(token_id)
|
||||||
result.outputs.logprob = float(scores[i, 0])
|
result.outputs.logprob = float(scores[i, 0])
|
||||||
# Construct top_logprobs
|
# Construct top_logprobs
|
||||||
topk_token_ids = tokens[i, :].tolist()
|
topk_token_ids = tokens[i, :].tolist()
|
||||||
|
Reference in New Issue
Block a user