mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[fix] qwen output inconsistency when top_p=0 (#3634)
* [fix] qwen output inconsistency when top_p=0 * [fix] remove decode pre_id code
This commit is contained in:
@@ -288,6 +288,7 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
self.share_inputs["step_idx"][idx : idx + 1] = (
|
self.share_inputs["step_idx"][idx : idx + 1] = (
|
||||||
len(request.output_token_ids) if prefill_end_index >= len(input_ids) else 0
|
len(request.output_token_ids) if prefill_end_index >= len(input_ids) else 0
|
||||||
)
|
)
|
||||||
|
self.share_inputs["pre_ids"][idx : idx + 1] = -1
|
||||||
has_prefill_task = True
|
has_prefill_task = True
|
||||||
elif request.task_type.value == RequestType.DECODE.value: # decode task
|
elif request.task_type.value == RequestType.DECODE.value: # decode task
|
||||||
logger.debug(f"Handle decode request {request} at idx {idx}")
|
logger.debug(f"Handle decode request {request} at idx {idx}")
|
||||||
|
@@ -276,6 +276,7 @@ class MetaxModelRunner(ModelRunnerBase):
|
|||||||
self.share_inputs["step_idx"][idx : idx + 1] = (
|
self.share_inputs["step_idx"][idx : idx + 1] = (
|
||||||
len(request.output_token_ids) if prefill_end_index >= len(input_ids) else 0
|
len(request.output_token_ids) if prefill_end_index >= len(input_ids) else 0
|
||||||
)
|
)
|
||||||
|
self.share_inputs["pre_ids"][idx : idx + 1] = -1
|
||||||
has_prefill_task = True
|
has_prefill_task = True
|
||||||
elif request.task_type.value == RequestType.DECODE.value: # decode task
|
elif request.task_type.value == RequestType.DECODE.value: # decode task
|
||||||
logger.debug(f"Handle decode request {request} at idx {idx}")
|
logger.debug(f"Handle decode request {request} at idx {idx}")
|
||||||
|
@@ -411,6 +411,7 @@ class XPUModelRunner(ModelRunnerBase):
|
|||||||
self.share_inputs["step_idx"][idx : idx + 1] = (
|
self.share_inputs["step_idx"][idx : idx + 1] = (
|
||||||
len(request.output_token_ids) if prefill_end_index >= len(input_ids) else 0
|
len(request.output_token_ids) if prefill_end_index >= len(input_ids) else 0
|
||||||
)
|
)
|
||||||
|
self.share_inputs["pre_ids"][idx : idx + 1] = -1
|
||||||
has_prefill_task = True
|
has_prefill_task = True
|
||||||
elif request.task_type.value == RequestType.DECODE.value: # decode task
|
elif request.task_type.value == RequestType.DECODE.value: # decode task
|
||||||
logger.debug(f"Handle decode request {request} at idx {idx}")
|
logger.debug(f"Handle decode request {request} at idx {idx}")
|
||||||
|
Reference in New Issue
Block a user