[fix] qwen output inconsistency when top_p=0 (#3634)

* [fix] qwen output inconsistency when top_p=0

* [fix] remove decode pre_id code
This commit is contained in:
李泳桦
2025-08-27 17:16:23 +08:00
committed by GitHub
parent 1265f6c192
commit b2afdf4fc6
3 changed files with 3 additions and 0 deletions

View File

@@ -276,6 +276,7 @@ class MetaxModelRunner(ModelRunnerBase):
self.share_inputs["step_idx"][idx : idx + 1] = (
len(request.output_token_ids) if prefill_end_index >= len(input_ids) else 0
)
self.share_inputs["pre_ids"][idx : idx + 1] = -1
has_prefill_task = True
elif request.task_type.value == RequestType.DECODE.value: # decode task
logger.debug(f"Handle decode request {request} at idx {idx}")