fix request_output sampling_params (#3154) (#3464)

This commit is contained in:
chen
2025-08-19 13:52:50 +08:00
committed by GitHub
parent bca8905b40
commit 6735626014

View File

@@ -734,10 +734,6 @@ class LLMEngine:
"""
Insert tasks to engine.
"""
for task in tasks:
start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER)
if task.sampling_params.bad_words is not None:
task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer)
# TODO 返回至 scheduler
if allocated:
current_tasks = []
@@ -764,6 +760,11 @@ class LLMEngine:
self.engine_worker_queue.put_tasks((current_tasks, self.resource_manager.real_bsz))
return True
for task in tasks:
start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER)
if task.sampling_params.bad_words is not None:
task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer)
self.resource_manager.check_and_free_block_tables()
if not isinstance(tasks, list):