Fix bug for caching output when preempted (#5502)

This commit is contained in:
chenjian
2025-12-15 17:25:35 +08:00
committed by GitHub
parent 9f70f4310e
commit 0100ee885f

View File

@@ -799,7 +799,9 @@ class TokenProcessor:
and self.cfg.cache_config.enable_prefix_caching
and self.cfg.cache_config.enable_output_caching
):
if (task.num_total_tokens - 1) % self.cfg.cache_config.block_size == 0:
if (task.num_total_tokens - 1) % self.cfg.cache_config.block_size == 0 and (
task_id not in self.resource_manager.to_be_rescheduled_request_id_set
):
self.resource_manager.cache_output_tokens(
task
) # when enable prefix caching, cache kv cache for output tokens