[Bug fix] Fix block num in scheduler v1 for release2.0.4 (#3314)

* fix bug for scheduler v0

* fix block num setting in scheduler v1

* fix block num setting in scheduler v1

* fix block num setting in scheduler v1

* fix block num setting in scheduler v1

* fix block num setting in scheduler v1
This commit is contained in:
chenjian
2025-08-11 23:55:45 +08:00
committed by GitHub
parent 4646aff25c
commit c6a133d573
5 changed files with 34 additions and 12 deletions

View File

@@ -18,6 +18,7 @@ import json
from dataclasses import asdict, dataclass
from dataclasses import fields as dataclass_fields
from typing import Any, Dict, List, Optional
import os
from fastdeploy.engine.config import (
CacheConfig,
@@ -854,7 +855,10 @@ class EngineArgs:
if self.enable_chunked_prefill:
self.max_num_batched_tokens = 2048
else:
self.max_num_batched_tokens = self.max_model_len
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
self.max_num_batched_tokens = self.max_model_len
else:
self.max_num_batched_tokens = 8192
scheduler_cfg = self.create_scheduler_config()
speculative_cfg = self.create_speculative_config()
graph_opt_cfg = self.create_graph_optimization_config()