Fix block num in schduelr v1 for release 2.1 (#3315)

* fix bug for scheduler v0

* fix block num setting in scheduler v1 for release 2.1

* fix block num setting in scheduler v1 for release 2.1

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
Co-authored-by: YUNSHEN XIE <1084314248@qq.com>
This commit is contained in:
chenjian
2025-08-12 00:41:05 +08:00
committed by GitHub
parent 9b07f85f6d
commit 25f51b0611
5 changed files with 33 additions and 12 deletions

View File

@@ -18,6 +18,7 @@ import json
from dataclasses import asdict, dataclass
from dataclasses import fields as dataclass_fields
from typing import Any, Dict, List, Optional
import os
from fastdeploy.config import (
CacheConfig,
@@ -865,7 +866,10 @@ class EngineArgs:
if self.enable_chunked_prefill:
self.max_num_batched_tokens = 2048
else:
self.max_num_batched_tokens = self.max_model_len
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
self.max_num_batched_tokens = self.max_model_len
else:
self.max_num_batched_tokens = 8192
all_dict = asdict(self)
all_dict["model_cfg"] = model_cfg