mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Fix block num in schduelr v1 for release 2.1 (#3315)
* fix bug for scheduler v0 * fix block num setting in scheduler v1 for release 2.1 * fix block num setting in scheduler v1 for release 2.1 --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: YUNSHEN XIE <1084314248@qq.com>
This commit is contained in:
@@ -18,6 +18,7 @@ import json
|
||||
from dataclasses import asdict, dataclass
|
||||
from dataclasses import fields as dataclass_fields
|
||||
from typing import Any, Dict, List, Optional
|
||||
import os
|
||||
|
||||
from fastdeploy.config import (
|
||||
CacheConfig,
|
||||
@@ -865,7 +866,10 @@ class EngineArgs:
|
||||
if self.enable_chunked_prefill:
|
||||
self.max_num_batched_tokens = 2048
|
||||
else:
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192
|
||||
|
||||
all_dict = asdict(self)
|
||||
all_dict["model_cfg"] = model_cfg
|
||||
|
Reference in New Issue
Block a user