mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[XPU]Moe uses a new operator (#4585)
* [XPU]Moe uses a new operator * [XPU]Moe uses a new operator * update response
This commit is contained in:
@@ -20,8 +20,6 @@ from dataclasses import asdict, dataclass
|
||||
from dataclasses import fields as dataclass_fields
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import paddle
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.config import (
|
||||
CacheConfig,
|
||||
@@ -1025,10 +1023,7 @@ class EngineArgs:
|
||||
|
||||
if self.max_num_batched_tokens is None:
|
||||
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
||||
if paddle.is_compiled_with_xpu():
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
else:
|
||||
if self.enable_chunked_prefill:
|
||||
self.max_num_batched_tokens = 2048
|
||||
|
||||
Reference in New Issue
Block a user