[XPU]Moe uses a new operator (#4585)

* [XPU]Moe uses a new operator

* [XPU]Moe uses a new operator

* update response
This commit is contained in:
yyssys
2025-10-24 23:01:46 +08:00
committed by GitHub
parent f42ed6d5f2
commit 822dea8d5f
3 changed files with 9 additions and 15 deletions

View File

@@ -20,8 +20,6 @@ from dataclasses import asdict, dataclass
from dataclasses import fields as dataclass_fields
from typing import Any, Dict, List, Optional, Union
import paddle
from fastdeploy import envs
from fastdeploy.config import (
CacheConfig,
@@ -1025,10 +1023,7 @@ class EngineArgs:
if self.max_num_batched_tokens is None:
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
if paddle.is_compiled_with_xpu():
self.max_num_batched_tokens = self.max_model_len
else:
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
else:
if self.enable_chunked_prefill:
self.max_num_batched_tokens = 2048