【FIX】Change the name of sparse attn from moba to plas (#4006)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

* 更新文档

* 【docs】 update readme (#4000)

* 更新文档

* update readme

* update docs

* 【FIX】Change the name of sparse attn from moba to plas (#3845)

* 更新文档

* 更新文档

* 更新文档

* 更新文档

* 修改moba为plas

* code style

* update ci

* code style

* update ci

* code style

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
yangjianfengo1
2025-09-10 10:04:29 +08:00
committed by GitHub
parent 35b8362804
commit dfc94371ee
14 changed files with 151 additions and 151 deletions

View File

@@ -29,9 +29,9 @@ from fastdeploy.config import (
FDConfig,
GraphOptimizationConfig,
LoadConfig,
MobaAttentionConfig,
ModelConfig,
ParallelConfig,
PlasAttentionConfig,
SpeculativeConfig,
TaskOption,
)
@@ -344,9 +344,9 @@ class EngineArgs:
"""
Configuration for graph optimization backend execution.
"""
moba_attention_config: Optional[Dict[str, Any]] = None
plas_attention_config: Optional[Dict[str, Any]] = None
"""
Configuration for moba attention.
Configuration for plas attention.
"""
enable_logprob: bool = False
@@ -571,9 +571,9 @@ class EngineArgs:
help="",
)
model_group.add_argument(
"--moba-attention-config",
"--plas-attention-config",
type=json.loads,
default=EngineArgs.moba_attention_config,
default=EngineArgs.plas_attention_config,
help="",
)
model_group.add_argument(
@@ -971,17 +971,17 @@ class EngineArgs:
graph_optimization_args[k] = v
return GraphOptimizationConfig(graph_optimization_args)
def create_moba_attention_config(self) -> MobaAttentionConfig:
def create_plas_attention_config(self) -> PlasAttentionConfig:
"""
Create and retuan a MobaAttentionConfig object based on the current settings.
Create and retuan a PlasAttentionConfig object based on the current settings.
"""
attention_args = asdict(self)
if self.moba_attention_config is not None:
for k, v in self.moba_attention_config.items():
if self.plas_attention_config is not None:
for k, v in self.plas_attention_config.items():
attention_args[k] = v
return MobaAttentionConfig(attention_args)
return PlasAttentionConfig(attention_args)
else:
return MobaAttentionConfig(None)
return PlasAttentionConfig(None)
def create_early_stop_config(self) -> EarlyStopConfig:
"""
@@ -1037,7 +1037,7 @@ class EngineArgs:
scheduler_cfg = self.create_scheduler_config()
graph_opt_cfg = self.create_graph_optimization_config()
graph_opt_cfg.update_use_cudagraph(self.use_cudagraph)
moba_attention_config = self.create_moba_attention_config()
plas_attention_config = self.create_plas_attention_config()
early_stop_cfg = self.create_early_stop_config()
early_stop_cfg.update_enable_early_stop(self.enable_early_stop)
@@ -1075,7 +1075,7 @@ class EngineArgs:
max_long_partial_prefills=self.max_long_partial_prefills,
long_prefill_token_threshold=self.long_prefill_token_threshold,
graph_opt_config=graph_opt_cfg,
moba_attention_config=moba_attention_config,
plas_attention_config=plas_attention_config,
guided_decoding_backend=self.guided_decoding_backend,
disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
early_stop_config=early_stop_cfg,