【FIX】Change the name of sparse attn from moba to plas (#4006)

* 更新文档 * 【docs】 update readme (#4000) * 更新文档 * update readme * update docs * 【FIX】Change the name of sparse attn from moba to plas (#3845) * 更新文档 * 更新文档 * 更新文档 * 更新文档 * 修改moba为plas * code style * update ci * code style * update ci * code style --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
2025-10-06 00:57:33 +08:00 · 2025-09-10 10:04:29 +08:00
parent 35b8362804
commit dfc94371ee
14 changed files with 151 additions and 151 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -29,9 +29,9 @@ from fastdeploy.config import (
    FDConfig,
    GraphOptimizationConfig,
    LoadConfig,
-    MobaAttentionConfig,
    ModelConfig,
    ParallelConfig,
+    PlasAttentionConfig,
    SpeculativeConfig,
    TaskOption,
 )
@@ -344,9 +344,9 @@ class EngineArgs:
    """
    Configuration for graph optimization backend execution.
    """
-    moba_attention_config: Optional[Dict[str, Any]] = None
+    plas_attention_config: Optional[Dict[str, Any]] = None
    """
-    Configuration for moba attention.
+    Configuration for plas attention.
    """

    enable_logprob: bool = False
@@ -571,9 +571,9 @@ class EngineArgs:
            help="",
        )
        model_group.add_argument(
-            "--moba-attention-config",
+            "--plas-attention-config",
            type=json.loads,
-            default=EngineArgs.moba_attention_config,
+            default=EngineArgs.plas_attention_config,
            help="",
        )
        model_group.add_argument(
@@ -971,17 +971,17 @@ class EngineArgs:
                graph_optimization_args[k] = v
        return GraphOptimizationConfig(graph_optimization_args)

-    def create_moba_attention_config(self) -> MobaAttentionConfig:
+    def create_plas_attention_config(self) -> PlasAttentionConfig:
        """
-        Create and retuan a MobaAttentionConfig object based on the current settings.
+        Create and retuan a PlasAttentionConfig object based on the current settings.
        """
        attention_args = asdict(self)
-        if self.moba_attention_config is not None:
-            for k, v in self.moba_attention_config.items():
+        if self.plas_attention_config is not None:
+            for k, v in self.plas_attention_config.items():
                attention_args[k] = v
-            return MobaAttentionConfig(attention_args)
+            return PlasAttentionConfig(attention_args)
        else:
-            return MobaAttentionConfig(None)
+            return PlasAttentionConfig(None)

    def create_early_stop_config(self) -> EarlyStopConfig:
        """
@@ -1037,7 +1037,7 @@ class EngineArgs:
        scheduler_cfg = self.create_scheduler_config()
        graph_opt_cfg = self.create_graph_optimization_config()
        graph_opt_cfg.update_use_cudagraph(self.use_cudagraph)
-        moba_attention_config = self.create_moba_attention_config()
+        plas_attention_config = self.create_plas_attention_config()

        early_stop_cfg = self.create_early_stop_config()
        early_stop_cfg.update_enable_early_stop(self.enable_early_stop)
@@ -1075,7 +1075,7 @@ class EngineArgs:
            max_long_partial_prefills=self.max_long_partial_prefills,
            long_prefill_token_threshold=self.long_prefill_token_threshold,
            graph_opt_config=graph_opt_cfg,
-            moba_attention_config=moba_attention_config,
+            plas_attention_config=plas_attention_config,
            guided_decoding_backend=self.guided_decoding_backend,
            disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
            early_stop_config=early_stop_cfg,