mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
Revert "【FIX】Change the name of sparse attn from moba to plas (#3845)" (#4001)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
This reverts commit e31c8f7336
.
This commit is contained in:
@@ -28,9 +28,9 @@ from fastdeploy.config import (
|
||||
FDConfig,
|
||||
GraphOptimizationConfig,
|
||||
LoadConfig,
|
||||
MobaAttentionConfig,
|
||||
ModelConfig,
|
||||
ParallelConfig,
|
||||
PlasAttentionConfig,
|
||||
SpeculativeConfig,
|
||||
TaskOption,
|
||||
)
|
||||
@@ -342,9 +342,9 @@ class EngineArgs:
|
||||
"""
|
||||
Configuration for graph optimization backend execution.
|
||||
"""
|
||||
plas_attention_config: Optional[Dict[str, Any]] = None
|
||||
moba_attention_config: Optional[Dict[str, Any]] = None
|
||||
"""
|
||||
Configuration for plas attention.
|
||||
Configuration for moba attention.
|
||||
"""
|
||||
|
||||
enable_logprob: bool = False
|
||||
@@ -559,9 +559,9 @@ class EngineArgs:
|
||||
help="",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--plas-attention-config",
|
||||
"--moba-attention-config",
|
||||
type=json.loads,
|
||||
default=EngineArgs.plas_attention_config,
|
||||
default=EngineArgs.moba_attention_config,
|
||||
help="",
|
||||
)
|
||||
model_group.add_argument(
|
||||
@@ -959,17 +959,17 @@ class EngineArgs:
|
||||
graph_optimization_args[k] = v
|
||||
return GraphOptimizationConfig(graph_optimization_args)
|
||||
|
||||
def create_plas_attention_config(self) -> PlasAttentionConfig:
|
||||
def create_moba_attention_config(self) -> MobaAttentionConfig:
|
||||
"""
|
||||
Create and retuan a PlasAttentionConfig object based on the current settings.
|
||||
Create and retuan a MobaAttentionConfig object based on the current settings.
|
||||
"""
|
||||
attention_args = asdict(self)
|
||||
if self.plas_attention_config is not None:
|
||||
for k, v in self.plas_attention_config.items():
|
||||
if self.moba_attention_config is not None:
|
||||
for k, v in self.moba_attention_config.items():
|
||||
attention_args[k] = v
|
||||
return PlasAttentionConfig(attention_args)
|
||||
return MobaAttentionConfig(attention_args)
|
||||
else:
|
||||
return PlasAttentionConfig(None)
|
||||
return MobaAttentionConfig(None)
|
||||
|
||||
def create_early_stop_config(self) -> EarlyStopConfig:
|
||||
"""
|
||||
@@ -1025,7 +1025,7 @@ class EngineArgs:
|
||||
scheduler_cfg = self.create_scheduler_config()
|
||||
graph_opt_cfg = self.create_graph_optimization_config()
|
||||
graph_opt_cfg.update_use_cudagraph(self.use_cudagraph)
|
||||
plas_attention_config = self.create_plas_attention_config()
|
||||
moba_attention_config = self.create_moba_attention_config()
|
||||
|
||||
early_stop_cfg = self.create_early_stop_config()
|
||||
early_stop_cfg.update_enable_early_stop(self.enable_early_stop)
|
||||
@@ -1063,7 +1063,7 @@ class EngineArgs:
|
||||
max_long_partial_prefills=self.max_long_partial_prefills,
|
||||
long_prefill_token_threshold=self.long_prefill_token_threshold,
|
||||
graph_opt_config=graph_opt_cfg,
|
||||
plas_attention_config=plas_attention_config,
|
||||
moba_attention_config=moba_attention_config,
|
||||
guided_decoding_backend=self.guided_decoding_backend,
|
||||
disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
|
||||
early_stop_config=early_stop_cfg,
|
||||
|
Reference in New Issue
Block a user