[SOT] Mark dynamic dims by type annotations (#2771)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* [SOT] Mark dynamic dims by type annotations

* fix conflict of forward_meta

* mark more attn backend

* fix missing annotated and add env SOT_SPECIALIZED_DIM_NUMBERS

* auto infer implicit 0 dim dynamic dim

* revert manual marked dims

* revert missing update

* auto infer can use unsafe code in warmup stage

* check -> type_match

* fix codestyle

* restore blank line

* empty commit

* add need_warmup nonlocal;

* add doc for resolver

* add missing type hints

* unquote "ForwardMeta"
This commit is contained in:
Nyakku Shigure
2025-07-22 15:23:52 +08:00
committed by GitHub
parent e991777757
commit 48e6a0ca26
13 changed files with 330 additions and 28 deletions

View File

@@ -66,13 +66,13 @@ class AppendAttentionMetadata(AttentionMetadata):
block_tables: Optional[paddle.Tensor] = None
rotary_embs: Optional[paddle.Tensor] = None
attn_mask: Optional[paddle.Tensor] = None
encoder_block_shape_q: Optional[paddle.Tensor] = None
decoder_block_shape_q: Optional[paddle.Tensor] = None
encoder_block_shape_q: int = -1
decoder_block_shape_q: int = -1
_fuse_kernel_compute_dtype: str = "bf16"
# pd_disaggregation
kv_signal_metadata: Optional[paddle.Tensor] = None
kv_signal_data_list: List[paddle.Tensor] = field(default_factory=list)
kv_signal_data_list: List[Optional[paddle.Tensor]] = field(default_factory=list)
class AppendAttentionBackend(AttentionBackend):
@@ -80,6 +80,9 @@ class AppendAttentionBackend(AttentionBackend):
AppendAttentionBackend backend implementation.
"""
__infer_dynamic_dims_fields__ = ["attention_metadata"]
attention_metadata: AppendAttentionMetadata
def __init__(
self,
fd_config: FDConfig,