[GCU] Enable gcu CI (#3190)

* [GCU] Update to the latest version * [GCU] Enable CI
2025-10-05 08:37:06 +08:00 · 2025-08-13 11:48:24 +08:00
parent ce9180241e
commit d1a92e3e17
6 changed files with 87 additions and 45 deletions
--- a/fastdeploy/model_executor/layers/backends/gcu/attention/flash_attn_backend.py
+++ b/fastdeploy/model_executor/layers/backends/gcu/attention/flash_attn_backend.py
@@ -76,6 +76,8 @@ class GCUFlashAttnBackend(AttentionBackend):
        kv_num_heads: int,
        num_heads: int,
        head_dim: int,
+        encoder_block_shape_q: int = -1,
+        decoder_block_shape_q: int = -1,
    ):
        """
        GCUFlashAttnBackend __init__
@@ -94,7 +96,7 @@ class GCUFlashAttnBackend(AttentionBackend):
        self.head_dim = head_dim
        self.scaling = 1.0 / (self.head_dim**0.5)
        self.num_layers = fd_config.model_config.num_hidden_layers
-        self.position_ids_base = paddle.arange(self.max_seq_len)
+        self.position_ids_base = np.arange(self.max_seq_len)

        # TODO(zhengjun): Need to adapt the allocation logic and
        # temporarily allocate according to fixed size
--- a/fastdeploy/model_executor/layers/backends/gcu/attention/mem_efficient_attn_backend.py
+++ b/fastdeploy/model_executor/layers/backends/gcu/attention/mem_efficient_attn_backend.py
@@ -74,6 +74,8 @@ class GCUMemEfficientAttnBackend(AttentionBackend):
        kv_num_heads: int,
        num_heads: int,
        head_dim: int,
+        encoder_block_shape_q: int = -1,
+        decoder_block_shape_q: int = -1,
    ):
        """
        GCUMemEfficientAttnBackend __init__
@@ -92,7 +94,7 @@ class GCUMemEfficientAttnBackend(AttentionBackend):
        self.head_dim = head_dim
        self.scaling = 1.0 / (self.head_dim**0.5)
        self.num_layers = fd_config.model_config.num_hidden_layers
-        self.position_ids_base = paddle.arange(self.max_seq_len)
+        self.position_ids_base = np.arange(self.max_seq_len)

        # TODO(zhengjun): Need to adapt the allocation logic and
        # temporarily allocate according to fixed size