Adapt for iluvatar gpu (#2684)

This commit is contained in:
liddk1121
2025-07-07 16:53:14 +08:00
committed by GitHub
parent 2579e8fea8
commit 1b54a2831e
50 changed files with 4485 additions and 80 deletions

View File

@@ -57,7 +57,8 @@ class LinearBase(nn.Layer):
NotImplementedError: Raised if the current platform is not a CUDA platform.
"""
super().__init__()
if current_platform.is_cuda() or current_platform.is_xpu():
if current_platform.is_cuda() or current_platform.is_xpu(
) or current_platform.is_iluvatar():
self.forward = self.forward_cuda
else:
raise NotImplementedError
@@ -411,9 +412,14 @@ class QKVParallelLinear(ColumnParallelLinear):
self.head_dim = fd_config.model_config.head_dim
self.nranks = fd_config.parallel_config.tensor_parallel_degree
self.num_heads_per_rank = divide(self.num_heads, self.nranks)
self.kv_num_heads_per_rank = divide(self.kv_num_heads, self.nranks)
if self.kv_num_heads < self.nranks and self.nranks % self.kv_num_heads == 0:
self.kv_num_heads_per_rank = 1
output_size = (self.num_heads + 2 * self.nranks) * self.head_dim
else:
self.kv_num_heads_per_rank = divide(self.kv_num_heads, self.nranks)
output_size = (self.num_heads +
2 * self.kv_num_heads) * self.head_dim
input_size = self.hidden_size
output_size = (self.num_heads + 2 * self.kv_num_heads) * self.head_dim
super().__init__(fd_config=fd_config,
prefix=prefix,
input_size=input_size,