[Intel HPU] Support intel hpu platform (#4161)

* [Intel HPU] Support intel hpu platform

* fix some issues

* apply precommit and move AttentionBackend_HPU

* fix format issue

* correct ops import

* fix ci issue

* update code in layers

* fix code style issue

* remove dense tp moe ep mode

* fix enc_dec_block_num

* fix rebase issue

* rename hpu to gaudi in readme

* rename ForwardMeta_HPU to HPUForwardMeta
This commit is contained in:
fmiao2372
2025-09-24 12:27:50 +08:00
committed by GitHub
parent a1c5d930bb
commit f1b5392e20
35 changed files with 2814 additions and 19 deletions

View File

@@ -209,6 +209,8 @@ class Sampler(nn.Layer):
or current_platform.is_maca()
):
self.forward = self.forward_cuda
elif current_platform.is_intel_hpu():
self.forward = self.forward_intel_hpu
else:
raise NotImplementedError
@@ -377,6 +379,49 @@ class Sampler(nn.Layer):
return sampler_output
def forward_intel_hpu(
self,
logits: paddle.Tensor,
sampling_metadata: SamplingMetadata,
batch_ids: paddle.Tensor,
max_batch: int,
rank: int,
local_rank: int,
) -> paddle.Tensor:
if logits.dtype != paddle.float32:
logits = paddle.cast(logits, paddle.float32)
from fastdeploy.model_executor.ops.intel_hpu import fused_sampler
_, next_tokens = fused_sampler(
sampling_metadata.pre_token_ids,
sampling_metadata.prompt_ids,
sampling_metadata.seq_lens_encoder,
sampling_metadata.seq_lens_decoder,
sampling_metadata.step_idx,
sampling_metadata.stop_flags,
logits,
sampling_metadata.repetition_penalties,
sampling_metadata.frequency_penalties,
sampling_metadata.presence_penalties,
sampling_metadata.temperature,
sampling_metadata.bad_words_token_ids,
sampling_metadata.step_idx,
sampling_metadata.min_dec_lens,
sampling_metadata.eos_token_ids,
sampling_metadata.top_p,
rank,
local_rank,
)
if next_tokens.shape[0] != max_batch:
dim = next_tokens.shape[-1]
tmp_tokens = paddle.full((max_batch, dim), -1, dtype=next_tokens.dtype)
tmp_tokens = paddle.scatter(tmp_tokens, batch_ids, next_tokens[: batch_ids.shape[0], :])
return tmp_tokens
return next_tokens
class SpeculativeSampler(nn.Layer):
"""