mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
[MetaxGPU] Support FastDeploy on metax gpu (#3241)
* [MetaxGPU] Support FastDeploy on metax gpu * Update metax_worker.py 1. change worker log; 2. remove custom allreduce, adapt it later; 3. remove cuda graph; * Update __init__.py 1. remove metax's key work comment * Update __init__.py 1. remove metax's key word comment; 2. add fused_moe_kernel_paddle import --------- Co-authored-by: yongqiangma <xing.wo@163.com>
This commit is contained in:
@@ -119,6 +119,23 @@ def apply_penalty_multi_scores(
|
||||
min_dec_lens,
|
||||
eos_token_ids,
|
||||
)
|
||||
elif current_platform.is_maca():
|
||||
from fastdeploy.model_executor.ops.gpu import get_token_penalty_multi_scores
|
||||
|
||||
logits = get_token_penalty_multi_scores(
|
||||
pre_token_ids,
|
||||
prompt_ids,
|
||||
prompt_lens,
|
||||
logits,
|
||||
repetition_penalties,
|
||||
frequency_penalties,
|
||||
presence_penalties,
|
||||
temperature,
|
||||
bad_words_token_ids,
|
||||
step_idx,
|
||||
min_dec_lens,
|
||||
eos_token_ids,
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
@@ -177,6 +177,7 @@ class Sampler(nn.Layer):
|
||||
or current_platform.is_iluvatar()
|
||||
or current_platform.is_gcu()
|
||||
or current_platform.is_dcu()
|
||||
or current_platform.is_maca()
|
||||
):
|
||||
self.forward = self.forward_cuda
|
||||
else:
|
||||
|
Reference in New Issue
Block a user