mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
[Iluvatar GPU] Optimze attention and moe performance (#3234)
This commit is contained in:
@@ -211,7 +211,7 @@ def post_process_normal(
|
||||
model_output.stop_flags,
|
||||
)
|
||||
|
||||
if current_platform.is_cuda():
|
||||
if current_platform.is_cuda() or current_platform.is_iluvatar():
|
||||
set_stop_value_multi_ends(
|
||||
sampler_output.sampled_token_ids,
|
||||
model_output.stop_flags,
|
||||
|
Reference in New Issue
Block a user