mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00

* fix noaux_tc op * fix * update * fix qk norm * fix linear for prequant loader * test * fix * fix * rm some print * fix noaux_tc op * test * Fix the confused enable_early_stop when only set early_stop_config (#3214) * fix the confused early_stop_config when only set early_stop_config * pre-commit * write a general method * Add ci case for min token and max token (#3229) Co-authored-by: xujing43 <xujing43@baidu.com> * add some evil cases (#3240) * add repitation early stop cases * add repitation early stop cases * add bad cases * add bad cases * add evil cases * qwen3_moe (#3084) * [Feature] support seed parameter (#3161) * support seed * fix * add SamplingMetadata seed test * The next_tokens values are inconsistent! * add air and rejection seed test * fix * add SamplingParams seed test * fix seed=0 * Default to defualt * fix * fix args_utils * fix review * fix review * fix * fix * add xpu,gcu,iluvatar support seed * fix * 【Fix Bug】 修复 fa3 支持集中式bug (#3235) * fix fa3 集中式bug * 增加qknorm参数 * fix qk norm * fix * update * fix linear for prequant loader * fix * fix * rm some print * fix * fix moe init weight&scale * fix moe init weight&scale --------- Co-authored-by: bukejiyu <395822456@qq.com> Co-authored-by: yuanxiaolan <yuanxiaolan01@baidu.com> Co-authored-by: Zero Rains <linjunlu@zerorains.top> Co-authored-by: xjkmfa <108254620+xjkmfa@users.noreply.github.com> Co-authored-by: xujing43 <xujing43@baidu.com> Co-authored-by: Divano <dddivano@outlook.com> Co-authored-by: bukejiyu <52310069+bukejiyu@users.noreply.github.com> Co-authored-by: lizexu123 <39205361+lizexu123@users.noreply.github.com> Co-authored-by: yangjianfengo1 <125249383+yangjianfengo1@users.noreply.github.com> Co-authored-by: qingqing01 <dangqingqing@baidu.com>
83 lines
3.2 KiB
Plaintext
83 lines
3.2 KiB
Plaintext
|
|
// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <optional>
|
|
|
|
#include "helper.h"
|
|
#include "noauxtc_kernel.h"
|
|
|
|
std::vector<paddle::Tensor> NoauxTc(paddle::Tensor& scores,
|
|
paddle::Tensor& scores_with_bias,
|
|
int n_group,
|
|
int topk_group,
|
|
int topk,
|
|
float routed_scaling_factor) {
|
|
auto input_shape = scores_with_bias.shape();
|
|
PD_CHECK(input_shape.size() == 2);
|
|
int64_t num_tokens = input_shape[0];
|
|
int64_t num_experts = input_shape[1];
|
|
auto input_type = scores_with_bias.dtype();
|
|
auto place = scores_with_bias.place();
|
|
auto group_scores = paddle::empty({num_tokens, n_group}, input_type, place);
|
|
auto topk_values = paddle::empty({num_tokens, topk}, input_type, place);
|
|
auto topk_indices = paddle::empty({num_tokens, topk}, paddle::DataType::INT64, place);
|
|
auto stream = scores_with_bias.stream();
|
|
|
|
invokeNoAuxTc<float, int64_t>(reinterpret_cast<float*>(scores.data<float>()),
|
|
reinterpret_cast<float*>(group_scores.data<float>()),
|
|
reinterpret_cast<float*>(topk_values.data<float>()),
|
|
reinterpret_cast<int64_t*>(topk_indices.data<int64_t>()),
|
|
reinterpret_cast<float*>(scores_with_bias.data<float>()),
|
|
num_tokens,
|
|
num_experts,
|
|
n_group,
|
|
topk_group,
|
|
topk,
|
|
routed_scaling_factor,
|
|
stream);
|
|
|
|
return {scores, topk_values, topk_indices};
|
|
}
|
|
|
|
std::vector<paddle::DataType> NoauxTcInferDtype(
|
|
const paddle::DataType& scores_dtype,
|
|
const paddle::DataType& scores_with_bias_dtype) {
|
|
return {scores_dtype, scores_dtype, paddle::DataType::INT64};
|
|
}
|
|
|
|
std::vector<std::vector<int64_t>> NoauxTcInferShape(
|
|
const std::vector<int64_t>& scores_shape,
|
|
const std::vector<int64_t>& ,
|
|
const int topk) {
|
|
auto num_tokens = scores_shape[0];
|
|
auto topk_values_shape = std::vector<int64_t>{num_tokens, topk};
|
|
auto topk_indices_shape = std::vector<int64_t>{num_tokens, topk};
|
|
return {scores_shape, topk_values_shape, topk_indices_shape};
|
|
}
|
|
|
|
PD_BUILD_STATIC_OP(noaux_tc)
|
|
.Inputs({"scores", "scores_with_bias"})
|
|
.Outputs({"output_tensor", "topk_values", "topk_indices"})
|
|
.Attrs({"n_group: int",
|
|
"topk_group: int",
|
|
"topk:int",
|
|
"routed_scaling_factor: float"})
|
|
.SetKernelFn(PD_KERNEL(NoauxTc))
|
|
.SetInferShapeFn(PD_INFER_SHAPE(NoauxTcInferShape))
|
|
.SetInferDtypeFn(PD_INFER_DTYPE(NoauxTcInferDtype));
|