// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/function/softmax.h" #include #include "fastdeploy/function/eigen.h" #include "fastdeploy/utils/axis_utils.h" #include "fastdeploy/utils/utils.h" namespace fastdeploy { template struct ValueClip { T operator()(const T& x) const { const T kThreshold = static_cast(-64.); return x < kThreshold ? kThreshold : x; } }; template struct SoftmaxEigen { void operator()(const FDTensor& x, FDTensor* out, int axis_dim) { constexpr int kBatchDim = 0; constexpr int kClassDim = 1; constexpr int kAxisDim = 1; auto logits = EigenMatrix::From(x); auto softmax = EigenMatrix::From(*out); const int batch_size = logits.dimension(kBatchDim); const int num_classes = logits.dimension(kClassDim); const int num_remain = num_classes / axis_dim; Eigen::DSizes along_axis(kAxisDim); Eigen::DSizes batch_classes(batch_size, num_classes); Eigen::DSizes batch_by_one(batch_size, 1); Eigen::DSizes one_by_class(1, num_classes); Eigen::DSizes batch_one_remain(batch_size, 1, num_remain); Eigen::DSizes one_axis_one(1, axis_dim, 1); Eigen::DSizes one_axis(1, axis_dim); Eigen::DSizes batch_axis_remain(batch_size, axis_dim, num_remain); const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); // For numerical stability, logits should be shifted by maximum number along // axis, calculate shifted_logits into softmax tensor for memory reuse. if (num_remain == 1) { // axis == -1, axis and class in same dimension, calculate along // class dimension directly for higher performance softmax.device(dev) = (logits - logits.maximum(along_axis) .eval() .reshape(batch_by_one) .broadcast(one_by_class)) .unaryExpr(ValueClip()); } else { // axis != -1, class dimension split into (axis, remain), max and sum // should be calculated along axis dimension softmax.device(dev) = (logits.reshape(batch_axis_remain) - logits.reshape(batch_axis_remain) .maximum(along_axis) .eval() .reshape(batch_one_remain) .broadcast(one_axis_one) .reshape(batch_axis_remain)) .reshape(batch_classes) .unaryExpr(ValueClip()); } softmax.device(dev) = softmax.exp(); softmax.device(dev) = (softmax * softmax.reshape(batch_axis_remain) .sum(along_axis) .inverse() .eval() .broadcast(one_axis)); } }; template void SoftmaxFunctor(const FDTensor& x, FDTensor* out, int axis) { SoftmaxEigen()(x, out, axis); } template void SoftmaxKernel(const FDTensor& x, FDTensor* out, int axis) { const int rank = x.shape.size(); const int calc_axis = CanonicalAxis(axis, rank); int axis_dim = x.shape[calc_axis]; out->Allocate(x.shape, x.dtype); if (out->Numel() == 0) { return; } const int n = SizeToAxis(calc_axis, x.shape); const int d = SizeFromAxis(calc_axis, x.shape); // Reshape to 2d tensor FDTensor x_2d, out_2d; x_2d.SetExternalData({n, d}, x.dtype, const_cast(x.Data())); out_2d.SetExternalData({n, d}, out->dtype, out->Data()); SoftmaxFunctor(x_2d, &out_2d, axis_dim); } void Softmax(const FDTensor& x, FDTensor* out, int axis) { FDASSERT( std::abs(axis) < x.shape.size(), "The absolute given axis should be smaller than the input's " "dimension. Expected absolute axis is smaller than %lu, but receive %d.", x.shape.size(), std::abs(axis)); FD_VISIT_FLOAT_TYPES(x.dtype, "SoftmaxKernel", ([&] { SoftmaxKernel(x, out, axis); })); } } // namespace fastdeploy