diff --git a/.clang-format b/.clang-format index 79aa44660..c91ec19fb 100644 --- a/.clang-format +++ b/.clang-format @@ -149,7 +149,7 @@ # SpaceBeforeRangeBasedForLoopColon: true # SpaceInEmptyBlock: false # SpaceInEmptyParentheses: false -# SpacesBeforeTrailingComments: 1 +# SpacesBeforeTrailingComments: 2 # SpacesInAngles: Never # SpacesInConditionalStatement: false # SpacesInContainerLiterals: true diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc old mode 100755 new mode 100644 index 86ce866f4..200e51ade --- a/fastdeploy/core/fd_tensor.cc +++ b/fastdeploy/core/fd_tensor.cc @@ -11,11 +11,11 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/float16.h" #include "fastdeploy/utils/utils.h" +#include +#include #ifdef WITH_GPU #include #endif @@ -151,9 +151,63 @@ void FDTensor::Resize(const std::vector& new_shape, shape.assign(new_shape.begin(), new_shape.end()); } +bool FDTensor::Reshape(const std::vector& new_shape) { + int numel = Numel(); + const int64_t unk_dim_val = -1; + const int64_t copy_dim_val = 0; + + std::vector output_shape(new_shape.size(), 0); + int64_t capacity = 1; + int unk_dim_idx = -1; + for (size_t i = 0; i < new_shape.size(); ++i) { + if (new_shape[i] == unk_dim_val) { + FDASSERT(unk_dim_idx == -1, + "Only one dimension value of 'shape' in ReshapeOp can " + "be -1. But received shape = [%s], shape[%d] is also -1.", + Str(new_shape).c_str(), i); + unk_dim_idx = i; + } else if (new_shape[i] == copy_dim_val) { + FDASSERT(i < shape.size(), + "The index of 0 in `shape` must be less than " + "the input tensor X's dimensions. " + "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " + "X's dimensions = %d.", + Str(new_shape).c_str(), i, Str(shape).c_str(), shape.size()); + } else { + FDASSERT(new_shape[i] > 0, + "Each dimension value of 'shape' in ReshapeOp must not " + "be negative except one unknown dimension. " + "But received shape = [%s], shape[%d] = %d.", + Str(new_shape).c_str(), i, new_shape[i]); + } + capacity *= (new_shape[i] ? new_shape[i] : shape[i]); + output_shape[i] = (new_shape[i] ? new_shape[i] : shape[i]); + } + if (unk_dim_idx != -1) { + output_shape[unk_dim_idx] = -numel / capacity; + FDASSERT(output_shape[unk_dim_idx] * capacity == -numel, + "The 'shape' attribute in ReshapeOp is invalid. " + "The input tensor X'size must be divisible by known " + "capacity of 'shape'. " + "But received X's shape = [%s], X's size = %d, " + "'shape' is [%s], known capacity of 'shape' is %d.", + Str(shape).c_str(), numel, Str(new_shape).c_str(), capacity); + } else { + FDASSERT(numel == capacity, + "The 'shape' in ReshapeOp is invalid. " + "The input tensor X'size must be equal to the capacity of " + "'shape'. " + "But received X's shape = [%s], X's size = %d, 'shape' is " + "[%s], the capacity of 'shape' is %d.", + Str(shape).c_str(), numel, Str(shape).c_str(), capacity); + } + shape = output_shape; + return true; +} + template -void CalculateStatisInfo(const void* src_ptr, int size, double* mean, double* max, - double* min) { +void CalculateStatisInfo(const void* src_ptr, int size, double* mean, + double* max, double* min) { const T* ptr = static_cast(src_ptr); *mean = 0; *max = -99999999; @@ -213,10 +267,9 @@ bool FDTensor::ReallocFn(size_t nbytes) { } return buffer_ != nullptr; #else - FDASSERT(false, - "The FastDeploy FDTensor allocator didn't compile under " - "-DWITH_GPU=ON," - "so this is an unexpected problem happend."); + FDASSERT(false, "The FastDeploy FDTensor allocator didn't compile under " + "-DWITH_GPU=ON," + "so this is an unexpected problem happend."); #endif } else { if (is_pinned_memory) { @@ -230,10 +283,9 @@ bool FDTensor::ReallocFn(size_t nbytes) { } return buffer_ != nullptr; #else - FDASSERT(false, - "The FastDeploy FDTensor allocator didn't compile under " - "-DWITH_GPU=ON," - "so this is an unexpected problem happend."); + FDASSERT(false, "The FastDeploy FDTensor allocator didn't compile under " + "-DWITH_GPU=ON," + "so this is an unexpected problem happend."); #endif } buffer_ = realloc(buffer_, nbytes); @@ -242,7 +294,8 @@ bool FDTensor::ReallocFn(size_t nbytes) { } void FDTensor::FreeFn() { - if (external_data_ptr != nullptr) external_data_ptr = nullptr; + if (external_data_ptr != nullptr) + external_data_ptr = nullptr; if (buffer_ != nullptr) { if (device == Device::GPU) { #ifdef WITH_GPU @@ -293,11 +346,8 @@ void FDTensor::CopyBuffer(void* dst, const void* src, size_t nbytes, FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; } FDTensor::FDTensor(const FDTensor& other) - : shape(other.shape), - name(other.name), - dtype(other.dtype), - device(other.device), - external_data_ptr(other.external_data_ptr) { + : shape(other.shape), name(other.name), dtype(other.dtype), + device(other.device), external_data_ptr(other.external_data_ptr) { // Copy buffer if (other.buffer_ == nullptr) { buffer_ = nullptr; @@ -310,12 +360,9 @@ FDTensor::FDTensor(const FDTensor& other) } FDTensor::FDTensor(FDTensor&& other) - : buffer_(other.buffer_), - shape(std::move(other.shape)), - name(std::move(other.name)), - dtype(other.dtype), - external_data_ptr(other.external_data_ptr), - device(other.device) { + : buffer_(other.buffer_), shape(std::move(other.shape)), + name(std::move(other.name)), dtype(other.dtype), + external_data_ptr(other.external_data_ptr), device(other.device) { other.name = ""; // Note(zhoushunjie): Avoid double free. other.buffer_ = nullptr; diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h old mode 100755 new mode 100644 index 7deb48229..6a86bba1b --- a/fastdeploy/core/fd_tensor.h +++ b/fastdeploy/core/fd_tensor.h @@ -57,9 +57,7 @@ struct FASTDEPLOY_DECL FDTensor { void* Data(); - bool IsShared() { - return external_data_ptr != nullptr; - } + bool IsShared() { return external_data_ptr != nullptr; } void StopSharing(); @@ -116,6 +114,7 @@ struct FASTDEPLOY_DECL FDTensor { const FDDataType& data_type, const std::string& tensor_name = "", const Device& new_device = Device::CPU); + bool Reshape(const std::vector& new_shape); // Debug function // Use this function to print shape, dtype, mean, max, min // prefix will also be printed as tag @@ -141,7 +140,7 @@ struct FASTDEPLOY_DECL FDTensor { static void CopyBuffer(void* dst, const void* src, size_t nbytes, const Device& device = Device::CPU, - bool is_pinned_memory = false); + bool is_pinned_memory = false); }; } // namespace fastdeploy diff --git a/fastdeploy/function/concat.cc b/fastdeploy/function/concat.cc index 3a59e7910..295c3c25a 100644 --- a/fastdeploy/function/concat.cc +++ b/fastdeploy/function/concat.cc @@ -14,26 +14,17 @@ #include "fastdeploy/function/concat.h" +#include "fastdeploy/utils/utils.h" #include #include #include #include -#include "fastdeploy/utils/utils.h" namespace fastdeploy { namespace function { -std::string Str(const std::vector& shape) { - std::ostringstream oss; - oss << "[ " << shape[0]; - for (int i = 1; i < shape.size(); ++i) { - oss << " ," << shape[i]; - } - oss << " ]"; - return oss.str(); -} -std::vector ComputeAndCheckConcatOutputShape( - const std::vector& input, int axis) { +std::vector +ComputeAndCheckConcatOutputShape(const std::vector& input, int axis) { const size_t n = input.size(); auto out_dims = input[0].shape; size_t in_zero_dims_size = out_dims.size(); @@ -58,8 +49,7 @@ std::vector ComputeAndCheckConcatOutputShape( return out_dims; } -template -struct ConcatFunctor { +template struct ConcatFunctor { void operator()(const std::vector& input, int axis, FDTensor* output) { size_t num = input.size(); diff --git a/fastdeploy/function/elementwise.cc b/fastdeploy/function/elementwise.cc new file mode 100644 index 000000000..27dacbcd9 --- /dev/null +++ b/fastdeploy/function/elementwise.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/function/elementwise.h" +#include "fastdeploy/function/eigen.h" +#include "fastdeploy/function/elementwise_base.h" +#include "fastdeploy/function/elementwise_functor.h" +#include "fastdeploy/utils/utils.h" +#include + +namespace fastdeploy { +namespace function { + +DEFINE_ELEMENTWISE_OP(Add); +DEFINE_ELEMENTWISE_OP(Multiply); +DEFINE_ELEMENTWISE_OP(Subtract); +DEFINE_ELEMENTWISE_OP(Divide); + +void Add(const FDTensor& x, const FDTensor& y, FDTensor* out) { + FD_VISIT_ALL_TYPES(x.dtype, "AddRawKernel", + ([&] { AddRawKernel()(x, y, -1, out); })); +} + +FDTensor operator+(const FDTensor& x, const FDTensor& y) { + FDTensor out; + Add(x, y, &out); + return out; +} + +void Subtract(const FDTensor& x, const FDTensor& y, FDTensor* out) { + FD_VISIT_ALL_TYPES(x.dtype, "SubtractRawKernel", + ([&] { SubtractRawKernel()(x, y, -1, out); })); +} + +FDTensor operator-(const FDTensor& x, const FDTensor& y) { + FDTensor out; + Subtract(x, y, &out); + return out; +} + +void Multiply(const FDTensor& x, const FDTensor& y, FDTensor* out) { + FD_VISIT_ALL_TYPES(x.dtype, "MultiplyRawKernel", + ([&] { MultiplyRawKernel()(x, y, -1, out); })); +} + +FDTensor operator*(const FDTensor& x, const FDTensor& y) { + FDTensor out; + Multiply(x, y, &out); + return out; +} + +void Divide(const FDTensor& x, const FDTensor& y, FDTensor* out) { + FD_VISIT_ALL_TYPES(x.dtype, "DivideRawKernel", + ([&] { DivideRawKernel()(x, y, -1, out); })); +} + +FDTensor operator/(const FDTensor& x, const FDTensor& y) { + FDTensor out; + Divide(x, y, &out); + return out; +} + +} // namespace function +} // namespace fastdeploy diff --git a/fastdeploy/function/elementwise.h b/fastdeploy/function/elementwise.h new file mode 100644 index 000000000..33eb5b762 --- /dev/null +++ b/fastdeploy/function/elementwise.h @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/core/fd_tensor.h" + +namespace fastdeploy { +namespace function { + +/** Excute the add operation for input FDTensors. *out = x + y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +FASTDEPLOY_DECL void Add(const FDTensor& x, const FDTensor& y, FDTensor* out); + +FASTDEPLOY_DECL FDTensor operator+(const FDTensor& x, const FDTensor& y); + +/** Excute the subtract operation for input FDTensors. *out = x - y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +FASTDEPLOY_DECL void Subtract(const FDTensor& x, const FDTensor& y, + FDTensor* out); + +FASTDEPLOY_DECL FDTensor operator-(const FDTensor& x, const FDTensor& y); + +/** Excute the multiply operation for input FDTensors. *out = x * y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +FASTDEPLOY_DECL void Multiply(const FDTensor& x, const FDTensor& y, + FDTensor* out); + +FASTDEPLOY_DECL FDTensor operator*(const FDTensor& x, const FDTensor& y); +/** Excute the divide operation for input FDTensors. *out = x / y. + @param x The input tensor. + @param y The input tensor. + @param out The output tensor which stores the result. +*/ +FASTDEPLOY_DECL void Divide(const FDTensor& x, const FDTensor& y, + FDTensor* out); +FASTDEPLOY_DECL FDTensor operator/(const FDTensor& x, const FDTensor& y); + +} // namespace function +} // namespace fastdeploy diff --git a/fastdeploy/function/elementwise_base.h b/fastdeploy/function/elementwise_base.h new file mode 100644 index 000000000..e2fab684e --- /dev/null +++ b/fastdeploy/function/elementwise_base.h @@ -0,0 +1,263 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/function/eigen.h" + +namespace fastdeploy { +namespace function { + +#define DEFINE_ELEMENTWISE_OP(name) \ + template struct name##RawKernel { \ + void operator()(const FDTensor& x, const FDTensor& y, int axis, \ + FDTensor* out) { \ + if (x.Shape() == y.Shape()) { \ + SameDimsElementwiseCompute>()(x, y, out); \ + } else { \ + auto x_dims = x.Shape(); \ + auto y_dims = y.Shape(); \ + if (x_dims.size() >= y_dims.size()) { \ + ElementwiseCompute, T>(x, y, axis, \ + name##Functor(), out); \ + } else { \ + ElementwiseCompute, T>( \ + x, y, axis, Inverse##name##Functor(), out); \ + } \ + } \ + } \ + } + +inline void GetMidDims(const std::vector& x_dims, + const std::vector& y_dims, const int axis, + int* pre, int* n, int* post, + int* is_run_common_broadcast) { + *pre = 1; + *n = 1; + *post = 1; + *is_run_common_broadcast = 0; + for (int i = 0; i < axis; ++i) { + (*pre) *= x_dims[i]; + } + for (int i = 0; i < y_dims.size(); ++i) { + if (x_dims[i + axis] != y_dims[i]) { + FDASSERT(y_dims[i] == 1 || x_dims[i + axis] == 1, + "Broadcast dimension mismatch. Operands " + "could not be broadcast together with the shape of " + "X = [%s] and the shape of Y = [%s]. Received [%d] " + "in X is not equal to [%d] in Y.", + Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis], + y_dims[i]); + *is_run_common_broadcast = 1; + return; + } + (*n) *= y_dims[i]; + } + for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) { + (*post) *= x_dims[i]; + } +} + +inline std::vector +TrimTrailingSingularDims(const std::vector& dims) { + // Remove trailing dimensions of size 1 for y + auto actual_dims_size = dims.size(); + for (; actual_dims_size != 0; --actual_dims_size) { + if (dims[actual_dims_size - 1] != 1) + break; + } + if (actual_dims_size == dims.size()) + return dims; + std::vector trim_dims; + trim_dims.resize(actual_dims_size); + for (int i = 0; i < actual_dims_size; ++i) { + trim_dims[i] = dims[i]; + } + return trim_dims; +} + +inline int GetElementwiseIndex(const int64_t* x_dims_array, const int max_dim, + const int64_t* index_array) { + int index_ = 0; + for (int i = 0; i < max_dim; i++) { + if (x_dims_array[i] > 1) { + index_ = index_ * x_dims_array[i] + index_array[i]; + } + } + return index_; +} + +inline void UpdateElementwiseIndexArray(const int64_t* out_dims_array, + const int max_dim, + int64_t* index_array) { + for (int i = max_dim - 1; i >= 0; --i) { + ++index_array[i]; + if (index_array[i] >= out_dims_array[i]) { + index_array[i] -= out_dims_array[i]; + } else { + break; + } + } +} + +inline void GetBroadcastDimsArrays(const std::vector& x_dims, + const std::vector& y_dims, + int64_t* x_dims_array, int64_t* y_dims_array, + int64_t* out_dims_array, const int max_dim, + const int axis) { + FDASSERT(axis >= 0, + "Axis should be great than or equal to 0, but received axis is %d.", + axis); + FDASSERT(axis < max_dim, + "Axis should be less than %d, but received axis is %d.", max_dim, + axis); + if (x_dims.size() > y_dims.size()) { + std::fill(y_dims_array, y_dims_array + axis, 1); + if (axis + y_dims.size() < max_dim) { + std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1); + } + std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array); + std::copy(y_dims.data(), y_dims.data() + y_dims.size(), + y_dims_array + axis); + } else { + std::fill(x_dims_array, x_dims_array + axis, 1); + if (axis + x_dims.size() < max_dim) { + std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1); + } + std::copy(x_dims.data(), x_dims.data() + x_dims.size(), + x_dims_array + axis); + std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array); + } + + for (int i = 0; i < max_dim; i++) { + FDASSERT(x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 || + y_dims_array[i] <= 1, + "Broadcast dimension mismatch. Operands " + "could not be broadcast together with the shape of " + "X = [%s] and the shape of Y = [%s]. Received [%d] " + "in X is not equal to [%d] in Y.", + Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis], + y_dims[i]); + if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) || + (x_dims_array[i] == 1 && y_dims_array[i] == 1)) { + out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]); + } else { + out_dims_array[i] = -1; + } + } +} + +template +void CommonForwardBroadcastCPU(const FDTensor& x, const FDTensor& y, + FDTensor* z, int64_t* x_dims_array, + int64_t* y_dims_array, int64_t* out_dims_array, + int max_dim, Functor func, + const bool is_xsize_larger = true) { + std::vector index_array(max_dim, 0); + const T* x_data = reinterpret_cast(x.Data()); + const T* y_data = reinterpret_cast(y.Data()); + FDASSERT(x_data != nullptr, "The input X should not be empty."); + FDASSERT(y_data != nullptr, "The input X should not be empty."); + OutType* out_data = reinterpret_cast(z->Data()); + + const int out_size = std::accumulate(out_dims_array, out_dims_array + max_dim, + 1, std::multiplies()); + int x_index, y_index; + for (int out_index = 0; out_index < out_size; ++out_index) { + x_index = GetElementwiseIndex(x_dims_array, max_dim, index_array.data()); + y_index = GetElementwiseIndex(y_dims_array, max_dim, index_array.data()); + if (is_xsize_larger) { + out_data[out_index] = func(x_data[x_index], y_data[y_index]); + } else { + out_data[out_index] = func(y_data[y_index], x_data[x_index]); + } + + UpdateElementwiseIndexArray(out_dims_array, max_dim, index_array.data()); + } +} + +template +void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y, + FDTensor* z, + const std::vector& x_dims, + const std::vector& y_dims, + Functor func, int axis, + const bool is_xsize_larger = true) { + int x_dims_size = x_dims.size(); + int y_dims_size = y_dims.size(); + int max_dim = (std::max)(x_dims_size, y_dims_size); + axis = (axis == -1 ? std::abs(x_dims_size - y_dims_size) : axis); + FDASSERT(axis >= 0, + "Axis should be great than or equal to 0, but received axis is %d.", + axis); + FDASSERT(axis < max_dim, + "Axis should be less than %d, but received axis is %d.", max_dim, + axis); + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), + y_dims_array.data(), out_dims_array.data(), max_dim, + axis); + z->Allocate(out_dims_array, TypeToDataType::dtype); + CommonForwardBroadcastCPU( + x, y, z, x_dims_array.data(), y_dims_array.data(), out_dims_array.data(), + max_dim, func, is_xsize_larger); +} + +template +void ElementwiseCompute(const FDTensor& x, const FDTensor& y, int axis, + Functor func, FDTensor* z) { + auto x_dims = x.Shape(); + auto y_dims = y.Shape(); + bool is_xsize_larger = true; + int max_dim = x_dims.size(); + if (x_dims.size() < y_dims.size()) { + is_xsize_larger = false; + max_dim = y_dims.size(); + } + + int diff_size = x_dims.size() - y_dims.size(); + axis = (axis == -1 ? std::abs(diff_size) : axis); + FDASSERT(axis >= 0, + "Axis should be great than or equal to 0, but received axis is %d.", + axis); + FDASSERT(axis < max_dim, + "Axis should be less than %d, but received axis is %d.", max_dim, + axis); + + int pre, n, post, is_run_common_broadcast, axis_trim = 0; + if (is_xsize_larger) { + auto y_dims_trimed = TrimTrailingSingularDims(y_dims); + axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis; + GetMidDims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post, + &is_run_common_broadcast); + } else { + auto x_dims_trimed = TrimTrailingSingularDims(x_dims); + axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis; + GetMidDims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post, + &is_run_common_broadcast); + } + // special case for common implementation. + // case 1: x=[2,3,1,5], y=[2,1,4,1] + // case 2: x=[2,3,4], y=[1,1,4] + CommonElementwiseBroadcastForward( + x, y, z, x_dims, y_dims, func, axis, is_xsize_larger); +} + +} // namespace function +} // namespace fastdeploy diff --git a/fastdeploy/function/elementwise_functor.h b/fastdeploy/function/elementwise_functor.h new file mode 100644 index 000000000..6a0c02e71 --- /dev/null +++ b/fastdeploy/function/elementwise_functor.h @@ -0,0 +1,126 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/function/eigen.h" +#include "fastdeploy/function/elementwise.h" +#include "fastdeploy/function/elementwise_base.h" +#include + +namespace fastdeploy { +namespace function { + +template struct SameDimsElementwiseCompute { + void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) { + z->Allocate(x.Shape(), x.Dtype()); + Functor()(x, y, z); + } +}; + +template struct SameDimsAddFunctor { + void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) { + const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x + eigen_y; + } +}; + +template struct SameDimsSubtractFunctor { + void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) { + const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x - eigen_y; + } +}; + +template struct SameDimsMultiplyFunctor { + void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) { + const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x * eigen_y; + } +}; + +template struct SameDimsDivideFunctor { + void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) { + const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); + auto eigen_x = EigenVector::Flatten(x); + auto eigen_y = EigenVector::Flatten(y); + auto eigen_z = EigenVector::Flatten(*z); + eigen_z.device(dev) = eigen_x / eigen_y; + } +}; + +// Add +template struct AddFunctor { + inline T operator()(const T a, const T b) const { return a + b; } +}; +template struct InverseAddFunctor { + inline T operator()(const T a, const T b) const { return b + a; } +}; + +// Subtract +template struct SubtractFunctor { + inline T operator()(const T a, const T b) const { return a - b; } +}; +template struct InverseSubtractFunctor { + inline T operator()(const T a, const T b) const { return b - a; } +}; + +// Multiply +template struct MultiplyFunctor { + inline T operator()(const T a, const T b) const { return a * b; } +}; +template <> struct MultiplyFunctor { + inline bool operator()(const bool a, const bool b) const { return a && b; } +}; +template struct InverseMultiplyFunctor { + inline T operator()(const T a, const T b) const { return b * a; } +}; +template <> struct InverseMultiplyFunctor { + inline bool operator()(const bool a, const bool b) const { return b && a; } +}; + +// Divide +#define DIV_ERROR_INFO \ + "InvalidArgumentError: Integer division by zero encountered in " \ + "(floor) divide. Please check the input value." + +template struct DivideFunctor { + inline T operator()(const T a, const T b) const { return a / b; } +}; + +template +struct DivideFunctor< + T, typename std::enable_if::value>::type> { + inline T operator()(const T a, const T b) const { + // For int32/int64, need to check whether the divison is zero. + FDASSERT(b != 0, DIV_ERROR_INFO); + return a / b; + } +}; + +template struct InverseDivideFunctor { + inline T operator()(const T a, const T b) const { return b / a; } +}; + +} // namespace function +} // namespace fastdeploy diff --git a/fastdeploy/utils/utils.cc b/fastdeploy/utils/utils.cc index 6e76c7888..d89b1d555 100644 --- a/fastdeploy/utils/utils.cc +++ b/fastdeploy/utils/utils.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/utils/utils.h" +#include namespace fastdeploy { @@ -55,4 +56,14 @@ std::vector GetStride(const std::vector& dims) { return result; } +std::string Str(const std::vector& shape) { + std::ostringstream oss; + oss << "[ " << shape[0]; + for (int i = 1; i < shape.size(); ++i) { + oss << " ," << shape[i]; + } + oss << " ]"; + return oss.str(); +} + } // namespace fastdeploy diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h index 994ea9baa..45b0f57cd 100644 --- a/fastdeploy/utils/utils.h +++ b/fastdeploy/utils/utils.h @@ -14,15 +14,15 @@ #pragma once -#include #include +#include #include #include +#include #include #include #include -#include #if defined(_WIN32) #ifdef FASTDEPLOY_LIB @@ -45,8 +45,7 @@ class FASTDEPLOY_DECL FDLogger { } explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]"); - template - FDLogger& operator<<(const T& val) { + template FDLogger& operator<<(const T& val) { if (!verbose_) { return *this; } @@ -75,37 +74,37 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file, #define __REL_FILE__ __FILE__ #endif -#define FDERROR \ - FDLogger(true, "[ERROR]") << __REL_FILE__ << "(" << __LINE__ \ - << ")::" << __FUNCTION__ << "\t" +#define FDERROR \ + FDLogger(true, "[ERROR]") \ + << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" -#define FDWARNING \ - FDLogger(true, "[WARNING]") << __REL_FILE__ << "(" << __LINE__ \ - << ")::" << __FUNCTION__ << "\t" +#define FDWARNING \ + FDLogger(true, "[WARNING]") \ + << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" -#define FDINFO \ - FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \ +#define FDINFO \ + FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \ << ")::" << __FUNCTION__ << "\t" -#define FDASSERT(condition, format, ...) \ - if (!(condition)) { \ - int n = std::snprintf(nullptr, 0, format, ##__VA_ARGS__); \ - std::vector buffer(n + 1); \ - std::snprintf(buffer.data(), n + 1, format, ##__VA_ARGS__); \ - FDERROR << buffer.data() << std::endl; \ - std::abort(); \ +#define FDASSERT(condition, format, ...) \ + if (!(condition)) { \ + int n = std::snprintf(nullptr, 0, format, ##__VA_ARGS__); \ + std::vector buffer(n + 1); \ + std::snprintf(buffer.data(), n + 1, format, ##__VA_ARGS__); \ + FDERROR << buffer.data() << std::endl; \ + std::abort(); \ } ///////// Basic Marco /////////// -#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \ - case enum_type: { \ - using HINT = type; \ - __VA_ARGS__(); \ - break; \ +#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \ + case enum_type: { \ + using HINT = type; \ + __VA_ARGS__(); \ + break; \ } -#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \ +#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \ FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__) // Visit different data type to match the corresponding function of FDTensor @@ -123,68 +122,70 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file, __VA_ARGS__) \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ __VA_ARGS__) \ - default: \ - FDASSERT( \ - false, \ - "Invalid enum data type. Expect to accept data type BOOL, INT32, " \ - "INT64, FP32, FP64, but receive type %s.", \ - Str(__dtype__).c_str()); \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data " \ + "type BOOL, INT32, " \ + "INT64, FP32, FP64, but receive type %s.", \ + Str(__dtype__).c_str()); \ } \ }() -#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, \ - "Invalid enum data type. Expect to accept data type INT32, " \ - "INT64, FP32, FP64, but receive type %s.", \ - Str(__dtype__).c_str()); \ - } \ +#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type INT32, " \ + "INT64, FP32, FP64, but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ }() -#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, \ - "Invalid enum data type. Expect to accept data type FP32, " \ - "FP64, but receive type %s.", \ - Str(__dtype__).c_str()); \ - } \ +#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type FP32, " \ + "FP64, but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ }() -#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, \ - "Invalid enum data type. Expect to accept data type INT32, " \ - "INT64, but receive type %s.", \ - Str(__dtype__).c_str()); \ - } \ +#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type INT32, " \ + "INT64, but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ }() -FASTDEPLOY_DECL std::vector GetStride( - const std::vector& dims); +FASTDEPLOY_DECL std::vector +GetStride(const std::vector& dims); + +FASTDEPLOY_DECL std::string Str(const std::vector& shape); } // namespace fastdeploy diff --git a/tests/core/test_fd_tensor.cc b/tests/core/test_fd_tensor.cc index ad4d639e4..286d14579 100644 --- a/tests/core/test_fd_tensor.cc +++ b/tests/core/test_fd_tensor.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "fastdeploy/core/fd_tensor.h" +#include "gtest_utils.h" +#include "gtest/gtest.h" #include #include #include -#include "fastdeploy/core/fd_tensor.h" -#include "gtest/gtest.h" -#include "gtest_utils.h" namespace fastdeploy { @@ -86,4 +86,18 @@ TEST(fastdeploy, fd_tensor_assignment) { ASSERT_EQ(tensor1.Data(), nullptr); } -} // namespace fastdeploy \ No newline at end of file +TEST(fastdeploy, fd_tensor_reshape) { + CheckShape check_shape; + FDTensor x; + x.Allocate({2, 3, 4, 5}, FDDataType::FP32); + x.Reshape({-1, 3, 2, 2, 5}); + check_shape(x.Shape(), {2, 3, 2, 2, 5}); + + x.Reshape({0, -1, 5, 2}); + check_shape(x.Shape(), {2, 6, 5, 2}); + + x.Reshape({2, 3, 0, 0, 2}); + check_shape(x.Shape(), {2, 3, 5, 2, 2}); +} + +} // namespace fastdeploy diff --git a/tests/function/test_elementwise.cc b/tests/function/test_elementwise.cc new file mode 100644 index 000000000..6319f37cc --- /dev/null +++ b/tests/function/test_elementwise.cc @@ -0,0 +1,451 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/function/elementwise.h" +#include "glog/logging.h" +#include "gtest_utils.h" +#include "gtest/gtest.h" +#include +#include +#include + +namespace fastdeploy { +namespace function { + +std::tuple, std::vector> CreateSameDimeData() { + // Shape: [2, 3, 4] + std::vector x_data = { + 0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816, + 0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133, + 0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046, + 0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406}; + // Shape: [2, 3, 4] + std::vector y_data = { + 0.8345295, 0.551608, 0.77101785, 0.386742, 0.12658621, 0.41240612, + 0.20051356, 0.68455917, 0.37947154, 0.2953741, 0.97703844, 0.2931625, + 0.2344262, 0.5054064, 0.40617892, 0.16315177, 0.71458364, 0.3748885, + 0.65257984, 0.83870554, 0.55464447, 0.38836837, 0.472637, 0.5546991}; + return std::make_tuple(x_data, y_data); +} + +std::tuple, std::vector> CreateBroadcastDim1Data() { + // Shape: [2, 3, 4] + std::vector x_data = { + 0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816, + 0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133, + 0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046, + 0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406}; + // Shape: [2, 1, 1] + std::vector y_data = {0.97375137, 0.11732706}; + return std::make_tuple(x_data, y_data); +} + +std::tuple, std::vector> CreateBroadcastDim2Data() { + // Shape: [2, 3, 4] + std::vector x_data = { + 0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816, + 0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133, + 0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046, + 0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406}; + // Shape: [1, 3, 1] + std::vector y_data = {0.30803263, 0.41172066, 0.5588573}; + return std::make_tuple(x_data, y_data); +} + +std::tuple, std::vector> CreateBroadcastDim3Data() { + // Shape: [2, 3, 4] + std::vector x_data = { + 0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816, + 0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133, + 0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046, + 0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406}; + // Shape: [1, 1, 4] + std::vector y_data = {0.62653106, 0.5128424, 0.9891219, 0.32416528}; + return std::make_tuple(x_data, y_data); +} + +std::tuple, std::vector> CreateBroadcastDim4Data() { + // Shape: [2, 1, 4] + std::vector x_data = {0.8428625, 0.6461913, 0.13740455, 0.11430702, + 0.659926, 0.535816, 0.7429162, 0.8456049}; + // Shape: [2, 2, 1] + std::vector y_data = {0.62653106, 0.5128424, 0.9891219, 0.32416528}; + return std::make_tuple(x_data, y_data); +} + +TEST(fastdeploy, check_same_dim) { + CheckShape check_shape; + CheckData check_data; + FDTensor x, y, z; + + auto test_data = CreateSameDimeData(); + auto x_data = std::get<0>(test_data); + auto y_data = std::get<1>(test_data); + x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data()); + y.SetExternalData({2, 3, 4}, FDDataType::FP32, y_data.data()); + + // Test Add functions + std::vector add_result = { + 1.677392, 1.1977993, 0.9084224, 0.50104904, 0.7865122, 0.94822216, + 0.94342977, 1.530164, 0.5917533, 0.5950749, 1.8392098, 0.70210385, + 0.36127308, 0.66202587, 0.8350199, 1.0108044, 0.96044695, 1.0439345, + 1.5314629, 1.5149645, 1.2210975, 0.7135986, 0.8865758, 1.3888397}; + + Add(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + z = x + y; + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + // Test subtract + std::vector sub_result = { + 0.008332968, 0.09458327, -0.6336133, -0.27243498, 0.5333398, + 0.1234099, 0.5424027, 0.16104573, -0.16718978, 0.004326731, + -0.11486715, 0.11577883, -0.10757932, -0.3487869, 0.022662044, + 0.6845008, -0.46872032, 0.29415748, 0.22630322, -0.16244662, + 0.11180854, -0.0631381, -0.058698207, 0.27944148}; + Subtract(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + z = x - y; + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + + // Test multiply + std::vector mul_result = { + 0.70339364, 0.3564443, 0.105941355, 0.044207327, 0.083537534, + 0.2209738, 0.14896478, 0.5788666, 0.08055489, 0.08852386, + 0.8423745, 0.11988626, 0.029736232, 0.079156496, 0.17418616, + 0.13829602, 0.17568989, 0.25081766, 0.57354134, 0.5671821, + 0.36964446, 0.12630916, 0.19564278, 0.46269706}; + Multiply(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + z = x * y; + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + + // Test divide + std::vector div_result = { + 1.0099852, 1.1714683, 0.17821188, 0.29556403, 5.2132535, 1.2992436, + 3.7050674, 1.2352546, 0.5594142, 1.0146483, 0.88243335, 1.3949306, + 0.54109514, 0.30988827, 1.0557933, 5.195485, 0.34406513, 1.7846532, + 1.3467824, 0.8063127, 1.201586, 0.8374273, 0.875807, 1.5037713}; + Divide(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); + z = x / y; + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); +} + +TEST(fastdeploy, check_broadcast_dim1) { + CheckShape check_shape; + CheckData check_data; + FDTensor x, y, z; + + auto test_data = CreateBroadcastDim1Data(); + auto x_data = std::get<0>(test_data); + auto y_data = std::get<1>(test_data); + x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data()); + y.SetExternalData({2, 1, 1}, FDDataType::FP32, y_data.data()); + + // Test Add functions + std::vector add_result = { + 1.816614, 1.619943, 1.111156, 1.088058, 1.633677, 1.509567, + 1.716668, 1.819356, 1.186033, 1.273452, 1.835923, 1.382693, + 0.244174, 0.273947, 0.546168, 0.96498, 0.36319, 0.786373, + 0.99621, 0.793586, 0.78378, 0.442557, 0.531266, 0.951468}; + + Add(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + z = x + y; + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + + // Test subtract + std::vector sub_result = { + -0.130889, -0.32756, -0.836347, -0.859444, -0.313825, -0.437935, + -0.230835, -0.128146, -0.76147, -0.674051, -0.11158, -0.56481, + 0.00952, 0.039292, 0.311514, 0.730326, 0.128536, 0.551719, + 0.761556, 0.558932, 0.549126, 0.207903, 0.296612, 0.716814}; + Subtract(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + z = x - y; + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + + // Test multiply + std::vector mul_result = { + 0.820738, 0.62923, 0.133798, 0.111307, 0.642604, 0.521752, + 0.723416, 0.823409, 0.20671, 0.291834, 0.83954, 0.398207, + 0.014883, 0.018376, 0.050315, 0.099453, 0.028846, 0.078497, + 0.103117, 0.079343, 0.078193, 0.038158, 0.048566, 0.097867}; + Multiply(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + z = x * y; + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + + // Test divide + std::vector div_result = { + 0.865583, 0.66361, 0.141108, 0.117388, 0.677715, 0.55026, + 0.762942, 0.868399, 0.218004, 0.30778, 0.885412, 0.419965, + 1.081139, 1.334897, 3.65509, 7.224699, 2.095538, 5.702402, + 7.490881, 5.763879, 5.680301, 2.771997, 3.528076, 7.109533}; + Divide(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); + z = x / y; + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); +} + +TEST(fastdeploy, check_broadcast_dim2) { + CheckShape check_shape; + CheckData check_data; + FDTensor x, y, z; + + auto test_data = CreateBroadcastDim2Data(); + auto x_data = std::get<0>(test_data); + auto y_data = std::get<1>(test_data); + x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data()); + y.SetExternalData({1, 3, 1}, FDDataType::FP32, y_data.data()); + + // Test Add functions + std::vector add_result = { + 1.150895, 0.954224, 0.445437, 0.42234, 1.071647, 0.947537, + 1.154637, 1.257326, 0.771139, 0.858558, 1.421029, 0.967799, + 0.43488, 0.464652, 0.736874, 1.155685, 0.657584, 1.080767, + 1.290604, 1.08798, 1.22531, 0.884088, 0.972796, 1.392998}; + + Add(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + z = x + y; + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + + // Test subtract + std::vector sub_result = { + 0.53483, 0.338159, -0.170628, -0.193726, 0.248205, 0.124095, + 0.331196, 0.433884, -0.346576, -0.259156, 0.303314, -0.149916, + -0.181186, -0.151413, 0.120808, 0.53962, -0.165857, 0.257325, + 0.467162, 0.264538, 0.107596, -0.233627, -0.144919, 0.275283}; + Subtract(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + z = x - y; + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + // Test multiply + std::vector mul_result = { + 0.259629, 0.199048, 0.042325, 0.03521, 0.271705, 0.220607, + 0.305874, 0.348153, 0.118635, 0.16749, 0.481831, 0.22854, + 0.039073, 0.048244, 0.132097, 0.261105, 0.101227, 0.27546, + 0.361854, 0.27843, 0.372452, 0.181757, 0.231333, 0.466166}; + Multiply(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + + // Test divide + std::vector div_result = { + 2.736277, 2.097801, 0.446071, 0.371087, 1.602849, 1.301407, + 1.804418, 2.053832, 0.37985, 0.536274, 1.54274, 0.731745, + 0.411797, 0.508451, 1.392193, 2.751827, 0.59716, 1.625, + 2.134659, 1.642519, 1.192528, 0.581956, 0.740688, 1.492582}; + Divide(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); +} + +TEST(fastdeploy, check_broadcast_dim3) { + CheckShape check_shape; + CheckData check_data; + FDTensor x, y, z; + + auto test_data = CreateBroadcastDim3Data(); + auto x_data = std::get<0>(test_data); + auto y_data = std::get<1>(test_data); + x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data()); + y.SetExternalData({4}, FDDataType::FP32, y_data.data()); + + // Test Add functions + std::vector add_result = { + 1.469393, 1.159034, 1.126526, 0.438472, 1.286457, 1.048658, + 1.732038, 1.16977, 0.838813, 0.812543, 1.851293, 0.733107, + 0.753378, 0.669462, 1.417963, 1.171818, 0.872394, 1.181888, + 1.868005, 1.000424, 1.292984, 0.838073, 1.403061, 1.158306}; + + Add(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + z = x + y; + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + + // Test subtract + std::vector sub_result = { + 0.216331, 0.133349, -0.851717, -0.209858, 0.033395, 0.022974, + -0.246206, 0.52144, -0.414249, -0.213142, -0.126951, 0.084776, + -0.499684, -0.356223, -0.560281, 0.523487, -0.380668, 0.156204, + -0.110239, 0.352094, 0.039922, -0.187612, -0.575183, 0.509975}; + Subtract(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + z = x - y; + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + // Test multiply + std::vector mul_result = { + 0.52808, 0.331394, 0.13591, 0.037054, 0.413464, 0.274789, + 0.734835, 0.274116, 0.133001, 0.153699, 0.852793, 0.132565, + 0.079474, 0.080321, 0.424176, 0.27478, 0.154041, 0.343115, + 0.869322, 0.21922, 0.417554, 0.166792, 0.409436, 0.270399}; + Multiply(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + z = x * y; + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + // Test divide + std::vector div_result = { + 1.345284, 1.260019, 0.138916, 0.35262, 1.053301, 1.044797, + 0.751087, 2.608561, 0.338821, 0.584392, 0.871653, 1.261521, + 0.202459, 0.305395, 0.433557, 2.614878, 0.39242, 1.304584, + 0.888549, 2.086155, 1.063719, 0.634172, 0.418491, 2.573195}; + Divide(x, y, &z); + check_shape(z.shape, {2, 3, 4}); + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); + z = x / y; + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); +} + +TEST(fastdeploy, check_broadcast_dim4) { + CheckShape check_shape; + CheckData check_data; + FDTensor x, y, z; + + auto test_data = CreateBroadcastDim4Data(); + auto x_data = std::get<0>(test_data); + auto y_data = std::get<1>(test_data); + x.SetExternalData({2, 1, 4}, FDDataType::FP32, x_data.data()); + y.SetExternalData({2, 2, 1}, FDDataType::FP32, y_data.data()); + + // Test Add functions + std::vector add_result = {1.469393, 1.272722, 0.763936, 0.740838, + 1.355705, 1.159034, 0.650247, 0.627149, + 1.649048, 1.524938, 1.732038, 1.834727, + 0.984091, 0.859981, 1.067081, 1.16977}; + + Add(x, y, &z); + check_shape(z.shape, {2, 2, 4}); + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + + z = x + y; + check_data(reinterpret_cast(z.Data()), add_result.data(), + add_result.size()); + // Test subtract + std::vector sub_result = {0.216331, 0.01966, -0.489127, -0.512224, + 0.33002, 0.133349, -0.375438, -0.398535, + -0.329196, -0.453306, -0.246206, -0.143517, + 0.335761, 0.211651, 0.418751, 0.52144}; + Subtract(x, y, &z); + check_shape(z.shape, {2, 2, 4}); + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + z = x - y; + check_data(reinterpret_cast(z.Data()), sub_result.data(), + sub_result.size()); + // Test multiply + std::vector mul_result = {0.52808, 0.404859, 0.086088, 0.071617, + 0.432256, 0.331394, 0.070467, 0.058621, + 0.652747, 0.529987, 0.734835, 0.836406, + 0.213925, 0.173693, 0.240828, 0.274116}; + Multiply(x, y, &z); + check_shape(z.shape, {2, 2, 4}); + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + z = x * y; + check_data(reinterpret_cast(z.Data()), mul_result.data(), + mul_result.size()); + + // Test divide + std::vector div_result = {1.345284, 1.031379, 0.21931, 0.182444, + 1.643512, 1.260019, 0.267927, 0.222889, + 0.667184, 0.541709, 0.751087, 0.854905, + 2.03577, 1.65291, 2.291782, 2.608561}; + Divide(x, y, &z); + check_shape(z.shape, {2, 2, 4}); + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); + z = x / y; + check_data(reinterpret_cast(z.Data()), div_result.data(), + div_result.size()); +} + +TEST(fastdeploy, mixed_operation) { + CheckShape check_shape; + CheckData check_data; + FDTensor a, b, c, d, e, output; + + auto test_data = CreateSameDimeData(); + auto a_data = std::get<0>(test_data); + auto b_data = std::get<1>(test_data); + auto c_data = std::get<1>(CreateBroadcastDim1Data()); + auto d_data = std::get<1>(CreateBroadcastDim2Data()); + auto e_data = std::get<1>(CreateBroadcastDim3Data()); + + a.SetExternalData({2, 3, 4}, FDDataType::FP32, a_data.data()); + b.SetExternalData({2, 3, 4}, FDDataType::FP32, b_data.data()); + c.SetExternalData({2, 1, 1}, FDDataType::FP32, c_data.data()); + d.SetExternalData({1, 3, 1}, FDDataType::FP32, d_data.data()); + e.SetExternalData({1, 1, 4}, FDDataType::FP32, e_data.data()); + + std::vector result = { + 3.238058, 3.004797, 2.278015, 2.881238, 1.822084, 2.073209, + 1.524921, 2.619779, 1.196421, 1.318079, 1.59565, 1.538118, + -0.215903, -0.052794, -0.434044, 0.195022, -0.165874, 0.022943, + -0.130613, 0.527984, -0.046946, -0.176592, -0.583538, 0.348473}; + + output = a * b + c / d - e; + check_shape(output.shape, {2, 3, 4}); + check_data(reinterpret_cast(output.Data()), result.data(), + result.size()); +} + +} // namespace function +} // namespace fastdeploy \ No newline at end of file