[Functions] Add +-*/ operators and reshape for FDTensor (#655)

* Add +-*/ functions

* Add same dims test case for operations

* add broadcast 0

* Add broadcast dim2 testcase

* Add broadcast dim3 and dim4 testcase

* Add +-*/ operators

* Add mixed operation

* refresh code style

* Add reshape op

* update code style
This commit is contained in:
Jack Zhou
2022-11-23 11:34:02 +08:00
committed by GitHub
parent c11bfb8341
commit de98163efa
12 changed files with 1163 additions and 126 deletions

View File

@@ -149,7 +149,7 @@
# SpaceBeforeRangeBasedForLoopColon: true # SpaceBeforeRangeBasedForLoopColon: true
# SpaceInEmptyBlock: false # SpaceInEmptyBlock: false
# SpaceInEmptyParentheses: false # SpaceInEmptyParentheses: false
# SpacesBeforeTrailingComments: 1 # SpacesBeforeTrailingComments: 2
# SpacesInAngles: Never # SpacesInAngles: Never
# SpacesInConditionalStatement: false # SpacesInConditionalStatement: false
# SpacesInContainerLiterals: true # SpacesInContainerLiterals: true

95
fastdeploy/core/fd_tensor.cc Executable file → Normal file
View File

@@ -11,11 +11,11 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <cstring>
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/core/float16.h" #include "fastdeploy/core/float16.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#include <algorithm>
#include <cstring>
#ifdef WITH_GPU #ifdef WITH_GPU
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#endif #endif
@@ -151,9 +151,63 @@ void FDTensor::Resize(const std::vector<int64_t>& new_shape,
shape.assign(new_shape.begin(), new_shape.end()); shape.assign(new_shape.begin(), new_shape.end());
} }
bool FDTensor::Reshape(const std::vector<int64_t>& new_shape) {
int numel = Numel();
const int64_t unk_dim_val = -1;
const int64_t copy_dim_val = 0;
std::vector<int64_t> output_shape(new_shape.size(), 0);
int64_t capacity = 1;
int unk_dim_idx = -1;
for (size_t i = 0; i < new_shape.size(); ++i) {
if (new_shape[i] == unk_dim_val) {
FDASSERT(unk_dim_idx == -1,
"Only one dimension value of 'shape' in ReshapeOp can "
"be -1. But received shape = [%s], shape[%d] is also -1.",
Str(new_shape).c_str(), i);
unk_dim_idx = i;
} else if (new_shape[i] == copy_dim_val) {
FDASSERT(i < shape.size(),
"The index of 0 in `shape` must be less than "
"the input tensor X's dimensions. "
"But received shape = [%s], shape[%d] = 0, X's shape = [%s], "
"X's dimensions = %d.",
Str(new_shape).c_str(), i, Str(shape).c_str(), shape.size());
} else {
FDASSERT(new_shape[i] > 0,
"Each dimension value of 'shape' in ReshapeOp must not "
"be negative except one unknown dimension. "
"But received shape = [%s], shape[%d] = %d.",
Str(new_shape).c_str(), i, new_shape[i]);
}
capacity *= (new_shape[i] ? new_shape[i] : shape[i]);
output_shape[i] = (new_shape[i] ? new_shape[i] : shape[i]);
}
if (unk_dim_idx != -1) {
output_shape[unk_dim_idx] = -numel / capacity;
FDASSERT(output_shape[unk_dim_idx] * capacity == -numel,
"The 'shape' attribute in ReshapeOp is invalid. "
"The input tensor X'size must be divisible by known "
"capacity of 'shape'. "
"But received X's shape = [%s], X's size = %d, "
"'shape' is [%s], known capacity of 'shape' is %d.",
Str(shape).c_str(), numel, Str(new_shape).c_str(), capacity);
} else {
FDASSERT(numel == capacity,
"The 'shape' in ReshapeOp is invalid. "
"The input tensor X'size must be equal to the capacity of "
"'shape'. "
"But received X's shape = [%s], X's size = %d, 'shape' is "
"[%s], the capacity of 'shape' is %d.",
Str(shape).c_str(), numel, Str(shape).c_str(), capacity);
}
shape = output_shape;
return true;
}
template <typename T> template <typename T>
void CalculateStatisInfo(const void* src_ptr, int size, double* mean, double* max, void CalculateStatisInfo(const void* src_ptr, int size, double* mean,
double* min) { double* max, double* min) {
const T* ptr = static_cast<const T*>(src_ptr); const T* ptr = static_cast<const T*>(src_ptr);
*mean = 0; *mean = 0;
*max = -99999999; *max = -99999999;
@@ -213,10 +267,9 @@ bool FDTensor::ReallocFn(size_t nbytes) {
} }
return buffer_ != nullptr; return buffer_ != nullptr;
#else #else
FDASSERT(false, FDASSERT(false, "The FastDeploy FDTensor allocator didn't compile under "
"The FastDeploy FDTensor allocator didn't compile under " "-DWITH_GPU=ON,"
"-DWITH_GPU=ON," "so this is an unexpected problem happend.");
"so this is an unexpected problem happend.");
#endif #endif
} else { } else {
if (is_pinned_memory) { if (is_pinned_memory) {
@@ -230,10 +283,9 @@ bool FDTensor::ReallocFn(size_t nbytes) {
} }
return buffer_ != nullptr; return buffer_ != nullptr;
#else #else
FDASSERT(false, FDASSERT(false, "The FastDeploy FDTensor allocator didn't compile under "
"The FastDeploy FDTensor allocator didn't compile under " "-DWITH_GPU=ON,"
"-DWITH_GPU=ON," "so this is an unexpected problem happend.");
"so this is an unexpected problem happend.");
#endif #endif
} }
buffer_ = realloc(buffer_, nbytes); buffer_ = realloc(buffer_, nbytes);
@@ -242,7 +294,8 @@ bool FDTensor::ReallocFn(size_t nbytes) {
} }
void FDTensor::FreeFn() { void FDTensor::FreeFn() {
if (external_data_ptr != nullptr) external_data_ptr = nullptr; if (external_data_ptr != nullptr)
external_data_ptr = nullptr;
if (buffer_ != nullptr) { if (buffer_ != nullptr) {
if (device == Device::GPU) { if (device == Device::GPU) {
#ifdef WITH_GPU #ifdef WITH_GPU
@@ -293,11 +346,8 @@ void FDTensor::CopyBuffer(void* dst, const void* src, size_t nbytes,
FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; } FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; }
FDTensor::FDTensor(const FDTensor& other) FDTensor::FDTensor(const FDTensor& other)
: shape(other.shape), : shape(other.shape), name(other.name), dtype(other.dtype),
name(other.name), device(other.device), external_data_ptr(other.external_data_ptr) {
dtype(other.dtype),
device(other.device),
external_data_ptr(other.external_data_ptr) {
// Copy buffer // Copy buffer
if (other.buffer_ == nullptr) { if (other.buffer_ == nullptr) {
buffer_ = nullptr; buffer_ = nullptr;
@@ -310,12 +360,9 @@ FDTensor::FDTensor(const FDTensor& other)
} }
FDTensor::FDTensor(FDTensor&& other) FDTensor::FDTensor(FDTensor&& other)
: buffer_(other.buffer_), : buffer_(other.buffer_), shape(std::move(other.shape)),
shape(std::move(other.shape)), name(std::move(other.name)), dtype(other.dtype),
name(std::move(other.name)), external_data_ptr(other.external_data_ptr), device(other.device) {
dtype(other.dtype),
external_data_ptr(other.external_data_ptr),
device(other.device) {
other.name = ""; other.name = "";
// Note(zhoushunjie): Avoid double free. // Note(zhoushunjie): Avoid double free.
other.buffer_ = nullptr; other.buffer_ = nullptr;

7
fastdeploy/core/fd_tensor.h Executable file → Normal file
View File

@@ -57,9 +57,7 @@ struct FASTDEPLOY_DECL FDTensor {
void* Data(); void* Data();
bool IsShared() { bool IsShared() { return external_data_ptr != nullptr; }
return external_data_ptr != nullptr;
}
void StopSharing(); void StopSharing();
@@ -116,6 +114,7 @@ struct FASTDEPLOY_DECL FDTensor {
const FDDataType& data_type, const std::string& tensor_name = "", const FDDataType& data_type, const std::string& tensor_name = "",
const Device& new_device = Device::CPU); const Device& new_device = Device::CPU);
bool Reshape(const std::vector<int64_t>& new_shape);
// Debug function // Debug function
// Use this function to print shape, dtype, mean, max, min // Use this function to print shape, dtype, mean, max, min
// prefix will also be printed as tag // prefix will also be printed as tag
@@ -141,7 +140,7 @@ struct FASTDEPLOY_DECL FDTensor {
static void CopyBuffer(void* dst, const void* src, size_t nbytes, static void CopyBuffer(void* dst, const void* src, size_t nbytes,
const Device& device = Device::CPU, const Device& device = Device::CPU,
bool is_pinned_memory = false); bool is_pinned_memory = false);
}; };
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -14,26 +14,17 @@
#include "fastdeploy/function/concat.h" #include "fastdeploy/function/concat.h"
#include "fastdeploy/utils/utils.h"
#include <cstring> #include <cstring>
#include <limits> #include <limits>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "fastdeploy/utils/utils.h"
namespace fastdeploy { namespace fastdeploy {
namespace function { namespace function {
std::string Str(const std::vector<int64_t>& shape) {
std::ostringstream oss;
oss << "[ " << shape[0];
for (int i = 1; i < shape.size(); ++i) {
oss << " ," << shape[i];
}
oss << " ]";
return oss.str();
}
std::vector<int64_t> ComputeAndCheckConcatOutputShape( std::vector<int64_t>
const std::vector<FDTensor>& input, int axis) { ComputeAndCheckConcatOutputShape(const std::vector<FDTensor>& input, int axis) {
const size_t n = input.size(); const size_t n = input.size();
auto out_dims = input[0].shape; auto out_dims = input[0].shape;
size_t in_zero_dims_size = out_dims.size(); size_t in_zero_dims_size = out_dims.size();
@@ -58,8 +49,7 @@ std::vector<int64_t> ComputeAndCheckConcatOutputShape(
return out_dims; return out_dims;
} }
template <typename T> template <typename T> struct ConcatFunctor {
struct ConcatFunctor {
void operator()(const std::vector<FDTensor>& input, int axis, void operator()(const std::vector<FDTensor>& input, int axis,
FDTensor* output) { FDTensor* output) {
size_t num = input.size(); size_t num = input.size();

View File

@@ -0,0 +1,75 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/function/elementwise.h"
#include "fastdeploy/function/eigen.h"
#include "fastdeploy/function/elementwise_base.h"
#include "fastdeploy/function/elementwise_functor.h"
#include "fastdeploy/utils/utils.h"
#include <algorithm>
namespace fastdeploy {
namespace function {
DEFINE_ELEMENTWISE_OP(Add);
DEFINE_ELEMENTWISE_OP(Multiply);
DEFINE_ELEMENTWISE_OP(Subtract);
DEFINE_ELEMENTWISE_OP(Divide);
void Add(const FDTensor& x, const FDTensor& y, FDTensor* out) {
FD_VISIT_ALL_TYPES(x.dtype, "AddRawKernel",
([&] { AddRawKernel<data_t>()(x, y, -1, out); }));
}
FDTensor operator+(const FDTensor& x, const FDTensor& y) {
FDTensor out;
Add(x, y, &out);
return out;
}
void Subtract(const FDTensor& x, const FDTensor& y, FDTensor* out) {
FD_VISIT_ALL_TYPES(x.dtype, "SubtractRawKernel",
([&] { SubtractRawKernel<data_t>()(x, y, -1, out); }));
}
FDTensor operator-(const FDTensor& x, const FDTensor& y) {
FDTensor out;
Subtract(x, y, &out);
return out;
}
void Multiply(const FDTensor& x, const FDTensor& y, FDTensor* out) {
FD_VISIT_ALL_TYPES(x.dtype, "MultiplyRawKernel",
([&] { MultiplyRawKernel<data_t>()(x, y, -1, out); }));
}
FDTensor operator*(const FDTensor& x, const FDTensor& y) {
FDTensor out;
Multiply(x, y, &out);
return out;
}
void Divide(const FDTensor& x, const FDTensor& y, FDTensor* out) {
FD_VISIT_ALL_TYPES(x.dtype, "DivideRawKernel",
([&] { DivideRawKernel<data_t>()(x, y, -1, out); }));
}
FDTensor operator/(const FDTensor& x, const FDTensor& y) {
FDTensor out;
Divide(x, y, &out);
return out;
}
} // namespace function
} // namespace fastdeploy

View File

@@ -0,0 +1,60 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/core/fd_tensor.h"
namespace fastdeploy {
namespace function {
/** Excute the add operation for input FDTensors. *out = x + y.
@param x The input tensor.
@param y The input tensor.
@param out The output tensor which stores the result.
*/
FASTDEPLOY_DECL void Add(const FDTensor& x, const FDTensor& y, FDTensor* out);
FASTDEPLOY_DECL FDTensor operator+(const FDTensor& x, const FDTensor& y);
/** Excute the subtract operation for input FDTensors. *out = x - y.
@param x The input tensor.
@param y The input tensor.
@param out The output tensor which stores the result.
*/
FASTDEPLOY_DECL void Subtract(const FDTensor& x, const FDTensor& y,
FDTensor* out);
FASTDEPLOY_DECL FDTensor operator-(const FDTensor& x, const FDTensor& y);
/** Excute the multiply operation for input FDTensors. *out = x * y.
@param x The input tensor.
@param y The input tensor.
@param out The output tensor which stores the result.
*/
FASTDEPLOY_DECL void Multiply(const FDTensor& x, const FDTensor& y,
FDTensor* out);
FASTDEPLOY_DECL FDTensor operator*(const FDTensor& x, const FDTensor& y);
/** Excute the divide operation for input FDTensors. *out = x / y.
@param x The input tensor.
@param y The input tensor.
@param out The output tensor which stores the result.
*/
FASTDEPLOY_DECL void Divide(const FDTensor& x, const FDTensor& y,
FDTensor* out);
FASTDEPLOY_DECL FDTensor operator/(const FDTensor& x, const FDTensor& y);
} // namespace function
} // namespace fastdeploy

View File

@@ -0,0 +1,263 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/function/eigen.h"
namespace fastdeploy {
namespace function {
#define DEFINE_ELEMENTWISE_OP(name) \
template <typename T> struct name##RawKernel { \
void operator()(const FDTensor& x, const FDTensor& y, int axis, \
FDTensor* out) { \
if (x.Shape() == y.Shape()) { \
SameDimsElementwiseCompute<SameDims##name##Functor<T>>()(x, y, out); \
} else { \
auto x_dims = x.Shape(); \
auto y_dims = y.Shape(); \
if (x_dims.size() >= y_dims.size()) { \
ElementwiseCompute<name##Functor<T>, T>(x, y, axis, \
name##Functor<T>(), out); \
} else { \
ElementwiseCompute<Inverse##name##Functor<T>, T>( \
x, y, axis, Inverse##name##Functor<T>(), out); \
} \
} \
} \
}
inline void GetMidDims(const std::vector<int64_t>& x_dims,
const std::vector<int64_t>& y_dims, const int axis,
int* pre, int* n, int* post,
int* is_run_common_broadcast) {
*pre = 1;
*n = 1;
*post = 1;
*is_run_common_broadcast = 0;
for (int i = 0; i < axis; ++i) {
(*pre) *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
if (x_dims[i + axis] != y_dims[i]) {
FDASSERT(y_dims[i] == 1 || x_dims[i + axis] == 1,
"Broadcast dimension mismatch. Operands "
"could not be broadcast together with the shape of "
"X = [%s] and the shape of Y = [%s]. Received [%d] "
"in X is not equal to [%d] in Y.",
Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
y_dims[i]);
*is_run_common_broadcast = 1;
return;
}
(*n) *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
(*post) *= x_dims[i];
}
}
inline std::vector<int64_t>
TrimTrailingSingularDims(const std::vector<int64_t>& dims) {
// Remove trailing dimensions of size 1 for y
auto actual_dims_size = dims.size();
for (; actual_dims_size != 0; --actual_dims_size) {
if (dims[actual_dims_size - 1] != 1)
break;
}
if (actual_dims_size == dims.size())
return dims;
std::vector<int64_t> trim_dims;
trim_dims.resize(actual_dims_size);
for (int i = 0; i < actual_dims_size; ++i) {
trim_dims[i] = dims[i];
}
return trim_dims;
}
inline int GetElementwiseIndex(const int64_t* x_dims_array, const int max_dim,
const int64_t* index_array) {
int index_ = 0;
for (int i = 0; i < max_dim; i++) {
if (x_dims_array[i] > 1) {
index_ = index_ * x_dims_array[i] + index_array[i];
}
}
return index_;
}
inline void UpdateElementwiseIndexArray(const int64_t* out_dims_array,
const int max_dim,
int64_t* index_array) {
for (int i = max_dim - 1; i >= 0; --i) {
++index_array[i];
if (index_array[i] >= out_dims_array[i]) {
index_array[i] -= out_dims_array[i];
} else {
break;
}
}
}
inline void GetBroadcastDimsArrays(const std::vector<int64_t>& x_dims,
const std::vector<int64_t>& y_dims,
int64_t* x_dims_array, int64_t* y_dims_array,
int64_t* out_dims_array, const int max_dim,
const int axis) {
FDASSERT(axis >= 0,
"Axis should be great than or equal to 0, but received axis is %d.",
axis);
FDASSERT(axis < max_dim,
"Axis should be less than %d, but received axis is %d.", max_dim,
axis);
if (x_dims.size() > y_dims.size()) {
std::fill(y_dims_array, y_dims_array + axis, 1);
if (axis + y_dims.size() < max_dim) {
std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1);
}
std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array);
std::copy(y_dims.data(), y_dims.data() + y_dims.size(),
y_dims_array + axis);
} else {
std::fill(x_dims_array, x_dims_array + axis, 1);
if (axis + x_dims.size() < max_dim) {
std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1);
}
std::copy(x_dims.data(), x_dims.data() + x_dims.size(),
x_dims_array + axis);
std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array);
}
for (int i = 0; i < max_dim; i++) {
FDASSERT(x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 ||
y_dims_array[i] <= 1,
"Broadcast dimension mismatch. Operands "
"could not be broadcast together with the shape of "
"X = [%s] and the shape of Y = [%s]. Received [%d] "
"in X is not equal to [%d] in Y.",
Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
y_dims[i]);
if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) ||
(x_dims_array[i] == 1 && y_dims_array[i] == 1)) {
out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]);
} else {
out_dims_array[i] = -1;
}
}
}
template <typename Functor, typename T, typename OutType = T>
void CommonForwardBroadcastCPU(const FDTensor& x, const FDTensor& y,
FDTensor* z, int64_t* x_dims_array,
int64_t* y_dims_array, int64_t* out_dims_array,
int max_dim, Functor func,
const bool is_xsize_larger = true) {
std::vector<int64_t> index_array(max_dim, 0);
const T* x_data = reinterpret_cast<const T*>(x.Data());
const T* y_data = reinterpret_cast<const T*>(y.Data());
FDASSERT(x_data != nullptr, "The input X should not be empty.");
FDASSERT(y_data != nullptr, "The input X should not be empty.");
OutType* out_data = reinterpret_cast<OutType*>(z->Data());
const int out_size = std::accumulate(out_dims_array, out_dims_array + max_dim,
1, std::multiplies<int64_t>());
int x_index, y_index;
for (int out_index = 0; out_index < out_size; ++out_index) {
x_index = GetElementwiseIndex(x_dims_array, max_dim, index_array.data());
y_index = GetElementwiseIndex(y_dims_array, max_dim, index_array.data());
if (is_xsize_larger) {
out_data[out_index] = func(x_data[x_index], y_data[y_index]);
} else {
out_data[out_index] = func(y_data[y_index], x_data[x_index]);
}
UpdateElementwiseIndexArray(out_dims_array, max_dim, index_array.data());
}
}
template <typename Functor, typename T, typename OutType = T>
void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y,
FDTensor* z,
const std::vector<int64_t>& x_dims,
const std::vector<int64_t>& y_dims,
Functor func, int axis,
const bool is_xsize_larger = true) {
int x_dims_size = x_dims.size();
int y_dims_size = y_dims.size();
int max_dim = (std::max)(x_dims_size, y_dims_size);
axis = (axis == -1 ? std::abs(x_dims_size - y_dims_size) : axis);
FDASSERT(axis >= 0,
"Axis should be great than or equal to 0, but received axis is %d.",
axis);
FDASSERT(axis < max_dim,
"Axis should be less than %d, but received axis is %d.", max_dim,
axis);
std::vector<int64_t> x_dims_array(max_dim);
std::vector<int64_t> y_dims_array(max_dim);
std::vector<int64_t> out_dims_array(max_dim);
GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
y_dims_array.data(), out_dims_array.data(), max_dim,
axis);
z->Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
CommonForwardBroadcastCPU<Functor, T, OutType>(
x, y, z, x_dims_array.data(), y_dims_array.data(), out_dims_array.data(),
max_dim, func, is_xsize_larger);
}
template <typename Functor, typename T, typename OutType = T>
void ElementwiseCompute(const FDTensor& x, const FDTensor& y, int axis,
Functor func, FDTensor* z) {
auto x_dims = x.Shape();
auto y_dims = y.Shape();
bool is_xsize_larger = true;
int max_dim = x_dims.size();
if (x_dims.size() < y_dims.size()) {
is_xsize_larger = false;
max_dim = y_dims.size();
}
int diff_size = x_dims.size() - y_dims.size();
axis = (axis == -1 ? std::abs(diff_size) : axis);
FDASSERT(axis >= 0,
"Axis should be great than or equal to 0, but received axis is %d.",
axis);
FDASSERT(axis < max_dim,
"Axis should be less than %d, but received axis is %d.", max_dim,
axis);
int pre, n, post, is_run_common_broadcast, axis_trim = 0;
if (is_xsize_larger) {
auto y_dims_trimed = TrimTrailingSingularDims(y_dims);
axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis;
GetMidDims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post,
&is_run_common_broadcast);
} else {
auto x_dims_trimed = TrimTrailingSingularDims(x_dims);
axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis;
GetMidDims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post,
&is_run_common_broadcast);
}
// special case for common implementation.
// case 1: x=[2,3,1,5], y=[2,1,4,1]
// case 2: x=[2,3,4], y=[1,1,4]
CommonElementwiseBroadcastForward<Functor, T, OutType>(
x, y, z, x_dims, y_dims, func, axis, is_xsize_larger);
}
} // namespace function
} // namespace fastdeploy

View File

@@ -0,0 +1,126 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/function/eigen.h"
#include "fastdeploy/function/elementwise.h"
#include "fastdeploy/function/elementwise_base.h"
#include <algorithm>
namespace fastdeploy {
namespace function {
template <typename Functor> struct SameDimsElementwiseCompute {
void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) {
z->Allocate(x.Shape(), x.Dtype());
Functor()(x, y, z);
}
};
template <typename T> struct SameDimsAddFunctor {
void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) {
const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
auto eigen_x = EigenVector<T>::Flatten(x);
auto eigen_y = EigenVector<T>::Flatten(y);
auto eigen_z = EigenVector<T>::Flatten(*z);
eigen_z.device(dev) = eigen_x + eigen_y;
}
};
template <typename T> struct SameDimsSubtractFunctor {
void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) {
const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
auto eigen_x = EigenVector<T>::Flatten(x);
auto eigen_y = EigenVector<T>::Flatten(y);
auto eigen_z = EigenVector<T>::Flatten(*z);
eigen_z.device(dev) = eigen_x - eigen_y;
}
};
template <typename T> struct SameDimsMultiplyFunctor {
void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) {
const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
auto eigen_x = EigenVector<T>::Flatten(x);
auto eigen_y = EigenVector<T>::Flatten(y);
auto eigen_z = EigenVector<T>::Flatten(*z);
eigen_z.device(dev) = eigen_x * eigen_y;
}
};
template <typename T> struct SameDimsDivideFunctor {
void operator()(const FDTensor& x, const FDTensor& y, FDTensor* z) {
const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
auto eigen_x = EigenVector<T>::Flatten(x);
auto eigen_y = EigenVector<T>::Flatten(y);
auto eigen_z = EigenVector<T>::Flatten(*z);
eigen_z.device(dev) = eigen_x / eigen_y;
}
};
// Add
template <typename T> struct AddFunctor {
inline T operator()(const T a, const T b) const { return a + b; }
};
template <typename T> struct InverseAddFunctor {
inline T operator()(const T a, const T b) const { return b + a; }
};
// Subtract
template <typename T> struct SubtractFunctor {
inline T operator()(const T a, const T b) const { return a - b; }
};
template <typename T> struct InverseSubtractFunctor {
inline T operator()(const T a, const T b) const { return b - a; }
};
// Multiply
template <typename T> struct MultiplyFunctor {
inline T operator()(const T a, const T b) const { return a * b; }
};
template <> struct MultiplyFunctor<bool> {
inline bool operator()(const bool a, const bool b) const { return a && b; }
};
template <typename T> struct InverseMultiplyFunctor {
inline T operator()(const T a, const T b) const { return b * a; }
};
template <> struct InverseMultiplyFunctor<bool> {
inline bool operator()(const bool a, const bool b) const { return b && a; }
};
// Divide
#define DIV_ERROR_INFO \
"InvalidArgumentError: Integer division by zero encountered in " \
"(floor) divide. Please check the input value."
template <typename T, typename Enable = void> struct DivideFunctor {
inline T operator()(const T a, const T b) const { return a / b; }
};
template <typename T>
struct DivideFunctor<
T, typename std::enable_if<std::is_integral<T>::value>::type> {
inline T operator()(const T a, const T b) const {
// For int32/int64, need to check whether the divison is zero.
FDASSERT(b != 0, DIV_ERROR_INFO);
return a / b;
}
};
template <typename T, typename Enable = void> struct InverseDivideFunctor {
inline T operator()(const T a, const T b) const { return b / a; }
};
} // namespace function
} // namespace fastdeploy

View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#include <sstream>
namespace fastdeploy { namespace fastdeploy {
@@ -55,4 +56,14 @@ std::vector<int64_t> GetStride(const std::vector<int64_t>& dims) {
return result; return result;
} }
std::string Str(const std::vector<int64_t>& shape) {
std::ostringstream oss;
oss << "[ " << shape[0];
for (int i = 1; i < shape.size(); ++i) {
oss << " ," << shape[i];
}
oss << " ]";
return oss.str();
}
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -14,15 +14,15 @@
#pragma once #pragma once
#include <stdlib.h>
#include <cstdio> #include <cstdio>
#include <stdlib.h>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <numeric>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include <numeric>
#if defined(_WIN32) #if defined(_WIN32)
#ifdef FASTDEPLOY_LIB #ifdef FASTDEPLOY_LIB
@@ -45,8 +45,7 @@ class FASTDEPLOY_DECL FDLogger {
} }
explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]"); explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]");
template <typename T> template <typename T> FDLogger& operator<<(const T& val) {
FDLogger& operator<<(const T& val) {
if (!verbose_) { if (!verbose_) {
return *this; return *this;
} }
@@ -75,37 +74,37 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
#define __REL_FILE__ __FILE__ #define __REL_FILE__ __FILE__
#endif #endif
#define FDERROR \ #define FDERROR \
FDLogger(true, "[ERROR]") << __REL_FILE__ << "(" << __LINE__ \ FDLogger(true, "[ERROR]") \
<< ")::" << __FUNCTION__ << "\t" << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
#define FDWARNING \ #define FDWARNING \
FDLogger(true, "[WARNING]") << __REL_FILE__ << "(" << __LINE__ \ FDLogger(true, "[WARNING]") \
<< ")::" << __FUNCTION__ << "\t" << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
#define FDINFO \ #define FDINFO \
FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \ FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t" << ")::" << __FUNCTION__ << "\t"
#define FDASSERT(condition, format, ...) \ #define FDASSERT(condition, format, ...) \
if (!(condition)) { \ if (!(condition)) { \
int n = std::snprintf(nullptr, 0, format, ##__VA_ARGS__); \ int n = std::snprintf(nullptr, 0, format, ##__VA_ARGS__); \
std::vector<char> buffer(n + 1); \ std::vector<char> buffer(n + 1); \
std::snprintf(buffer.data(), n + 1, format, ##__VA_ARGS__); \ std::snprintf(buffer.data(), n + 1, format, ##__VA_ARGS__); \
FDERROR << buffer.data() << std::endl; \ FDERROR << buffer.data() << std::endl; \
std::abort(); \ std::abort(); \
} }
///////// Basic Marco /////////// ///////// Basic Marco ///////////
#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \ #define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \
case enum_type: { \ case enum_type: { \
using HINT = type; \ using HINT = type; \
__VA_ARGS__(); \ __VA_ARGS__(); \
break; \ break; \
} }
#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \ #define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \
FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__) FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__)
// Visit different data type to match the corresponding function of FDTensor // Visit different data type to match the corresponding function of FDTensor
@@ -123,68 +122,70 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
__VA_ARGS__) \ __VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \
__VA_ARGS__) \ __VA_ARGS__) \
default: \ default: \
FDASSERT( \ FDASSERT(false, \
false, \ "Invalid enum data type. Expect to accept data " \
"Invalid enum data type. Expect to accept data type BOOL, INT32, " \ "type BOOL, INT32, " \
"INT64, FP32, FP64, but receive type %s.", \ "INT64, FP32, FP64, but receive type %s.", \
Str(__dtype__).c_str()); \ Str(__dtype__).c_str()); \
} \ } \
}() }()
#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...) \ #define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...) \
[&] { \ [&] { \
const auto& __dtype__ = TYPE; \ const auto& __dtype__ = TYPE; \
switch (__dtype__) { \ switch (__dtype__) { \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
__VA_ARGS__) \ __VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
__VA_ARGS__) \ __VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \
__VA_ARGS__) \ __VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \
__VA_ARGS__) \ __VA_ARGS__) \
default: \ default: \
FDASSERT(false, \ FDASSERT(false, \
"Invalid enum data type. Expect to accept data type INT32, " \ "Invalid enum data type. Expect to accept data type INT32, " \
"INT64, FP32, FP64, but receive type %s.", \ "INT64, FP32, FP64, but receive type %s.", \
Str(__dtype__).c_str()); \ Str(__dtype__).c_str()); \
} \ } \
}() }()
#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \ #define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \
[&] { \ [&] { \
const auto& __dtype__ = TYPE; \ const auto& __dtype__ = TYPE; \
switch (__dtype__) { \ switch (__dtype__) { \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \
__VA_ARGS__) \ __VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \
__VA_ARGS__) \ __VA_ARGS__) \
default: \ default: \
FDASSERT(false, \ FDASSERT(false, \
"Invalid enum data type. Expect to accept data type FP32, " \ "Invalid enum data type. Expect to accept data type FP32, " \
"FP64, but receive type %s.", \ "FP64, but receive type %s.", \
Str(__dtype__).c_str()); \ Str(__dtype__).c_str()); \
} \ } \
}() }()
#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \ #define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \
[&] { \ [&] { \
const auto& __dtype__ = TYPE; \ const auto& __dtype__ = TYPE; \
switch (__dtype__) { \ switch (__dtype__) { \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
__VA_ARGS__) \ __VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
__VA_ARGS__) \ __VA_ARGS__) \
default: \ default: \
FDASSERT(false, \ FDASSERT(false, \
"Invalid enum data type. Expect to accept data type INT32, " \ "Invalid enum data type. Expect to accept data type INT32, " \
"INT64, but receive type %s.", \ "INT64, but receive type %s.", \
Str(__dtype__).c_str()); \ Str(__dtype__).c_str()); \
} \ } \
}() }()
FASTDEPLOY_DECL std::vector<int64_t> GetStride( FASTDEPLOY_DECL std::vector<int64_t>
const std::vector<int64_t>& dims); GetStride(const std::vector<int64_t>& dims);
FASTDEPLOY_DECL std::string Str(const std::vector<int64_t>& shape);
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -12,12 +12,12 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/core/fd_tensor.h"
#include "gtest_utils.h"
#include "gtest/gtest.h"
#include <array> #include <array>
#include <cstring> #include <cstring>
#include <vector> #include <vector>
#include "fastdeploy/core/fd_tensor.h"
#include "gtest/gtest.h"
#include "gtest_utils.h"
namespace fastdeploy { namespace fastdeploy {
@@ -86,4 +86,18 @@ TEST(fastdeploy, fd_tensor_assignment) {
ASSERT_EQ(tensor1.Data(), nullptr); ASSERT_EQ(tensor1.Data(), nullptr);
} }
} // namespace fastdeploy TEST(fastdeploy, fd_tensor_reshape) {
CheckShape check_shape;
FDTensor x;
x.Allocate({2, 3, 4, 5}, FDDataType::FP32);
x.Reshape({-1, 3, 2, 2, 5});
check_shape(x.Shape(), {2, 3, 2, 2, 5});
x.Reshape({0, -1, 5, 2});
check_shape(x.Shape(), {2, 6, 5, 2});
x.Reshape({2, 3, 0, 0, 2});
check_shape(x.Shape(), {2, 3, 5, 2, 2});
}
} // namespace fastdeploy

View File

@@ -0,0 +1,451 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/function/elementwise.h"
#include "glog/logging.h"
#include "gtest_utils.h"
#include "gtest/gtest.h"
#include <array>
#include <tuple>
#include <vector>
namespace fastdeploy {
namespace function {
std::tuple<std::vector<float>, std::vector<float>> CreateSameDimeData() {
// Shape: [2, 3, 4]
std::vector<float> x_data = {
0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816,
0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133,
0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046,
0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406};
// Shape: [2, 3, 4]
std::vector<float> y_data = {
0.8345295, 0.551608, 0.77101785, 0.386742, 0.12658621, 0.41240612,
0.20051356, 0.68455917, 0.37947154, 0.2953741, 0.97703844, 0.2931625,
0.2344262, 0.5054064, 0.40617892, 0.16315177, 0.71458364, 0.3748885,
0.65257984, 0.83870554, 0.55464447, 0.38836837, 0.472637, 0.5546991};
return std::make_tuple(x_data, y_data);
}
std::tuple<std::vector<float>, std::vector<float>> CreateBroadcastDim1Data() {
// Shape: [2, 3, 4]
std::vector<float> x_data = {
0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816,
0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133,
0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046,
0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406};
// Shape: [2, 1, 1]
std::vector<float> y_data = {0.97375137, 0.11732706};
return std::make_tuple(x_data, y_data);
}
std::tuple<std::vector<float>, std::vector<float>> CreateBroadcastDim2Data() {
// Shape: [2, 3, 4]
std::vector<float> x_data = {
0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816,
0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133,
0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046,
0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406};
// Shape: [1, 3, 1]
std::vector<float> y_data = {0.30803263, 0.41172066, 0.5588573};
return std::make_tuple(x_data, y_data);
}
std::tuple<std::vector<float>, std::vector<float>> CreateBroadcastDim3Data() {
// Shape: [2, 3, 4]
std::vector<float> x_data = {
0.8428625, 0.6461913, 0.13740455, 0.11430702, 0.659926, 0.535816,
0.7429162, 0.8456049, 0.21228176, 0.29970083, 0.8621713, 0.40894133,
0.12684688, 0.1566195, 0.42884097, 0.8476526, 0.2458633, 0.669046,
0.87888306, 0.6762589, 0.666453, 0.32523027, 0.4139388, 0.8341406};
// Shape: [1, 1, 4]
std::vector<float> y_data = {0.62653106, 0.5128424, 0.9891219, 0.32416528};
return std::make_tuple(x_data, y_data);
}
std::tuple<std::vector<float>, std::vector<float>> CreateBroadcastDim4Data() {
// Shape: [2, 1, 4]
std::vector<float> x_data = {0.8428625, 0.6461913, 0.13740455, 0.11430702,
0.659926, 0.535816, 0.7429162, 0.8456049};
// Shape: [2, 2, 1]
std::vector<float> y_data = {0.62653106, 0.5128424, 0.9891219, 0.32416528};
return std::make_tuple(x_data, y_data);
}
TEST(fastdeploy, check_same_dim) {
CheckShape check_shape;
CheckData check_data;
FDTensor x, y, z;
auto test_data = CreateSameDimeData();
auto x_data = std::get<0>(test_data);
auto y_data = std::get<1>(test_data);
x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data());
y.SetExternalData({2, 3, 4}, FDDataType::FP32, y_data.data());
// Test Add functions
std::vector<float> add_result = {
1.677392, 1.1977993, 0.9084224, 0.50104904, 0.7865122, 0.94822216,
0.94342977, 1.530164, 0.5917533, 0.5950749, 1.8392098, 0.70210385,
0.36127308, 0.66202587, 0.8350199, 1.0108044, 0.96044695, 1.0439345,
1.5314629, 1.5149645, 1.2210975, 0.7135986, 0.8865758, 1.3888397};
Add(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
z = x + y;
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
// Test subtract
std::vector<float> sub_result = {
0.008332968, 0.09458327, -0.6336133, -0.27243498, 0.5333398,
0.1234099, 0.5424027, 0.16104573, -0.16718978, 0.004326731,
-0.11486715, 0.11577883, -0.10757932, -0.3487869, 0.022662044,
0.6845008, -0.46872032, 0.29415748, 0.22630322, -0.16244662,
0.11180854, -0.0631381, -0.058698207, 0.27944148};
Subtract(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
z = x - y;
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
// Test multiply
std::vector<float> mul_result = {
0.70339364, 0.3564443, 0.105941355, 0.044207327, 0.083537534,
0.2209738, 0.14896478, 0.5788666, 0.08055489, 0.08852386,
0.8423745, 0.11988626, 0.029736232, 0.079156496, 0.17418616,
0.13829602, 0.17568989, 0.25081766, 0.57354134, 0.5671821,
0.36964446, 0.12630916, 0.19564278, 0.46269706};
Multiply(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
z = x * y;
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
// Test divide
std::vector<float> div_result = {
1.0099852, 1.1714683, 0.17821188, 0.29556403, 5.2132535, 1.2992436,
3.7050674, 1.2352546, 0.5594142, 1.0146483, 0.88243335, 1.3949306,
0.54109514, 0.30988827, 1.0557933, 5.195485, 0.34406513, 1.7846532,
1.3467824, 0.8063127, 1.201586, 0.8374273, 0.875807, 1.5037713};
Divide(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
z = x / y;
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
}
TEST(fastdeploy, check_broadcast_dim1) {
CheckShape check_shape;
CheckData check_data;
FDTensor x, y, z;
auto test_data = CreateBroadcastDim1Data();
auto x_data = std::get<0>(test_data);
auto y_data = std::get<1>(test_data);
x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data());
y.SetExternalData({2, 1, 1}, FDDataType::FP32, y_data.data());
// Test Add functions
std::vector<float> add_result = {
1.816614, 1.619943, 1.111156, 1.088058, 1.633677, 1.509567,
1.716668, 1.819356, 1.186033, 1.273452, 1.835923, 1.382693,
0.244174, 0.273947, 0.546168, 0.96498, 0.36319, 0.786373,
0.99621, 0.793586, 0.78378, 0.442557, 0.531266, 0.951468};
Add(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
z = x + y;
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
// Test subtract
std::vector<float> sub_result = {
-0.130889, -0.32756, -0.836347, -0.859444, -0.313825, -0.437935,
-0.230835, -0.128146, -0.76147, -0.674051, -0.11158, -0.56481,
0.00952, 0.039292, 0.311514, 0.730326, 0.128536, 0.551719,
0.761556, 0.558932, 0.549126, 0.207903, 0.296612, 0.716814};
Subtract(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
z = x - y;
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
// Test multiply
std::vector<float> mul_result = {
0.820738, 0.62923, 0.133798, 0.111307, 0.642604, 0.521752,
0.723416, 0.823409, 0.20671, 0.291834, 0.83954, 0.398207,
0.014883, 0.018376, 0.050315, 0.099453, 0.028846, 0.078497,
0.103117, 0.079343, 0.078193, 0.038158, 0.048566, 0.097867};
Multiply(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
z = x * y;
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
// Test divide
std::vector<float> div_result = {
0.865583, 0.66361, 0.141108, 0.117388, 0.677715, 0.55026,
0.762942, 0.868399, 0.218004, 0.30778, 0.885412, 0.419965,
1.081139, 1.334897, 3.65509, 7.224699, 2.095538, 5.702402,
7.490881, 5.763879, 5.680301, 2.771997, 3.528076, 7.109533};
Divide(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
z = x / y;
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
}
TEST(fastdeploy, check_broadcast_dim2) {
CheckShape check_shape;
CheckData check_data;
FDTensor x, y, z;
auto test_data = CreateBroadcastDim2Data();
auto x_data = std::get<0>(test_data);
auto y_data = std::get<1>(test_data);
x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data());
y.SetExternalData({1, 3, 1}, FDDataType::FP32, y_data.data());
// Test Add functions
std::vector<float> add_result = {
1.150895, 0.954224, 0.445437, 0.42234, 1.071647, 0.947537,
1.154637, 1.257326, 0.771139, 0.858558, 1.421029, 0.967799,
0.43488, 0.464652, 0.736874, 1.155685, 0.657584, 1.080767,
1.290604, 1.08798, 1.22531, 0.884088, 0.972796, 1.392998};
Add(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
z = x + y;
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
// Test subtract
std::vector<float> sub_result = {
0.53483, 0.338159, -0.170628, -0.193726, 0.248205, 0.124095,
0.331196, 0.433884, -0.346576, -0.259156, 0.303314, -0.149916,
-0.181186, -0.151413, 0.120808, 0.53962, -0.165857, 0.257325,
0.467162, 0.264538, 0.107596, -0.233627, -0.144919, 0.275283};
Subtract(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
z = x - y;
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
// Test multiply
std::vector<float> mul_result = {
0.259629, 0.199048, 0.042325, 0.03521, 0.271705, 0.220607,
0.305874, 0.348153, 0.118635, 0.16749, 0.481831, 0.22854,
0.039073, 0.048244, 0.132097, 0.261105, 0.101227, 0.27546,
0.361854, 0.27843, 0.372452, 0.181757, 0.231333, 0.466166};
Multiply(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
// Test divide
std::vector<float> div_result = {
2.736277, 2.097801, 0.446071, 0.371087, 1.602849, 1.301407,
1.804418, 2.053832, 0.37985, 0.536274, 1.54274, 0.731745,
0.411797, 0.508451, 1.392193, 2.751827, 0.59716, 1.625,
2.134659, 1.642519, 1.192528, 0.581956, 0.740688, 1.492582};
Divide(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
}
TEST(fastdeploy, check_broadcast_dim3) {
CheckShape check_shape;
CheckData check_data;
FDTensor x, y, z;
auto test_data = CreateBroadcastDim3Data();
auto x_data = std::get<0>(test_data);
auto y_data = std::get<1>(test_data);
x.SetExternalData({2, 3, 4}, FDDataType::FP32, x_data.data());
y.SetExternalData({4}, FDDataType::FP32, y_data.data());
// Test Add functions
std::vector<float> add_result = {
1.469393, 1.159034, 1.126526, 0.438472, 1.286457, 1.048658,
1.732038, 1.16977, 0.838813, 0.812543, 1.851293, 0.733107,
0.753378, 0.669462, 1.417963, 1.171818, 0.872394, 1.181888,
1.868005, 1.000424, 1.292984, 0.838073, 1.403061, 1.158306};
Add(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
z = x + y;
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
// Test subtract
std::vector<float> sub_result = {
0.216331, 0.133349, -0.851717, -0.209858, 0.033395, 0.022974,
-0.246206, 0.52144, -0.414249, -0.213142, -0.126951, 0.084776,
-0.499684, -0.356223, -0.560281, 0.523487, -0.380668, 0.156204,
-0.110239, 0.352094, 0.039922, -0.187612, -0.575183, 0.509975};
Subtract(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
z = x - y;
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
// Test multiply
std::vector<float> mul_result = {
0.52808, 0.331394, 0.13591, 0.037054, 0.413464, 0.274789,
0.734835, 0.274116, 0.133001, 0.153699, 0.852793, 0.132565,
0.079474, 0.080321, 0.424176, 0.27478, 0.154041, 0.343115,
0.869322, 0.21922, 0.417554, 0.166792, 0.409436, 0.270399};
Multiply(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
z = x * y;
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
// Test divide
std::vector<float> div_result = {
1.345284, 1.260019, 0.138916, 0.35262, 1.053301, 1.044797,
0.751087, 2.608561, 0.338821, 0.584392, 0.871653, 1.261521,
0.202459, 0.305395, 0.433557, 2.614878, 0.39242, 1.304584,
0.888549, 2.086155, 1.063719, 0.634172, 0.418491, 2.573195};
Divide(x, y, &z);
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
z = x / y;
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
}
TEST(fastdeploy, check_broadcast_dim4) {
CheckShape check_shape;
CheckData check_data;
FDTensor x, y, z;
auto test_data = CreateBroadcastDim4Data();
auto x_data = std::get<0>(test_data);
auto y_data = std::get<1>(test_data);
x.SetExternalData({2, 1, 4}, FDDataType::FP32, x_data.data());
y.SetExternalData({2, 2, 1}, FDDataType::FP32, y_data.data());
// Test Add functions
std::vector<float> add_result = {1.469393, 1.272722, 0.763936, 0.740838,
1.355705, 1.159034, 0.650247, 0.627149,
1.649048, 1.524938, 1.732038, 1.834727,
0.984091, 0.859981, 1.067081, 1.16977};
Add(x, y, &z);
check_shape(z.shape, {2, 2, 4});
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
z = x + y;
check_data(reinterpret_cast<const float*>(z.Data()), add_result.data(),
add_result.size());
// Test subtract
std::vector<float> sub_result = {0.216331, 0.01966, -0.489127, -0.512224,
0.33002, 0.133349, -0.375438, -0.398535,
-0.329196, -0.453306, -0.246206, -0.143517,
0.335761, 0.211651, 0.418751, 0.52144};
Subtract(x, y, &z);
check_shape(z.shape, {2, 2, 4});
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
z = x - y;
check_data(reinterpret_cast<const float*>(z.Data()), sub_result.data(),
sub_result.size());
// Test multiply
std::vector<float> mul_result = {0.52808, 0.404859, 0.086088, 0.071617,
0.432256, 0.331394, 0.070467, 0.058621,
0.652747, 0.529987, 0.734835, 0.836406,
0.213925, 0.173693, 0.240828, 0.274116};
Multiply(x, y, &z);
check_shape(z.shape, {2, 2, 4});
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
z = x * y;
check_data(reinterpret_cast<const float*>(z.Data()), mul_result.data(),
mul_result.size());
// Test divide
std::vector<float> div_result = {1.345284, 1.031379, 0.21931, 0.182444,
1.643512, 1.260019, 0.267927, 0.222889,
0.667184, 0.541709, 0.751087, 0.854905,
2.03577, 1.65291, 2.291782, 2.608561};
Divide(x, y, &z);
check_shape(z.shape, {2, 2, 4});
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
z = x / y;
check_data(reinterpret_cast<const float*>(z.Data()), div_result.data(),
div_result.size());
}
TEST(fastdeploy, mixed_operation) {
CheckShape check_shape;
CheckData check_data;
FDTensor a, b, c, d, e, output;
auto test_data = CreateSameDimeData();
auto a_data = std::get<0>(test_data);
auto b_data = std::get<1>(test_data);
auto c_data = std::get<1>(CreateBroadcastDim1Data());
auto d_data = std::get<1>(CreateBroadcastDim2Data());
auto e_data = std::get<1>(CreateBroadcastDim3Data());
a.SetExternalData({2, 3, 4}, FDDataType::FP32, a_data.data());
b.SetExternalData({2, 3, 4}, FDDataType::FP32, b_data.data());
c.SetExternalData({2, 1, 1}, FDDataType::FP32, c_data.data());
d.SetExternalData({1, 3, 1}, FDDataType::FP32, d_data.data());
e.SetExternalData({1, 1, 4}, FDDataType::FP32, e_data.data());
std::vector<float> result = {
3.238058, 3.004797, 2.278015, 2.881238, 1.822084, 2.073209,
1.524921, 2.619779, 1.196421, 1.318079, 1.59565, 1.538118,
-0.215903, -0.052794, -0.434044, 0.195022, -0.165874, 0.022943,
-0.130613, 0.527984, -0.046946, -0.176592, -0.583538, 0.348473};
output = a * b + c / d - e;
check_shape(output.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(output.Data()), result.data(),
result.size());
}
} // namespace function
} // namespace fastdeploy