mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-16 05:30:58 +08:00
[Sync][Internal] sync some internal paddle3d codes (#2108)
This commit is contained in:
@@ -462,7 +462,7 @@ endif()
|
||||
if(ENABLE_ENCRYPTION)
|
||||
add_definitions(-DENABLE_ENCRYPTION)
|
||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ENCRYPTION_SRCS})
|
||||
include(${PROJECT_SOURCE_DIR}/cmake/gflags.cmake)
|
||||
# include(${PROJECT_SOURCE_DIR}/cmake/gflags.cmake)
|
||||
include(${PROJECT_SOURCE_DIR}/cmake/openssl.cmake)
|
||||
list(APPEND DEPEND_LIBS ${OPENSSL_LIBRARIES})
|
||||
endif()
|
||||
|
@@ -21,6 +21,7 @@ add_executable(benchmark_structurev2_table ${PROJECT_SOURCE_DIR}/benchmark_struc
|
||||
add_executable(benchmark_structurev2_layout ${PROJECT_SOURCE_DIR}/benchmark_structurev2_layout.cc)
|
||||
add_executable(benchmark_ppshituv2_rec ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_rec.cc)
|
||||
add_executable(benchmark_ppshituv2_det ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_det.cc)
|
||||
add_executable(benchmark_pp3d_cadnn ${PROJECT_SOURCE_DIR}/benchmark_pp3d_cadnn.cc)
|
||||
add_executable(benchmark_pp3d_centerpoint ${PROJECT_SOURCE_DIR}/benchmark_pp3d_centerpoint.cc)
|
||||
|
||||
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
@@ -34,6 +35,7 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
target_link_libraries(benchmark_structurev2_layout ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_pp3d_cadnn ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_pp3d_centerpoint ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
else()
|
||||
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
|
||||
@@ -46,6 +48,7 @@ else()
|
||||
target_link_libraries(benchmark_structurev2_layout ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_pp3d_cadnn ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_pp3d_centerpoint ${FASTDEPLOY_LIBS} gflags)
|
||||
endif()
|
||||
# only for Android ADB test
|
||||
|
@@ -41,9 +41,17 @@ fi
|
||||
|
||||
# PP-ShiTuV2
|
||||
./benchmark_ppshituv2_rec --model PP-ShiTuv2-rec --image ppshituv2_wangzai.png --config_path $CONFIG_PATH
|
||||
./benchmark_ppshituv2_det --model PP-ShiTuv2-det --image ppdet_det_img.jpg --config_path $CONFIG_PATH
|
||||
|
||||
# PP-StructureV2
|
||||
./benchmark_structurev2_layout --model PP-Structurev2-layout --image structurev2_layout_val_0002.jpg --config_path $CONFIG_PATH
|
||||
./benchmark_structurev2_table --model PP-Structurev2-SLANet --image structurev2_table.jpg --table_char_dict_path table_structure_dict_ch.txt --config_path $CONFIG_PATH
|
||||
./benchmark --model PP-Structurev2-vi-layoutxlm --shapes "1,512:1,512,4:1,512:1,512" --trt_shapes "1,512:1,512:1,512:1,512,4:1,512,4:1,512,4:1,512:1,512:1,512:1,512:1,512:1,512" --names "x_0:x_1:x_2:x_3" --dtypes "INT64:INT64:INT64:INT64" --disable_mkldnn --custom_tensor_value 0.2 --config_path $CONFIG_PATH
|
||||
|
||||
# Paddle3D
|
||||
./benchmark --model PETRv1_v99 --shapes "1,6,3,320,800:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
|
||||
./benchmark --model PETRv2_v99 --shapes "1,12,3,320,800:1,12,4,4:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
|
||||
./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH
|
||||
./benchmark_pp3d_cadnn --model CADNN_OCRNet-HRNetW18 --image paddle3d_cadnn_kitti_000780.png --config_path $CONFIG_PATH
|
||||
|
||||
set +x
|
||||
|
@@ -43,9 +43,17 @@ fi
|
||||
|
||||
# PP-ShiTuV2
|
||||
./benchmark_ppshituv2_rec --model PP-ShiTuv2-rec --image ppshituv2_wangzai.png --config_path $CONFIG_PATH
|
||||
./benchmark_ppshituv2_det --model PP-ShiTuv2-det --image ppdet_det_img.jpg --config_path $CONFIG_PATH
|
||||
|
||||
# PP-StructureV2
|
||||
./benchmark_structurev2_layout --model PP-Structurev2-layout --image structurev2_layout_val_0002.jpg --config_path $CONFIG_PATH
|
||||
./benchmark_structurev2_table --model PP-Structurev2-SLANet --image structurev2_table.jpg --table_char_dict_path table_structure_dict_ch.txt --config_path $CONFIG_PATH
|
||||
./benchmark --model PP-Structurev2-vi-layoutxlm --shapes "1,512:1,512,4:1,512:1,512" --trt_shapes "1,512:1,512:1,512:1,512,4:1,512,4:1,512,4:1,512:1,512:1,512:1,512:1,512:1,512" --names "x_0:x_1:x_2:x_3" --dtypes "INT64:INT64:INT64:INT64" --disable_mkldnn --custom_tensor_value 0.2 --collect_trt_shape_by_custom_tensor_value --collect_trt_shape_by_device --config_path $CONFIG_PATH
|
||||
|
||||
# Paddle3D
|
||||
./benchmark --model PETRv1_v99 --shapes "1,6,3,320,800:1,6,4,4" --trt_shapes "1,6,3,320,800:1,6,3,320,800:1,6,3,320,800:1,6,4,4:1,6,4,4:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
|
||||
./benchmark --model PETRv2_v99 --shapes "1,12,3,320,800:1,12,4,4:1,12" --trt_shapes "1,12,3,320,800:1,12,3,320,800:1,12,3,320,800:1,12,4,4:1,12,4,4:1,12,4,4:1,12:1,12:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
|
||||
./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH
|
||||
./benchmark_pp3d_cadnn --model CADNN_OCRNet-HRNetW18 --image paddle3d_cadnn_kitti_000780.png --config_path $CONFIG_PATH
|
||||
|
||||
set +x
|
||||
|
81
benchmark/paddlex/benchmark_pp3d_cadnn.cc
Normal file
81
benchmark/paddlex/benchmark_pp3d_cadnn.cc
Normal file
@@ -0,0 +1,81 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
std::string model_name, params_name, config_name;
|
||||
auto model_format = fastdeploy::ModelFormat::PADDLE;
|
||||
if (!UpdateModelResourceName(&model_name, ¶ms_name, &config_name,
|
||||
&model_format, config_info, false)) {
|
||||
return -1;
|
||||
}
|
||||
auto model_file = FLAGS_model + sep + model_name;
|
||||
auto params_file = FLAGS_model + sep + params_name;
|
||||
std::vector<float> cam_data{7.183351e+02, 0.000000e+00, 6.003891e+02,
|
||||
4.450382e+01, 0.000000e+00, 7.183351e+02,
|
||||
1.815122e+02, -5.951107e-01, 0.000000e+00,
|
||||
0.000000e+00, 1.000000e+00, 2.616315e-03};
|
||||
std::vector<float> lidar_data = {
|
||||
0.0048523, -0.9999298, -0.01081266, -0.00711321,
|
||||
-0.00302069, 0.01079808, -0.99993706, -0.06176636,
|
||||
0.99998367, 0.00488465, -0.00296808, -0.26739058,
|
||||
0., 0., 0., 1.};
|
||||
if (config_info["backend"] == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
option.paddle_infer_option.collect_trt_shape_by_device = true;
|
||||
option.paddle_infer_option.trt_min_subgraph_size = 12;
|
||||
option.paddle_infer_option.DisableTrtOps({"squeeze2"});
|
||||
option.trt_option.max_batch_size = 1;
|
||||
}
|
||||
if (config_info["backend"] == "paddle_trt" ||
|
||||
config_info["backend"] == "trt") {
|
||||
// use custom data to perform collect shapes.
|
||||
option.trt_option.SetShape("images", {1, 3, 375, 1242},
|
||||
{1, 3, 375, 1242}, {1, 3, 375, 1242});
|
||||
option.trt_option.SetShape("trans_lidar_to_cam", {1, 4, 4},
|
||||
{1, 4, 4}, {1, 4, 4});
|
||||
option.trt_option.SetShape("trans_cam_to_img", {1, 3, 4},
|
||||
{1, 3, 4}, {1, 3, 4});
|
||||
std::vector<float> image_data;
|
||||
image_data.assign(im.data, im.data + 1*3*375*1242);
|
||||
option.trt_option.SetInputData("trans_lidar_to_cam", lidar_data);
|
||||
option.trt_option.SetInputData("trans_cam_to_img", cam_data);
|
||||
option.trt_option.SetInputData("images", image_data);
|
||||
}
|
||||
auto model_cadnn = vision::perception::Caddn(
|
||||
model_file, params_file, "", option, model_format);
|
||||
vision::PerceptionResult res;
|
||||
// Run profiling
|
||||
BENCHMARK_MODEL(model_cadnn, model_cadnn.Predict(im, cam_data, lidar_data, &res))
|
||||
std::cout << res.Str() << std::endl;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
@@ -93,7 +93,7 @@ int main(int argc, char* argv[]) {
|
||||
vision::PerceptionResult res;
|
||||
// Run profiling
|
||||
BENCHMARK_MODEL(model_centerpoint, model_centerpoint.Predict(point_dir, &res))
|
||||
// std::cout << res.Str() << std::endl;
|
||||
std::cout << res.Str() << std::endl;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
@@ -41,9 +41,17 @@ fi
|
||||
|
||||
# PP-ShiTuV2
|
||||
./benchmark_ppshituv2_rec --model PP-ShiTuv2-rec --image ppshituv2_wangzai.png --config_path $CONFIG_PATH
|
||||
./benchmark_ppshituv2_det --model PP-ShiTuv2-det --image ppdet_det_img.jpg --config_path $CONFIG_PATH
|
||||
|
||||
# PP-StructureV2
|
||||
./benchmark_structurev2_layout --model PP-Structurev2-layout --image structurev2_layout_val_0002.jpg --config_path $CONFIG_PATH
|
||||
./benchmark_structurev2_table --model PP-Structurev2-SLANet --image structurev2_table.jpg --table_char_dict_path table_structure_dict_ch.txt --config_path $CONFIG_PATH
|
||||
./benchmark --model PP-Structurev2-vi-layoutxlm --shapes "1,512:1,512,4:1,512:1,512" --names "x_0:x_1:x_2:x_3" --dtypes "INT64:INT64:INT64:INT64" --disable_mkldnn --custom_tensor_value 0.2 --config_path $CONFIG_PATH
|
||||
|
||||
# Paddle3D
|
||||
./benchmark --model PETRv1_v99 --config_path $CONFIG_PATH --shapes "1,6,3,320,800:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --warmup 5 --repeat 20
|
||||
./benchmark --model PETRv2_v99 --config_path $CONFIG_PATH --shapes "1,12,3,320,800:1,12,4,4:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --warmup 5 --repeat 20
|
||||
./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH
|
||||
./benchmark_pp3d_cadnn --model CADNN_OCRNet-HRNetW18 --image paddle3d_cadnn_kitti_000780.png --config_path $CONFIG_PATH
|
||||
|
||||
set +x
|
@@ -119,3 +119,4 @@ PD_BUILD_OP(centerpoint_postprocess)
|
||||
.SetInferDtypeFn(PD_INFER_DTYPE(fastdeploy::paddle_custom_ops::PostProcessInferDtype));
|
||||
|
||||
#endif // WITH_GPU
|
||||
|
||||
|
@@ -220,7 +220,7 @@ std::vector<paddle::Tensor> postprocess_gpu(
|
||||
|
||||
// nms
|
||||
// in NmsLauncher, rot = - theta - pi / 2
|
||||
const int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS);
|
||||
int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS);
|
||||
auto nms_mask = paddle::empty({num_bboxes_for_nms * col_blocks},
|
||||
paddle::DataType::INT64, paddle::GPUPlace());
|
||||
int64_t *nms_mask_data = nms_mask.data<int64_t>();
|
||||
|
94
fastdeploy/runtime/backends/paddle/ops/grid_sample_3d.cc
Normal file
94
fastdeploy/runtime/backends/paddle/ops/grid_sample_3d.cc
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#if defined(WITH_GPU)
|
||||
|
||||
#include "grid_sample_3d.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
|
||||
#include "paddle/include/experimental/ext_all.h"
|
||||
#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
|
||||
#include "paddle/include/paddle/extension.h"
|
||||
#else
|
||||
#include "paddle/extension.h"
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
std::vector<paddle::Tensor> GridSample3DCUDAForward(
|
||||
const paddle::Tensor& x, const paddle::Tensor& grid,
|
||||
const std::string& mode, const std::string& padding_mode,
|
||||
bool align_corners);
|
||||
|
||||
std::vector<paddle::Tensor> GridSample3DForward(const paddle::Tensor& x,
|
||||
const paddle::Tensor& grid,
|
||||
const std::string& mode,
|
||||
const std::string& padding_mode,
|
||||
bool align_corners) {
|
||||
return GridSample3DCUDAForward(x, grid, mode, padding_mode, align_corners);
|
||||
}
|
||||
|
||||
std::vector<paddle::Tensor> GridSample3DCUDABackward(
|
||||
const paddle::Tensor& x, const paddle::Tensor& grid,
|
||||
const paddle::Tensor& grad_out, const std::string& mode,
|
||||
const std::string& padding_mode, bool align_corners);
|
||||
|
||||
std::vector<paddle::Tensor> GridSample3DBackward(
|
||||
const paddle::Tensor& x, const paddle::Tensor& grid,
|
||||
const paddle::Tensor& grad_out, const std::string& mode,
|
||||
const std::string& padding_mode, bool align_corners) {
|
||||
return GridSample3DCUDABackward(x, grid, grad_out, mode, padding_mode,
|
||||
align_corners);
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t>> GridSample3DInferShape(
|
||||
std::vector<int64_t> x_shape, std::vector<int64_t> grid_shape) {
|
||||
return {
|
||||
{x_shape[0], x_shape[1], grid_shape[1], grid_shape[2], grid_shape[3]}};
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t>> GridSample3DInferBackShape(
|
||||
std::vector<int64_t> x_shape, std::vector<int64_t> grid_shape) {
|
||||
return {x_shape};
|
||||
}
|
||||
|
||||
std::vector<paddle::DataType> GridSample3DInferDtype(
|
||||
paddle::DataType x_dtype, paddle::DataType grid_dtype) {
|
||||
return {x_dtype};
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
||||
|
||||
PD_BUILD_OP(grid_sample_3d)
|
||||
.Inputs({"x", "grid"})
|
||||
.Attrs({"mode: std::string", "padding_mode: std::string",
|
||||
"align_corners: bool"})
|
||||
.Outputs({"out"})
|
||||
.SetKernelFn(PD_KERNEL(fastdeploy::paddle_custom_ops::GridSample3DForward))
|
||||
.SetInferShapeFn(PD_INFER_SHAPE(fastdeploy::paddle_custom_ops::GridSample3DInferShape))
|
||||
.SetInferDtypeFn(PD_INFER_DTYPE(fastdeploy::paddle_custom_ops::GridSample3DInferDtype));
|
||||
|
||||
PD_BUILD_GRAD_OP(grid_sample_3d)
|
||||
.Inputs({"x", "grid", paddle::Grad("out")})
|
||||
.Attrs({"mode: std::string", "padding_mode: std::string",
|
||||
"align_corners: bool"})
|
||||
.Outputs({paddle::Grad("x")})
|
||||
.SetKernelFn(PD_KERNEL(fastdeploy::paddle_custom_ops::GridSample3DBackward))
|
||||
.SetInferShapeFn(PD_INFER_SHAPE(fastdeploy::paddle_custom_ops::GridSample3DInferBackShape));
|
||||
|
||||
#endif
|
657
fastdeploy/runtime/backends/paddle/ops/grid_sample_3d.cu
Normal file
657
fastdeploy/runtime/backends/paddle/ops/grid_sample_3d.cu
Normal file
@@ -0,0 +1,657 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#include "grid_sample_3d.h"
|
||||
|
||||
#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
|
||||
#include "paddle/include/experimental/ext_all.h"
|
||||
#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
|
||||
#include "paddle/include/paddle/extension.h"
|
||||
#else
|
||||
#include "paddle/extension.h"
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
#define CHECK_INPUT_GPU(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
|
||||
|
||||
static __forceinline__ __device__ bool InBounds3D(int64_t d, int64_t h,
|
||||
int64_t w, int64_t D,
|
||||
int64_t H, int64_t W) {
|
||||
return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W;
|
||||
}
|
||||
|
||||
#define CUDA_KERNEL_LOOP_TYPE(i, n, index_type) \
|
||||
index_type _i_n_d_e_x = blockIdx.x * blockDim.x + threadIdx.x; \
|
||||
for (index_type i = _i_n_d_e_x; _i_n_d_e_x < (n); \
|
||||
_i_n_d_e_x += blockDim.x * gridDim.x, i = _i_n_d_e_x)
|
||||
|
||||
#define CUDA_KERNEL_LOOP(i, n) CUDA_KERNEL_LOOP_TYPE(i, n, int)
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T Unnormalize(T coord, int size,
|
||||
bool align_corners) {
|
||||
if (align_corners) {
|
||||
return ((coord + 1.f) / 2) * (size - 1);
|
||||
} else {
|
||||
return ((coord + 1.f) * size - 1) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T ClipIndexes(T in, int max_value) {
|
||||
return min(static_cast<T>(max_value), max(in, static_cast<T>(0)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T ReflectIndexes(T in, int twice_low,
|
||||
int twice_high) {
|
||||
if (twice_low == twice_high) {
|
||||
return static_cast<T>(0);
|
||||
}
|
||||
T min = static_cast<T>(twice_low) / 2;
|
||||
T span = static_cast<T>(twice_high - twice_low) / 2;
|
||||
in = fabs(in - min);
|
||||
T extra = fmod(in, span);
|
||||
int flips = static_cast<int>(floor(in / span));
|
||||
if (flips % 2 == 0) {
|
||||
return extra + min;
|
||||
} else {
|
||||
return span - extra + min;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T ComputePositions(T coord, int size,
|
||||
PaddingMode padding_mode,
|
||||
bool align_corners) {
|
||||
coord = Unnormalize<T>(coord, size, align_corners);
|
||||
if (padding_mode == PaddingMode::border) {
|
||||
coord = ClipIndexes(coord, size - 1);
|
||||
} else if (padding_mode == PaddingMode::reflect) {
|
||||
if (align_corners) {
|
||||
coord = ReflectIndexes(coord, 0, 2 * (size - 1));
|
||||
} else {
|
||||
coord = ReflectIndexes(coord, -1, 2 * size - 1);
|
||||
}
|
||||
coord = ClipIndexes(coord, size - 1);
|
||||
}
|
||||
return coord;
|
||||
}
|
||||
|
||||
template <typename T, typename index_t>
|
||||
__global__ void GridSample3DCudaKernel(
|
||||
const index_t nthreads, index_t out_c, index_t out_d, index_t out_h,
|
||||
index_t out_w, index_t in_d, index_t in_h, index_t in_w, const T* input,
|
||||
const T* grid, T* output, const Mode interpolation_mode,
|
||||
const PaddingMode padding_mode, bool align_corners) {
|
||||
// printf("size: %d, %d, %d, %d, %d, %d \n", out_c, out_d, out_w, out_h, in_d,
|
||||
// in_w);
|
||||
index_t inp_sW = 1;
|
||||
index_t inp_sH = in_w;
|
||||
index_t inp_sD = in_h * in_w;
|
||||
index_t inp_sC = in_d * inp_sD;
|
||||
index_t inp_sN = out_c * inp_sC;
|
||||
|
||||
index_t grid_sCoor = 1;
|
||||
index_t grid_sW = 3;
|
||||
index_t grid_sH = out_w * grid_sW;
|
||||
index_t grid_sD = out_h * grid_sH;
|
||||
index_t grid_sN = out_d * grid_sD;
|
||||
|
||||
index_t out_sW = 1;
|
||||
index_t out_sH = out_w;
|
||||
index_t out_sD = out_h * out_w;
|
||||
index_t out_sC = out_d * out_sD;
|
||||
index_t out_sN = out_c * out_sC;
|
||||
|
||||
CUDA_KERNEL_LOOP_TYPE(index, nthreads, index_t) {
|
||||
const index_t w = index % out_w;
|
||||
const index_t h = (index / out_w) % out_h;
|
||||
const index_t d = (index / (out_h * out_w)) % out_d;
|
||||
const index_t n = index / (out_d * out_h * out_w);
|
||||
const index_t grid_offset =
|
||||
n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW;
|
||||
// get the corresponding input x, y, z co-ordinates from grid
|
||||
T ix = grid[grid_offset];
|
||||
T iy = grid[grid_offset + grid_sCoor];
|
||||
T iz = grid[grid_offset + 2 * grid_sCoor];
|
||||
ix = ComputePositions(ix, in_w, padding_mode, align_corners);
|
||||
iy = ComputePositions(iy, in_h, padding_mode, align_corners);
|
||||
iz = ComputePositions(iz, in_d, padding_mode, align_corners);
|
||||
// printf("ix: %f, iy: %f, iz: %f \n", ix, iy, iz);
|
||||
if (interpolation_mode == Mode::bilinear) {
|
||||
// get corner pixel values from (x, y, z)
|
||||
// for 4d, we used north-east-south-west
|
||||
// for 5d, we add top-bottom
|
||||
index_t ix_tnw = static_cast<index_t>(std::floor(ix));
|
||||
index_t iy_tnw = static_cast<index_t>(std::floor(iy));
|
||||
index_t iz_tnw = static_cast<index_t>(std::floor(iz));
|
||||
|
||||
index_t ix_tne = ix_tnw + 1;
|
||||
index_t iy_tne = iy_tnw;
|
||||
index_t iz_tne = iz_tnw;
|
||||
|
||||
index_t ix_tsw = ix_tnw;
|
||||
index_t iy_tsw = iy_tnw + 1;
|
||||
index_t iz_tsw = iz_tnw;
|
||||
|
||||
index_t ix_tse = ix_tnw + 1;
|
||||
index_t iy_tse = iy_tnw + 1;
|
||||
index_t iz_tse = iz_tnw;
|
||||
|
||||
index_t ix_bnw = ix_tnw;
|
||||
index_t iy_bnw = iy_tnw;
|
||||
index_t iz_bnw = iz_tnw + 1;
|
||||
|
||||
index_t ix_bne = ix_tnw + 1;
|
||||
index_t iy_bne = iy_tnw;
|
||||
index_t iz_bne = iz_tnw + 1;
|
||||
|
||||
index_t ix_bsw = ix_tnw;
|
||||
index_t iy_bsw = iy_tnw + 1;
|
||||
index_t iz_bsw = iz_tnw + 1;
|
||||
|
||||
index_t ix_bse = ix_tnw + 1;
|
||||
index_t iy_bse = iy_tnw + 1;
|
||||
index_t iz_bse = iz_tnw + 1;
|
||||
|
||||
// get surfaces to each neighbor:
|
||||
T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz);
|
||||
T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz);
|
||||
T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz);
|
||||
T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz);
|
||||
T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse);
|
||||
T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw);
|
||||
T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne);
|
||||
T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw);
|
||||
|
||||
auto inp_ptr_NC = input + n * inp_sN;
|
||||
auto out_ptr_NCDHW =
|
||||
output + n * out_sN + d * out_sD + h * out_sH + w * out_sW;
|
||||
for (index_t c = 0; c < out_c;
|
||||
++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) {
|
||||
*out_ptr_NCDHW = static_cast<T>(0);
|
||||
if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] *
|
||||
tnw;
|
||||
}
|
||||
if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] *
|
||||
tne;
|
||||
}
|
||||
if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] *
|
||||
tsw;
|
||||
}
|
||||
if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] *
|
||||
tse;
|
||||
}
|
||||
if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] *
|
||||
bnw;
|
||||
}
|
||||
if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] *
|
||||
bne;
|
||||
}
|
||||
if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] *
|
||||
bsw;
|
||||
}
|
||||
if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW +=
|
||||
inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] *
|
||||
bse;
|
||||
}
|
||||
}
|
||||
} else if (interpolation_mode == Mode::nearest) {
|
||||
index_t ix_nearest = static_cast<index_t>(std::round(ix));
|
||||
index_t iy_nearest = static_cast<index_t>(std::round(iy));
|
||||
index_t iz_nearest = static_cast<index_t>(std::round(iz));
|
||||
|
||||
// assign nearest neighor pixel value to output pixel
|
||||
auto inp_ptr_NC = input + n * inp_sN;
|
||||
auto out_ptr_NCDHW =
|
||||
output + n * out_sN + d * out_sD + h * out_sH + w * out_sW;
|
||||
for (index_t c = 0; c < out_c;
|
||||
++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) {
|
||||
if (InBounds3D(iz_nearest, iy_nearest, ix_nearest, in_d, in_h, in_w)) {
|
||||
*out_ptr_NCDHW =
|
||||
inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH +
|
||||
ix_nearest * inp_sW];
|
||||
} else {
|
||||
*out_ptr_NCDHW = static_cast<T>(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<paddle::Tensor> GridSample3DCUDAForward(
|
||||
const paddle::Tensor& x, const paddle::Tensor& grid,
|
||||
const std::string& mode, const std::string& padding_mode,
|
||||
bool align_corners) {
|
||||
CHECK_INPUT_GPU(x);
|
||||
CHECK_INPUT_GPU(grid);
|
||||
PaddingMode enum_padding_mode;
|
||||
Mode enum_mode;
|
||||
if (padding_mode == "border") {
|
||||
enum_padding_mode = PaddingMode::border;
|
||||
} else if (padding_mode == "reflection") {
|
||||
enum_padding_mode = PaddingMode::reflect;
|
||||
} else {
|
||||
enum_padding_mode = PaddingMode::zeros;
|
||||
}
|
||||
|
||||
if (mode == "nearest") {
|
||||
enum_mode = Mode::nearest;
|
||||
} else {
|
||||
enum_mode = Mode::bilinear;
|
||||
}
|
||||
const int n = grid.shape()[0];
|
||||
const int out_d = grid.shape()[1];
|
||||
const int out_h = grid.shape()[2];
|
||||
const int out_w = grid.shape()[3];
|
||||
const int c = x.shape()[1];
|
||||
const int in_d = x.shape()[2];
|
||||
const int in_h = x.shape()[3];
|
||||
const int in_w = x.shape()[4];
|
||||
|
||||
auto output = paddle::full({n, c, out_d, out_h, out_w}, 0,
|
||||
paddle::DataType::FLOAT32, paddle::GPUPlace());
|
||||
const int count = static_cast<int>(n * out_d * out_h * out_w);
|
||||
|
||||
int max_threads_per_block = 512;
|
||||
int block_num = (count - 1) / max_threads_per_block + 1;
|
||||
// printf("size: %d, %d, %d, %d, %d, %d \n", n, c, out_d, out_h, count,
|
||||
// block_num);
|
||||
GridSample3DCudaKernel<float, int>
|
||||
<<<block_num, max_threads_per_block, 0, x.stream()>>>(
|
||||
count, c, out_d, out_h, out_w, in_d, in_h, in_w, x.data<float>(),
|
||||
grid.data<float>(), output.data<float>(), enum_mode,
|
||||
enum_padding_mode, align_corners);
|
||||
|
||||
cudaError_t error_check;
|
||||
error_check = cudaGetLastError();
|
||||
if (error_check != cudaSuccess) {
|
||||
printf("%s\n", cudaGetErrorString(error_check));
|
||||
}
|
||||
// printf("size: %d, %d, %d, %d, %d, %d \n", n, c, out_d, out_h, count,
|
||||
// block_num);
|
||||
return {output};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T UnnormalizeWithMask(T coord, int size,
|
||||
bool align_corners,
|
||||
T* grad_in) {
|
||||
if (align_corners) {
|
||||
*grad_in = static_cast<T>(size - 1) / 2;
|
||||
return ((coord + 1.f) / 2) * (size - 1);
|
||||
} else {
|
||||
*grad_in = static_cast<T>(size) / 2;
|
||||
return ((coord + 1.f) * size - 1) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T ClipIndexesWithMask(T in, int clip_limit,
|
||||
T* grad_in) {
|
||||
if (in <= static_cast<T>(0)) {
|
||||
*grad_in = static_cast<T>(0);
|
||||
return static_cast<T>(0);
|
||||
} else {
|
||||
T max = static_cast<T>(clip_limit - 1);
|
||||
if (in >= max) {
|
||||
*grad_in = static_cast<T>(0);
|
||||
return max;
|
||||
} else {
|
||||
*grad_in = static_cast<T>(1);
|
||||
return in;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T ReflectIndexesWithMask(T in, int twice_low,
|
||||
int twice_high,
|
||||
T* grad_in) {
|
||||
if (twice_low == twice_high) {
|
||||
*grad_in = static_cast<T>(0);
|
||||
return static_cast<T>(0);
|
||||
}
|
||||
int grad_in_mult_;
|
||||
T min = static_cast<T>(twice_low) / 2;
|
||||
T span = static_cast<T>(twice_high - twice_low) / 2;
|
||||
in = in - min;
|
||||
if (in < static_cast<T>(0)) {
|
||||
grad_in_mult_ = -1;
|
||||
in = -in;
|
||||
} else {
|
||||
grad_in_mult_ = 1;
|
||||
}
|
||||
T extra = fmod(in, span);
|
||||
int flips = static_cast<int>(floor(in / span));
|
||||
if (flips % 2 == 0) {
|
||||
*grad_in = static_cast<T>(grad_in_mult_);
|
||||
return extra + min;
|
||||
} else {
|
||||
*grad_in = static_cast<T>(-grad_in_mult_);
|
||||
return span - extra + min;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ T
|
||||
ComputePositionsWithMask(T coord, int size, PaddingMode padding_mode,
|
||||
bool align_corners, T* grad_in) {
|
||||
T grad_clip, grad_refl;
|
||||
coord = UnnormalizeWithMask<T>(coord, size, align_corners, grad_in);
|
||||
if (padding_mode == PaddingMode::border) {
|
||||
coord = ClipIndexesWithMask(coord, size, &grad_clip);
|
||||
*grad_in = (*grad_in) * grad_clip;
|
||||
} else if (padding_mode == PaddingMode::reflect) {
|
||||
if (align_corners) {
|
||||
coord = ReflectIndexesWithMask(coord, 0, 2 * (size - 1), &grad_refl);
|
||||
} else {
|
||||
coord = ReflectIndexesWithMask(coord, -1, 2 * size - 1, &grad_refl);
|
||||
}
|
||||
coord = ClipIndexesWithMask(coord, size, &grad_clip);
|
||||
*grad_in = (*grad_in) * grad_refl * grad_clip;
|
||||
}
|
||||
|
||||
return coord;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static __forceinline__ __device__ void AtomicAdd3D(
|
||||
T* data, int64_t d, int64_t h, int64_t w, int64_t sD, int64_t sH,
|
||||
int64_t sW, int64_t D, int64_t H, int64_t W, T delta) {
|
||||
if (InBounds3D(d, h, w, D, H, W)) {
|
||||
atomicAdd(data + d * sD + h * sH + w * sW, delta);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename index_t>
|
||||
__global__ void GridSample3DCudaBackwardKernel(
|
||||
const index_t nthreads, const T* grad_output, const T* input, const T* grid,
|
||||
index_t out_c, index_t out_d, index_t out_h, index_t out_w, index_t in_d,
|
||||
index_t in_h, index_t in_w, T* grad_input, T* grad_grid, const Mode mode,
|
||||
const PaddingMode padding_mode, bool align_corners) {
|
||||
index_t inp_sW = 1;
|
||||
index_t inp_sH = in_w;
|
||||
index_t inp_sD = in_h * in_w;
|
||||
index_t inp_sC = in_d * inp_sD;
|
||||
index_t inp_sN = out_c * inp_sC;
|
||||
|
||||
index_t grid_sCoor = 1;
|
||||
index_t grid_sW = 3;
|
||||
index_t grid_sH = out_w * grid_sW;
|
||||
index_t grid_sD = out_h * grid_sH;
|
||||
index_t grid_sN = out_d * grid_sD;
|
||||
|
||||
index_t gOut_sW = 1;
|
||||
index_t gOut_sH = out_w;
|
||||
index_t gOut_sD = out_h * out_w;
|
||||
index_t gOut_sC = out_d * gOut_sD;
|
||||
index_t gOut_sN = out_c * gOut_sC;
|
||||
|
||||
CUDA_KERNEL_LOOP_TYPE(index, nthreads, index_t) {
|
||||
const index_t w = index % out_w;
|
||||
const index_t h = (index / out_w) % out_h;
|
||||
const index_t d = (index / (out_h * out_w)) % out_d;
|
||||
const index_t n = index / (out_d * out_h * out_w);
|
||||
const auto grid_offset =
|
||||
n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW;
|
||||
|
||||
// get the corresponding input x, y, z co-ordinates from grid
|
||||
T ix = grid[grid_offset];
|
||||
T iy = grid[grid_offset + grid_sCoor];
|
||||
T iz = grid[grid_offset + 2 * grid_sCoor];
|
||||
|
||||
// multipliers for gradients on ix, iy, and iz
|
||||
T gix_mult, giy_mult, giz_mult;
|
||||
ix = ComputePositionsWithMask(ix, in_w, padding_mode, align_corners,
|
||||
&gix_mult);
|
||||
iy = ComputePositionsWithMask(iy, in_h, padding_mode, align_corners,
|
||||
&giy_mult);
|
||||
iz = ComputePositionsWithMask(iz, in_d, padding_mode, align_corners,
|
||||
&giz_mult);
|
||||
|
||||
if (mode == Mode::bilinear) {
|
||||
// get corner pixel values from (x, y, z)
|
||||
// for 4d, we used north-east-south-west
|
||||
// for 5d, we add top-bottom
|
||||
index_t ix_tnw = static_cast<index_t>(std::floor(ix));
|
||||
index_t iy_tnw = static_cast<index_t>(std::floor(iy));
|
||||
index_t iz_tnw = static_cast<index_t>(std::floor(iz));
|
||||
|
||||
index_t ix_tne = ix_tnw + 1;
|
||||
index_t iy_tne = iy_tnw;
|
||||
index_t iz_tne = iz_tnw;
|
||||
|
||||
index_t ix_tsw = ix_tnw;
|
||||
index_t iy_tsw = iy_tnw + 1;
|
||||
index_t iz_tsw = iz_tnw;
|
||||
|
||||
index_t ix_tse = ix_tnw + 1;
|
||||
index_t iy_tse = iy_tnw + 1;
|
||||
index_t iz_tse = iz_tnw;
|
||||
|
||||
index_t ix_bnw = ix_tnw;
|
||||
index_t iy_bnw = iy_tnw;
|
||||
index_t iz_bnw = iz_tnw + 1;
|
||||
|
||||
index_t ix_bne = ix_tnw + 1;
|
||||
index_t iy_bne = iy_tnw;
|
||||
index_t iz_bne = iz_tnw + 1;
|
||||
|
||||
index_t ix_bsw = ix_tnw;
|
||||
index_t iy_bsw = iy_tnw + 1;
|
||||
index_t iz_bsw = iz_tnw + 1;
|
||||
|
||||
index_t ix_bse = ix_tnw + 1;
|
||||
index_t iy_bse = iy_tnw + 1;
|
||||
index_t iz_bse = iz_tnw + 1;
|
||||
|
||||
// get surfaces to each neighbor:
|
||||
T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz);
|
||||
T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz);
|
||||
T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz);
|
||||
T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz);
|
||||
T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse);
|
||||
T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw);
|
||||
T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne);
|
||||
T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw);
|
||||
|
||||
T gix = static_cast<T>(0), giy = static_cast<T>(0),
|
||||
giz = static_cast<T>(0);
|
||||
index_t gOut_offset =
|
||||
n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW;
|
||||
index_t inp_offset_NC = n * inp_sN;
|
||||
T* gInp_ptr_NC = grad_input + n * inp_sN;
|
||||
for (index_t c = 0; c < out_c; ++c, gOut_offset += gOut_sC,
|
||||
gInp_ptr_NC += inp_sC, inp_offset_NC += inp_sC) {
|
||||
T gOut = grad_output[gOut_offset];
|
||||
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_tnw, iy_tnw, ix_tnw, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, tnw * gOut);
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_tne, iy_tne, ix_tne, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, tne * gOut);
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_tsw, iy_tsw, ix_tsw, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, tsw * gOut);
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_tse, iy_tse, ix_tse, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, tse * gOut);
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_bnw, iy_bnw, ix_bnw, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, bnw * gOut);
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_bne, iy_bne, ix_bne, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, bne * gOut);
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_bsw, iy_bsw, ix_bsw, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, bsw * gOut);
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_bse, iy_bse, ix_bse, inp_sD, inp_sH, inp_sW,
|
||||
in_d, in_h, in_w, bse * gOut);
|
||||
|
||||
// calculate grad_grid
|
||||
if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) {
|
||||
T tnw_val = input[inp_offset_NC + iz_tnw * inp_sD + iy_tnw * inp_sH +
|
||||
ix_tnw * inp_sW];
|
||||
gix -= tnw_val * (iy_bse - iy) * (iz_bse - iz) * gOut;
|
||||
giy -= tnw_val * (ix_bse - ix) * (iz_bse - iz) * gOut;
|
||||
giz -= tnw_val * (ix_bse - ix) * (iy_bse - iy) * gOut;
|
||||
}
|
||||
if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) {
|
||||
T tne_val = input[inp_offset_NC + iz_tne * inp_sD + iy_tne * inp_sH +
|
||||
ix_tne * inp_sW];
|
||||
gix += tne_val * (iy_bsw - iy) * (iz_bsw - iz) * gOut;
|
||||
giy -= tne_val * (ix - ix_bsw) * (iz_bsw - iz) * gOut;
|
||||
giz -= tne_val * (ix - ix_bsw) * (iy_bsw - iy) * gOut;
|
||||
}
|
||||
if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) {
|
||||
T tsw_val = input[inp_offset_NC + iz_tsw * inp_sD + iy_tsw * inp_sH +
|
||||
ix_tsw * inp_sW];
|
||||
gix -= tsw_val * (iy - iy_bne) * (iz_bne - iz) * gOut;
|
||||
giy += tsw_val * (ix_bne - ix) * (iz_bne - iz) * gOut;
|
||||
giz -= tsw_val * (ix_bne - ix) * (iy - iy_bne) * gOut;
|
||||
}
|
||||
if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) {
|
||||
T tse_val = input[inp_offset_NC + iz_tse * inp_sD + iy_tse * inp_sH +
|
||||
ix_tse * inp_sW];
|
||||
gix += tse_val * (iy - iy_bnw) * (iz_bnw - iz) * gOut;
|
||||
giy += tse_val * (ix - ix_bnw) * (iz_bnw - iz) * gOut;
|
||||
giz -= tse_val * (ix - ix_bnw) * (iy - iy_bnw) * gOut;
|
||||
}
|
||||
if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) {
|
||||
T bnw_val = input[inp_offset_NC + iz_bnw * inp_sD + iy_bnw * inp_sH +
|
||||
ix_bnw * inp_sW];
|
||||
gix -= bnw_val * (iy_tse - iy) * (iz - iz_tse) * gOut;
|
||||
giy -= bnw_val * (ix_tse - ix) * (iz - iz_tse) * gOut;
|
||||
giz += bnw_val * (ix_tse - ix) * (iy_tse - iy) * gOut;
|
||||
}
|
||||
if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) {
|
||||
T bne_val = input[inp_offset_NC + iz_bne * inp_sD + iy_bne * inp_sH +
|
||||
ix_bne * inp_sW];
|
||||
gix += bne_val * (iy_tsw - iy) * (iz - iz_tsw) * gOut;
|
||||
giy -= bne_val * (ix - ix_tsw) * (iz - iz_tsw) * gOut;
|
||||
giz += bne_val * (ix - ix_tsw) * (iy_tsw - iy) * gOut;
|
||||
}
|
||||
if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) {
|
||||
T bsw_val = input[inp_offset_NC + iz_bsw * inp_sD + iy_bsw * inp_sH +
|
||||
ix_bsw * inp_sW];
|
||||
gix -= bsw_val * (iy - iy_tne) * (iz - iz_tne) * gOut;
|
||||
giy += bsw_val * (ix_tne - ix) * (iz - iz_tne) * gOut;
|
||||
giz += bsw_val * (ix_tne - ix) * (iy - iy_tne) * gOut;
|
||||
}
|
||||
if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) {
|
||||
T bse_val = input[inp_offset_NC + iz_bse * inp_sD + iy_bse * inp_sH +
|
||||
ix_bse * inp_sW];
|
||||
gix += bse_val * (iy - iy_tnw) * (iz - iz_tnw) * gOut;
|
||||
giy += bse_val * (ix - ix_tnw) * (iz - iz_tnw) * gOut;
|
||||
giz += bse_val * (ix - ix_tnw) * (iy - iy_tnw) * gOut;
|
||||
}
|
||||
}
|
||||
if (grad_grid != nullptr) {
|
||||
T* gGrid_ptr_NDHW = grad_grid + index * grid_sW;
|
||||
gGrid_ptr_NDHW[0] = gix_mult * gix;
|
||||
gGrid_ptr_NDHW[1] = giy_mult * giy;
|
||||
gGrid_ptr_NDHW[2] = giz_mult * giz;
|
||||
}
|
||||
} else if (mode == Mode::nearest) {
|
||||
auto ix_nearest = static_cast<index_t>(std::round(ix));
|
||||
auto iy_nearest = static_cast<index_t>(std::round(iy));
|
||||
auto iz_nearest = static_cast<index_t>(std::round(iz));
|
||||
|
||||
// assign nearest neighor pixel value to output pixel
|
||||
index_t gOut_offset =
|
||||
n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW;
|
||||
T* gInp_ptr_NC = grad_input + n * inp_sN;
|
||||
for (index_t c = 0; c < out_c;
|
||||
++c, gOut_offset += gOut_sC, gInp_ptr_NC += inp_sC) {
|
||||
AtomicAdd3D(gInp_ptr_NC, iz_nearest, iy_nearest, ix_nearest, inp_sD,
|
||||
inp_sH, inp_sW, in_d, in_h, in_w, grad_output[gOut_offset]);
|
||||
}
|
||||
if (grad_grid != nullptr) {
|
||||
T* gGrid_ptr_NDHW = grad_grid + index * grid_sW;
|
||||
gGrid_ptr_NDHW[0] = static_cast<T>(0);
|
||||
gGrid_ptr_NDHW[1] = static_cast<T>(0);
|
||||
gGrid_ptr_NDHW[2] = static_cast<T>(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<paddle::Tensor> GridSample3DCUDABackward(
|
||||
const paddle::Tensor& x, const paddle::Tensor& grid,
|
||||
const paddle::Tensor& grad_out, const std::string& mode,
|
||||
const std::string& padding_mode, bool align_corners) {
|
||||
PaddingMode enum_padding_mode;
|
||||
Mode enum_mode;
|
||||
if (padding_mode == "border") {
|
||||
enum_padding_mode = PaddingMode::border;
|
||||
} else if (padding_mode == "reflection") {
|
||||
enum_padding_mode = PaddingMode::reflect;
|
||||
} else {
|
||||
enum_padding_mode = PaddingMode::zeros;
|
||||
}
|
||||
|
||||
if (mode == "nearest") {
|
||||
enum_mode = Mode::nearest;
|
||||
} else {
|
||||
enum_mode = Mode::bilinear;
|
||||
}
|
||||
|
||||
const int out_d = grid.shape()[1];
|
||||
const int out_h = grid.shape()[2];
|
||||
const int out_w = grid.shape()[3];
|
||||
const int n = x.shape()[0];
|
||||
const int c = x.shape()[1];
|
||||
const int in_d = x.shape()[2];
|
||||
const int in_h = x.shape()[3];
|
||||
const int in_w = x.shape()[4];
|
||||
|
||||
auto grid_grad_output =
|
||||
paddle::empty({n, out_d, out_h, out_w, 3}, paddle::DataType::FLOAT32,
|
||||
paddle::GPUPlace());
|
||||
auto x_grad_output =
|
||||
paddle::full({n, c, in_d, in_h, in_w}, 0, paddle::DataType::FLOAT32,
|
||||
paddle::GPUPlace());
|
||||
|
||||
const int count = static_cast<int>(n * out_d * out_h * out_w);
|
||||
|
||||
int max_threads_per_block = 512;
|
||||
int block_num = (count - 1) / max_threads_per_block + 1;
|
||||
|
||||
GridSample3DCudaBackwardKernel<float, int>
|
||||
<<<block_num, max_threads_per_block, 0, x.stream()>>>(
|
||||
count, grad_out.data<float>(), x.data<float>(), grid.data<float>(), c,
|
||||
out_d, out_h, out_w, in_d, in_h, in_w, x_grad_output.data<float>(),
|
||||
grid_grad_output.data<float>(), enum_mode, enum_padding_mode,
|
||||
align_corners);
|
||||
|
||||
return {x_grad_output};
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
33
fastdeploy/runtime/backends/paddle/ops/grid_sample_3d.h
Normal file
33
fastdeploy/runtime/backends/paddle/ops/grid_sample_3d.h
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
#define HOST_DEVICE __host__ __device__
|
||||
#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
|
||||
|
||||
enum class Mode { bilinear, nearest };
|
||||
|
||||
enum class PaddingMode { zeros, border, reflect };
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
272
fastdeploy/runtime/backends/paddle/ops/iou3d_cpu.cc
Normal file
272
fastdeploy/runtime/backends/paddle/ops/iou3d_cpu.cc
Normal file
@@ -0,0 +1,272 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
3D Rotated IoU Calculation (CPU)
|
||||
Written by Shaoshuai Shi
|
||||
All Rights Reserved 2020.
|
||||
*/
|
||||
|
||||
#include "iou3d_cpu.h"
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
static inline float min(float a, float b) { return a > b ? b : a; }
|
||||
|
||||
static inline float max(float a, float b) { return a > b ? a : b; }
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(EPS)
|
||||
#undef EPS
|
||||
#endif
|
||||
#define EPS 1e-8
|
||||
#else
|
||||
static const float EPS = 1e-8;
|
||||
#endif
|
||||
|
||||
struct Point {
|
||||
float x, y;
|
||||
Point() {}
|
||||
Point(double _x, double _y) { x = _x, y = _y; }
|
||||
|
||||
void set(float _x, float _y) {
|
||||
x = _x;
|
||||
y = _y;
|
||||
}
|
||||
|
||||
Point operator+(const Point &b) const {
|
||||
return Point(x + b.x, y + b.y);
|
||||
}
|
||||
|
||||
Point operator-(const Point &b) const {
|
||||
return Point(x - b.x, y - b.y);
|
||||
}
|
||||
};
|
||||
|
||||
static inline float cross(const Point &a, const Point &b) {
|
||||
return a.x * b.y - a.y * b.x;
|
||||
}
|
||||
|
||||
static inline float cross(const Point &p1, const Point &p2, const Point &p0) {
|
||||
return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
|
||||
}
|
||||
|
||||
static inline int check_rect_cross(const Point &p1, const Point &p2, const Point &q1,
|
||||
const Point &q2) {
|
||||
int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&
|
||||
min(q1.x, q2.x) <= max(p1.x, p2.x) &&
|
||||
min(p1.y, p2.y) <= max(q1.y, q2.y) &&
|
||||
min(q1.y, q2.y) <= max(p1.y, p2.y);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int check_in_box2d(const float *box, const Point &p) {
|
||||
// params: (7) [x, y, z, dx, dy, dz, heading]
|
||||
const float MARGIN = 1e-2;
|
||||
|
||||
float center_x = box[0], center_y = box[1];
|
||||
float angle_cos = cos(-box[6]),
|
||||
angle_sin =
|
||||
sin(-box[6]); // rotate the point in the opposite direction of box
|
||||
float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
|
||||
float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
|
||||
|
||||
return (fabs(rot_x) < box[3] / 2 + MARGIN &&
|
||||
fabs(rot_y) < box[4] / 2 + MARGIN);
|
||||
}
|
||||
|
||||
static inline int intersection(const Point &p1, const Point &p0, const Point &q1,
|
||||
const Point &q0, Point &ans) {
|
||||
// fast exclusion
|
||||
if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;
|
||||
|
||||
// check cross standing
|
||||
float s1 = cross(q0, p1, p0);
|
||||
float s2 = cross(p1, q1, p0);
|
||||
float s3 = cross(p0, q1, q0);
|
||||
float s4 = cross(q1, p1, q0);
|
||||
|
||||
if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0;
|
||||
|
||||
// calculate intersection of two lines
|
||||
float s5 = cross(q1, p1, p0);
|
||||
if (fabs(s5 - s1) > EPS) {
|
||||
ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
|
||||
ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
|
||||
|
||||
} else {
|
||||
float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
|
||||
float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
|
||||
float D = a0 * b1 - a1 * b0;
|
||||
|
||||
ans.x = (b0 * c1 - b1 * c0) / D;
|
||||
ans.y = (a1 * c0 - a0 * c1) / D;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void rotate_around_center(const Point ¢er, const float angle_cos,
|
||||
const float angle_sin, Point &p) {
|
||||
float new_x =
|
||||
(p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x;
|
||||
float new_y =
|
||||
(p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
|
||||
p.set(new_x, new_y);
|
||||
}
|
||||
|
||||
static inline int point_cmp(const Point &a, const Point &b, const Point ¢er) {
|
||||
return atan2(a.y - center.y, a.x - center.x) >
|
||||
atan2(b.y - center.y, b.x - center.x);
|
||||
}
|
||||
|
||||
static inline float box_overlap(const float *box_a, const float *box_b) {
|
||||
// params: box_a (7) [x, y, z, dx, dy, dz, heading]
|
||||
// params: box_b (7) [x, y, z, dx, dy, dz, heading]
|
||||
|
||||
// float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 =
|
||||
// box_a[3], a_angle = box_a[4];
|
||||
// float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 =
|
||||
// box_b[3], b_angle = box_b[4];
|
||||
float a_angle = box_a[6], b_angle = box_b[6];
|
||||
float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2,
|
||||
a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2;
|
||||
float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half;
|
||||
float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half;
|
||||
float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half;
|
||||
float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half;
|
||||
|
||||
Point center_a(box_a[0], box_a[1]);
|
||||
Point center_b(box_b[0], box_b[1]);
|
||||
|
||||
Point box_a_corners[5];
|
||||
box_a_corners[0].set(a_x1, a_y1);
|
||||
box_a_corners[1].set(a_x2, a_y1);
|
||||
box_a_corners[2].set(a_x2, a_y2);
|
||||
box_a_corners[3].set(a_x1, a_y2);
|
||||
|
||||
Point box_b_corners[5];
|
||||
box_b_corners[0].set(b_x1, b_y1);
|
||||
box_b_corners[1].set(b_x2, b_y1);
|
||||
box_b_corners[2].set(b_x2, b_y2);
|
||||
box_b_corners[3].set(b_x1, b_y2);
|
||||
|
||||
// get oriented corners
|
||||
float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
|
||||
float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
|
||||
|
||||
for (int k = 0; k < 4; k++) {
|
||||
rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
|
||||
rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
|
||||
}
|
||||
|
||||
box_a_corners[4] = box_a_corners[0];
|
||||
box_b_corners[4] = box_b_corners[0];
|
||||
|
||||
// get intersection of lines
|
||||
Point cross_points[16];
|
||||
Point poly_center;
|
||||
int cnt = 0, flag = 0;
|
||||
|
||||
poly_center.set(0, 0);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
flag = intersection(box_a_corners[i + 1], box_a_corners[i],
|
||||
box_b_corners[j + 1], box_b_corners[j],
|
||||
cross_points[cnt]);
|
||||
if (flag) {
|
||||
poly_center = poly_center + cross_points[cnt];
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check corners
|
||||
for (int k = 0; k < 4; k++) {
|
||||
if (check_in_box2d(box_a, box_b_corners[k])) {
|
||||
poly_center = poly_center + box_b_corners[k];
|
||||
cross_points[cnt] = box_b_corners[k];
|
||||
cnt++;
|
||||
}
|
||||
if (check_in_box2d(box_b, box_a_corners[k])) {
|
||||
poly_center = poly_center + box_a_corners[k];
|
||||
cross_points[cnt] = box_a_corners[k];
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
poly_center.x /= cnt;
|
||||
poly_center.y /= cnt;
|
||||
|
||||
// sort the points of polygon
|
||||
Point temp;
|
||||
for (int j = 0; j < cnt - 1; j++) {
|
||||
for (int i = 0; i < cnt - j - 1; i++) {
|
||||
if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {
|
||||
temp = cross_points[i];
|
||||
cross_points[i] = cross_points[i + 1];
|
||||
cross_points[i + 1] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get the overlap areas
|
||||
float area = 0;
|
||||
for (int k = 0; k < cnt - 1; k++) {
|
||||
area += cross(cross_points[k] - cross_points[0],
|
||||
cross_points[k + 1] - cross_points[0]);
|
||||
}
|
||||
|
||||
return fabs(area) / 2.0;
|
||||
}
|
||||
|
||||
static inline float iou_bev(const float *box_a, const float *box_b) {
|
||||
// params: box_a (7) [x, y, z, dx, dy, dz, heading]
|
||||
// params: box_b (7) [x, y, z, dx, dy, dz, heading]
|
||||
float sa = box_a[3] * box_a[4];
|
||||
float sb = box_b[3] * box_b[4];
|
||||
float s_overlap = box_overlap(box_a, box_b);
|
||||
return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
|
||||
}
|
||||
|
||||
int boxes_iou_bev_cpu(paddle::Tensor boxes_a_tensor,
|
||||
paddle::Tensor boxes_b_tensor,
|
||||
paddle::Tensor ans_iou_tensor) {
|
||||
// params boxes_a_tensor: (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params boxes_b_tensor: (M, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params ans_iou_tensor: (N, M)
|
||||
|
||||
// CHECK_CONTIGUOUS(boxes_a_tensor);
|
||||
// CHECK_CONTIGUOUS(boxes_b_tensor);
|
||||
|
||||
int num_boxes_a = boxes_a_tensor.shape()[0];
|
||||
int num_boxes_b = boxes_b_tensor.shape()[0];
|
||||
const float *boxes_a = boxes_a_tensor.data<float>();
|
||||
const float *boxes_b = boxes_b_tensor.data<float>();
|
||||
float *ans_iou = ans_iou_tensor.data<float>();
|
||||
|
||||
for (int i = 0; i < num_boxes_a; i++) {
|
||||
for (int j = 0; j < num_boxes_b; j++) {
|
||||
ans_iou[i * num_boxes_b + j] = iou_bev(boxes_a + i * 7, boxes_b + j * 7);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
35
fastdeploy/runtime/backends/paddle/ops/iou3d_cpu.h
Normal file
35
fastdeploy/runtime/backends/paddle/ops/iou3d_cpu.h
Normal file
@@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
|
||||
#include "paddle/include/experimental/ext_all.h"
|
||||
#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
|
||||
#include "paddle/include/paddle/extension.h"
|
||||
#else
|
||||
#include "paddle/extension.h"
|
||||
#endif
|
||||
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
FASTDEPLOY_DECL int boxes_iou_bev_cpu(
|
||||
paddle::Tensor boxes_a_tensor, paddle::Tensor boxes_b_tensor,
|
||||
paddle::Tensor ans_iou_tensor);
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
237
fastdeploy/runtime/backends/paddle/ops/iou3d_nms.cc
Normal file
237
fastdeploy/runtime/backends/paddle/ops/iou3d_nms.cc
Normal file
@@ -0,0 +1,237 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
|
||||
Written by Shaoshuai Shi
|
||||
All Rights Reserved 2019-2020.
|
||||
*/
|
||||
|
||||
#if defined(WITH_GPU)
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include "iou3d_nms.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
#define CHECK_INPUT(x) PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
|
||||
// #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
|
||||
static inline int DIVUP(const int m, const int n)
|
||||
{ return ((m) / (n) + ((m) % (n) > 0)); }
|
||||
|
||||
#define CHECK_ERROR(ans) \
|
||||
{ gpuAssert((ans), __FILE__, __LINE__); }
|
||||
inline void gpuAssert(cudaError_t code, const char *file, int line,
|
||||
bool abort = true) {
|
||||
if (code != cudaSuccess) {
|
||||
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
|
||||
line);
|
||||
if (abort) exit(code);
|
||||
}
|
||||
}
|
||||
|
||||
#define D(x) \
|
||||
PD_THROW('\n', x, \
|
||||
"\n--------------------------------- where is the error ? " \
|
||||
"---------------------------------------\n");
|
||||
|
||||
static const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
|
||||
|
||||
void boxesoverlapLauncher(const int num_a, const float *boxes_a,
|
||||
const int num_b, const float *boxes_b,
|
||||
float *ans_overlap);
|
||||
void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,
|
||||
const float *boxes_b, float *ans_iou);
|
||||
void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,
|
||||
float nms_overlap_thresh);
|
||||
void nmsNormalLauncher(const float *boxes, unsigned long long *mask,
|
||||
int boxes_num, float nms_overlap_thresh);
|
||||
|
||||
int boxes_overlap_bev_gpu(paddle::Tensor boxes_a, paddle::Tensor boxes_b,
|
||||
paddle::Tensor ans_overlap) {
|
||||
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params ans_overlap: (N, M)
|
||||
|
||||
CHECK_INPUT(boxes_a);
|
||||
CHECK_INPUT(boxes_b);
|
||||
CHECK_INPUT(ans_overlap);
|
||||
|
||||
int num_a = boxes_a.shape()[0];
|
||||
int num_b = boxes_b.shape()[0];
|
||||
|
||||
const float *boxes_a_data = boxes_a.data<float>();
|
||||
const float *boxes_b_data = boxes_b.data<float>();
|
||||
float *ans_overlap_data = ans_overlap.data<float>();
|
||||
|
||||
boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data,
|
||||
ans_overlap_data);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int boxes_iou_bev_gpu(paddle::Tensor boxes_a, paddle::Tensor boxes_b,
|
||||
paddle::Tensor ans_iou) {
|
||||
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params ans_overlap: (N, M)
|
||||
CHECK_INPUT(boxes_a);
|
||||
CHECK_INPUT(boxes_b);
|
||||
CHECK_INPUT(ans_iou);
|
||||
|
||||
int num_a = boxes_a.shape()[0];
|
||||
int num_b = boxes_b.shape()[0];
|
||||
|
||||
const float *boxes_a_data = boxes_a.data<float>();
|
||||
const float *boxes_b_data = boxes_b.data<float>();
|
||||
float *ans_iou_data = ans_iou.data<float>();
|
||||
|
||||
boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<paddle::Tensor> nms_gpu(const paddle::Tensor &boxes,
|
||||
float nms_overlap_thresh) {
|
||||
// params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params keep: (N)
|
||||
CHECK_INPUT(boxes);
|
||||
// CHECK_CONTIGUOUS(keep);
|
||||
auto keep = paddle::empty({boxes.shape()[0]}, paddle::DataType::INT32,
|
||||
paddle::CPUPlace());
|
||||
auto num_to_keep_tensor =
|
||||
paddle::empty({1}, paddle::DataType::INT32, paddle::CPUPlace());
|
||||
int *num_to_keep_data = num_to_keep_tensor.data<int>();
|
||||
|
||||
int boxes_num = boxes.shape()[0];
|
||||
const float *boxes_data = boxes.data<float>();
|
||||
int *keep_data = keep.data<int>();
|
||||
|
||||
int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
|
||||
|
||||
unsigned long long *mask_data = NULL;
|
||||
CHECK_ERROR(cudaMalloc((void **)&mask_data,
|
||||
boxes_num * col_blocks * sizeof(unsigned long long)));
|
||||
nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);
|
||||
|
||||
// unsigned long long mask_cpu[boxes_num * col_blocks];
|
||||
// unsigned long long *mask_cpu = new unsigned long long [boxes_num *
|
||||
// col_blocks];
|
||||
std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);
|
||||
|
||||
// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
|
||||
CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
|
||||
boxes_num * col_blocks * sizeof(unsigned long long),
|
||||
cudaMemcpyDeviceToHost));
|
||||
|
||||
cudaFree(mask_data);
|
||||
|
||||
// WARN(qiuyanjun): codes below will throw a compile error on windows with
|
||||
// msvc. Thus, we choosed to use std::vectored to store the result instead.
|
||||
// unsigned long long remv_cpu[col_blocks];
|
||||
// memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long));
|
||||
std::vector<unsigned long long> remv_cpu(col_blocks, 0);
|
||||
|
||||
int num_to_keep = 0;
|
||||
|
||||
for (int i = 0; i < boxes_num; i++) {
|
||||
int nblock = i / THREADS_PER_BLOCK_NMS;
|
||||
int inblock = i % THREADS_PER_BLOCK_NMS;
|
||||
|
||||
if (!(remv_cpu[nblock] & (1ULL << inblock))) {
|
||||
keep_data[num_to_keep++] = i;
|
||||
unsigned long long *p = &mask_cpu[0] + i * col_blocks;
|
||||
for (int j = nblock; j < col_blocks; j++) {
|
||||
remv_cpu[j] |= p[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
num_to_keep_data[0] = num_to_keep;
|
||||
|
||||
if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
|
||||
|
||||
return {keep, num_to_keep_tensor};
|
||||
}
|
||||
|
||||
int nms_normal_gpu(paddle::Tensor boxes, paddle::Tensor keep,
|
||||
float nms_overlap_thresh) {
|
||||
// params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params keep: (N)
|
||||
|
||||
CHECK_INPUT(boxes);
|
||||
// CHECK_CONTIGUOUS(keep);
|
||||
|
||||
int boxes_num = boxes.shape()[0];
|
||||
const float *boxes_data = boxes.data<float>();
|
||||
// WARN(qiuyanjun): long type for Tensor::data() API is not exported by paddle,
|
||||
// it will raise some link error on windows with msvc. Please check:
|
||||
// https://github.com/PaddlePaddle/Paddle/blob/release/2.5/paddle/phi/api/lib/tensor.cc
|
||||
#if defined(_WIN32)
|
||||
int *keep_data = keep.data<int>();
|
||||
#else
|
||||
long *keep_data = keep.data<long>();
|
||||
#endif
|
||||
|
||||
int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
|
||||
|
||||
unsigned long long *mask_data = NULL;
|
||||
CHECK_ERROR(cudaMalloc((void **)&mask_data,
|
||||
boxes_num * col_blocks * sizeof(unsigned long long)));
|
||||
nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);
|
||||
|
||||
// unsigned long long mask_cpu[boxes_num * col_blocks];
|
||||
// unsigned long long *mask_cpu = new unsigned long long [boxes_num *
|
||||
// col_blocks];
|
||||
std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);
|
||||
|
||||
// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
|
||||
CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
|
||||
boxes_num * col_blocks * sizeof(unsigned long long),
|
||||
cudaMemcpyDeviceToHost));
|
||||
|
||||
cudaFree(mask_data);
|
||||
|
||||
// WARN(qiuyanjun): codes below will throw a compile error on windows with
|
||||
// msvc. Thus, we choosed to use std::vectored to store the result instead.
|
||||
// unsigned long long remv_cpu[col_blocks];
|
||||
// memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long));
|
||||
std::vector<unsigned long long> remv_cpu(col_blocks, 0);
|
||||
|
||||
int num_to_keep = 0;
|
||||
|
||||
for (int i = 0; i < boxes_num; i++) {
|
||||
int nblock = i / THREADS_PER_BLOCK_NMS;
|
||||
int inblock = i % THREADS_PER_BLOCK_NMS;
|
||||
|
||||
if (!(remv_cpu[nblock] & (1ULL << inblock))) {
|
||||
keep_data[num_to_keep++] = i;
|
||||
unsigned long long *p = &mask_cpu[0] + i * col_blocks;
|
||||
for (int j = nblock; j < col_blocks; j++) {
|
||||
remv_cpu[j] |= p[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
|
||||
|
||||
return num_to_keep;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
||||
|
||||
#endif
|
46
fastdeploy/runtime/backends/paddle/ops/iou3d_nms.h
Normal file
46
fastdeploy/runtime/backends/paddle/ops/iou3d_nms.h
Normal file
@@ -0,0 +1,46 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
|
||||
#include "paddle/include/experimental/ext_all.h"
|
||||
#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
|
||||
#include "paddle/include/paddle/extension.h"
|
||||
#else
|
||||
#include "paddle/extension.h"
|
||||
#endif
|
||||
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
#if defined(WITH_GPU)
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
FASTDEPLOY_DECL int boxes_overlap_bev_gpu(
|
||||
paddle::Tensor boxes_a, paddle::Tensor boxes_b,
|
||||
paddle::Tensor ans_overlap);
|
||||
FASTDEPLOY_DECL int boxes_iou_bev_gpu(paddle::Tensor boxes_a,
|
||||
paddle::Tensor boxes_b,
|
||||
paddle::Tensor ans_iou);
|
||||
FASTDEPLOY_DECL std::vector<paddle::Tensor> nms_gpu(
|
||||
const paddle::Tensor& boxes, float nms_overlap_thresh);
|
||||
FASTDEPLOY_DECL int nms_normal_gpu(
|
||||
paddle::Tensor boxes, paddle::Tensor keep, float nms_overlap_thresh);
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
||||
|
||||
#endif
|
||||
|
55
fastdeploy/runtime/backends/paddle/ops/iou3d_nms_api.cc
Normal file
55
fastdeploy/runtime/backends/paddle/ops/iou3d_nms_api.cc
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#if defined(PADDLEINFERENCE_API_COMPAT_2_4_x)
|
||||
#include "paddle/include/experimental/ext_all.h"
|
||||
#elif defined(PADDLEINFERENCE_API_COMPAT_2_5_x)
|
||||
#include "paddle/include/paddle/extension.h"
|
||||
#else
|
||||
#include "paddle/extension.h"
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "iou3d_cpu.h"
|
||||
#include "iou3d_nms.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace paddle_custom_ops {
|
||||
|
||||
std::vector<std::vector<int64_t>> NMSInferShape(
|
||||
std::vector<int64_t> boxes_shape) {
|
||||
int64_t keep_num = 1;
|
||||
return {{boxes_shape[0]}, {keep_num}};
|
||||
}
|
||||
|
||||
std::vector<paddle::DataType> NMSInferDtype(paddle::DataType boxes_dtype) {
|
||||
return {paddle::DataType::INT64, paddle::DataType::INT64};
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace paddle_custom_ops
|
||||
|
||||
#if defined(WITH_GPU)
|
||||
|
||||
PD_BUILD_OP(nms_gpu)
|
||||
.Inputs({"boxes"})
|
||||
.Outputs({"keep", "num_to_keep"})
|
||||
.Attrs({"nms_overlap_thresh: float"})
|
||||
.SetKernelFn(PD_KERNEL(fastdeploy::paddle_custom_ops::nms_gpu))
|
||||
.SetInferDtypeFn(PD_INFER_DTYPE(fastdeploy::paddle_custom_ops::NMSInferDtype))
|
||||
.SetInferShapeFn(PD_INFER_SHAPE(fastdeploy::paddle_custom_ops::NMSInferShape));
|
||||
|
||||
#endif
|
@@ -1,23 +1,8 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
|
||||
Written by Shaoshuai Shi
|
||||
All Rights Reserved 2019-2020.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
namespace fastdeploy {
|
||||
@@ -78,20 +63,36 @@ __device__ int check_rect_cross(const Point &p1, const Point &p2,
|
||||
__device__ inline int check_in_box2d(const float *box, const Point &p) {
|
||||
// params: (7) [x, y, z, dx, dy, dz, heading]
|
||||
const float MARGIN = 1e-2;
|
||||
// Align with the setting of mmdet3d
|
||||
// const float MARGIN = 1e-5;
|
||||
|
||||
float center_x = box[0], center_y = box[1];
|
||||
// rotate the point in the opposite direction of box
|
||||
float angle_cos = cos(-box[6]), angle_sin = sin(-box[6]);
|
||||
float angle_cos = cos(-box[6]),
|
||||
angle_sin =
|
||||
sin(-box[6]); // rotate the point in the opposite direction of box
|
||||
float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
|
||||
float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
|
||||
|
||||
return (fabs(rot_x) < box[3] / 2 + MARGIN &&
|
||||
fabs(rot_y) < box[4] / 2 + MARGIN);
|
||||
// Align with the implement of mmdet3d
|
||||
// float rot_x =
|
||||
// (p.x - center_x) * angle_cos + (p.y - center_y) * angle_sin + center_x;
|
||||
// float rot_y =
|
||||
// -(p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos +
|
||||
// center_y;
|
||||
// float x1 = center_x - box[3] / 2;
|
||||
// float x2 = center_x + box[3] / 2;
|
||||
// float y1 = center_y - box[4] / 2;
|
||||
// float y2 = center_y + box[4] / 2;
|
||||
// return (rot_x > x1 - MARGIN && rot_x < x2 + MARGIN && rot_y > y1 - MARGIN
|
||||
// &&
|
||||
// rot_y < y2 + MARGIN);
|
||||
}
|
||||
|
||||
__device__ inline int intersection(const Point &p1, const Point &p0,
|
||||
const Point &q1, const Point &q0,
|
||||
Point *ans) {
|
||||
Point &ans) {
|
||||
// fast exclusion
|
||||
if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;
|
||||
|
||||
@@ -106,16 +107,16 @@ __device__ inline int intersection(const Point &p1, const Point &p0,
|
||||
// calculate intersection of two lines
|
||||
float s5 = cross(q1, p1, p0);
|
||||
if (fabs(s5 - s1) > EPS) {
|
||||
ans->x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
|
||||
ans->y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
|
||||
ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
|
||||
ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
|
||||
|
||||
} else {
|
||||
float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
|
||||
float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
|
||||
float D = a0 * b1 - a1 * b0;
|
||||
|
||||
ans->x = (b0 * c1 - b1 * c0) / D;
|
||||
ans->y = (a1 * c0 - a0 * c1) / D;
|
||||
ans.x = (b0 * c1 - b1 * c0) / D;
|
||||
ans.y = (a1 * c0 - a0 * c1) / D;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@@ -123,12 +124,18 @@ __device__ inline int intersection(const Point &p1, const Point &p0,
|
||||
|
||||
__device__ inline void rotate_around_center(const Point ¢er,
|
||||
const float angle_cos,
|
||||
const float angle_sin, Point *p) {
|
||||
float new_x = (p->x - center.x) * angle_cos +
|
||||
(p->y - center.y) * (-angle_sin) + center.x;
|
||||
const float angle_sin, Point &p) {
|
||||
// float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) *
|
||||
// (-angle_sin) + center.x;
|
||||
// float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos +
|
||||
// center.y;
|
||||
// p.set(new_x, new_y);
|
||||
// Aligh with the implement of mmdet3d
|
||||
float new_x =
|
||||
(p.x - center.x) * angle_cos + (p.y - center.y) * angle_sin + center.x;
|
||||
float new_y =
|
||||
(p->x - center.x) * angle_sin + (p->y - center.y) * angle_cos + center.y;
|
||||
p->set(new_x, new_y);
|
||||
-(p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
|
||||
p.set(new_x, new_y);
|
||||
}
|
||||
|
||||
__device__ inline int point_cmp(const Point &a, const Point &b,
|
||||
@@ -152,6 +159,14 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) {
|
||||
Point center_a(box_a[0], box_a[1]);
|
||||
Point center_b(box_b[0], box_b[1]);
|
||||
|
||||
#ifdef DEBUG
|
||||
printf(
|
||||
"a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n",
|
||||
a_x1, a_y1, a_x2, a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
|
||||
printf("center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y,
|
||||
center_b.x, center_b.y);
|
||||
#endif
|
||||
|
||||
Point box_a_corners[5];
|
||||
box_a_corners[0].set(a_x1, a_y1);
|
||||
box_a_corners[1].set(a_x2, a_y1);
|
||||
@@ -169,8 +184,17 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) {
|
||||
float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
|
||||
|
||||
for (int k = 0; k < 4; k++) {
|
||||
rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners + k);
|
||||
rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners + k);
|
||||
#ifdef DEBUG
|
||||
printf("before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k,
|
||||
box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x,
|
||||
box_b_corners[k].y);
|
||||
#endif
|
||||
rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
|
||||
rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
|
||||
#ifdef DEBUG
|
||||
printf("corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x,
|
||||
box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
|
||||
#endif
|
||||
}
|
||||
|
||||
box_a_corners[4] = box_a_corners[0];
|
||||
@@ -186,10 +210,19 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
flag = intersection(box_a_corners[i + 1], box_a_corners[i],
|
||||
box_b_corners[j + 1], box_b_corners[j],
|
||||
cross_points + cnt);
|
||||
cross_points[cnt]);
|
||||
if (flag) {
|
||||
poly_center = poly_center + cross_points[cnt];
|
||||
cnt++;
|
||||
#ifdef DEBUG
|
||||
printf(
|
||||
"Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, "
|
||||
"%.3f)->(%.3f, %.3f) \n",
|
||||
cross_points[cnt - 1].x, cross_points[cnt - 1].y,
|
||||
box_a_corners[i].x, box_a_corners[i].y, box_a_corners[i + 1].x,
|
||||
box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
|
||||
box_b_corners[i + 1].x, box_b_corners[i + 1].y);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -200,11 +233,19 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) {
|
||||
poly_center = poly_center + box_b_corners[k];
|
||||
cross_points[cnt] = box_b_corners[k];
|
||||
cnt++;
|
||||
#ifdef DEBUG
|
||||
printf("b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x,
|
||||
cross_points[cnt - 1].y);
|
||||
#endif
|
||||
}
|
||||
if (check_in_box2d(box_b, box_a_corners[k])) {
|
||||
poly_center = poly_center + box_a_corners[k];
|
||||
cross_points[cnt] = box_a_corners[k];
|
||||
cnt++;
|
||||
#ifdef DEBUG
|
||||
printf("a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x,
|
||||
cross_points[cnt - 1].y);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -223,6 +264,14 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
printf("cnt=%d\n", cnt);
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x,
|
||||
cross_points[i].y);
|
||||
}
|
||||
#endif
|
||||
|
||||
// get the overlap areas
|
||||
float area = 0;
|
||||
for (int k = 0; k < cnt - 1; k++) {
|
||||
@@ -242,10 +291,220 @@ __device__ inline float iou_bev(const float *box_a, const float *box_b) {
|
||||
return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
|
||||
}
|
||||
|
||||
__global__ void nms_kernel(const int num_bboxes, const int num_bboxes_for_nms,
|
||||
__global__ void boxes_overlap_kernel(const int num_a, const float *boxes_a,
|
||||
const int num_b, const float *boxes_b,
|
||||
float *ans_overlap) {
|
||||
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
|
||||
const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
|
||||
const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
|
||||
|
||||
if (a_idx >= num_a || b_idx >= num_b) {
|
||||
return;
|
||||
}
|
||||
const float *cur_box_a = boxes_a + a_idx * 7;
|
||||
const float *cur_box_b = boxes_b + b_idx * 7;
|
||||
float s_overlap = box_overlap(cur_box_a, cur_box_b);
|
||||
ans_overlap[a_idx * num_b + b_idx] = s_overlap;
|
||||
}
|
||||
|
||||
__global__ void boxes_iou_bev_kernel(const int num_a, const float *boxes_a,
|
||||
const int num_b, const float *boxes_b,
|
||||
float *ans_iou) {
|
||||
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
|
||||
const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
|
||||
const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
|
||||
|
||||
if (a_idx >= num_a || b_idx >= num_b) {
|
||||
return;
|
||||
}
|
||||
|
||||
const float *cur_box_a = boxes_a + a_idx * 7;
|
||||
const float *cur_box_b = boxes_b + b_idx * 7;
|
||||
float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
|
||||
ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
|
||||
}
|
||||
|
||||
__global__ void nms_kernel(const int boxes_num, const float nms_overlap_thresh,
|
||||
const float *boxes, unsigned long long *mask) {
|
||||
// params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params: mask (N, N/THREADS_PER_BLOCK_NMS)
|
||||
|
||||
const int row_start = blockIdx.y;
|
||||
const int col_start = blockIdx.x;
|
||||
|
||||
// if (row_start > col_start) return;
|
||||
|
||||
const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
|
||||
THREADS_PER_BLOCK_NMS);
|
||||
const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
|
||||
THREADS_PER_BLOCK_NMS);
|
||||
|
||||
__shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7];
|
||||
|
||||
if (threadIdx.x < col_size) {
|
||||
block_boxes[threadIdx.x * 7 + 0] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0];
|
||||
block_boxes[threadIdx.x * 7 + 1] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1];
|
||||
block_boxes[threadIdx.x * 7 + 2] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2];
|
||||
block_boxes[threadIdx.x * 7 + 3] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3];
|
||||
block_boxes[threadIdx.x * 7 + 4] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4];
|
||||
block_boxes[threadIdx.x * 7 + 5] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5];
|
||||
block_boxes[threadIdx.x * 7 + 6] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6];
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < row_size) {
|
||||
const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
|
||||
const float *cur_box = boxes + cur_box_idx * 7;
|
||||
|
||||
int i = 0;
|
||||
unsigned long long t = 0;
|
||||
int start = 0;
|
||||
if (row_start == col_start) {
|
||||
start = threadIdx.x + 1;
|
||||
}
|
||||
for (i = start; i < col_size; i++) {
|
||||
if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh) {
|
||||
t |= 1ULL << i;
|
||||
}
|
||||
}
|
||||
int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
|
||||
mask[cur_box_idx * col_blocks + col_start] = t;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ inline float iou_normal(float const *const a, float const *const b) {
|
||||
// params: a: [x, y, z, dx, dy, dz, heading]
|
||||
// params: b: [x, y, z, dx, dy, dz, heading]
|
||||
|
||||
float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2),
|
||||
right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2);
|
||||
float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2),
|
||||
bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2);
|
||||
float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
|
||||
float interS = width * height;
|
||||
float Sa = a[3] * a[4];
|
||||
float Sb = b[3] * b[4];
|
||||
return interS / fmaxf(Sa + Sb - interS, EPS);
|
||||
}
|
||||
|
||||
__global__ void nms_normal_kernel(const int boxes_num,
|
||||
const float nms_overlap_thresh,
|
||||
const int decode_bboxes_dims, const float *bboxes,
|
||||
const int *index, const int64_t *sorted_index,
|
||||
const float *boxes,
|
||||
unsigned long long *mask) {
|
||||
// params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params: mask (N, N/THREADS_PER_BLOCK_NMS)
|
||||
|
||||
const int row_start = blockIdx.y;
|
||||
const int col_start = blockIdx.x;
|
||||
|
||||
// if (row_start > col_start) return;
|
||||
|
||||
const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
|
||||
THREADS_PER_BLOCK_NMS);
|
||||
const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
|
||||
THREADS_PER_BLOCK_NMS);
|
||||
|
||||
__shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7];
|
||||
|
||||
if (threadIdx.x < col_size) {
|
||||
block_boxes[threadIdx.x * 7 + 0] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0];
|
||||
block_boxes[threadIdx.x * 7 + 1] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1];
|
||||
block_boxes[threadIdx.x * 7 + 2] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2];
|
||||
block_boxes[threadIdx.x * 7 + 3] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3];
|
||||
block_boxes[threadIdx.x * 7 + 4] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4];
|
||||
block_boxes[threadIdx.x * 7 + 5] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5];
|
||||
block_boxes[threadIdx.x * 7 + 6] =
|
||||
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6];
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < row_size) {
|
||||
const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
|
||||
const float *cur_box = boxes + cur_box_idx * 7;
|
||||
|
||||
int i = 0;
|
||||
unsigned long long t = 0;
|
||||
int start = 0;
|
||||
if (row_start == col_start) {
|
||||
start = threadIdx.x + 1;
|
||||
}
|
||||
for (i = start; i < col_size; i++) {
|
||||
if (iou_normal(cur_box, block_boxes + i * 7) > nms_overlap_thresh) {
|
||||
t |= 1ULL << i;
|
||||
}
|
||||
}
|
||||
int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
|
||||
mask[cur_box_idx * col_blocks + col_start] = t;
|
||||
}
|
||||
}
|
||||
|
||||
void boxesoverlapLauncher(const int num_a, const float *boxes_a,
|
||||
const int num_b, const float *boxes_b,
|
||||
float *ans_overlap) {
|
||||
dim3 blocks(
|
||||
DIVUP(num_b, THREADS_PER_BLOCK),
|
||||
DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row)
|
||||
dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
|
||||
|
||||
boxes_overlap_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
|
||||
ans_overlap);
|
||||
#ifdef DEBUG
|
||||
cudaDeviceSynchronize(); // for using printf in kernel function
|
||||
#endif
|
||||
}
|
||||
|
||||
void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,
|
||||
const float *boxes_b, float *ans_iou) {
|
||||
dim3 blocks(
|
||||
DIVUP(num_b, THREADS_PER_BLOCK),
|
||||
DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row)
|
||||
dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
|
||||
|
||||
boxes_iou_bev_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
|
||||
ans_iou);
|
||||
#ifdef DEBUG
|
||||
cudaDeviceSynchronize(); // for using printf in kernel function
|
||||
#endif
|
||||
}
|
||||
|
||||
void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,
|
||||
float nms_overlap_thresh) {
|
||||
dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
|
||||
DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
|
||||
dim3 threads(THREADS_PER_BLOCK_NMS);
|
||||
nms_kernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);
|
||||
}
|
||||
|
||||
void nmsNormalLauncher(const float *boxes, unsigned long long *mask,
|
||||
int boxes_num, float nms_overlap_thresh) {
|
||||
dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
|
||||
DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
|
||||
dim3 threads(THREADS_PER_BLOCK_NMS);
|
||||
nms_normal_kernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes,
|
||||
mask);
|
||||
}
|
||||
|
||||
__global__ void nms_kernel_centerpoint(const int num_bboxes,
|
||||
const int num_bboxes_for_nms,
|
||||
const float nms_overlap_thresh,
|
||||
const int decode_bboxes_dims,
|
||||
const float *bboxes, const int *index,
|
||||
const int64_t *sorted_index,
|
||||
int64_t *mask) {
|
||||
// params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
|
||||
// params: mask (N, N/THREADS_PER_BLOCK_NMS)
|
||||
@@ -304,7 +563,7 @@ __global__ void nms_kernel(const int num_bboxes, const int num_bboxes_for_nms,
|
||||
t |= 1ULL << i;
|
||||
}
|
||||
}
|
||||
const int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS);
|
||||
int col_blocks = DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS);
|
||||
mask[cur_box_idx * col_blocks + col_start] = t;
|
||||
}
|
||||
}
|
||||
@@ -317,7 +576,7 @@ void NmsLauncher(const cudaStream_t &stream, const float *bboxes,
|
||||
dim3 blocks(DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS),
|
||||
DIVUP(num_bboxes_for_nms, THREADS_PER_BLOCK_NMS));
|
||||
dim3 threads(THREADS_PER_BLOCK_NMS);
|
||||
nms_kernel<<<blocks, threads, 0, stream>>>(
|
||||
nms_kernel_centerpoint<<<blocks, threads, 0, stream>>>(
|
||||
num_bboxes, num_bboxes_for_nms, nms_overlap_thresh, decode_bboxes_dims,
|
||||
bboxes, index, sorted_index, mask);
|
||||
}
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include "fastdeploy/vision/perception/paddle3d/smoke/smoke.h"
|
||||
#include "fastdeploy/vision/perception/paddle3d/petr/petr.h"
|
||||
#include "fastdeploy/vision/perception/paddle3d/centerpoint/centerpoint.h"
|
||||
#include "fastdeploy/vision/perception/paddle3d/caddn/caddn.h"
|
||||
#include "fastdeploy/vision/detection/ppdet/model.h"
|
||||
#include "fastdeploy/vision/facealign/contrib/face_landmark_1000.h"
|
||||
#include "fastdeploy/vision/facealign/contrib/pfld.h"
|
||||
|
86
fastdeploy/vision/perception/paddle3d/caddn/caddn.cc
Normal file
86
fastdeploy/vision/perception/paddle3d/caddn/caddn.cc
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/vision/perception/paddle3d/caddn/caddn.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
namespace perception {
|
||||
|
||||
Caddn::Caddn(const std::string& model_file, const std::string& params_file,
|
||||
const std::string& config_file, const RuntimeOption& custom_option,
|
||||
const ModelFormat& model_format)
|
||||
: preprocessor_(config_file) {
|
||||
valid_gpu_backends = {Backend::PDINFER};
|
||||
|
||||
runtime_option = custom_option;
|
||||
runtime_option.model_format = model_format;
|
||||
runtime_option.model_file = model_file;
|
||||
runtime_option.params_file = params_file;
|
||||
initialized = Initialize();
|
||||
}
|
||||
|
||||
bool Caddn::Initialize() {
|
||||
if (!InitRuntime()) {
|
||||
FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Caddn::Predict(const cv::Mat& im, std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
PerceptionResult* result) {
|
||||
std::vector<PerceptionResult> results;
|
||||
if (!BatchPredict({im}, input_cam_data, input_lidar_data, &results)) {
|
||||
return false;
|
||||
}
|
||||
if (results.size()) {
|
||||
*result = std::move(results[0]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Caddn::BatchPredict(const std::vector<cv::Mat>& images,
|
||||
std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
std::vector<PerceptionResult>* results) {
|
||||
std::vector<FDMat> fd_images = WrapMat(images);
|
||||
|
||||
if (!preprocessor_.Run(&fd_images, input_cam_data, input_lidar_data,
|
||||
&reused_input_tensors_)) {
|
||||
FDERROR << "Failed to preprocess the input image." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
reused_input_tensors_[0].name = "images";
|
||||
reused_input_tensors_[1].name = "trans_cam_to_img";
|
||||
reused_input_tensors_[2].name = "trans_lidar_to_cam";
|
||||
|
||||
if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
|
||||
FDERROR << "Failed to inference by runtime." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!postprocessor_.Run(reused_output_tensors_, results)) {
|
||||
FDERROR << "Failed to postprocess the inference results by runtime."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace perception
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
83
fastdeploy/vision/perception/paddle3d/caddn/caddn.h
Executable file
83
fastdeploy/vision/perception/paddle3d/caddn/caddn.h
Executable file
@@ -0,0 +1,83 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fastdeploy/fastdeploy_model.h"
|
||||
#include "fastdeploy/vision/perception/paddle3d/caddn/preprocessor.h"
|
||||
#include "fastdeploy/vision/perception/paddle3d/caddn/postprocessor.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
namespace perception {
|
||||
/*! @brief Caddn model object used when to load a Caddn model exported by Caddn.
|
||||
*/
|
||||
class FASTDEPLOY_DECL Caddn : public FastDeployModel {
|
||||
public:
|
||||
/** \brief Set path of model file and the configuration of runtime.
|
||||
*
|
||||
* \param[in] model_file Path of model file, e.g Caddn/model.pdiparams
|
||||
* \param[in] params_file Path of parameter file, e.g Caddn/model.pdiparams, if the model format is ONNX, this parameter will be ignored
|
||||
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends"
|
||||
* \param[in] model_format Model format of the loaded model, default is Paddle format
|
||||
*/
|
||||
Caddn(const std::string& model_file, const std::string& params_file,
|
||||
const std::string& config_file,
|
||||
const RuntimeOption& custom_option = RuntimeOption(),
|
||||
const ModelFormat& model_format = ModelFormat::PADDLE);
|
||||
|
||||
std::string ModelName() const { return "Paddle3D/Caddn"; }
|
||||
|
||||
/** \brief Predict the perception result for an input image
|
||||
*
|
||||
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
|
||||
* \param[in] result The output perception result will be writen to this structure
|
||||
* \return true if the prediction successed, otherwise false
|
||||
*/
|
||||
virtual bool Predict(const cv::Mat& im,
|
||||
std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
PerceptionResult* results);
|
||||
|
||||
/** \brief Predict the perception results for a batch of input images
|
||||
*
|
||||
* \param[in] imgs, The input image list, each element comes from cv::imread()
|
||||
* \param[in] results The output perception result list
|
||||
* \return true if the prediction successed, otherwise false
|
||||
*/
|
||||
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
|
||||
std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
std::vector<PerceptionResult>* results);
|
||||
|
||||
/// Get preprocessor reference of Caddn
|
||||
virtual CaddnPreprocessor& GetPreprocessor() {
|
||||
return preprocessor_;
|
||||
}
|
||||
|
||||
/// Get postprocessor reference of Caddn
|
||||
virtual CaddnPostprocessor& GetPostprocessor() {
|
||||
return postprocessor_;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool Initialize();
|
||||
CaddnPreprocessor preprocessor_;
|
||||
CaddnPostprocessor postprocessor_;
|
||||
bool initialized_ = false;
|
||||
};
|
||||
|
||||
} // namespace perception
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
96
fastdeploy/vision/perception/paddle3d/caddn/caddn_pybind.cc
Normal file
96
fastdeploy/vision/perception/paddle3d/caddn/caddn_pybind.cc
Normal file
@@ -0,0 +1,96 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/pybind/main.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
void BindCaddn(pybind11::module& m) {
|
||||
pybind11::class_<vision::perception::CaddnPreprocessor,
|
||||
vision::ProcessorManager>(m, "CaddnPreprocessor")
|
||||
.def(pybind11::init<std::string>())
|
||||
.def("run",
|
||||
[](vision::perception::CaddnPreprocessor& self,
|
||||
std::vector<pybind11::array>& im_list,
|
||||
std::vector<float>& cam_data, std::vector<float>& lidar_data) {
|
||||
std::vector<vision::FDMat> images;
|
||||
for (size_t i = 0; i < im_list.size(); ++i) {
|
||||
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
||||
}
|
||||
std::vector<FDTensor> outputs;
|
||||
if (!self.Run(&images, cam_data, lidar_data, &outputs)) {
|
||||
throw std::runtime_error(
|
||||
"Failed to preprocess the input data in CaddnPreprocessor.");
|
||||
}
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
outputs[i].StopSharing();
|
||||
}
|
||||
return outputs;
|
||||
});
|
||||
|
||||
pybind11::class_<vision::perception::CaddnPostprocessor>(m,
|
||||
"CaddnPostprocessor")
|
||||
.def(pybind11::init<>())
|
||||
.def("run",
|
||||
[](vision::perception::CaddnPostprocessor& self,
|
||||
std::vector<FDTensor>& inputs) {
|
||||
std::vector<vision::PerceptionResult> results;
|
||||
if (!self.Run(inputs, &results)) {
|
||||
throw std::runtime_error(
|
||||
"Failed to postprocess the runtime result in "
|
||||
"CaddnPostprocessor.");
|
||||
}
|
||||
return results;
|
||||
})
|
||||
.def("run", [](vision::perception::CaddnPostprocessor& self,
|
||||
std::vector<pybind11::array>& input_array) {
|
||||
std::vector<vision::PerceptionResult> results;
|
||||
std::vector<FDTensor> inputs;
|
||||
PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
|
||||
if (!self.Run(inputs, &results)) {
|
||||
throw std::runtime_error(
|
||||
"Failed to postprocess the runtime result in "
|
||||
"CaddnPostprocessor.");
|
||||
}
|
||||
return results;
|
||||
});
|
||||
|
||||
pybind11::class_<vision::perception::Caddn, FastDeployModel>(m, "Caddn")
|
||||
.def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
|
||||
ModelFormat>())
|
||||
.def("predict",
|
||||
[](vision::perception::Caddn& self, pybind11::array& data,
|
||||
std::vector<float>& cam_data, std::vector<float>& lidar_data) {
|
||||
auto mat = PyArrayToCvMat(data);
|
||||
vision::PerceptionResult res;
|
||||
self.Predict(mat, cam_data, lidar_data, &res);
|
||||
return res;
|
||||
})
|
||||
.def("batch_predict",
|
||||
[](vision::perception::Caddn& self,
|
||||
std::vector<pybind11::array>& data, std::vector<float>& cam_data,
|
||||
std::vector<float>& lidar_data) {
|
||||
std::vector<cv::Mat> images;
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
images.push_back(PyArrayToCvMat(data[i]));
|
||||
}
|
||||
std::vector<vision::PerceptionResult> results;
|
||||
self.BatchPredict(images, cam_data, lidar_data, &results);
|
||||
return results;
|
||||
})
|
||||
.def_property_readonly("preprocessor",
|
||||
&vision::perception::Caddn::GetPreprocessor)
|
||||
.def_property_readonly("postprocessor",
|
||||
&vision::perception::Caddn::GetPostprocessor);
|
||||
}
|
||||
} // namespace fastdeploy
|
70
fastdeploy/vision/perception/paddle3d/caddn/postprocessor.cc
Normal file
70
fastdeploy/vision/perception/paddle3d/caddn/postprocessor.cc
Normal file
@@ -0,0 +1,70 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/vision/perception/paddle3d/caddn/postprocessor.h"
|
||||
|
||||
#include "fastdeploy/vision/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
namespace perception {
|
||||
|
||||
CaddnPostprocessor::CaddnPostprocessor() {}
|
||||
|
||||
bool CaddnPostprocessor::Run(const std::vector<FDTensor>& tensors,
|
||||
std::vector<PerceptionResult>* results) {
|
||||
results->resize(1);
|
||||
(*results)[0].Clear();
|
||||
(*results)[0].Reserve(tensors[0].shape[0]);
|
||||
if (tensors[0].dtype != FDDataType::FP32) {
|
||||
FDERROR << "Only support post process with float32 data." << std::endl;
|
||||
return false;
|
||||
}
|
||||
const float* data_0 = reinterpret_cast<const float*>(tensors[0].Data());
|
||||
auto result = &(*results)[0];
|
||||
for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 7) {
|
||||
// item 1 ~ 3 : box3d bottom center x, y, z
|
||||
// item 4 ~ 6 : box3d w, h, l
|
||||
// item 7 : box3d yaw angle
|
||||
std::vector<float> vec(data_0 + i, data_0 + i + 7);
|
||||
result->boxes.emplace_back(
|
||||
std::array<float, 7>{0, 0, 0, 0, vec[3], vec[4], vec[5]});
|
||||
result->center.emplace_back(std::array<float, 3>{vec[0], vec[1], vec[2]});
|
||||
result->yaw_angle.push_back(vec[6]);
|
||||
}
|
||||
const float* data_1 = reinterpret_cast<const float*>(tensors[2].Data());
|
||||
for (int i = 0; i < tensors[2].shape[0]; i += 1) {
|
||||
std::vector<float> vec(data_1 + i, data_1 + i + 1);
|
||||
result->scores.push_back(vec[0]);
|
||||
}
|
||||
const float* data_2 = reinterpret_cast<const float*>(tensors[1].Data());
|
||||
for (int i = 0; i < tensors[1].shape[0]; i++) {
|
||||
std::vector<float> vec(data_2 + i, data_2 + i + 1);
|
||||
result->label_ids.push_back(vec[0]);
|
||||
}
|
||||
|
||||
result->valid.push_back(true); // 0 scores
|
||||
result->valid.push_back(true); // 1 label_ids
|
||||
result->valid.push_back(true); // 2 boxes
|
||||
result->valid.push_back(true); // 3 center
|
||||
result->valid.push_back(false); // 4 observation_angle
|
||||
result->valid.push_back(true); // 5 yaw_angle
|
||||
result->valid.push_back(false); // 6 velocity
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace perception
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
48
fastdeploy/vision/perception/paddle3d/caddn/postprocessor.h
Normal file
48
fastdeploy/vision/perception/paddle3d/caddn/postprocessor.h
Normal file
@@ -0,0 +1,48 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include "fastdeploy/vision/common/processors/transform.h"
|
||||
#include "fastdeploy/vision/common/result.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
|
||||
namespace perception {
|
||||
/*! @brief Postprocessor object for Caddn serials model.
|
||||
*/
|
||||
class FASTDEPLOY_DECL CaddnPostprocessor {
|
||||
public:
|
||||
/** \brief Create a postprocessor instance for Caddn serials model
|
||||
*/
|
||||
CaddnPostprocessor();
|
||||
|
||||
/** \brief Process the result of runtime and fill to PerceptionResult structure
|
||||
*
|
||||
* \param[in] tensors The inference result from runtime
|
||||
* \param[in] result The output result of detection
|
||||
* \param[in] ims_info The shape info list, record input_shape and output_shape
|
||||
* \return true if the postprocess successed, otherwise false
|
||||
*/
|
||||
bool Run(const std::vector<FDTensor>& tensors,
|
||||
std::vector<PerceptionResult>* results);
|
||||
|
||||
|
||||
protected:
|
||||
float conf_threshold_;
|
||||
};
|
||||
|
||||
} // namespace perception
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
112
fastdeploy/vision/perception/paddle3d/caddn/preprocessor.cc
Normal file
112
fastdeploy/vision/perception/paddle3d/caddn/preprocessor.cc
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/vision/perception/paddle3d/caddn/preprocessor.h"
|
||||
|
||||
#include "fastdeploy/function/concat.h"
|
||||
#include "yaml-cpp/yaml.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
namespace perception {
|
||||
|
||||
CaddnPreprocessor::CaddnPreprocessor(const std::string& config_file) {
|
||||
config_file_ = config_file;
|
||||
FDASSERT(BuildPreprocessPipeline(),
|
||||
"Failed to create Paddle3DDetPreprocessor.");
|
||||
initialized_ = true;
|
||||
}
|
||||
|
||||
bool CaddnPreprocessor::BuildPreprocessPipeline() {
|
||||
processors_.clear();
|
||||
|
||||
// preprocess
|
||||
processors_.push_back(std::make_shared<BGR2RGB>());
|
||||
|
||||
std::vector<float> alpha = {1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0};
|
||||
std::vector<float> beta = {0.0, 0.0, 0.0};
|
||||
processors_.push_back(std::make_shared<Convert>(alpha, beta));
|
||||
|
||||
processors_.push_back(std::make_shared<Cast>("float"));
|
||||
processors_.push_back(std::make_shared<HWC2CHW>());
|
||||
|
||||
// Fusion will improve performance
|
||||
FuseTransforms(&processors_);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CaddnPreprocessor::Apply(FDMatBatch* image_batch,
|
||||
std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
std::vector<FDTensor>* outputs) {
|
||||
if (image_batch->mats->empty()) {
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
if (!initialized_) {
|
||||
FDERROR << "The preprocessor is not initialized." << std::endl;
|
||||
return false;
|
||||
}
|
||||
// There are 3 outputs, image, cam_data, lidar_data
|
||||
outputs->resize(3);
|
||||
int batch = static_cast<int>(image_batch->mats->size());
|
||||
|
||||
// Allocate memory for cam_data
|
||||
(*outputs)[1].Resize({batch, 3, 4}, FDDataType::FP32);
|
||||
|
||||
// Allocate memory for lidar_data
|
||||
(*outputs)[2].Resize({batch, 4, 4}, FDDataType::FP32);
|
||||
|
||||
auto* cam_data_ptr = reinterpret_cast<float*>((*outputs)[1].MutableData());
|
||||
auto* lidar_data_ptr = reinterpret_cast<float*>((*outputs)[2].MutableData());
|
||||
|
||||
for (size_t i = 0; i < image_batch->mats->size(); ++i) {
|
||||
FDMat* mat = &(image_batch->mats->at(i));
|
||||
for (size_t j = 0; j < processors_.size(); ++j) {
|
||||
if (!(*(processors_[j].get()))(mat)) {
|
||||
FDERROR << "Failed to processs image:" << i << " in "
|
||||
<< processors_[j]->Name() << "." << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(cam_data_ptr + i * 12, input_cam_data.data(), 12 * sizeof(float));
|
||||
memcpy(lidar_data_ptr + i * 16, input_lidar_data.data(),
|
||||
16 * sizeof(float));
|
||||
}
|
||||
|
||||
FDTensor* tensor = image_batch->Tensor();
|
||||
(*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
|
||||
tensor->Data(), tensor->device,
|
||||
tensor->device_id);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CaddnPreprocessor::Run(std::vector<FDMat>* images,
|
||||
std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
std::vector<FDTensor>* outputs) {
|
||||
FDMatBatch image_batch(images);
|
||||
PreApply(&image_batch);
|
||||
bool ret = Apply(&image_batch, input_cam_data, input_lidar_data, outputs);
|
||||
PostApply();
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace perception
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
69
fastdeploy/vision/perception/paddle3d/caddn/preprocessor.h
Executable file
69
fastdeploy/vision/perception/paddle3d/caddn/preprocessor.h
Executable file
@@ -0,0 +1,69 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include "fastdeploy/vision/common/processors/manager.h"
|
||||
#include "fastdeploy/vision/common/processors/transform.h"
|
||||
#include "fastdeploy/vision/common/result.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
|
||||
namespace perception {
|
||||
/*! @brief Preprocessor object for Caddn serials model.
|
||||
*/
|
||||
class FASTDEPLOY_DECL CaddnPreprocessor : public ProcessorManager {
|
||||
public:
|
||||
CaddnPreprocessor() = default;
|
||||
/** \brief Create a preprocessor instance for Caddn model
|
||||
*
|
||||
* \param[in] config_file Path of configuration file for deployment, e.g Caddn/infer_cfg.yml
|
||||
*/
|
||||
explicit CaddnPreprocessor(const std::string& config_file);
|
||||
|
||||
bool Run(std::vector<FDMat>* images,
|
||||
std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
std::vector<FDTensor>* outputs);
|
||||
|
||||
/** \brief Process the input image and prepare input tensors for runtime
|
||||
*
|
||||
* \param[in] images The input image data list, all the elements are returned by cv::imread()
|
||||
* \param[in] outputs The output tensors which will feed in runtime
|
||||
* \param[in] ims_info The shape info list, record input_shape and output_shape
|
||||
* \return true if the preprocess successed, otherwise false
|
||||
*/
|
||||
bool Apply(FDMatBatch* image_batch, std::vector<FDTensor>* outputs) {
|
||||
FDERROR << "CaddnPreprocessor should input cam and lidar datas" << std::endl;
|
||||
return 0;
|
||||
};
|
||||
bool Apply(FDMatBatch* image_batch,
|
||||
std::vector<float>& input_cam_data,
|
||||
std::vector<float>& input_lidar_data,
|
||||
std::vector<FDTensor>* outputs);
|
||||
|
||||
protected:
|
||||
bool BuildPreprocessPipeline();
|
||||
std::vector<std::shared_ptr<Processor>> processors_;
|
||||
|
||||
bool disable_permute_ = false;
|
||||
|
||||
bool initialized_ = false;
|
||||
|
||||
std::string config_file_;
|
||||
};
|
||||
|
||||
} // namespace perception
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
@@ -24,8 +24,7 @@ CenterpointPreprocessor::CenterpointPreprocessor(
|
||||
|
||||
bool CenterpointPreprocessor::ReadPoint(const std::string &file_path,
|
||||
const int64_t num_point_dim,
|
||||
std::vector<float> &data,
|
||||
int64_t *num_points) {
|
||||
std::vector<float> &data, int64_t *num_points) {
|
||||
std::ifstream file_in(file_path, std::ios::in | std::ios::binary);
|
||||
if (num_point_dim < 4) {
|
||||
FDERROR << "Point dimension must not be less than 4, but received "
|
||||
|
@@ -19,6 +19,7 @@ namespace fastdeploy {
|
||||
void BindSmoke(pybind11::module& m);
|
||||
void BindPetr(pybind11::module& m);
|
||||
void BindCenterpoint(pybind11::module& m);
|
||||
void BindCaddn(pybind11::module& m);
|
||||
|
||||
void BindPerception(pybind11::module& m) {
|
||||
auto perception_module =
|
||||
@@ -26,5 +27,6 @@ void BindPerception(pybind11::module& m) {
|
||||
BindSmoke(perception_module);
|
||||
BindPetr(perception_module);
|
||||
BindCenterpoint(perception_module);
|
||||
BindCaddn(perception_module);
|
||||
}
|
||||
} // namespace fastdeploy
|
||||
|
@@ -16,3 +16,4 @@ from __future__ import absolute_import
|
||||
from .paddle3d.smoke import *
|
||||
from .paddle3d.petr import *
|
||||
from .paddle3d.centerpoint import *
|
||||
from .paddle3d.caddn import *
|
||||
|
108
python/fastdeploy/vision/perception/paddle3d/caddn.py
Normal file
108
python/fastdeploy/vision/perception/paddle3d/caddn.py
Normal file
@@ -0,0 +1,108 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
from .... import FastDeployModel, ModelFormat
|
||||
from .... import c_lib_wrap as C
|
||||
|
||||
|
||||
class CaddnPreprocessor:
|
||||
def __init__(self, config_file):
|
||||
"""Create a preprocessor for Caddn
|
||||
"""
|
||||
self._preprocessor = C.vision.perception.CaddnPreprocessor(config_file)
|
||||
|
||||
def run(self, input_ims, cam_data, lidar_data):
|
||||
"""Preprocess input images for Caddn
|
||||
|
||||
:param: input_ims: (list of numpy.ndarray)The input image
|
||||
:return: list of FDTensor
|
||||
"""
|
||||
return self._preprocessor.run(input_ims, cam_data, lidar_data)
|
||||
|
||||
|
||||
class CaddnPostprocessor:
|
||||
def __init__(self):
|
||||
"""Create a postprocessor for Caddn
|
||||
"""
|
||||
self._postprocessor = C.vision.perception.CaddnPostprocessor()
|
||||
|
||||
def run(self, runtime_results):
|
||||
"""Postprocess the runtime results for Caddn
|
||||
|
||||
:param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
|
||||
:return: list of PerceptionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
|
||||
"""
|
||||
return self._postprocessor.run(runtime_results)
|
||||
|
||||
|
||||
class Caddn(FastDeployModel):
|
||||
def __init__(self,
|
||||
model_file,
|
||||
params_file,
|
||||
config_file,
|
||||
runtime_option=None,
|
||||
model_format=ModelFormat.PADDLE):
|
||||
"""Load a Caddn model exported by Caddn.
|
||||
|
||||
:param model_file: (str)Path of model file, e.g ./Caddn.pdmodel
|
||||
:param params_file: (str)Path of parameters file, e.g ./Caddn.pdiparams
|
||||
:param config_file: (str)Path of config file, e.g ./infer_cfg.yaml
|
||||
:param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
|
||||
:param model_format: (fastdeploy.ModelForamt)Model format of the loaded model
|
||||
"""
|
||||
super(Caddn, self).__init__(runtime_option)
|
||||
|
||||
self._model = C.vision.perception.Caddn(
|
||||
model_file, params_file, config_file, self._runtime_option,
|
||||
model_format)
|
||||
assert self.initialized, "Caddn initialize failed."
|
||||
|
||||
def predict(self, input_image, cam_data, lidar_data):
|
||||
"""Detect an input image
|
||||
|
||||
:param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
|
||||
:param: cam_data: (list)The input camera data
|
||||
:param: lidar_data: (list)The input lidar data
|
||||
:return: PerceptionResult
|
||||
"""
|
||||
return self._model.predict(input_image, cam_data, lidar_data)
|
||||
|
||||
def batch_predict(self, images, cam_data, lidar_data):
|
||||
"""Classify a batch of input image
|
||||
|
||||
:param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
|
||||
:param: cam_data: (list)The input camera data
|
||||
:param: lidar_data: (list)The input lidar data
|
||||
:return list of PerceptionResult
|
||||
"""
|
||||
|
||||
return self._model.batch_predict(images, cam_data, lidar_data)
|
||||
|
||||
@property
|
||||
def preprocessor(self):
|
||||
"""Get CaddnPreprocessor object of the loaded model
|
||||
|
||||
:return CaddnPreprocessor
|
||||
"""
|
||||
return self._model.preprocessor
|
||||
|
||||
@property
|
||||
def postprocessor(self):
|
||||
"""Get CaddnPostprocessor object of the loaded model
|
||||
|
||||
:return CaddnPostprocessor
|
||||
"""
|
||||
return self._model.postprocessor
|
Reference in New Issue
Block a user