mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Backend] cuda normalize and permute, cuda concat, optimized ppcls, ppdet & ppseg (#546)
* cuda normalize and permute, cuda concat * add use cuda option for preprocessor * ppyoloe use cuda normalize * ppseg use cuda normalize * add proclib cuda in processor base * ppcls add use cuda preprocess api * ppcls preprocessor set gpu id * fix pybind * refine ppcls preprocessing use gpu logic * fdtensor device id is -1 by default * refine assert message Co-authored-by: heliqi <1101791222@qq.com>
This commit is contained in:
@@ -85,8 +85,9 @@ struct ConcatFunctor {
|
||||
int64_t col_len = input_cols[j];
|
||||
const T* input_data = reinterpret_cast<const T*>(input[j].Data());
|
||||
for (int64_t k = 0; k < out_rows; ++k) {
|
||||
std::memcpy(output_data + k * out_cols + col_idx,
|
||||
input_data + k * col_len, sizeof(T) * col_len);
|
||||
FDTensor::CopyBuffer(output_data + k * out_cols + col_idx,
|
||||
input_data + k * col_len, sizeof(T) * col_len,
|
||||
input[j].device, input[j].is_pinned_memory);
|
||||
}
|
||||
col_idx += col_len;
|
||||
}
|
||||
@@ -97,7 +98,8 @@ template <typename T>
|
||||
void ConcatKernel(const std::vector<FDTensor>& input, FDTensor* output,
|
||||
int axis) {
|
||||
auto output_shape = ComputeAndCheckConcatOutputShape(input, axis);
|
||||
output->Allocate(output_shape, TypeToDataType<T>::dtype);
|
||||
output->Resize(output_shape, TypeToDataType<T>::dtype, output->name,
|
||||
input[0].device);
|
||||
|
||||
ConcatFunctor<T> functor;
|
||||
functor(input, axis, output);
|
||||
@@ -115,10 +117,9 @@ void Concat(const std::vector<FDTensor>& x, FDTensor* out, int axis) {
|
||||
if (axis < 0) {
|
||||
axis += rank;
|
||||
}
|
||||
FDTensor out_temp;
|
||||
|
||||
FD_VISIT_ALL_TYPES(x[0].dtype, "Concat",
|
||||
([&] { ConcatKernel<data_t>(x, &out_temp, axis); }));
|
||||
*out = std::move(out_temp);
|
||||
([&] { ConcatKernel<data_t>(x, out, axis); }));
|
||||
}
|
||||
|
||||
} // namespace function
|
||||
|
Reference in New Issue
Block a user