Files
FastDeploy/fastdeploy/backends/poros/utils.cc
WJJ1995 f5c94e5471 Support Poros Backend (#188)
* Add poros backend

* Add torch lib

* Add python3 lib

* set c++ 14 for poros

* fixed bugs

* fixed grammer bugs

* fixed grammer bugs

* fixed code bugs

* fixed code bugs

* fixed CreatePorosValue bug

* Add AtType2String for Log

* fixed trt_option

* fixed poros.cmake path

* fixed grammer bug

* fixed grammer bug

* fixed ambiguous reference

* fixed ambiguous reference

* fixed reference error

* fixed include files

* rm ENABLE_TRT_BACKEND in poros

* update CMakeLists.txt

* fixed CMakeLists.txt

* Add libtorch.so in CMakeLists.txt

* Fixed CMakeLists.txt

* Fixed CMakeLists.txt

* Fixed copy bug

* Fixed copy bug

* Fixed copy bug

* Fixed Cmake

* Fixed Cmake

* debug

* debug

* debug

* debug

* debug

* debug

* debug utils

* debug utils

* copy to cpu

* rm log info

* test share mem

* test share mem

* test share mem

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* time cost

* time cost

* fixed bug

* time collect

* mem copy

* mem copy

* rm time log

* rm share mem

* fixed multi inputs bug

* add set_input_dtypes func

* add SetInputDtypes

* fixed bug

* fixed bug

* fixed prewarm data order

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* fixed bug

* Add compile func

* Add compile func

* Add compile func

* Add is_dynamic option

* Add is_dynamic option

* Add is_dynamic option

* Add is_dynamic option

* rm infer log

* add cuda11.6 poros lib

* fixed bug

* fixed bug

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* rm logs

* test

* test

* test

* add test log

* add test log

* add test log

* add test log

* support cpu

* support cpu

* support cpu

* support cpu

* support member variable definition

* rm useless log

* fixed name

* resolve conflict

* resolve conflict

* resolve conflict

* fixed cmake

* add GetInputInfos&GetOutputInfos

* add GetInputInfos&GetOutputInfos

* fixed bug

* fixed runtime.py

* add compile func

* add np

* deal with comments

* rm to_inter func

* add property
2022-10-17 15:28:12 +08:00

186 lines
5.9 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/poros/poros_backend.h"
#ifdef WITH_GPU
#include <cuda_runtime_api.h>
#endif
namespace fastdeploy {
std::string AtType2String(const at::ScalarType& dtype) {
std::string out;
switch (dtype) {
case at::kByte:
out = "at::kByte";
break;
case at::kChar:
out = "at::kChar";
break;
case at::kShort:
out = "at::kShort";
break;
case at::kInt:
out = "at::kInt";
break;
case at::kLong:
out = "at::kLong";
break;
case at::kHalf:
out = "at::kHalf";
break;
case at::kFloat:
out = "at::kFloat";
break;
case at::kDouble:
out = "at::kDouble";
break;
default:
out = "at::UNKNOWN";
}
return out;
}
at::ScalarType GetPorosDtype(const FDDataType& fd_dtype) {
if (fd_dtype == FDDataType::FP32) {
return at::kFloat;
} else if (fd_dtype == FDDataType::FP64) {
return at::kDouble;
} else if (fd_dtype == FDDataType::INT32) {
return at::kInt;
} else if (fd_dtype == FDDataType::INT64) {
return at::kLong;
}
FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
<< std::endl;
return at::kFloat;
}
FDDataType GetFdDtype(const at::ScalarType& poros_dtype) {
if (poros_dtype == at::kFloat) {
return FDDataType::FP32;
} else if (poros_dtype == at::kDouble) {
return FDDataType::FP64;
} else if (poros_dtype == at::kInt) {
return FDDataType::INT32;
} else if (poros_dtype == at::kLong) {
return FDDataType::INT64;
}
FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype)
<< "." << std::endl;
return FDDataType::FP32;
}
at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
"Only support tensor which device is CPU or GPU for PorosBackend.");
auto data_type = GetPorosDtype(tensor.dtype);
size_t numel = tensor.Numel();
at::Tensor poros_value;
if (is_backend_cuda) {
poros_value = std::move(
at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous());
} else {
poros_value = std::move(
at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous());
}
if (data_type == at::kFloat) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(float), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(float));
}
} else if (data_type == at::kInt) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int32_t), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int32_t));
}
} else if (data_type == at::kLong) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int64_t), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int64_t));
}
} else if (data_type == at::kDouble) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(double), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(double));
}
} else {
FDASSERT(false,
"Unrecognized data type while calling "
"PorosBackend::CreatePorosValue().");
}
return poros_value;
}
void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor,
bool is_backend_cuda) {
const auto data_type = tensor.scalar_type();
std::vector<int64_t> shape;
auto sizes = tensor.sizes();
for (size_t i = 0; i < sizes.size(); i++) {
shape.push_back(sizes[i]);
}
auto fd_dtype = GetFdDtype(data_type);
fd_tensor->Resize(shape, fd_dtype);
size_t numel = tensor.numel();
// at::Tensor -> FDTensor
if (data_type == at::kFloat) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float));
}
return;
} else if (data_type == at::kInt) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t));
}
return;
} else if (data_type == at::kLong) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t));
}
return;
} else if (data_type == at::kDouble) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double));
}
return;
}
}
} // namespace fastdeploy