mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
Support Poros Backend (#188)
* Add poros backend * Add torch lib * Add python3 lib * set c++ 14 for poros * fixed bugs * fixed grammer bugs * fixed grammer bugs * fixed code bugs * fixed code bugs * fixed CreatePorosValue bug * Add AtType2String for Log * fixed trt_option * fixed poros.cmake path * fixed grammer bug * fixed grammer bug * fixed ambiguous reference * fixed ambiguous reference * fixed reference error * fixed include files * rm ENABLE_TRT_BACKEND in poros * update CMakeLists.txt * fixed CMakeLists.txt * Add libtorch.so in CMakeLists.txt * Fixed CMakeLists.txt * Fixed CMakeLists.txt * Fixed copy bug * Fixed copy bug * Fixed copy bug * Fixed Cmake * Fixed Cmake * debug * debug * debug * debug * debug * debug * debug utils * debug utils * copy to cpu * rm log info * test share mem * test share mem * test share mem * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * time cost * time cost * fixed bug * time collect * mem copy * mem copy * rm time log * rm share mem * fixed multi inputs bug * add set_input_dtypes func * add SetInputDtypes * fixed bug * fixed bug * fixed prewarm data order * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * fixed bug * Add compile func * Add compile func * Add compile func * Add is_dynamic option * Add is_dynamic option * Add is_dynamic option * Add is_dynamic option * rm infer log * add cuda11.6 poros lib * fixed bug * fixed bug * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * rm logs * test * test * test * add test log * add test log * add test log * add test log * support cpu * support cpu * support cpu * support cpu * support member variable definition * rm useless log * fixed name * resolve conflict * resolve conflict * resolve conflict * fixed cmake * add GetInputInfos&GetOutputInfos * add GetInputInfos&GetOutputInfos * fixed bug * fixed runtime.py * add compile func * add np * deal with comments * rm to_inter func * add property
This commit is contained in:
186
fastdeploy/backends/poros/utils.cc
Normal file
186
fastdeploy/backends/poros/utils.cc
Normal file
@@ -0,0 +1,186 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/backends/poros/poros_backend.h"
|
||||
|
||||
#ifdef WITH_GPU
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
std::string AtType2String(const at::ScalarType& dtype) {
|
||||
std::string out;
|
||||
switch (dtype) {
|
||||
case at::kByte:
|
||||
out = "at::kByte";
|
||||
break;
|
||||
case at::kChar:
|
||||
out = "at::kChar";
|
||||
break;
|
||||
case at::kShort:
|
||||
out = "at::kShort";
|
||||
break;
|
||||
case at::kInt:
|
||||
out = "at::kInt";
|
||||
break;
|
||||
case at::kLong:
|
||||
out = "at::kLong";
|
||||
break;
|
||||
case at::kHalf:
|
||||
out = "at::kHalf";
|
||||
break;
|
||||
case at::kFloat:
|
||||
out = "at::kFloat";
|
||||
break;
|
||||
case at::kDouble:
|
||||
out = "at::kDouble";
|
||||
break;
|
||||
default:
|
||||
out = "at::UNKNOWN";
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
at::ScalarType GetPorosDtype(const FDDataType& fd_dtype) {
|
||||
if (fd_dtype == FDDataType::FP32) {
|
||||
return at::kFloat;
|
||||
} else if (fd_dtype == FDDataType::FP64) {
|
||||
return at::kDouble;
|
||||
} else if (fd_dtype == FDDataType::INT32) {
|
||||
return at::kInt;
|
||||
} else if (fd_dtype == FDDataType::INT64) {
|
||||
return at::kLong;
|
||||
}
|
||||
FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
|
||||
<< std::endl;
|
||||
return at::kFloat;
|
||||
}
|
||||
|
||||
FDDataType GetFdDtype(const at::ScalarType& poros_dtype) {
|
||||
if (poros_dtype == at::kFloat) {
|
||||
return FDDataType::FP32;
|
||||
} else if (poros_dtype == at::kDouble) {
|
||||
return FDDataType::FP64;
|
||||
} else if (poros_dtype == at::kInt) {
|
||||
return FDDataType::INT32;
|
||||
} else if (poros_dtype == at::kLong) {
|
||||
return FDDataType::INT64;
|
||||
}
|
||||
FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype)
|
||||
<< "." << std::endl;
|
||||
return FDDataType::FP32;
|
||||
}
|
||||
|
||||
at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
|
||||
FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
|
||||
"Only support tensor which device is CPU or GPU for PorosBackend.");
|
||||
auto data_type = GetPorosDtype(tensor.dtype);
|
||||
size_t numel = tensor.Numel();
|
||||
at::Tensor poros_value;
|
||||
if (is_backend_cuda) {
|
||||
poros_value = std::move(
|
||||
at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous());
|
||||
} else {
|
||||
poros_value = std::move(
|
||||
at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous());
|
||||
}
|
||||
if (data_type == at::kFloat) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(float), cudaMemcpyHostToDevice);
|
||||
} else {
|
||||
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(float));
|
||||
}
|
||||
} else if (data_type == at::kInt) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(int32_t), cudaMemcpyHostToDevice);
|
||||
} else {
|
||||
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(int32_t));
|
||||
}
|
||||
} else if (data_type == at::kLong) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(int64_t), cudaMemcpyHostToDevice);
|
||||
} else {
|
||||
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(int64_t));
|
||||
}
|
||||
} else if (data_type == at::kDouble) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(double), cudaMemcpyHostToDevice);
|
||||
} else {
|
||||
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
|
||||
numel * sizeof(double));
|
||||
}
|
||||
} else {
|
||||
FDASSERT(false,
|
||||
"Unrecognized data type while calling "
|
||||
"PorosBackend::CreatePorosValue().");
|
||||
}
|
||||
return poros_value;
|
||||
}
|
||||
|
||||
void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor,
|
||||
bool is_backend_cuda) {
|
||||
const auto data_type = tensor.scalar_type();
|
||||
std::vector<int64_t> shape;
|
||||
auto sizes = tensor.sizes();
|
||||
for (size_t i = 0; i < sizes.size(); i++) {
|
||||
shape.push_back(sizes[i]);
|
||||
}
|
||||
auto fd_dtype = GetFdDtype(data_type);
|
||||
fd_tensor->Resize(shape, fd_dtype);
|
||||
size_t numel = tensor.numel();
|
||||
// at::Tensor -> FDTensor
|
||||
if (data_type == at::kFloat) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float),
|
||||
cudaMemcpyDeviceToHost);
|
||||
} else {
|
||||
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float));
|
||||
}
|
||||
return;
|
||||
} else if (data_type == at::kInt) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t),
|
||||
cudaMemcpyDeviceToHost);
|
||||
} else {
|
||||
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t));
|
||||
}
|
||||
return;
|
||||
} else if (data_type == at::kLong) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t),
|
||||
cudaMemcpyDeviceToHost);
|
||||
} else {
|
||||
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t));
|
||||
}
|
||||
return;
|
||||
} else if (data_type == at::kDouble) {
|
||||
if (is_backend_cuda) {
|
||||
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double),
|
||||
cudaMemcpyDeviceToHost);
|
||||
} else {
|
||||
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double));
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
Reference in New Issue
Block a user