mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-11 19:40:25 +08:00
[Backend] cuda normalize and permute, cuda concat, optimized ppcls, ppdet & ppseg (#546)
* cuda normalize and permute, cuda concat * add use cuda option for preprocessor * ppyoloe use cuda normalize * ppseg use cuda normalize * add proclib cuda in processor base * ppcls add use cuda preprocess api * ppcls preprocessor set gpu id * fix pybind * refine ppcls preprocessing use gpu logic * fdtensor device id is -1 by default * refine assert message Co-authored-by: heliqi <1101791222@qq.com>
This commit is contained in:
@@ -252,7 +252,8 @@ void FDTensor::FreeFn() {
|
||||
}
|
||||
}
|
||||
|
||||
void FDTensor::CopyBuffer(void* dst, const void* src, size_t nbytes) {
|
||||
void FDTensor::CopyBuffer(void* dst, const void* src, size_t nbytes,
|
||||
const Device& device, bool is_pinned_memory) {
|
||||
if (device == Device::GPU) {
|
||||
#ifdef WITH_GPU
|
||||
FDASSERT(cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToDevice) == 0,
|
||||
@@ -295,7 +296,7 @@ FDTensor::FDTensor(const FDTensor& other)
|
||||
size_t nbytes = Nbytes();
|
||||
FDASSERT(ReallocFn(nbytes),
|
||||
"The FastDeploy FDTensor allocate memory error");
|
||||
CopyBuffer(buffer_, other.buffer_, nbytes);
|
||||
CopyBuffer(buffer_, other.buffer_, nbytes, device, is_pinned_memory);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -325,7 +326,7 @@ FDTensor& FDTensor::operator=(const FDTensor& other) {
|
||||
} else {
|
||||
Resize(other.shape, other.dtype, other.name, other.device);
|
||||
size_t nbytes = Nbytes();
|
||||
CopyBuffer(buffer_, other.buffer_, nbytes);
|
||||
CopyBuffer(buffer_, other.buffer_, nbytes, device, is_pinned_memory);
|
||||
}
|
||||
external_data_ptr = other.external_data_ptr;
|
||||
}
|
||||
|
Reference in New Issue
Block a user