[Backend] cuda normalize and permute, cuda concat, optimized ppcls, ppdet & ppseg (#546)

* cuda normalize and permute, cuda concat

* add use cuda option for preprocessor

* ppyoloe use cuda normalize

* ppseg use cuda normalize

* add proclib cuda in processor base

* ppcls add use cuda preprocess api

* ppcls preprocessor set gpu id

* fix pybind

* refine ppcls preprocessing use gpu logic

* fdtensor device id is -1 by default

* refine assert message

Co-authored-by: heliqi <1101791222@qq.com>
This commit is contained in:
Wang Xinyu
2022-11-14 18:44:00 +08:00
committed by GitHub
parent 8dec2115d5
commit a36f5d3396
20 changed files with 204 additions and 26 deletions

View File

@@ -39,6 +39,9 @@ struct FASTDEPLOY_DECL FDTensor {
// GPU to inference the model
// so we can skip data transfer, which may improve the efficience
Device device = Device::CPU;
// By default the device id of FDTensor is -1, which means this value is
// invalid, and FDTensor is using the same device id as Runtime.
int device_id = -1;
// Whether the data buffer is in pinned memory, which is allocated
// with cudaMallocHost()
@@ -130,8 +133,9 @@ struct FASTDEPLOY_DECL FDTensor {
~FDTensor() { FreeFn(); }
private:
void CopyBuffer(void* dst, const void* src, size_t nbytes);
static void CopyBuffer(void* dst, const void* src, size_t nbytes,
const Device& device = Device::CPU,
bool is_pinned_memory = false);
};
} // namespace fastdeploy