mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
[Backend] cuda normalize and permute, cuda concat, optimized ppcls, ppdet & ppseg (#546)
* cuda normalize and permute, cuda concat * add use cuda option for preprocessor * ppyoloe use cuda normalize * ppseg use cuda normalize * add proclib cuda in processor base * ppcls add use cuda preprocess api * ppcls preprocessor set gpu id * fix pybind * refine ppcls preprocessing use gpu logic * fdtensor device id is -1 by default * refine assert message Co-authored-by: heliqi <1101791222@qq.com>
This commit is contained in:
@@ -39,6 +39,9 @@ struct FASTDEPLOY_DECL FDTensor {
|
||||
// GPU to inference the model
|
||||
// so we can skip data transfer, which may improve the efficience
|
||||
Device device = Device::CPU;
|
||||
// By default the device id of FDTensor is -1, which means this value is
|
||||
// invalid, and FDTensor is using the same device id as Runtime.
|
||||
int device_id = -1;
|
||||
|
||||
// Whether the data buffer is in pinned memory, which is allocated
|
||||
// with cudaMallocHost()
|
||||
@@ -130,8 +133,9 @@ struct FASTDEPLOY_DECL FDTensor {
|
||||
|
||||
~FDTensor() { FreeFn(); }
|
||||
|
||||
private:
|
||||
void CopyBuffer(void* dst, const void* src, size_t nbytes);
|
||||
static void CopyBuffer(void* dst, const void* src, size_t nbytes,
|
||||
const Device& device = Device::CPU,
|
||||
bool is_pinned_memory = false);
|
||||
};
|
||||
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user