Merge pull request #1305 from wwbitejotunn/set_stream_infer-shareExData

[Backend] Set inference with extra stream and prebind output tensor with share ex data
This commit is contained in:
Jack Zhou
2023-02-14 17:08:56 +08:00
committed by GitHub
10 changed files with 177 additions and 35 deletions

View File

@@ -25,6 +25,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
if (option.device == Device::GPU) {
config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
if (option_.external_stream_) {
FDINFO << "Will use external stream for Paddle Backend." << std::endl;
config_.SetExecStream(option_.external_stream_);
}
if (option.enable_trt) {
@@ -226,23 +227,47 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
<< inputs_desc_.size() << ")." << std::endl;
return false;
}
// output share backend memory only support CPU or GPU
if (option_.device == Device::IPU) {
copy_to_fd = true;
}
RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
for (size_t i = 0; i < inputs.size(); ++i) {
auto handle = predictor_->GetInputHandle(inputs[i].name);
ShareTensorFromFDTensor(handle.get(), inputs[i]);
}
std::unordered_set<std::string> prebinded_output_name;
// prebinded output only support for GPU
if (!copy_to_fd) {
for (size_t i = 0; i < (*outputs).size(); ++i) {
auto output_name = (*outputs)[i].name;
// if a output is not prebinded,
// the name of output is expected to be empty.
// We skip here
if (output_name.empty()) {
continue;
}
// Record the prebinded output_name.
// Those outputs do not need PaddleTensorToFDTensor
// after predictor_.Run()
prebinded_output_name.insert(output_name);
auto handle = predictor_->GetOutputHandle(output_name);
ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
}
}
RUNTIME_PROFILE_LOOP_BEGIN(1)
predictor_->Run();
RUNTIME_PROFILE_LOOP_END
// output share backend memory only support CPU or GPU
if (option_.device == Device::IPU) {
copy_to_fd = true;
}
outputs->resize(outputs_desc_.size());
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
// skip prebinded output
if (copy_to_fd == false &&
prebinded_output_name.count(outputs_desc_[i].name)) {
continue;
}
auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
if (copy_to_fd) {
(*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;

View File

@@ -35,6 +35,9 @@ paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device);
// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor,
FDTensor& fd_tensor);
// convert paddle_infer::Tensor to fastdeploy::FDTensor
// if copy_to_fd is true, copy memory data to FDTensor
/// else share memory to FDTensor
@@ -89,4 +92,4 @@ class PaddleBackend : public BaseBackend {
std::vector<TensorInfo> inputs_desc_;
std::vector<TensorInfo> outputs_desc_;
};
} // namespace fastdeploy
} // namespace fastdeploy

View File

@@ -61,6 +61,43 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor,
Str(fd_tensor.dtype).c_str());
}
void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor,
FDTensor& fd_tensor) {
std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
auto place = ConvertFDDeviceToPlace(fd_tensor.device);
if (fd_tensor.dtype == FDDataType::FP32) {
if (place == paddle_infer::PlaceType::kGPU) {
tensor->ShareExternalData(static_cast<float*>(fd_tensor.MutableData()),
shape, place);
} else {
tensor->CopyToCpu(static_cast<float*>(fd_tensor.MutableData()));
}
return;
} else if (fd_tensor.dtype == FDDataType::INT32) {
if (place == paddle_infer::PlaceType::kGPU) {
tensor->ShareExternalData(static_cast<int32_t*>(fd_tensor.MutableData()),
shape, place);
} else {
tensor->CopyToCpu(static_cast<int32_t*>(fd_tensor.MutableData()));
}
return;
} else if (fd_tensor.dtype == FDDataType::INT64) {
if (place == paddle_infer::PlaceType::kGPU) {
tensor->ShareExternalData(static_cast<int64_t*>(fd_tensor.MutableData()),
shape, place);
} else {
tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor.MutableData()));
}
return;
} else if (fd_tensor.dtype == FDDataType::UINT8) {
tensor->ShareExternalData(static_cast<uint8_t*>(fd_tensor.MutableData()),
shape, paddle_infer::PlaceType::kCPU);
return;
}
FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
Str(fd_tensor.dtype).c_str());
}
void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
FDTensor* fd_tensor, bool copy_to_fd) {
auto fd_dtype = PaddleDataTypeToFD(tensor->type());

View File

@@ -49,6 +49,10 @@ void BindOption(pybind11::module& m) {
.def_readwrite("poros_option", &RuntimeOption::poros_option)
.def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option)
.def("set_external_stream", &RuntimeOption::SetExternalStream)
.def("set_external_raw_stream",
[](RuntimeOption& self, size_t external_stream) {
self.SetExternalStream(reinterpret_cast<void*>(external_stream));
})
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)

View File

@@ -224,6 +224,25 @@ void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
}
}
void Runtime::BindOutputTensor(const std::string& name, FDTensor& output) {
bool is_exist = false;
for (auto& t : output_tensors_) {
if (t.name == name) {
FDINFO << "The output name [" << name << "] is exist." << std::endl;
is_exist = true;
t.SetExternalData(output.shape, output.dtype, output.MutableData(),
output.device, output.device_id);
break;
}
}
if (!is_exist) {
FDINFO << "The output name [" << name << "] is prebinded added into output tensor list." << std::endl;
FDTensor new_tensor(name);
new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
output.device, output.device_id);
output_tensors_.emplace_back(std::move(new_tensor));
}
}
FDTensor* Runtime::GetOutputTensor(const std::string& name) {
for (auto& t : output_tensors_) {
if (t.name == name) {

View File

@@ -75,6 +75,12 @@ struct FASTDEPLOY_DECL Runtime {
/** \brief Bind FDTensor by name, no copy and share input memory
*/
void BindInputTensor(const std::string& name, FDTensor& input);
/** \brief Bind FDTensor by name, no copy and share output memory.
* Please make share the correctness of tensor shape of output.
*/
void BindOutputTensor(const std::string& name, FDTensor& output);
/** \brief Get output FDTensor by name, no copy and share backend output memory
*/
FDTensor* GetOutputTensor(const std::string& name);

View File

@@ -110,6 +110,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
bool enable_multi_stream = false);
void SetExternalStream(void* external_stream);
/*
* @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
*/