mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-13 12:23:55 +08:00
Fd serving add docker images correlation and docs (#311)
* fd serving add dockerfile * fix enable_paddle_mkldnn * delete disable_paddle_mkldnn Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -26,6 +26,7 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
@@ -169,83 +170,154 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
||||
// instance when creating that instance's runtime.
|
||||
runtime_options_.reset(new fastdeploy::RuntimeOption());
|
||||
|
||||
triton::common::TritonJson::Value optimization;
|
||||
if (not ModelConfig().Find("optimization", &optimization)) {
|
||||
return;
|
||||
}
|
||||
|
||||
triton::common::TritonJson::Value eas;
|
||||
if (not optimization.Find("execution_accelerators", &eas)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// CPU execution providers
|
||||
{
|
||||
triton::common::TritonJson::Value optimization;
|
||||
if (ModelConfig().Find("optimization", &optimization)) {
|
||||
triton::common::TritonJson::Value backend;
|
||||
if (optimization.Find("onnxruntime", &backend)) {
|
||||
runtime_options_->UseOrtBackend();
|
||||
std::vector<std::string> param_keys;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(backend.Members(¶m_keys));
|
||||
for (const auto& param_key : param_keys) {
|
||||
std::string value_string;
|
||||
if (param_key == "graph_level") {
|
||||
triton::common::TritonJson::Value cpu_eas;
|
||||
if (eas.Find("cpu_execution_accelerator", &cpu_eas)) {
|
||||
for (size_t idx = 0; idx < cpu_eas.ArraySize(); idx++) {
|
||||
triton::common::TritonJson::Value ea;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(cpu_eas.IndexAsObject(idx, &ea));
|
||||
std::string name;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(ea.MemberAsString("name", &name));
|
||||
if (name == "onnxruntime") {
|
||||
runtime_options_->UseOrtBackend();
|
||||
} else if (name == "paddle") {
|
||||
runtime_options_->UsePaddleBackend();
|
||||
} else if (name == "openvino") {
|
||||
runtime_options_->UseOpenVINOBackend();
|
||||
} else if (name != "") {
|
||||
TRITONSERVER_Error* error = TRITONSERVER_ErrorNew(
|
||||
TRITONSERVER_ERROR_INVALID_ARG,
|
||||
std::string("unknown cpu_execution_accelerator name '" + name +
|
||||
"' is provided. Available choices are [onnxruntime, "
|
||||
"paddle, openvino]")
|
||||
.c_str());
|
||||
THROW_IF_BACKEND_MODEL_ERROR(error);
|
||||
}
|
||||
|
||||
triton::common::TritonJson::Value params;
|
||||
if (ea.Find("parameters", ¶ms)) {
|
||||
std::vector<std::string> param_keys;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(params.Members(¶m_keys));
|
||||
for (const auto& param_key : param_keys) {
|
||||
std::string value_string;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
value_string, &runtime_options_->ort_graph_opt_level));
|
||||
} else if (param_key == "inter_op_num_threads") {
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
value_string, &runtime_options_->ort_inter_op_num_threads));
|
||||
} else if (param_key == "execution_mode") {
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
value_string, &runtime_options_->ort_execution_mode));
|
||||
params.MemberAsString(param_key.c_str(), &value_string));
|
||||
if (param_key == "cpu_threads") {
|
||||
int cpu_thread_num;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
ParseIntValue(value_string, &cpu_thread_num));
|
||||
runtime_options_->SetCpuThreadNum(cpu_thread_num);
|
||||
// } else if (param_key == "graph_level") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
// value_string, &runtime_options_->ort_graph_opt_level));
|
||||
// } else if (param_key == "inter_op_num_threads") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
// value_string,
|
||||
// &runtime_options_->ort_inter_op_num_threads));
|
||||
// } else if (param_key == "execution_mode") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
// value_string, &runtime_options_->ort_execution_mode));
|
||||
// } else if (param_key == "capacity") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
// value_string, &runtime_options_->pd_mkldnn_cache_size));
|
||||
} else if (param_key == "use_mkldnn") {
|
||||
bool pd_enable_mkldnn;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
ParseBoolValue(value_string, &pd_enable_mkldnn));
|
||||
runtime_options_->SetPaddleMKLDNN(pd_enable_mkldnn);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (optimization.Find("tensorrt", &backend)) {
|
||||
runtime_options_->UseTrtBackend();
|
||||
std::vector<std::string> param_keys;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(backend.Members(¶m_keys));
|
||||
for (const auto& param_key : param_keys) {
|
||||
std::string value_string;
|
||||
if (param_key == "cpu_threads") {
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
ParseIntValue(value_string, &runtime_options_->cpu_thread_num));
|
||||
}
|
||||
// TODO(liqi): add tensorrt
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GPU execution providers
|
||||
{
|
||||
triton::common::TritonJson::Value gpu_eas;
|
||||
if (eas.Find("gpu_execution_accelerator", &gpu_eas)) {
|
||||
for (size_t idx = 0; idx < gpu_eas.ArraySize(); idx++) {
|
||||
triton::common::TritonJson::Value ea;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(gpu_eas.IndexAsObject(idx, &ea));
|
||||
std::string name;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(ea.MemberAsString("name", &name));
|
||||
|
||||
if (name == "onnxruntime") {
|
||||
runtime_options_->UseOrtBackend();
|
||||
} else if (name == "paddle") {
|
||||
runtime_options_->UsePaddleBackend();
|
||||
} else if (name == "tensorrt") {
|
||||
runtime_options_->UseTrtBackend();
|
||||
}
|
||||
} else if (optimization.Find("paddle", &backend)) {
|
||||
runtime_options_->UsePaddleBackend();
|
||||
std::vector<std::string> param_keys;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(backend.Members(¶m_keys));
|
||||
for (const auto& param_key : param_keys) {
|
||||
std::string value_string;
|
||||
if (param_key == "cpu_threads") {
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
ParseIntValue(value_string, &runtime_options_->cpu_thread_num));
|
||||
} else if (param_key == "capacity") {
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
value_string, &runtime_options_->pd_mkldnn_cache_size));
|
||||
} else if (param_key == "use_mkldnn") {
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(ParseBoolValue(
|
||||
value_string, &runtime_options_->pd_enable_mkldnn));
|
||||
if (name == "min_shape" or name == "max_shape" or name == "opt_shape") {
|
||||
triton::common::TritonJson::Value params;
|
||||
if (ea.Find("parameters", ¶ms)) {
|
||||
std::vector<std::string> input_names;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(params.Members(&input_names));
|
||||
for (const auto& input_name : input_names) {
|
||||
std::vector<int32_t> shape;
|
||||
FDParseShape(params, input_name, &shape);
|
||||
if (name == "min_shape") {
|
||||
runtime_options_->trt_min_shape[input_name] = shape;
|
||||
} else if (name == "max_shape") {
|
||||
runtime_options_->trt_max_shape[input_name] = shape;
|
||||
} else {
|
||||
runtime_options_->trt_opt_shape[input_name] = shape;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (optimization.Find("openvino", &backend)) {
|
||||
runtime_options_->UseOpenVINOBackend();
|
||||
std::vector<std::string> param_keys;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(backend.Members(¶m_keys));
|
||||
for (const auto& param_key : param_keys) {
|
||||
std::string value_string;
|
||||
if (param_key == "cpu_threads") {
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
backend.MemberAsString(param_key.c_str(), &value_string));
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
ParseIntValue(value_string, &runtime_options_->cpu_thread_num));
|
||||
} else {
|
||||
triton::common::TritonJson::Value params;
|
||||
if (ea.Find("parameters", ¶ms)) {
|
||||
std::vector<std::string> param_keys;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(params.Members(¶m_keys));
|
||||
for (const auto& param_key : param_keys) {
|
||||
std::string value_string;
|
||||
THROW_IF_BACKEND_MODEL_ERROR(
|
||||
params.MemberAsString(param_key.c_str(), &value_string));
|
||||
// if (param_key == "graph_level") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
// value_string, &runtime_options_->ort_graph_opt_level));
|
||||
// } else if (param_key == "inter_op_num_threads") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
// value_string,
|
||||
// &runtime_options_->ort_inter_op_num_threads));
|
||||
// } else if (param_key == "execution_mode") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
||||
// value_string, &runtime_options_->ort_execution_mode));
|
||||
// }
|
||||
if (param_key == "precision") {
|
||||
std::transform(value_string.begin(), value_string.end(),
|
||||
value_string.begin(), ::tolower);
|
||||
if (value_string == "trt_fp16") {
|
||||
runtime_options_->EnableTrtFP16();
|
||||
} else if (value_string == "trt_int8") {
|
||||
// TODO(liqi): use EnableTrtINT8
|
||||
runtime_options_->trt_enable_int8 = true;
|
||||
}
|
||||
// } else if( param_key == "max_batch_size") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseUnsignedLongLongValue(
|
||||
// value_string, &runtime_options_->trt_max_batch_size));
|
||||
// } else if( param_key == "workspace_size") {
|
||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseUnsignedLongLongValue(
|
||||
// value_string,
|
||||
// &runtime_options_->trt_max_workspace_size));
|
||||
} else if (param_key == "cache_file") {
|
||||
runtime_options_->SetTrtCacheFile(value_string);
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO(liqi): add openvino
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -285,11 +357,11 @@ TRITONSERVER_Error* ModelState::LoadModel(
|
||||
"not provided.'")
|
||||
.c_str());
|
||||
}
|
||||
runtime_options_->model_format = fastdeploy::Frontend::PADDLE;
|
||||
runtime_options_->model_format = fastdeploy::ModelFormat::PADDLE;
|
||||
runtime_options_->model_file = *model_path;
|
||||
runtime_options_->params_file = *params_path;
|
||||
} else {
|
||||
runtime_options_->model_format = fastdeploy::Frontend::ONNX;
|
||||
runtime_options_->model_format = fastdeploy::ModelFormat::ONNX;
|
||||
runtime_options_->model_file = *model_path;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user