mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 01:22:59 +08:00
[XPU] Add gm_default_size -> Backend::LITE (#1934)
* add gm_default_size * add gm_default_size --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
@@ -99,13 +99,16 @@ void FD_C_RuntimeOptionWrapperUseKunlunXin(
|
|||||||
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
||||||
int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
|
int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
|
||||||
FD_C_Bool autotune, const char* autotune_file, const char* precision,
|
FD_C_Bool autotune, const char* autotune_file, const char* precision,
|
||||||
FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream) {
|
FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream,
|
||||||
|
int64_t gm_default_size) {
|
||||||
auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
|
auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
|
||||||
fd_c_runtime_option_wrapper);
|
fd_c_runtime_option_wrapper);
|
||||||
runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked),
|
runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked),
|
||||||
bool(autotune), std::string(autotune_file),
|
bool(autotune), std::string(autotune_file),
|
||||||
std::string(precision), bool(adaptive_seqlen),
|
std::string(precision),
|
||||||
bool(enable_multi_stream));
|
bool(adaptive_seqlen),
|
||||||
|
bool(enable_multi_stream),
|
||||||
|
gm_default_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FD_C_RuntimeOptionWrapperUseSophgo(
|
void FD_C_RuntimeOptionWrapperUseSophgo(
|
||||||
|
@@ -131,7 +131,8 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
|
|||||||
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
||||||
int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
|
int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
|
||||||
FD_C_Bool autotune, const char* autotune_file, const char* precision,
|
FD_C_Bool autotune, const char* autotune_file, const char* precision,
|
||||||
FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream);
|
FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream,
|
||||||
|
int64_t gm_default_size);
|
||||||
|
|
||||||
/** Use Sophgo to inference
|
/** Use Sophgo to inference
|
||||||
*
|
*
|
||||||
|
@@ -108,11 +108,12 @@ public class RuntimeOption {
|
|||||||
UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
|
UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
|
||||||
bool locked = false, bool autotune = true,
|
bool locked = false, bool autotune = true,
|
||||||
string autotune_file = "", string precision = "int16",
|
string autotune_file = "", string precision = "int16",
|
||||||
bool adaptive_seqlen = false, bool enable_multi_stream = false) {
|
bool adaptive_seqlen = false, bool enable_multi_stream = false,
|
||||||
|
int64_t gm_default_size = 0) {
|
||||||
FD_C_RuntimeOptionWrapperUseKunlunXin(
|
FD_C_RuntimeOptionWrapperUseKunlunXin(
|
||||||
fd_runtime_option_wrapper, kunlunxin_id, l3_workspace_size, locked,
|
fd_runtime_option_wrapper, kunlunxin_id, l3_workspace_size, locked,
|
||||||
autotune, autotune_file, precision, adaptive_seqlen,
|
autotune, autotune_file, precision, adaptive_seqlen,
|
||||||
enable_multi_stream);
|
enable_multi_stream, gm_default_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Use Sophgo to inference
|
/// Use Sophgo to inference
|
||||||
@@ -366,7 +367,8 @@ public class RuntimeOption {
|
|||||||
private static extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
|
private static extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
|
||||||
IntPtr fd_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size,
|
IntPtr fd_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size,
|
||||||
bool locked, bool autotune, string autotune_file, string precision,
|
bool locked, bool autotune, string autotune_file, string precision,
|
||||||
bool adaptive_seqlen, bool enable_multi_stream);
|
bool adaptive_seqlen, bool enable_multi_stream,
|
||||||
|
Int64 gm_default_size);
|
||||||
|
|
||||||
[DllImport("fastdeploy.dll",
|
[DllImport("fastdeploy.dll",
|
||||||
EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgo")]
|
EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgo")]
|
||||||
|
@@ -96,6 +96,7 @@ void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) {
|
|||||||
option.kunlunxin_autotune_file);
|
option.kunlunxin_autotune_file);
|
||||||
config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
|
config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
|
||||||
option.kunlunxin_adaptive_seqlen);
|
option.kunlunxin_adaptive_seqlen);
|
||||||
|
config_.set_xpu_gm_workspace_method(option.kunlunxin_gm_default_size);
|
||||||
if (option.kunlunxin_enable_multi_stream) {
|
if (option.kunlunxin_enable_multi_stream) {
|
||||||
config_.enable_xpu_multi_stream();
|
config_.enable_xpu_multi_stream();
|
||||||
}
|
}
|
||||||
|
@@ -72,6 +72,8 @@ struct LiteBackendOption {
|
|||||||
std::string kunlunxin_autotune_file = "";
|
std::string kunlunxin_autotune_file = "";
|
||||||
/// kunlunxin_precision
|
/// kunlunxin_precision
|
||||||
std::string kunlunxin_precision = "int16";
|
std::string kunlunxin_precision = "int16";
|
||||||
|
/// kunlunxin_gm_default_size
|
||||||
|
int kunlunxin_gm_default_size = 0;
|
||||||
/// kunlunxin_adaptive_seqlen
|
/// kunlunxin_adaptive_seqlen
|
||||||
bool kunlunxin_adaptive_seqlen = false;
|
bool kunlunxin_adaptive_seqlen = false;
|
||||||
/// kunlunxin_enable_multi_stream
|
/// kunlunxin_enable_multi_stream
|
||||||
|
@@ -53,6 +53,8 @@ void BindLiteOption(pybind11::module& m) {
|
|||||||
&LiteBackendOption::kunlunxin_autotune_file)
|
&LiteBackendOption::kunlunxin_autotune_file)
|
||||||
.def_readwrite("kunlunxin_precision",
|
.def_readwrite("kunlunxin_precision",
|
||||||
&LiteBackendOption::kunlunxin_precision)
|
&LiteBackendOption::kunlunxin_precision)
|
||||||
|
.def_readwrite("kunlunxin_gm_default_size",
|
||||||
|
&LiteBackendOption::kunlunxin_gm_default_size)
|
||||||
.def_readwrite("kunlunxin_adaptive_seqlen",
|
.def_readwrite("kunlunxin_adaptive_seqlen",
|
||||||
&LiteBackendOption::kunlunxin_adaptive_seqlen)
|
&LiteBackendOption::kunlunxin_adaptive_seqlen)
|
||||||
.def_readwrite("kunlunxin_enable_multi_stream",
|
.def_readwrite("kunlunxin_enable_multi_stream",
|
||||||
|
@@ -84,7 +84,8 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
|||||||
const std::string& autotune_file,
|
const std::string& autotune_file,
|
||||||
const std::string& precision,
|
const std::string& precision,
|
||||||
bool adaptive_seqlen,
|
bool adaptive_seqlen,
|
||||||
bool enable_multi_stream) {
|
bool enable_multi_stream,
|
||||||
|
int64_t gm_default_size) {
|
||||||
device = Device::KUNLUNXIN;
|
device = Device::KUNLUNXIN;
|
||||||
paddle_lite_option.device = device;
|
paddle_lite_option.device = device;
|
||||||
paddle_lite_option.device_id = kunlunxin_id;
|
paddle_lite_option.device_id = kunlunxin_id;
|
||||||
@@ -95,6 +96,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
|||||||
paddle_lite_option.kunlunxin_precision = precision;
|
paddle_lite_option.kunlunxin_precision = precision;
|
||||||
paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
|
paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
|
||||||
paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
|
paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
|
||||||
|
paddle_lite_option.kunlunxin_gm_default_size = gm_default_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuntimeOption::UseAscend() {
|
void RuntimeOption::UseAscend() {
|
||||||
|
@@ -112,7 +112,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
const std::string& autotune_file = "",
|
const std::string& autotune_file = "",
|
||||||
const std::string& precision = "int16",
|
const std::string& precision = "int16",
|
||||||
bool adaptive_seqlen = false,
|
bool adaptive_seqlen = false,
|
||||||
bool enable_multi_stream = false);
|
bool enable_multi_stream = false,
|
||||||
|
int64_t gm_default_size = 0);
|
||||||
|
|
||||||
void SetExternalStream(void* external_stream);
|
void SetExternalStream(void* external_stream);
|
||||||
|
|
||||||
|
@@ -226,7 +226,8 @@ class RuntimeOption:
|
|||||||
autotune_file="",
|
autotune_file="",
|
||||||
precision="int16",
|
precision="int16",
|
||||||
adaptive_seqlen=False,
|
adaptive_seqlen=False,
|
||||||
enable_multi_stream=False):
|
enable_multi_stream=False,
|
||||||
|
gm_default_size=0):
|
||||||
"""Inference with KunlunXin XPU
|
"""Inference with KunlunXin XPU
|
||||||
|
|
||||||
:param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0
|
:param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0
|
||||||
@@ -244,7 +245,8 @@ class RuntimeOption:
|
|||||||
"""
|
"""
|
||||||
return self._option.use_kunlunxin(device_id, l3_workspace_size, locked,
|
return self._option.use_kunlunxin(device_id, l3_workspace_size, locked,
|
||||||
autotune, autotune_file, precision,
|
autotune, autotune_file, precision,
|
||||||
adaptive_seqlen, enable_multi_stream)
|
adaptive_seqlen, enable_multi_stream,
|
||||||
|
gm_default_size)
|
||||||
|
|
||||||
def use_cpu(self):
|
def use_cpu(self):
|
||||||
"""Inference with CPU
|
"""Inference with CPU
|
||||||
|
Reference in New Issue
Block a user