[XPU] Add gm_default_size -> Backend::LITE (#1934)

* add gm_default_size

* add gm_default_size

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
linkk08
2023-05-16 14:31:22 +08:00
committed by GitHub
parent 33e07410da
commit 9ed8d18fcc
9 changed files with 28 additions and 12 deletions

View File

@@ -99,13 +99,16 @@ void FD_C_RuntimeOptionWrapperUseKunlunXin(
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked, int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
FD_C_Bool autotune, const char* autotune_file, const char* precision, FD_C_Bool autotune, const char* autotune_file, const char* precision,
FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream) { FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream,
int64_t gm_default_size) {
auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
fd_c_runtime_option_wrapper); fd_c_runtime_option_wrapper);
runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked), runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked),
bool(autotune), std::string(autotune_file), bool(autotune), std::string(autotune_file),
std::string(precision), bool(adaptive_seqlen), std::string(precision),
bool(enable_multi_stream)); bool(adaptive_seqlen),
bool(enable_multi_stream),
gm_default_size);
} }
void FD_C_RuntimeOptionWrapperUseSophgo( void FD_C_RuntimeOptionWrapperUseSophgo(

View File

@@ -131,7 +131,8 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked, int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
FD_C_Bool autotune, const char* autotune_file, const char* precision, FD_C_Bool autotune, const char* autotune_file, const char* precision,
FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream); FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream,
int64_t gm_default_size);
/** Use Sophgo to inference /** Use Sophgo to inference
* *

View File

@@ -108,11 +108,12 @@ public class RuntimeOption {
UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00, UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
bool locked = false, bool autotune = true, bool locked = false, bool autotune = true,
string autotune_file = "", string precision = "int16", string autotune_file = "", string precision = "int16",
bool adaptive_seqlen = false, bool enable_multi_stream = false) { bool adaptive_seqlen = false, bool enable_multi_stream = false,
int64_t gm_default_size = 0) {
FD_C_RuntimeOptionWrapperUseKunlunXin( FD_C_RuntimeOptionWrapperUseKunlunXin(
fd_runtime_option_wrapper, kunlunxin_id, l3_workspace_size, locked, fd_runtime_option_wrapper, kunlunxin_id, l3_workspace_size, locked,
autotune, autotune_file, precision, adaptive_seqlen, autotune, autotune_file, precision, adaptive_seqlen,
enable_multi_stream); enable_multi_stream, gm_default_size);
} }
/// Use Sophgo to inference /// Use Sophgo to inference
@@ -366,7 +367,8 @@ public class RuntimeOption {
private static extern void FD_C_RuntimeOptionWrapperUseKunlunXin( private static extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
IntPtr fd_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size, IntPtr fd_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size,
bool locked, bool autotune, string autotune_file, string precision, bool locked, bool autotune, string autotune_file, string precision,
bool adaptive_seqlen, bool enable_multi_stream); bool adaptive_seqlen, bool enable_multi_stream,
Int64 gm_default_size);
[DllImport("fastdeploy.dll", [DllImport("fastdeploy.dll",
EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgo")] EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgo")]

View File

@@ -96,6 +96,7 @@ void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) {
option.kunlunxin_autotune_file); option.kunlunxin_autotune_file);
config_.set_xpu_multi_encoder_method(option.kunlunxin_precision, config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
option.kunlunxin_adaptive_seqlen); option.kunlunxin_adaptive_seqlen);
config_.set_xpu_gm_workspace_method(option.kunlunxin_gm_default_size);
if (option.kunlunxin_enable_multi_stream) { if (option.kunlunxin_enable_multi_stream) {
config_.enable_xpu_multi_stream(); config_.enable_xpu_multi_stream();
} }

View File

@@ -72,6 +72,8 @@ struct LiteBackendOption {
std::string kunlunxin_autotune_file = ""; std::string kunlunxin_autotune_file = "";
/// kunlunxin_precision /// kunlunxin_precision
std::string kunlunxin_precision = "int16"; std::string kunlunxin_precision = "int16";
/// kunlunxin_gm_default_size
int kunlunxin_gm_default_size = 0;
/// kunlunxin_adaptive_seqlen /// kunlunxin_adaptive_seqlen
bool kunlunxin_adaptive_seqlen = false; bool kunlunxin_adaptive_seqlen = false;
/// kunlunxin_enable_multi_stream /// kunlunxin_enable_multi_stream

View File

@@ -53,6 +53,8 @@ void BindLiteOption(pybind11::module& m) {
&LiteBackendOption::kunlunxin_autotune_file) &LiteBackendOption::kunlunxin_autotune_file)
.def_readwrite("kunlunxin_precision", .def_readwrite("kunlunxin_precision",
&LiteBackendOption::kunlunxin_precision) &LiteBackendOption::kunlunxin_precision)
.def_readwrite("kunlunxin_gm_default_size",
&LiteBackendOption::kunlunxin_gm_default_size)
.def_readwrite("kunlunxin_adaptive_seqlen", .def_readwrite("kunlunxin_adaptive_seqlen",
&LiteBackendOption::kunlunxin_adaptive_seqlen) &LiteBackendOption::kunlunxin_adaptive_seqlen)
.def_readwrite("kunlunxin_enable_multi_stream", .def_readwrite("kunlunxin_enable_multi_stream",

View File

@@ -84,7 +84,8 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
const std::string& autotune_file, const std::string& autotune_file,
const std::string& precision, const std::string& precision,
bool adaptive_seqlen, bool adaptive_seqlen,
bool enable_multi_stream) { bool enable_multi_stream,
int64_t gm_default_size) {
device = Device::KUNLUNXIN; device = Device::KUNLUNXIN;
paddle_lite_option.device = device; paddle_lite_option.device = device;
paddle_lite_option.device_id = kunlunxin_id; paddle_lite_option.device_id = kunlunxin_id;
@@ -95,6 +96,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
paddle_lite_option.kunlunxin_precision = precision; paddle_lite_option.kunlunxin_precision = precision;
paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen; paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream; paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
paddle_lite_option.kunlunxin_gm_default_size = gm_default_size;
} }
void RuntimeOption::UseAscend() { void RuntimeOption::UseAscend() {

View File

@@ -112,7 +112,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
const std::string& autotune_file = "", const std::string& autotune_file = "",
const std::string& precision = "int16", const std::string& precision = "int16",
bool adaptive_seqlen = false, bool adaptive_seqlen = false,
bool enable_multi_stream = false); bool enable_multi_stream = false,
int64_t gm_default_size = 0);
void SetExternalStream(void* external_stream); void SetExternalStream(void* external_stream);

View File

@@ -226,7 +226,8 @@ class RuntimeOption:
autotune_file="", autotune_file="",
precision="int16", precision="int16",
adaptive_seqlen=False, adaptive_seqlen=False,
enable_multi_stream=False): enable_multi_stream=False,
gm_default_size=0):
"""Inference with KunlunXin XPU """Inference with KunlunXin XPU
:param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0 :param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0
@@ -244,7 +245,8 @@ class RuntimeOption:
""" """
return self._option.use_kunlunxin(device_id, l3_workspace_size, locked, return self._option.use_kunlunxin(device_id, l3_workspace_size, locked,
autotune, autotune_file, precision, autotune, autotune_file, precision,
adaptive_seqlen, enable_multi_stream) adaptive_seqlen, enable_multi_stream,
gm_default_size)
def use_cpu(self): def use_cpu(self):
"""Inference with CPU """Inference with CPU