diff --git a/c_api/fastdeploy_capi/runtime/runtime_option.cc b/c_api/fastdeploy_capi/runtime/runtime_option.cc index f0694a271..cba327869 100644 --- a/c_api/fastdeploy_capi/runtime/runtime_option.cc +++ b/c_api/fastdeploy_capi/runtime/runtime_option.cc @@ -99,13 +99,16 @@ void FD_C_RuntimeOptionWrapperUseKunlunXin( __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked, FD_C_Bool autotune, const char* autotune_file, const char* precision, - FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream) { + FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream, + int64_t gm_default_size) { auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, fd_c_runtime_option_wrapper); runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked), bool(autotune), std::string(autotune_file), - std::string(precision), bool(adaptive_seqlen), - bool(enable_multi_stream)); + std::string(precision), + bool(adaptive_seqlen), + bool(enable_multi_stream), + gm_default_size); } void FD_C_RuntimeOptionWrapperUseSophgo( diff --git a/c_api/fastdeploy_capi/runtime/runtime_option.h b/c_api/fastdeploy_capi/runtime/runtime_option.h index c07bb9080..6f27e3686 100644 --- a/c_api/fastdeploy_capi/runtime/runtime_option.h +++ b/c_api/fastdeploy_capi/runtime/runtime_option.h @@ -131,7 +131,8 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseKunlunXin( __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked, FD_C_Bool autotune, const char* autotune_file, const char* precision, - FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream); + FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream, + int64_t gm_default_size); /** Use Sophgo to inference * diff --git a/csharp/fastdeploy/runtime_option.cs b/csharp/fastdeploy/runtime_option.cs index f25c9dbd7..249fd30df 100644 --- a/csharp/fastdeploy/runtime_option.cs +++ b/csharp/fastdeploy/runtime_option.cs @@ -108,11 +108,12 @@ public class RuntimeOption { UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00, bool locked = false, bool autotune = true, string autotune_file = "", string precision = "int16", - bool adaptive_seqlen = false, bool enable_multi_stream = false) { + bool adaptive_seqlen = false, bool enable_multi_stream = false, + int64_t gm_default_size = 0) { FD_C_RuntimeOptionWrapperUseKunlunXin( fd_runtime_option_wrapper, kunlunxin_id, l3_workspace_size, locked, - autotune, autotune_file, precision, adaptive_seqlen, - enable_multi_stream); + autotune, autotune_file, precision, adaptive_seqlen, + enable_multi_stream, gm_default_size); } /// Use Sophgo to inference @@ -366,7 +367,8 @@ public class RuntimeOption { private static extern void FD_C_RuntimeOptionWrapperUseKunlunXin( IntPtr fd_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size, bool locked, bool autotune, string autotune_file, string precision, - bool adaptive_seqlen, bool enable_multi_stream); + bool adaptive_seqlen, bool enable_multi_stream, + Int64 gm_default_size); [DllImport("fastdeploy.dll", EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgo")] diff --git a/fastdeploy/runtime/backends/lite/configure_hardware.cc b/fastdeploy/runtime/backends/lite/configure_hardware.cc index c8a2af83a..0b7eae287 100644 --- a/fastdeploy/runtime/backends/lite/configure_hardware.cc +++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc @@ -96,6 +96,7 @@ void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) { option.kunlunxin_autotune_file); config_.set_xpu_multi_encoder_method(option.kunlunxin_precision, option.kunlunxin_adaptive_seqlen); + config_.set_xpu_gm_workspace_method(option.kunlunxin_gm_default_size); if (option.kunlunxin_enable_multi_stream) { config_.enable_xpu_multi_stream(); } diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h index dd76bf7df..410ec6034 100755 --- a/fastdeploy/runtime/backends/lite/option.h +++ b/fastdeploy/runtime/backends/lite/option.h @@ -72,6 +72,8 @@ struct LiteBackendOption { std::string kunlunxin_autotune_file = ""; /// kunlunxin_precision std::string kunlunxin_precision = "int16"; + /// kunlunxin_gm_default_size + int kunlunxin_gm_default_size = 0; /// kunlunxin_adaptive_seqlen bool kunlunxin_adaptive_seqlen = false; /// kunlunxin_enable_multi_stream diff --git a/fastdeploy/runtime/backends/lite/option_pybind.cc b/fastdeploy/runtime/backends/lite/option_pybind.cc index 0a01854ad..c5747c685 100644 --- a/fastdeploy/runtime/backends/lite/option_pybind.cc +++ b/fastdeploy/runtime/backends/lite/option_pybind.cc @@ -53,6 +53,8 @@ void BindLiteOption(pybind11::module& m) { &LiteBackendOption::kunlunxin_autotune_file) .def_readwrite("kunlunxin_precision", &LiteBackendOption::kunlunxin_precision) + .def_readwrite("kunlunxin_gm_default_size", + &LiteBackendOption::kunlunxin_gm_default_size) .def_readwrite("kunlunxin_adaptive_seqlen", &LiteBackendOption::kunlunxin_adaptive_seqlen) .def_readwrite("kunlunxin_enable_multi_stream", diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc index 4bbc8f721..563339237 100644 --- a/fastdeploy/runtime/runtime_option.cc +++ b/fastdeploy/runtime/runtime_option.cc @@ -84,7 +84,8 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, const std::string& autotune_file, const std::string& precision, bool adaptive_seqlen, - bool enable_multi_stream) { + bool enable_multi_stream, + int64_t gm_default_size) { device = Device::KUNLUNXIN; paddle_lite_option.device = device; paddle_lite_option.device_id = kunlunxin_id; @@ -95,6 +96,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, paddle_lite_option.kunlunxin_precision = precision; paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen; paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream; + paddle_lite_option.kunlunxin_gm_default_size = gm_default_size; } void RuntimeOption::UseAscend() { diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h index ea6d61f20..8e99a88bf 100755 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -112,7 +112,8 @@ struct FASTDEPLOY_DECL RuntimeOption { const std::string& autotune_file = "", const std::string& precision = "int16", bool adaptive_seqlen = false, - bool enable_multi_stream = false); + bool enable_multi_stream = false, + int64_t gm_default_size = 0); void SetExternalStream(void* external_stream); diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index c17abc094..fbd75e2a7 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -226,7 +226,8 @@ class RuntimeOption: autotune_file="", precision="int16", adaptive_seqlen=False, - enable_multi_stream=False): + enable_multi_stream=False, + gm_default_size=0): """Inference with KunlunXin XPU :param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0 @@ -244,7 +245,8 @@ class RuntimeOption: """ return self._option.use_kunlunxin(device_id, l3_workspace_size, locked, autotune, autotune_file, precision, - adaptive_seqlen, enable_multi_stream) + adaptive_seqlen, enable_multi_stream, + gm_default_size) def use_cpu(self): """Inference with CPU