diff --git a/c_api/fastdeploy_capi/runtime/runtime_option.cc b/c_api/fastdeploy_capi/runtime/runtime_option.cc
index f0694a271..cba327869 100644
--- a/c_api/fastdeploy_capi/runtime/runtime_option.cc
+++ b/c_api/fastdeploy_capi/runtime/runtime_option.cc
@@ -99,13 +99,16 @@ void FD_C_RuntimeOptionWrapperUseKunlunXin(
     __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
     int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
     FD_C_Bool autotune, const char* autotune_file, const char* precision,
-    FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream) {
+    FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream, 
+    int64_t gm_default_size) {
   auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
                                                    fd_c_runtime_option_wrapper);
   runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked),
                                bool(autotune), std::string(autotune_file),
-                               std::string(precision), bool(adaptive_seqlen),
-                               bool(enable_multi_stream));
+                               std::string(precision), 
+                               bool(adaptive_seqlen),
+                               bool(enable_multi_stream),
+                               gm_default_size);
 }
 
 void FD_C_RuntimeOptionWrapperUseSophgo(
diff --git a/c_api/fastdeploy_capi/runtime/runtime_option.h b/c_api/fastdeploy_capi/runtime/runtime_option.h
index c07bb9080..6f27e3686 100644
--- a/c_api/fastdeploy_capi/runtime/runtime_option.h
+++ b/c_api/fastdeploy_capi/runtime/runtime_option.h
@@ -131,7 +131,8 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
     __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
     int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
     FD_C_Bool autotune, const char* autotune_file, const char* precision,
-    FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream);
+    FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream,
+    int64_t gm_default_size);
 
 /** Use Sophgo to inference
  *
diff --git a/csharp/fastdeploy/runtime_option.cs b/csharp/fastdeploy/runtime_option.cs
index f25c9dbd7..249fd30df 100644
--- a/csharp/fastdeploy/runtime_option.cs
+++ b/csharp/fastdeploy/runtime_option.cs
@@ -108,11 +108,12 @@ public class RuntimeOption {
   UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
                bool locked = false, bool autotune = true,
                string autotune_file = "", string precision = "int16",
-               bool adaptive_seqlen = false, bool enable_multi_stream = false) {
+               bool adaptive_seqlen = false, bool enable_multi_stream = false,
+               int64_t gm_default_size = 0) {
     FD_C_RuntimeOptionWrapperUseKunlunXin(
         fd_runtime_option_wrapper, kunlunxin_id, l3_workspace_size, locked,
-        autotune, autotune_file, precision, adaptive_seqlen,
-        enable_multi_stream);
+        autotune, autotune_file, precision,  adaptive_seqlen,
+        enable_multi_stream, gm_default_size);
   }
 
   /// Use Sophgo to inference
@@ -366,7 +367,8 @@ public class RuntimeOption {
   private static extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
       IntPtr fd_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size,
       bool locked, bool autotune, string autotune_file, string precision,
-      bool adaptive_seqlen, bool enable_multi_stream);
+      bool adaptive_seqlen, bool enable_multi_stream,
+      Int64 gm_default_size);
 
   [DllImport("fastdeploy.dll",
              EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgo")]
diff --git a/fastdeploy/runtime/backends/lite/configure_hardware.cc b/fastdeploy/runtime/backends/lite/configure_hardware.cc
index c8a2af83a..0b7eae287 100644
--- a/fastdeploy/runtime/backends/lite/configure_hardware.cc
+++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc
@@ -96,6 +96,7 @@ void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) {
                                 option.kunlunxin_autotune_file);
   config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
                                        option.kunlunxin_adaptive_seqlen);
+  config_.set_xpu_gm_workspace_method(option.kunlunxin_gm_default_size);
   if (option.kunlunxin_enable_multi_stream) {
     config_.enable_xpu_multi_stream();
   }
diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h
index dd76bf7df..410ec6034 100755
--- a/fastdeploy/runtime/backends/lite/option.h
+++ b/fastdeploy/runtime/backends/lite/option.h
@@ -72,6 +72,8 @@ struct LiteBackendOption {
   std::string kunlunxin_autotune_file = "";
   /// kunlunxin_precision
   std::string kunlunxin_precision = "int16";
+  /// kunlunxin_gm_default_size 
+  int kunlunxin_gm_default_size = 0;
   /// kunlunxin_adaptive_seqlen
   bool kunlunxin_adaptive_seqlen = false;
   /// kunlunxin_enable_multi_stream
diff --git a/fastdeploy/runtime/backends/lite/option_pybind.cc b/fastdeploy/runtime/backends/lite/option_pybind.cc
index 0a01854ad..c5747c685 100644
--- a/fastdeploy/runtime/backends/lite/option_pybind.cc
+++ b/fastdeploy/runtime/backends/lite/option_pybind.cc
@@ -53,6 +53,8 @@ void BindLiteOption(pybind11::module& m) {
                      &LiteBackendOption::kunlunxin_autotune_file)
       .def_readwrite("kunlunxin_precision",
                      &LiteBackendOption::kunlunxin_precision)
+      .def_readwrite("kunlunxin_gm_default_size",
+                     &LiteBackendOption::kunlunxin_gm_default_size)
       .def_readwrite("kunlunxin_adaptive_seqlen",
                      &LiteBackendOption::kunlunxin_adaptive_seqlen)
       .def_readwrite("kunlunxin_enable_multi_stream",
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
index 4bbc8f721..563339237 100644
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -84,7 +84,8 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
                                  const std::string& autotune_file,
                                  const std::string& precision,
                                  bool adaptive_seqlen,
-                                 bool enable_multi_stream) {
+                                 bool enable_multi_stream,
+                                 int64_t gm_default_size) {
   device = Device::KUNLUNXIN;
   paddle_lite_option.device = device;
   paddle_lite_option.device_id = kunlunxin_id;
@@ -95,6 +96,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
   paddle_lite_option.kunlunxin_precision = precision;
   paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
   paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
+  paddle_lite_option.kunlunxin_gm_default_size = gm_default_size;
 }
 
 void RuntimeOption::UseAscend() {
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
index ea6d61f20..8e99a88bf 100755
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -112,7 +112,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
                     const std::string& autotune_file = "",
                     const std::string& precision = "int16",
                     bool adaptive_seqlen = false,
-                    bool enable_multi_stream = false);
+                    bool enable_multi_stream = false,
+                    int64_t gm_default_size = 0);
 
   void SetExternalStream(void* external_stream);
 
diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py
index c17abc094..fbd75e2a7 100755
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -226,7 +226,8 @@ class RuntimeOption:
                       autotune_file="",
                       precision="int16",
                       adaptive_seqlen=False,
-                      enable_multi_stream=False):
+                      enable_multi_stream=False,
+                      gm_default_size=0):
         """Inference with KunlunXin XPU
 
         :param device_id: (int)The index of KunlunXin XPU will be used for inference, default 0
@@ -244,7 +245,8 @@ class RuntimeOption:
         """
         return self._option.use_kunlunxin(device_id, l3_workspace_size, locked,
                                           autotune, autotune_file, precision,
-                                          adaptive_seqlen, enable_multi_stream)
+                                          adaptive_seqlen, enable_multi_stream,
+                                          gm_default_size)
 
     def use_cpu(self):
         """Inference with CPU