diff --git a/fastdeploy/splitwise/internal_adapter_utils.py b/fastdeploy/splitwise/internal_adapter_utils.py
index dc0ddf544..13fe7513e 100644
--- a/fastdeploy/splitwise/internal_adapter_utils.py
+++ b/fastdeploy/splitwise/internal_adapter_utils.py
@@ -61,6 +61,7 @@ class InternalAdapter:
             "max_batch_size": int(available_batch_size),
             "max_input_token_num": self.cfg.max_num_batched_tokens,
             "unhandled_request_num": self.engine.scheduler.get_unhandled_request_num(),
+            "available_batch": int(self.engine.resource_manager.available_batch()),
         }
         return server_info
 
diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index 59f44edb5..fdb73609d 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -1268,11 +1268,6 @@ class GPUModelRunner(ModelRunnerBase):
             We plan to replace it with 'ModelForwardBatch'.
             intermediate_tensors:
         """
-        # 1. Prepare inputs of model and sampler.
-        skip_idx_list = self._get_skip_idx(model_forward_batch)
-        self._prepare_inputs()
-        self.sampler.pre_process(skip_idx_list)
-
         # NOTE(wufeisheng): If `not_need_stop`` is False, it means the current worker is in an idle state.
         # This logic is not used in TP (Tensor Parallelism) mode. However, in EP (Expert Parallelism) mode,
         # when there is data on other runner, the current runner is required to execute part of the model.
@@ -1280,6 +1275,11 @@ class GPUModelRunner(ModelRunnerBase):
             self._execute_empty_input()
             return None
 
+        # 1. Prepare inputs of model and sampler.
+        skip_idx_list = self._get_skip_idx(model_forward_batch)
+        self._prepare_inputs()
+        self.sampler.pre_process(skip_idx_list)
+
         # 2. Padding inputs for cuda graph
         self.padding_cudagraph_inputs()