diff --git a/fastdeploy/worker/gcu_model_runner.py b/fastdeploy/worker/gcu_model_runner.py index e08d6760e..63bf6e7b3 100644 --- a/fastdeploy/worker/gcu_model_runner.py +++ b/fastdeploy/worker/gcu_model_runner.py @@ -772,6 +772,8 @@ class GCUModelRunner(ModelRunnerBase): post_process( sampler_output=sampler_output, model_output=model_output_data, + share_inputs=self.share_inputs, + block_size=self.parallel_config.block_size, speculative_decoding=self.speculative_decoding, skip_save_output=True, ) @@ -989,6 +991,8 @@ class GCUModelRunner(ModelRunnerBase): post_process( sampler_output=sampler_output, model_output=model_output_data, + share_inputs=self.share_inputs, + block_size=self.parallel_config.block_size, save_each_rank=self.parallel_config.use_ep, speculative_decoding=self.speculative_decoding, skip_save_output=skip_save_output,