mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
fix xpu offline demo garbled output (#2763)
This commit is contained in:
@@ -142,7 +142,8 @@ def xpu_process_output(
|
||||
|
||||
|
||||
def xpu_post_process(sampled_token_ids: paddle.Tensor,
|
||||
model_output: ModelOutputData) -> None:
|
||||
model_output: ModelOutputData,
|
||||
skip_save_output: bool) -> None:
|
||||
"""
|
||||
|
||||
"""
|
||||
@@ -185,6 +186,7 @@ def xpu_post_process(sampled_token_ids: paddle.Tensor,
|
||||
)
|
||||
# 3. Transmit the model's output and stop generation signal via message queue.
|
||||
# In the future, we will abandon this approach.
|
||||
if not skip_save_output:
|
||||
save_output(
|
||||
sampled_token_ids,
|
||||
model_output.not_need_stop,
|
||||
@@ -658,7 +660,7 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
self._dummy_prefill_inputs(num_tokens, batch_size)
|
||||
|
||||
while True:
|
||||
self.execute_model(None)
|
||||
self.execute_model(None, True)
|
||||
|
||||
if int((self.share_inputs['seq_lens_this_time'] > 0).sum()) == 0:
|
||||
break
|
||||
@@ -666,6 +668,7 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
def execute_model(
|
||||
self,
|
||||
model_forward_batch: Optional[List[Request]] = None,
|
||||
is_dummy_run: bool = False,
|
||||
) -> Optional[ModelRunnerOutput]:
|
||||
"""
|
||||
The Entrance of model execute.
|
||||
@@ -721,7 +724,8 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
accept_num=None,
|
||||
)
|
||||
xpu_post_process(sampled_token_ids=sampled_token_ids,
|
||||
model_output=model_output_data)
|
||||
model_output=model_output_data,
|
||||
skip_save_output=is_dummy_run)
|
||||
|
||||
# 7. Updata 'infer_seed' and step_paddle()
|
||||
self.share_inputs["infer_seed"].add_(self.infer_seed_increment)
|
||||
|
@@ -86,7 +86,6 @@ class XpuWorker(WorkerBase):
|
||||
You may limit the usage of GPU memory
|
||||
by adjusting the `gpu_memory_utilization` parameter.
|
||||
"""
|
||||
# logger.warn("XPU current could not determine available memory")
|
||||
from fastdeploy.model_executor.ops.xpu import \
|
||||
xpu_get_free_global_memory, xpu_get_total_global_memory, xpu_get_used_global_memory
|
||||
|
||||
|
Reference in New Issue
Block a user