[xpu] support ep (#4067)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
zhupengyang
2025-09-15 13:53:11 +08:00
committed by GitHub
parent 29ed617f0f
commit 9409665713
18 changed files with 1179 additions and 329 deletions

View File

@@ -872,16 +872,36 @@ class XPUModelRunner(ModelRunnerBase):
self._dummy_prefill_inputs(num_tokens, batch_size)
while True:
self.execute_model(None, True)
self.execute_model(is_dummy_run=True)
if int((self.share_inputs["seq_lens_this_time"] > 0).sum()) == 0:
break
def _set_debug_level(
self, debug_level: int = 0x1, model_forward_batch: Optional[List[Request]] = None, is_dummy_run: bool = False
) -> None:
"""
Set debug level for XPU: 0x1, 0xA1, 0x1B1
"""
request_num = 0 if model_forward_batch is None else len(model_forward_batch)
if debug_level == 0 or request_num == 0 or is_dummy_run:
paddle.device.xpu.set_debug_level(0)
return
if self.parallel_config.use_ep:
request_num = paddle.to_tensor(request_num, dtype="int32")
paddle.distributed.all_reduce(request_num, group=self.parallel_config.ep_group)
logger.info(f"local_rank: {self.local_rank}, request_num: {request_num.item()}")
if request_num.item() > 0:
paddle.device.xpu.set_debug_level(debug_level)
else:
paddle.device.xpu.set_debug_level(debug_level)
def execute_model(
self,
model_forward_batch: Optional[List[Request]] = None,
is_dummy_run: bool = False,
num_running_requests: int = None,
is_dummy_run: bool = False,
) -> Optional[ModelRunnerOutput]:
"""
The Entrance of model execute.
@@ -892,6 +912,9 @@ class XPUModelRunner(ModelRunnerBase):
num_running_requests: batch_size
intermediate_tensors:
"""
# 0. set debug level
# self._set_debug_level(0x1, model_forward_batch, is_dummy_run)
# 1. Prepare inputs of model and decoder.
self._prepare_inputs(is_dummy_run=is_dummy_run)