Revert "[BugFix] fix ep (#3290)" (#3317)

This reverts commit 86ff68be4b.
This commit is contained in:
Jiang-Jia-Jun
2025-08-11 16:17:58 +08:00
committed by GitHub
parent c000cff744
commit ca4e4ab911
2 changed files with 3 additions and 4 deletions

View File

@@ -780,7 +780,7 @@ class GPUModelRunner(ModelRunnerBase):
output_padding_offset,
) = pre_process(
self.share_inputs["input_ids"],
getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
self.share_inputs["seq_lens_this_time"],
self.speculative_decoding,
(self.share_inputs["draft_tokens"] if self.speculative_decoding else None),
self.share_inputs["seq_lens_encoder"],
@@ -864,7 +864,7 @@ class GPUModelRunner(ModelRunnerBase):
max_len_tensor_cpu=self.share_inputs["max_len_tensor_cpu"],
seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
seq_lens_this_time=getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
batch_id_per_token=self.share_inputs["batch_id_per_token"],
cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
cu_seqlens_k=self.share_inputs["cu_seqlens_k"],

View File

@@ -244,7 +244,7 @@ class PaddleDisWorkerProc:
"""
while True:
self.worker_healthy_live_signal.value[self.local_rank % self.max_chips_per_node] = int(time.time())
num_running_requests = 0
if self.fd_config.parallel_config.tensor_parallel_rank == 0 and self.task_queue.num_tasks() > 0:
tasks, read_finish = self.task_queue.get_tasks()
@@ -271,7 +271,6 @@ class PaddleDisWorkerProc:
self.nnode = int((self.parallel_config.tensor_parallel_size + 7) // 8)
mp_num_per_node = self.parallel_config.tensor_parallel_size // self.nnode
req_ids = []
num_running_requests = 0
while True:
if self.local_rank == 0:
if self.model_weights_status.value[0] != 0: