[Others] clean code (#5235)

This commit is contained in:
周周周
2025-11-28 15:40:49 +08:00
committed by GitHub
parent 2d69d91ab8
commit 73886204d4

View File

@@ -646,7 +646,6 @@ class GPUModelRunner(ModelRunnerBase):
)
self.share_inputs["first_token_ids"][idx : idx + 1] = self.share_inputs["input_ids"][idx : idx + 1, :1]
self.share_inputs["ori_seq_lens_encoder"][idx : idx + 1] = length
if request.get("seed") is not None:
self.share_inputs["infer_seed"][idx : idx + 1] = request.get("seed")
@@ -864,7 +863,6 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["stop_flags"][idx : idx + 1] = False
self.share_inputs["first_token_ids"][idx : idx + 1] = self.share_inputs["input_ids"][idx : idx + 1, :1]
self.share_inputs["ori_seq_lens_encoder"][idx : idx + 1] = length
if request.get("seed") is not None:
self.share_inputs["infer_seed"][idx : idx + 1] = request.get("seed")
@@ -1017,7 +1015,6 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["stop_flags"][idx : idx + 1] = False
self.share_inputs["temperature"][idx : idx + 1] = 1
self.share_inputs["first_token_ids"][idx : idx + 1] = self.share_inputs["input_ids"][idx : idx + 1, :1]
self.share_inputs["ori_seq_lens_encoder"][idx : idx + 1] = input_length
self.share_inputs["encoder_block_lens"][idx : idx + 1] = block_num
self.share_inputs["block_tables"][idx : idx + 1, :block_num] = np.arange(
@@ -1101,7 +1098,6 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["used_list_len"] = paddle.full([max_num_seqs], 0, dtype="int32")
self.share_inputs["infer_seed"] = paddle.full([max_num_seqs, 1], 0, dtype="int64").cpu()
self.share_inputs["first_token_ids"] = paddle.full([max_num_seqs, 1], -1, dtype="int64")
self.share_inputs["ori_seq_lens_encoder"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
self.share_inputs["system_lens"] = paddle.full([max_num_seqs, 1], 0, dtype="int32")
self.share_inputs["system_ids"] = paddle.full([max_num_seqs, 1], -1, dtype="int32")
@@ -1783,8 +1779,8 @@ class GPUModelRunner(ModelRunnerBase):
def _dummy_run(
self,
num_tokens: paddle.Tensor,
batch_size: paddle.Tensor,
num_tokens: int,
batch_size: int,
expected_decode_len: int = 1,
in_capturing: bool = False,
capture_prefill: bool = False,
@@ -1830,21 +1826,20 @@ class GPUModelRunner(ModelRunnerBase):
# 3. Run model
if self.enable_mm:
model_output = self.model(
self.share_inputs["ids_remove_padding"],
self.forward_meta.ids_remove_padding,
self.share_inputs["image_features"],
self.forward_meta,
)
else:
model_output = self.model(
ids_remove_padding=self.share_inputs["ids_remove_padding"],
forward_meta=self.forward_meta,
self.forward_meta.ids_remove_padding,
self.forward_meta,
)
if self.use_cudagraph:
model_output = model_output[: self.real_token_num]
if self.is_pooling_model:
hidden_states = model_output
self._dummy_pooler_run(hidden_states, model_output)
self._dummy_pooler_run(model_output, model_output)
break
else:
hidden_states = rebuild_padding(
@@ -2154,14 +2149,14 @@ class GPUModelRunner(ModelRunnerBase):
# 3. Execute model
if self.enable_mm:
model_output = self.model(
self.share_inputs["ids_remove_padding"],
self.forward_meta.ids_remove_padding,
self.share_inputs["image_features"],
self.forward_meta,
)
else:
model_output = self.model(
ids_remove_padding=self.share_inputs["ids_remove_padding"],
forward_meta=self.forward_meta,
self.forward_meta.ids_remove_padding,
self.forward_meta,
)
if self.use_cudagraph:
model_output = model_output[: self.real_token_num]
@@ -2169,8 +2164,7 @@ class GPUModelRunner(ModelRunnerBase):
prompt_logprobs_list = self._get_prompt_logprobs_list(model_output)
if self.is_pooling_model:
hidden_states = model_output
pooler_output = self._pool(hidden_states, num_running_requests)
pooler_output = self._pool(model_output, num_running_requests)
model_output_data = ModelOutputData(
next_tokens=self.share_inputs["next_tokens"],