Sync v2.0 version of code to github repo

This commit is contained in:
Jiang-Jia-Jun
2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions

View File

@@ -20,75 +20,141 @@ from typing import Optional
import paddle
@dataclass
class PreProcessOutputData:
""" """
@dataclass
class ModelOutputData:
""" """
# Tokens generated in the previous step
"""
OutputData by execute_model
"""
"""
Tokens generated in the previous step
"""
next_tokens: paddle.Tensor
# Flags indicating whether decoding should stop
"""
Flags indicating whether decoding should stop
"""
stop_flags: paddle.Tensor
# Index of the current decoding step
"""
Index of the current decoding step
"""
step_idx: int
# Maximum decoding length
"""
Maximum decoding length
"""
max_dec_len: int
# Previous ids used for decoding
"""
Previous ids used for decoding
"""
pre_ids: paddle.Tensor
# Sequence lengths for this step
"""
Sequence lengths for this step
"""
seq_lens_this_time: paddle.Tensor
# Lengths of the stop sequences
stop_seqs_len: paddle.Tensor
"""
Eos token ID
"""
eos_token_id: paddle.Tensor
# Indicates if stopping conditions should be ignored
"""
Indicates if stopping conditions should be ignored
"""
not_need_stop: bool
# Sequence lengths of the encoder
"""
Sequence lengths of the encoder
"""
seq_lens_encoder: paddle.Tensor
# Sequence lengths of the decoder
"""
Sequence lengths of the decoder
"""
seq_lens_decoder: paddle.Tensor
# Indicates if this is a blocking step
"""
Indicates if this is a blocking step
"""
is_block_step: bool
# Use message queue output
output_via_mq: bool
# The ID of the message queue.
"""
The ID of the message queue.
"""
msg_queue_id: int
# The model parallel rank
"""
The model parallel rank
"""
mp_rank: int
# Use EP parallel
"""
Use EP parallel
"""
use_ep: bool
"""
input ids
"""
input_ids: paddle.Tensor
"""
stop nums for every sequence
"""
stop_nums: paddle.Tensor
"""
for speculative decoding
full hidden states before lm_head
"""
full_hidden_states: paddle.Tensor
"""
draft tokens for every sequence
"""
draft_tokens: paddle.Tensor
"""
draft token num for every sequence
"""
actual_draft_token_num: paddle.Tensor
"""
accepted tokens in current step
"""
accept_tokens: paddle.Tensor
"""
the number of accepted tokens in current step
"""
accept_num: paddle.Tensor
@dataclass
class ModelRunnerOutput:
"""
[WIP] ModelRunnerOutput is serialized and sent to the scheduler process.
"""
# [num_reqs]
"""
[num_reqs]
"""
req_ids: list[str]
# req_id -> index
"""
req_id -> index
"""
req_id_to_index: dict[str, int]
# [num_reqs, num_generated_tokens]
"""
[num_reqs, num_generated_tokens]
"""
sampled_token_ids: list[list[int]]
# [num_reqs, num_spec_tokens]
"""
[num_reqs, num_spec_tokens]
"""
spec_token_ids: Optional[list[list[int]]]
# TODO(gongshaotian): supplement other outputs info