Files
FastDeploy/tests/operators/test_top_p_candidates.py
co63oc c5671d7c09
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
[MTP][Unit Test]add test_top_p_candidates (#4046)
* add test_top_p_candidates

* fix

* fix

* fix
2025-09-22 17:06:38 +08:00

130 lines
5.0 KiB
Python

import unittest
import numpy as np
import paddle
from fastdeploy.model_executor.ops.gpu import top_p_candidates
def top_p_candidates_dynamic_top_p(probs, top_p_per_bid, candidates_len, output_padding_offset, max_seq_len):
"""
Simulate TopPCandidates, supporting dynamic selection of Top-P values based on bid.
Args:
probs: numpy.ndarray, shape [token_num, vocab_size]
Probability distribution over the vocabulary for each token.
top_p_per_bid: list or numpy.ndarray, shape [num_bid]
Top-P values for each logical block (bid), e.g., [0.7, 0.9, 0.5].
candidates_len: int
Maximum number of candidate tokens to return for each token.
output_padding_offset: numpy.ndarray, shape [token_num]
Offset for each token, used to compute the original token ID (ori_token_id).
max_seq_len: int
Used to compute bid = ori_token_id // max_seq_len.
Returns:
verify_tokens: List[List[int]], list of candidate token IDs for each token.
verify_scores: List[List[float]], list of candidate token probability scores for each token.
actual_candidate_lens: List[int], actual number of candidate tokens returned for each token.
ori_token_ids: List[int], original token ID for each token.
bid_list: List[int], bid for each token.
"""
token_num, vocab_size = probs.shape
verify_tokens = []
verify_scores = []
actual_candidate_lens = []
ori_token_ids = []
bid_list = []
top_p_per_bid = np.array(top_p_per_bid)
num_bid = len(top_p_per_bid)
for token_id in range(token_num):
# --- Compute ori_token_id and bid ---
offset = output_padding_offset[token_id]
ori_token_id = token_id + offset
bid = ori_token_id // max_seq_len
# If the bid is out of the range of top_p_per_bid, you can choose to clamp it to [0, num_bid - 1]
if bid < 0:
bid = 0
if bid >= num_bid:
bid = (
num_bid - 1
) # Or you could raise an error or exception; here, we simply handle it by using the last bid.
token_top_p = top_p_per_bid[bid] # Dynamically retrieve the top_p value for the given bid.
ori_token_ids.append(ori_token_id)
bid_list.append(bid)
# The probability distribution of the current token.
token_probs = probs[token_id, :]
# Sort by probability in descending order.
sorted_indices = np.argsort(token_probs)[::-1]
sorted_probs = token_probs[sorted_indices]
accumulated_prob = 0.0
selected_indices = []
selected_probs = []
for sort_idx, (prob, token_idx) in enumerate(zip(sorted_probs, sorted_indices)):
if sort_idx >= candidates_len:
break # Return at most candidates_len.
accumulated_prob += prob
selected_indices.append(int(token_idx))
selected_probs.append(float(prob))
if accumulated_prob >= token_top_p:
break # The cumulative probability satisfies the Top-P criterion.
# If the Top-P threshold is not met, return the tokens that have already been selected.
actual_len = len(selected_indices)
actual_candidate_lens.append(actual_len)
# Pad the insufficient token_id with 0.
padded_token_ids = selected_indices + [0] * (candidates_len - actual_len)
# Pad the insufficient score with 0.0.
padded_scores = selected_probs + [0.0] * (candidates_len - actual_len)
verify_tokens.append(padded_token_ids)
verify_scores.append(padded_scores)
return verify_scores, verify_tokens, actual_candidate_lens, ori_token_ids, bid_list
def top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len):
ret = top_p_candidates_dynamic_top_p(probs, top_p, candidates_len, output_padding_offset, max_seq_len)
return [ret[0], ret[1], ret[2]]
class TestTopPCandidates(unittest.TestCase):
def test_top_p_candidates(self):
paddle.seed(42)
token_num = 5
vocab_size = 100
candidates_len = 5
max_seq_len = 120
probs = paddle.randn([token_num, vocab_size])
top_p = paddle.randn([token_num])
bs = 5
tokens = [1] * bs
output_padding_offset = []
opo_offset = 0
for bid in range(bs):
ts = tokens[bid]
for i in range(ts):
output_padding_offset.append(opo_offset)
opo_offset += max_seq_len - ts
output_padding_offset = paddle.to_tensor(output_padding_offset).astype(paddle.int32)
ret1 = top_p_candidates(probs, top_p, output_padding_offset, candidates_len, max_seq_len)
ret2 = top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len)
np.testing.assert_allclose(ret1[0].numpy(), ret2[0])
np.testing.assert_allclose(ret1[1].numpy(), ret2[1])
np.testing.assert_allclose(ret1[2].numpy(), ret2[2])
if __name__ == "__main__":
unittest.main()