mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00

Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
* add test_top_p_candidates * fix * fix * fix
130 lines
5.0 KiB
Python
130 lines
5.0 KiB
Python
import unittest
|
|
|
|
import numpy as np
|
|
import paddle
|
|
|
|
from fastdeploy.model_executor.ops.gpu import top_p_candidates
|
|
|
|
|
|
def top_p_candidates_dynamic_top_p(probs, top_p_per_bid, candidates_len, output_padding_offset, max_seq_len):
|
|
"""
|
|
Simulate TopPCandidates, supporting dynamic selection of Top-P values based on bid.
|
|
|
|
Args:
|
|
probs: numpy.ndarray, shape [token_num, vocab_size]
|
|
Probability distribution over the vocabulary for each token.
|
|
top_p_per_bid: list or numpy.ndarray, shape [num_bid]
|
|
Top-P values for each logical block (bid), e.g., [0.7, 0.9, 0.5].
|
|
candidates_len: int
|
|
Maximum number of candidate tokens to return for each token.
|
|
output_padding_offset: numpy.ndarray, shape [token_num]
|
|
Offset for each token, used to compute the original token ID (ori_token_id).
|
|
max_seq_len: int
|
|
Used to compute bid = ori_token_id // max_seq_len.
|
|
|
|
Returns:
|
|
verify_tokens: List[List[int]], list of candidate token IDs for each token.
|
|
verify_scores: List[List[float]], list of candidate token probability scores for each token.
|
|
actual_candidate_lens: List[int], actual number of candidate tokens returned for each token.
|
|
ori_token_ids: List[int], original token ID for each token.
|
|
bid_list: List[int], bid for each token.
|
|
"""
|
|
token_num, vocab_size = probs.shape
|
|
verify_tokens = []
|
|
verify_scores = []
|
|
actual_candidate_lens = []
|
|
ori_token_ids = []
|
|
bid_list = []
|
|
|
|
top_p_per_bid = np.array(top_p_per_bid)
|
|
|
|
num_bid = len(top_p_per_bid)
|
|
|
|
for token_id in range(token_num):
|
|
# --- Compute ori_token_id and bid ---
|
|
offset = output_padding_offset[token_id]
|
|
ori_token_id = token_id + offset
|
|
bid = ori_token_id // max_seq_len
|
|
|
|
# If the bid is out of the range of top_p_per_bid, you can choose to clamp it to [0, num_bid - 1]
|
|
if bid < 0:
|
|
bid = 0
|
|
if bid >= num_bid:
|
|
bid = (
|
|
num_bid - 1
|
|
) # Or you could raise an error or exception; here, we simply handle it by using the last bid.
|
|
|
|
token_top_p = top_p_per_bid[bid] # Dynamically retrieve the top_p value for the given bid.
|
|
|
|
ori_token_ids.append(ori_token_id)
|
|
bid_list.append(bid)
|
|
|
|
# The probability distribution of the current token.
|
|
token_probs = probs[token_id, :]
|
|
# Sort by probability in descending order.
|
|
sorted_indices = np.argsort(token_probs)[::-1]
|
|
sorted_probs = token_probs[sorted_indices]
|
|
|
|
accumulated_prob = 0.0
|
|
selected_indices = []
|
|
selected_probs = []
|
|
|
|
for sort_idx, (prob, token_idx) in enumerate(zip(sorted_probs, sorted_indices)):
|
|
if sort_idx >= candidates_len:
|
|
break # Return at most candidates_len.
|
|
|
|
accumulated_prob += prob
|
|
selected_indices.append(int(token_idx))
|
|
selected_probs.append(float(prob))
|
|
|
|
if accumulated_prob >= token_top_p:
|
|
break # The cumulative probability satisfies the Top-P criterion.
|
|
|
|
# If the Top-P threshold is not met, return the tokens that have already been selected.
|
|
actual_len = len(selected_indices)
|
|
actual_candidate_lens.append(actual_len)
|
|
# Pad the insufficient token_id with 0.
|
|
padded_token_ids = selected_indices + [0] * (candidates_len - actual_len)
|
|
# Pad the insufficient score with 0.0.
|
|
padded_scores = selected_probs + [0.0] * (candidates_len - actual_len)
|
|
|
|
verify_tokens.append(padded_token_ids)
|
|
verify_scores.append(padded_scores)
|
|
|
|
return verify_scores, verify_tokens, actual_candidate_lens, ori_token_ids, bid_list
|
|
|
|
|
|
def top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len):
|
|
ret = top_p_candidates_dynamic_top_p(probs, top_p, candidates_len, output_padding_offset, max_seq_len)
|
|
return [ret[0], ret[1], ret[2]]
|
|
|
|
|
|
class TestTopPCandidates(unittest.TestCase):
|
|
def test_top_p_candidates(self):
|
|
paddle.seed(42)
|
|
token_num = 5
|
|
vocab_size = 100
|
|
candidates_len = 5
|
|
max_seq_len = 120
|
|
probs = paddle.randn([token_num, vocab_size])
|
|
top_p = paddle.randn([token_num])
|
|
bs = 5
|
|
tokens = [1] * bs
|
|
output_padding_offset = []
|
|
opo_offset = 0
|
|
for bid in range(bs):
|
|
ts = tokens[bid]
|
|
for i in range(ts):
|
|
output_padding_offset.append(opo_offset)
|
|
opo_offset += max_seq_len - ts
|
|
output_padding_offset = paddle.to_tensor(output_padding_offset).astype(paddle.int32)
|
|
ret1 = top_p_candidates(probs, top_p, output_padding_offset, candidates_len, max_seq_len)
|
|
ret2 = top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len)
|
|
np.testing.assert_allclose(ret1[0].numpy(), ret2[0])
|
|
np.testing.assert_allclose(ret1[1].numpy(), ret2[1])
|
|
np.testing.assert_allclose(ret1[2].numpy(), ret2[2])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|