mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Sync v2.0 version of code to github repo
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import get_padding_offset
|
||||
|
||||
np.random.seed(2023)
|
||||
|
||||
max_len = 10
|
||||
seq_lens = np.array([4, 3, 6], "int32").reshape(-1, 1)
|
||||
cum_offset = np.cumsum((max_len - seq_lens).flatten(), -1, "int32")
|
||||
token_num = np.sum(seq_lens)
|
||||
bs = seq_lens.shape[0]
|
||||
input_ids = np.zeros([bs, max_len], "int64")
|
||||
for i in range(bs):
|
||||
ids_len = seq_lens[i, 0]
|
||||
input_ids[i, 0:ids_len] = np.random.randint(1, 10, seq_lens[i, 0], "int64")
|
||||
|
||||
x_remove_padding, cum_offsets_out, padding_offset, cu_seqlens_q, cu_seqlens_k = get_padding_offset(
|
||||
paddle.to_tensor(input_ids),
|
||||
paddle.to_tensor(cum_offset),
|
||||
paddle.to_tensor(token_num),
|
||||
paddle.to_tensor(seq_lens),
|
||||
)
|
||||
|
||||
print("input_ids:\n", input_ids)
|
||||
print("cum_offset:\n", cum_offset)
|
||||
print("token_num:\n", token_num)
|
||||
print("seq_lens:\n", seq_lens)
|
||||
print("x_remove_padding:\n", x_remove_padding)
|
||||
print("cum_offsets_out:\n", cum_offsets_out)
|
||||
print("padding_offset:\n", padding_offset)
|
||||
print("cu_seqlens_q:\n", cu_seqlens_q)
|
||||
print("cu_seqlens_k:\n", cu_seqlens_k)
|
||||
|
||||
ref_x_remove_padding = np.array([8, 7, 8, 2, 4, 5, 5, 7, 6, 1, 7, 2, 6],
|
||||
"int64")
|
||||
ref_cum_offsets_out = np.array([0, 6, 13], "int32")
|
||||
ref_padding_offset = np.array([0, 0, 0, 0, 6, 6, 6, 13, 13, 13, 13, 13, 13],
|
||||
"int32")
|
||||
ref_cu_seqlens_q = np.array([0, 4, 7, 13], "int32")
|
||||
ref_cu_seqlens_k = np.array([0, 4, 7, 13], "int32")
|
||||
|
||||
assert sum(ref_x_remove_padding -
|
||||
x_remove_padding) == 0, 'Check x_remove_padding failed.'
|
||||
assert sum(ref_cum_offsets_out -
|
||||
cum_offsets_out) == 0, 'Check cum_offsets_out failed.'
|
||||
assert sum(ref_padding_offset -
|
||||
padding_offset) == 0, 'Check padding_offset failed.'
|
||||
assert sum(ref_cu_seqlens_q - cu_seqlens_q) == 0, 'Check cu_seqlens_q failed.'
|
||||
assert sum(ref_cu_seqlens_k - cu_seqlens_k) == 0, 'Check cu_seqlens_k failed.'
|
@@ -0,0 +1,254 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import get_token_penalty_multi_scores
|
||||
|
||||
paddle.seed(2023)
|
||||
|
||||
pre_ids = paddle.to_tensor(
|
||||
[[1, 9, 3, 4, 5, 6, 7, -1, -1, -1], [1, 9, 7, 6, 5, 4, -1, -1, -1, -1]],
|
||||
"int64")
|
||||
logits = paddle.to_tensor([[0.1, 0.9, 0.3, 0.4, 0.5, 0.6, 0.7, 0.1, 0.1, 0.1],
|
||||
[0.1, 0.9, 0.7, 0.6, 0.5, 0.4, 0.1, 0.1, 0.1, 0.1]],
|
||||
"float32")
|
||||
penalty_scores = paddle.to_tensor([1.0, 1.0], "float32")
|
||||
frequency_scores = paddle.to_tensor([0.1, 0.1], "float32")
|
||||
presence_scores = paddle.to_tensor([0.0, 0.0], "float32")
|
||||
temperatures = paddle.to_tensor([0.5, 0.25], "float32")
|
||||
bad_tokens = paddle.to_tensor([0, 1], "int64")
|
||||
cur_len = paddle.to_tensor([7, 6], "int64")
|
||||
min_len = paddle.to_tensor([1, 8], "int64")
|
||||
eos_token_id = paddle.to_tensor([2, 9], "int64")
|
||||
print("logits\n", logits)
|
||||
get_token_penalty_multi_scores(
|
||||
pre_ids,
|
||||
logits,
|
||||
penalty_scores,
|
||||
frequency_scores,
|
||||
presence_scores,
|
||||
temperatures,
|
||||
bad_tokens,
|
||||
cur_len,
|
||||
min_len,
|
||||
eos_token_id,
|
||||
)
|
||||
print("pre_ids\n", pre_ids)
|
||||
print("logits\n", logits)
|
||||
print("penalty_scores\n", penalty_scores)
|
||||
print("frequency_scores\n", frequency_scores)
|
||||
print("presence_scores\n", presence_scores)
|
||||
print("temperatures\n", temperatures)
|
||||
print("bad_tokens\n", bad_tokens)
|
||||
print("cur_len\n", cur_len)
|
||||
print("min_len\n", min_len)
|
||||
print("eos_token_id\n", eos_token_id)
|
||||
|
||||
ref_logits = np.array(
|
||||
[
|
||||
[
|
||||
-10000000000,
|
||||
-10000000000,
|
||||
0.6,
|
||||
0.6,
|
||||
0.8,
|
||||
1,
|
||||
1.2,
|
||||
0,
|
||||
0.2,
|
||||
0,
|
||||
],
|
||||
[
|
||||
-10000000000,
|
||||
-10000000000,
|
||||
-40000,
|
||||
2.4,
|
||||
1.6,
|
||||
1.2,
|
||||
0,
|
||||
0,
|
||||
0.4,
|
||||
-40000.39843750,
|
||||
],
|
||||
],
|
||||
"float32",
|
||||
)
|
||||
diff_logits = np.sum(np.abs(ref_logits - logits.numpy()))
|
||||
print("diff_logits\n", diff_logits)
|
||||
assert diff_logits < 1e-6, 'Check failed.'
|
||||
|
||||
pre_ids = paddle.to_tensor(
|
||||
[[
|
||||
2, 3, 3, 5, 8, 9, 3, 9, 1, 8, 9, 2, 3, 8, 8, 9, 9, 1, 4, 2, 6, 2, 6, 8,
|
||||
7, 2, 2, 3, 8, 1, 5, 7, 9, 2, 2, 9, 1, 4, 9, 8, 5, 8, 5, 7, 3, 6, 4, 4,
|
||||
9, 9, 8, 5, 5, 2, 2, 9, 4, 8, 1, 9, 6, 9, 2, 2, 7, 2, 2, 9, 4, 6, 4, 6,
|
||||
1, 4, 1, 9, 1, 8, 8, 5, 7, 9, 4, 2, 5, 1, 1, 4, 1, 5, 5, 4, 4, 2, 1, 8,
|
||||
7, 1, 2, 9, 6, 7, 9, 6, 7, 7, 4, 9, 9, 7, 5, 1, 8, 9, 8, 8, 5, 4, 6, 4,
|
||||
7, 5, 5, 7, 6, 9, 3, 9
|
||||
],
|
||||
[
|
||||
7, 8, 1, 3, 1, 7, 6, 3, 5, 3, 8, 3, 1, 9, 7, 1, 1, 9, 5, 4, 9, 6, 1,
|
||||
9, 3, 8, 3, 9, 9, 6, 4, 2, 8, 5, 3, 1, 6, 9, 1, 3, 9, 8, 1, 7, 5, 1,
|
||||
5, 1, 8, 7, 4, 5, 9, 8, 7, 4, 7, 3, 6, 4, 6, 6, 5, 5, 2, 9, 9, 5, 8,
|
||||
8, 4, 8, 2, 8, 1, 3, 9, 1, 8, 5, 8, 3, 8, 8, 2, 7, 3, 7, 5, 7, 2, 6,
|
||||
3, 5, 1, 4, 6, 1, 9, 8, 2, 2, 3, 6, 7, 6, 2, 6, 5, 1, 5, 6, 2, 1, 6,
|
||||
4, 7, 7, 3, 8, 5, 1, 9, 1, 2, 8, 6, 8
|
||||
]])
|
||||
logits = paddle.to_tensor(
|
||||
[[
|
||||
0.16274983, 0.61470598, 0.94366980, 0.82005417, 0.50752640, 0.38316748,
|
||||
0.92648441, 0.24050158, 0.05461595, 0.42218581, 0.36270225, 0.15464807,
|
||||
0.13614719, 0.67509544, 0.40315166, 0.10671722, 0.24832056, 0.76091218,
|
||||
0.11598995, 0.10962527, 0.04688513, 0.81536716, 0.72259802, 0.60476679,
|
||||
0.16701800, 0.84160781, 0.79649884, 0.78021604, 0.75329530, 0.98587888,
|
||||
0.13421868, 0.16027625, 0.15269397, 0.06228730, 0.73856270, 0.34721911,
|
||||
0.73683006, 0.78178608, 0.32068327, 0.79906309, 0.44214272, 0.63330448,
|
||||
0.08016958, 0.63367140, 0.19788943, 0.55346787, 0.11142531, 0.90518415,
|
||||
0.21236691, 0.81587470, 0.83752930, 0.70979482, 0.35684183, 0.28715104,
|
||||
0.87162822, 0.17679396, 0.98725849, 0.76129991, 0.04090235, 0.37181064,
|
||||
0.63317049, 0.24689502, 0.21126501, 0.57617670, 0.74346697, 0.40613672,
|
||||
0.56907010, 0.68556929, 0.29032683, 0.17866278, 0.35165095, 0.97015840,
|
||||
0.70785582, 0.54259878, 0.14712237, 0.90483177, 0.02094105, 0.36411613,
|
||||
0.02495066, 0.88874054, 0.88895452, 0.86216462, 0.58062190, 0.95583254,
|
||||
0.20553111, 0.29870346, 0.69652933, 0.36861244, 0.85316223, 0.50240189,
|
||||
0.17566244, 0.61080140, 0.88203174, 0.98675215, 0.24344546, 0.17213407,
|
||||
0.78160852, 0.25165486, 0.48188508, 0.82812423, 0.10199814, 0.90475923,
|
||||
0.66907483, 0.71910626, 0.40660757, 0.59460294, 0.70212913, 0.90841550,
|
||||
0.00329034, 0.11290466, 0.89654654, 0.69114941, 0.29473618, 0.62027222,
|
||||
0.37333879, 0.98911142, 0.46510187, 0.65914583, 0.73022646, 0.12790845,
|
||||
0.12817244, 0.43015456, 0.75011456, 0.43562204, 0.48086026, 0.75587070,
|
||||
0.98481447, 0.77367836
|
||||
],
|
||||
[
|
||||
0.12336024, 0.74152875, 0.09191196, 0.99301219, 0.44764417,
|
||||
0.01848883, 0.78326035, 0.99228370, 0.81447607, 0.02627683,
|
||||
0.51033205, 0.98703283, 0.15247856, 0.77640921, 0.60799915,
|
||||
0.87518770, 0.76818430, 0.86542630, 0.31795895, 0.04829503,
|
||||
0.85567141, 0.30271924, 0.67515039, 0.59728831, 0.78710967,
|
||||
0.75111693, 0.56837374, 0.49085775, 0.91510201, 0.59545547,
|
||||
0.99482232, 0.59036905, 0.58267909, 0.28770933, 0.53237396,
|
||||
0.95318258, 0.93987304, 0.61142951, 0.26737869, 0.52285451,
|
||||
0.03479086, 0.61631846, 0.66777998, 0.15736090, 0.00447258,
|
||||
0.37035006, 0.15281211, 0.95372260, 0.25963321, 0.61036694,
|
||||
0.15020694, 0.19171195, 0.55252832, 0.00391038, 0.31052542,
|
||||
0.96495175, 0.42586124, 0.05630261, 0.99728668, 0.01856293,
|
||||
0.83201504, 0.10701843, 0.56434178, 0.38009524, 0.51095045,
|
||||
0.13202040, 0.07133843, 0.75313550, 0.17111187, 0.80716974,
|
||||
0.00172165, 0.83906764, 0.73240769, 0.85843354, 0.11042888,
|
||||
0.07912333, 0.33689004, 0.22334915, 0.59059596, 0.52789515,
|
||||
0.29831955, 0.39515004, 0.55602801, 0.83818001, 0.05865780,
|
||||
0.25654668, 0.76624149, 0.35190639, 0.04158346, 0.59157544,
|
||||
0.30779791, 0.94609004, 0.10759670, 0.65575141, 0.37828529,
|
||||
0.29571742, 0.76361233, 0.72476572, 0.18568406, 0.85430276,
|
||||
0.02057583, 0.76195669, 0.65507215, 0.69129735, 0.25084621,
|
||||
0.75223947, 0.06064088, 0.20287007, 0.35887691, 0.75043523,
|
||||
0.47575447, 0.40021798, 0.44464844, 0.67975360, 0.40443239,
|
||||
0.71052992, 0.21782248, 0.50568426, 0.89037591, 0.06661721,
|
||||
0.28788096, 0.70773387, 0.42428264, 0.80419677, 0.42710736,
|
||||
0.87317258, 0.88229448, 0.79217333
|
||||
]])
|
||||
# pre_ids = paddle.to_tensor(np.float32(np.random.random([2, 1024])))
|
||||
# logits = paddle.to_tensor(np.float32(np.random.random([2, 1024])))
|
||||
penalty_scores = paddle.to_tensor([1.0, 1.0], "float32")
|
||||
frequency_scores = paddle.to_tensor([0.1, 0.1], "float32")
|
||||
presence_scores = paddle.to_tensor([0.0, 0.0], "float32")
|
||||
temperatures = paddle.to_tensor([0.5, 0.25], "float32")
|
||||
bad_tokens = paddle.to_tensor([0, 1], "int64")
|
||||
cur_len = paddle.to_tensor([7, 6], "int64")
|
||||
min_len = paddle.to_tensor([1, 8], "int64")
|
||||
eos_token_id = paddle.to_tensor([2, 9], "int64")
|
||||
print("logits\n", logits)
|
||||
get_token_penalty_multi_scores(
|
||||
pre_ids,
|
||||
logits,
|
||||
penalty_scores,
|
||||
frequency_scores,
|
||||
presence_scores,
|
||||
temperatures,
|
||||
bad_tokens,
|
||||
cur_len,
|
||||
min_len,
|
||||
eos_token_id,
|
||||
)
|
||||
print("pre_ids\n", pre_ids)
|
||||
print("logits\n", logits)
|
||||
print("penalty_scores\n", penalty_scores)
|
||||
print("frequency_scores\n", frequency_scores)
|
||||
print("presence_scores\n", presence_scores)
|
||||
print("temperatures\n", temperatures)
|
||||
print("bad_tokens\n", bad_tokens)
|
||||
print("cur_len\n", cur_len)
|
||||
print("min_len\n", min_len)
|
||||
print("eos_token_id\n", eos_token_id)
|
||||
|
||||
ref_logits = np.array(
|
||||
[[
|
||||
-10000000000., -10000000000., 1.88733959, 1.64010835, 1.01505280,
|
||||
0.76633495, 1.85296881, 0.48100317, 0.10923190, 0.84437162, 0.72540450,
|
||||
0.30929613, 0.27229437, 1.35019088, 0.80630332, 0.21343444, 0.49664113,
|
||||
1.52182436, 0.23197991, 0.21925054, 0.09377026, 1.63073432, 1.44519603,
|
||||
1.20953357, 0.33403599, 1.68321562, 1.59299767, 1.56043208, 1.50659060,
|
||||
1.97175777, 0.26843736, 0.32055250, 0.30538794, 0.12457460, 1.47712541,
|
||||
0.69443822, 1.47366011, 1.56357217, 0.64136654, 1.59812617, 0.88428545,
|
||||
1.26660895, 0.16033916, 1.26734281, 0.39577886, 1.10693574, 0.22285062,
|
||||
1.81036830, 0.42473382, 1.63174939, 1.67505860, 1.41958964, 0.71368366,
|
||||
0.57430208, 1.74325645, 0.35358793, 1.97451699, 1.52259982, 0.08180470,
|
||||
0.74362129, 1.26634097, 0.49379003, 0.42253003, 1.15235341, 1.48693395,
|
||||
0.81227344, 1.13814020, 1.37113857, 0.58065367, 0.35732555, 0.70330191,
|
||||
1.94031680, 1.41571164, 1.08519757, 0.29424474, 1.80966353, 0.04188210,
|
||||
0.72823226, 0.04990132, 1.77748108, 1.77790904, 1.72432923, 1.16124380,
|
||||
1.91166508, 0.41106221, 0.59740692, 1.39305866, 0.73722488, 1.70632446,
|
||||
1.00480378, 0.35132489, 1.22160280, 1.76406348, 1.97350430, 0.48689091,
|
||||
0.34426814, 1.56321704, 0.50330973, 0.96377015, 1.65624845, 0.20399629,
|
||||
1.80951846, 1.33814967, 1.43821251, 0.81321514, 1.18920588, 1.40425825,
|
||||
1.81683099, 0.00658068, 0.22580932, 1.79309309, 1.38229883, 0.58947235,
|
||||
1.24054444, 0.74667758, 1.97822285, 0.93020374, 1.31829166, 1.46045291,
|
||||
0.25581691, 0.25634488, 0.86030912, 1.50022912, 0.87124407, 0.96172053,
|
||||
1.51174140, 1.96962893, 1.54735672
|
||||
],
|
||||
[
|
||||
-10000000000., -10000000000., -40000., 3.97204876, 1.79057670,
|
||||
0.07395532, 3.13304138, 3.96913481, 3.25790429, -40000., 2.04132819,
|
||||
3.94813132, 0.60991424, 3.10563684, 2.43199658, 3.50075078,
|
||||
3.07273722, 3.46170521, 1.27183580, 0.19318011, 3.42268562,
|
||||
1.21087694, 2.70060158, 2.38915324, 3.14843869, 3.00446773,
|
||||
2.27349496, 1.96343100, 3.66040802, 2.38182187, 3.97928929,
|
||||
2.36147618, 2.33071637, 1.15083730, 2.12949586, 3.81273031,
|
||||
3.75949216, 2.44571805, 1.06951475, 2.09141803, 0.13916343,
|
||||
2.46527386, 2.67111993, 0.62944359, 0.01789032, 1.48140025,
|
||||
0.61124843, 3.81489038, 1.03853285, 2.44146776, 0.60082775,
|
||||
0.76684779, 2.21011329, 0.01564152, 1.24210167, 3.85980701,
|
||||
1.70344496, 0.22521044, 3.98914671, 0.07425172, 3.32806015,
|
||||
0.42807373, 2.25736713, 1.52038097, 2.04380178, 0.52808160,
|
||||
0.28535372, 3.01254201, 0.68444747, 3.22867894, 0.00688660,
|
||||
3.35627055, 2.92963076, 3.43373418, 0.44171551, 0.31649333,
|
||||
1.34756017, 0.89339662, 2.36238384, 2.11158061, 1.19327819,
|
||||
1.58060014, 2.22411203, 3.35272002, 0.23463120, 1.02618670,
|
||||
3.06496596, 1.40762556, 0.16633384, 2.36630177, 1.23119164,
|
||||
3.78436017, 0.43038681, 2.62300563, 1.51314116, 1.18286967,
|
||||
3.05444932, 2.89906287, 0.74273622, 3.41721106, 0.08230332,
|
||||
3.04782677, 2.62028861, 2.76518941, 1.00338483, 3.00895786,
|
||||
0.24256352, 0.81148028, 1.43550766, 3.00174093, 1.90301788,
|
||||
1.60087192, 1.77859378, 2.71901441, 1.61772954, 2.84211969,
|
||||
0.87128991, 2.02273703, 3.56150365, 0.26646885, 1.15152383,
|
||||
2.83093548, 1.69713056, 3.21678710, 1.70842946, 3.49269032,
|
||||
3.52917790, 3.16869330
|
||||
]],
|
||||
"float32",
|
||||
)
|
||||
diff_logits = np.sum(np.abs(ref_logits - logits.numpy()))
|
||||
print("diff_logits\n", diff_logits)
|
||||
assert diff_logits < 1e-6, 'Check failed.'
|
@@ -0,0 +1,76 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import set_value_by_flags_and_idx
|
||||
|
||||
paddle.seed(2023)
|
||||
|
||||
pre_ids_all = paddle.to_tensor(
|
||||
[[1, 9, 3, 4, 5, 6, 7, -1, -1, -1], [1, 9, 7, 6, 5, 4, -1, -1, -1, -1]],
|
||||
"int64")
|
||||
input_ids = paddle.to_tensor([[1, 9, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1],
|
||||
[1, 9, 7, 6, 5, 4, -1, -1, -1, -1, -1, -1, -1]],
|
||||
"int64")
|
||||
seq_lens_this_time = paddle.to_tensor([1, 1], "int32")
|
||||
seq_lens_encoder = paddle.to_tensor([1, 1], "int32")
|
||||
seq_lens_decoder = paddle.to_tensor([1, 1], "int32")
|
||||
step_idx = paddle.to_tensor([1, 1], "int64")
|
||||
stop_flags = paddle.to_tensor([0, 1], "bool")
|
||||
print("pre_ids_all\n", pre_ids_all)
|
||||
set_value_by_flags_and_idx(pre_ids_all, input_ids, seq_lens_this_time,
|
||||
seq_lens_encoder, seq_lens_decoder, step_idx,
|
||||
stop_flags)
|
||||
print("pre_ids_all\n", pre_ids_all)
|
||||
print("input_ids\n", input_ids)
|
||||
print("seq_lens_this_time\n", seq_lens_this_time)
|
||||
print("seq_lens_encoder\n", seq_lens_encoder)
|
||||
print("seq_lens_decoder\n", seq_lens_decoder)
|
||||
print("step_idx\n", step_idx)
|
||||
print("stop_flags\n", stop_flags)
|
||||
|
||||
ref_pre_ids_all = np.array(
|
||||
[
|
||||
[
|
||||
1,
|
||||
1,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
-1,
|
||||
-1,
|
||||
-1,
|
||||
],
|
||||
[
|
||||
1,
|
||||
9,
|
||||
7,
|
||||
6,
|
||||
5,
|
||||
4,
|
||||
-1,
|
||||
-1,
|
||||
-1,
|
||||
-1,
|
||||
],
|
||||
],
|
||||
"int64",
|
||||
)
|
||||
diff_pre_ids_all = np.sum(np.abs(ref_pre_ids_all - pre_ids_all.numpy()))
|
||||
print("diff_pre_ids_all\n", diff_pre_ids_all)
|
||||
assert diff_pre_ids_all == 0, 'Check failed.'
|
170
custom_ops/xpu_ops/test/python/ops/test_step.py
Normal file
170
custom_ops/xpu_ops/test/python/ops/test_step.py
Normal file
@@ -0,0 +1,170 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import step_paddle
|
||||
|
||||
np.random.seed(2023)
|
||||
|
||||
max_bs = 128
|
||||
bs = max_bs
|
||||
max_seq_len = 8192
|
||||
block_size = 64
|
||||
block_bs = 8
|
||||
block_ratio = 0.75
|
||||
|
||||
stop_flags = np.random.randint(0, 2, [max_bs]).astype("bool")
|
||||
seq_lens_this_time = np.zeros([bs], "int32")
|
||||
seq_lens_encoder = np.zeros([max_bs], "int32")
|
||||
seq_lens_decoder = np.zeros([max_bs], "int32")
|
||||
step_idx = np.zeros([max_bs], "int64")
|
||||
for i in range(bs):
|
||||
seq_lens_decoder[i] = 2 + i * 2
|
||||
seq_lens_this_time[i] = 1
|
||||
ori_seq_lens_encoder = np.zeros([max_bs], "int32")
|
||||
ori_seq_lens_encoder[:] = seq_lens_decoder[:] // 2
|
||||
step_idx = (seq_lens_decoder - ori_seq_lens_encoder).astype("int64")
|
||||
|
||||
max_block_num = block_bs * max_seq_len // block_size
|
||||
free_list_len = int(max_block_num * (1 - block_ratio))
|
||||
free_list_len = np.full([1], free_list_len, "int32")
|
||||
free_list = np.arange(max_block_num - 1,
|
||||
max_block_num - free_list_len - 1,
|
||||
-1,
|
||||
dtype="int32")
|
||||
|
||||
encoder_block_lens = np.zeros([max_bs], "int32")
|
||||
used_list_len = np.zeros([max_bs], "int32")
|
||||
block_tables = np.full([max_bs, 128], -1, "int32")
|
||||
encoder_block_id = 0
|
||||
for i in range(bs):
|
||||
enc_block_num = (ori_seq_lens_encoder[i] + block_size - 1) // block_size
|
||||
encoder_block_lens[i] = enc_block_num
|
||||
dec_block_num = (seq_lens_decoder[i] + block_size -
|
||||
1) // block_size - enc_block_num
|
||||
used_list_len[i] = dec_block_num
|
||||
block_tables[i, :enc_block_num] = np.arange(
|
||||
encoder_block_id, encoder_block_id + enc_block_num, 1, "int32")
|
||||
encoder_block_id += enc_block_num
|
||||
if dec_block_num > 0:
|
||||
block_tables[
|
||||
i, enc_block_num:enc_block_num +
|
||||
dec_block_num] = free_list[free_list_len[0] - 1 -
|
||||
dec_block_num:free_list_len[0] - 1]
|
||||
free_list[free_list_len[0] - 1 - dec_block_num:free_list_len[0] -
|
||||
1] = -1
|
||||
free_list_len[0] -= dec_block_num
|
||||
assert free_list_len[0] >= 0
|
||||
|
||||
is_block_step = np.zeros([max_bs], "bool")
|
||||
is_block_step[:bs] = np.random.randint(0, 2, [bs]).astype("bool")
|
||||
step_block_list = np.full([max_bs], -1, "int32")
|
||||
step_lens = np.full([1], 0, "int32")
|
||||
for i in range(bs):
|
||||
if is_block_step[i]:
|
||||
step_block_list[step_lens[0]] = i
|
||||
step_lens[0] += 1
|
||||
|
||||
recover_lens = np.full([1], 0, "int32")
|
||||
recover_block_list = np.full([max_bs], -1, "int32")
|
||||
|
||||
need_block_len = np.full([1], 0, "int32")
|
||||
need_block_list = np.full([max_bs], -1, "int32")
|
||||
|
||||
input_ids = np.random.randint(0, 1000, [max_bs, max_seq_len], "int64")
|
||||
pre_ids = np.random.randint(0, 1000, [max_bs, max_seq_len], "int64")
|
||||
|
||||
next_tokens = np.random.randint(0, 1000, [max_bs], "int64")
|
||||
encoder_decoder_block_num = 1
|
||||
first_token_ids = np.random.randint(0, 1000, [max_bs], "int64")
|
||||
|
||||
stop_flags = paddle.to_tensor(stop_flags)
|
||||
seq_lens_this_time = paddle.to_tensor(seq_lens_this_time)
|
||||
seq_lens_encoder = paddle.to_tensor(seq_lens_encoder)
|
||||
seq_lens_decoder = paddle.to_tensor(seq_lens_decoder)
|
||||
ori_seq_lens_encoder = paddle.to_tensor(ori_seq_lens_encoder)
|
||||
block_tables = paddle.to_tensor(block_tables)
|
||||
encoder_block_lens = paddle.to_tensor(encoder_block_lens)
|
||||
is_block_step = paddle.to_tensor(is_block_step)
|
||||
step_block_list = paddle.to_tensor(step_block_list)
|
||||
step_lens = paddle.to_tensor(step_lens)
|
||||
recover_lens = paddle.to_tensor(recover_lens)
|
||||
recover_block_list = paddle.to_tensor(recover_block_list)
|
||||
need_block_list = paddle.to_tensor(need_block_list)
|
||||
need_block_len = paddle.to_tensor(need_block_len)
|
||||
used_list_len = paddle.to_tensor(used_list_len)
|
||||
free_list_len = paddle.to_tensor(free_list_len)
|
||||
free_list = paddle.to_tensor(free_list)
|
||||
input_ids = paddle.to_tensor(input_ids)
|
||||
pre_ids = paddle.to_tensor(pre_ids)
|
||||
step_idx = paddle.to_tensor(step_idx)
|
||||
next_tokens = paddle.to_tensor(next_tokens)
|
||||
first_token_ids = paddle.to_tensor(first_token_ids)
|
||||
|
||||
# print("-" * 50 + "before step op" + "-" * 50)
|
||||
# print("stop_flags: ", stop_flags)
|
||||
# print("seq_lens_this_time: ", seq_lens_this_time)
|
||||
# print("seq_lens_encoder: ", seq_lens_encoder)
|
||||
# print("seq_lens_decoder: ", seq_lens_decoder)
|
||||
# print("ori_seq_lens_encoder: ", ori_seq_lens_encoder)
|
||||
# print("block_tables: ", block_tables)
|
||||
# print("encoder_block_lens: ", encoder_block_lens)
|
||||
# print("is_block_step: ", is_block_step)
|
||||
# print("step_block_list: ", step_block_list)
|
||||
# print("step_lens: ", step_lens)
|
||||
# print("recover_lens: ", recover_lens)
|
||||
# print("recover_block_list: ", recover_block_list)
|
||||
# print("need_block_list: ", need_block_list)
|
||||
# print("need_block_len: ", need_block_len)
|
||||
# print("used_list_len: ", used_list_len)
|
||||
# print("free_list_len: ", free_list_len)
|
||||
# print("free_list: ", free_list)
|
||||
# print("input_ids: ", input_ids)
|
||||
# print("pre_ids: ", pre_ids)
|
||||
# print("step_idx: ", step_idx)
|
||||
# print("next_tokens: ", next_tokens)
|
||||
|
||||
step_paddle(stop_flags, seq_lens_this_time, ori_seq_lens_encoder,
|
||||
seq_lens_encoder, seq_lens_decoder, block_tables,
|
||||
encoder_block_lens, is_block_step, step_block_list, step_lens,
|
||||
recover_block_list, recover_lens, need_block_list, need_block_len,
|
||||
used_list_len, free_list, free_list_len, input_ids, pre_ids,
|
||||
step_idx, next_tokens, first_token_ids, block_size,
|
||||
encoder_decoder_block_num)
|
||||
|
||||
print("-" * 50 + "after step op" + "-" * 50)
|
||||
print("stop_flags: ", stop_flags)
|
||||
print("seq_lens_this_time: ", seq_lens_this_time)
|
||||
print("seq_lens_encoder: ", seq_lens_encoder)
|
||||
print("seq_lens_decoder: ", seq_lens_decoder)
|
||||
print("ori_seq_lens_encoder: ", ori_seq_lens_encoder)
|
||||
print("block_tables: ", block_tables)
|
||||
print("encoder_block_lens: ", encoder_block_lens)
|
||||
print("is_block_step: ", is_block_step)
|
||||
print("step_block_list: ", step_block_list)
|
||||
print("step_lens: ", step_lens)
|
||||
print("recover_lens: ", recover_lens)
|
||||
print("recover_block_list: ", recover_block_list)
|
||||
print("need_block_list: ", need_block_list)
|
||||
print("need_block_len: ", need_block_len)
|
||||
print("used_list_len: ", used_list_len)
|
||||
print("free_list_len: ", free_list_len)
|
||||
print("free_list: ", free_list)
|
||||
print("input_ids: ", input_ids)
|
||||
print("pre_ids: ", pre_ids)
|
||||
print("step_idx: ", step_idx)
|
||||
print("next_tokens: ", next_tokens)
|
||||
print("first_token_ids: ", first_token_ids)
|
@@ -0,0 +1,139 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import set_stop_value_multi_ends
|
||||
|
||||
np.random.seed(1)
|
||||
|
||||
bs = 64
|
||||
|
||||
# test beam_search=False
|
||||
topk_ids = paddle.arange(0, bs, dtype="int64")
|
||||
next_tokens = paddle.full([bs], 0, dtype="int64")
|
||||
stop_flags = paddle.to_tensor(np.random.randint(0, 2, [bs]), "bool")
|
||||
seq_lens = paddle.to_tensor(np.random.randint(0, 5, [bs]), "int32")
|
||||
end_ids = paddle.to_tensor([0, 1, 2, 3, 4, 5], "int64")
|
||||
print("topk_ids\n", topk_ids)
|
||||
print("next_tokens\n", next_tokens)
|
||||
print("stop_flags\n", stop_flags)
|
||||
set_stop_value_multi_ends(topk_ids, stop_flags, seq_lens, end_ids, next_tokens,
|
||||
False)
|
||||
print("topk_ids\n", topk_ids)
|
||||
print("next_tokens\n", next_tokens)
|
||||
print("stop_flags\n", stop_flags)
|
||||
print("seq_lens\n", seq_lens)
|
||||
print("end_ids\n", end_ids)
|
||||
|
||||
ref_topk_ids = np.array(
|
||||
[
|
||||
0, 0, 2, 3, -1, 0, 0, 0, 0, 9, 10, 0, 12, 0, -1, 15, 16, 0, 18, 19, 20,
|
||||
0, 22, 23, 0, 25, 26, 27, -1, 29, 30, 31, 0, 0, 0, -1, -1, 37, 38, 39,
|
||||
-1, -1, 0, 0, 0, 0, 46, -1, 0, 49, 50, 0, 52, 53, 0, -1, 0, 57, -1, 59,
|
||||
60, 0, 0, 63
|
||||
],
|
||||
"int64",
|
||||
)
|
||||
ref_next_tokens = np.array(
|
||||
[
|
||||
0, 0, 2, 3, 0, 0, 0, 0, 0, 9, 10, 0, 12, 0, 0, 15, 16, 0, 18, 19, 20,
|
||||
0, 22, 23, 0, 25, 26, 27, 0, 29, 30, 31, 0, 0, 0, 0, 0, 37, 38, 39, 0,
|
||||
0, 0, 0, 0, 0, 46, 0, 0, 49, 50, 0, 52, 53, 0, 0, 0, 57, 0, 59, 60, 0,
|
||||
0, 63
|
||||
],
|
||||
"int64",
|
||||
)
|
||||
ref_stop_flags = np.array(
|
||||
[
|
||||
True, True, True, True, True, True, True, True, True, False, False,
|
||||
True, False, True, True, False, False, True, False, False, False, True,
|
||||
False, False, True, False, False, False, True, False, False, False,
|
||||
True, True, True, True, True, False, False, False, True, True, True,
|
||||
True, True, True, False, True, True, False, False, True, False, False,
|
||||
True, True, True, False, True, False, False, True, True, False
|
||||
],
|
||||
"bool",
|
||||
)
|
||||
diff_topk_ids = np.sum(np.abs(ref_topk_ids - topk_ids.numpy()))
|
||||
print("diff_topk_ids\n", diff_topk_ids)
|
||||
assert diff_topk_ids == 0, 'Check failed.'
|
||||
diff_next_tokens = np.sum(np.abs(ref_next_tokens - next_tokens.numpy()))
|
||||
print("diff_next_tokens\n", diff_next_tokens)
|
||||
assert diff_next_tokens == 0, 'Check failed.'
|
||||
diff_stop_flags = np.sum(
|
||||
np.abs(
|
||||
ref_stop_flags.astype(np.int32) - stop_flags.numpy().astype(np.int32)))
|
||||
print("diff_stop_flags\n", diff_stop_flags)
|
||||
assert diff_stop_flags == 0, 'Check failed.'
|
||||
|
||||
# test beam_search=True
|
||||
topk_ids = paddle.arange(0, bs, dtype="int64")
|
||||
next_tokens = paddle.full([bs], 0, dtype="int64")
|
||||
stop_flags = paddle.to_tensor(np.random.randint(0, 2, [bs]), "bool")
|
||||
seq_lens = paddle.to_tensor(np.random.randint(0, 5, [bs]), "int32")
|
||||
end_ids = paddle.to_tensor([0, 1, 2, 3, 4, 5], "int64")
|
||||
print("topk_ids\n", topk_ids)
|
||||
print("next_tokens\n", next_tokens)
|
||||
print("stop_flags\n", stop_flags)
|
||||
set_stop_value_multi_ends(topk_ids, stop_flags, seq_lens, end_ids, next_tokens,
|
||||
True)
|
||||
print("topk_ids\n", topk_ids)
|
||||
print("next_tokens\n", next_tokens)
|
||||
print("stop_flags\n", stop_flags)
|
||||
print("seq_lens\n", seq_lens)
|
||||
print("end_ids\n", end_ids)
|
||||
|
||||
ref_topk_ids = np.array(
|
||||
[
|
||||
0, 1, 2, 3, 4, 0, 6, 7, -1, 9, 10, 0, -1, 13, 14, 15, 0, 17, 18, 19,
|
||||
20, 0, 22, 23, 24, 25, -1, -1, 28, 29, 0, 0, -1, 33, 34, 35, 36, 37, 0,
|
||||
-1, 0, 41, -1, 0, 44, 45, 46, 0, 0, 49, 0, 0, 0, 53, 0, 0, 0, 0, 58,
|
||||
-1, 60, 61, -1, 63
|
||||
],
|
||||
"int64",
|
||||
)
|
||||
ref_next_tokens = np.array(
|
||||
[
|
||||
0, 1, 2, 3, 4, 0, 6, 7, 0, 9, 10, 0, 0, 13, 14, 15, 0, 17, 18, 19, 20,
|
||||
0, 22, 23, 24, 25, 0, 0, 28, 29, 0, 0, 0, 33, 34, 35, 36, 37, 0, 0, 0,
|
||||
41, 0, 0, 44, 45, 46, 0, 0, 49, 0, 0, 0, 53, 0, 0, 0, 0, 58, 0, 60, 61,
|
||||
0, 63
|
||||
],
|
||||
"int64",
|
||||
)
|
||||
ref_stop_flags = np.array(
|
||||
[
|
||||
False, False, False, False, False, True, False, False, True, False,
|
||||
False, True, True, False, False, False, True, False, False, False,
|
||||
False, True, False, False, False, False, True, True, False, False,
|
||||
True, True, True, False, False, False, False, False, True, True, True,
|
||||
False, True, True, False, False, False, True, True, False, True, True,
|
||||
True, False, True, True, True, True, False, True, False, False, True,
|
||||
False
|
||||
],
|
||||
"bool",
|
||||
)
|
||||
diff_topk_ids = np.sum(np.abs(ref_topk_ids - topk_ids.numpy()))
|
||||
print("diff_topk_ids\n", diff_topk_ids)
|
||||
assert diff_topk_ids == 0, 'Check failed.'
|
||||
diff_next_tokens = np.sum(np.abs(ref_next_tokens - next_tokens.numpy()))
|
||||
print("diff_next_tokens\n", diff_next_tokens)
|
||||
assert diff_next_tokens == 0, 'Check failed.'
|
||||
diff_stop_flags = np.sum(
|
||||
np.abs(
|
||||
ref_stop_flags.astype(np.int32) - stop_flags.numpy().astype(np.int32)))
|
||||
print("diff_stop_flags\n", diff_stop_flags)
|
||||
assert diff_stop_flags == 0, 'Check failed.'
|
@@ -0,0 +1,45 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import get_token_penalty
|
||||
|
||||
paddle.seed(2023)
|
||||
|
||||
bs = 1
|
||||
length = 12
|
||||
length_id = 6
|
||||
pre_ids = paddle.ones([bs, length_id], dtype="int64")
|
||||
logits = paddle.randn([bs, length], dtype="float16")
|
||||
penalty_scores = paddle.randn([bs], dtype="float16")
|
||||
# pre_ids = np.array([[0, 1, 2, 3, 4, 5]]).astype('int64')
|
||||
# logits = np.random.uniform(1, 10, size=(bs, length)).astype('float32')
|
||||
# penalty_scores = np.random.uniform(1, 2, size=(bs)).astype('float32')
|
||||
out = get_token_penalty(pre_ids, logits, penalty_scores)
|
||||
print(pre_ids)
|
||||
print(logits)
|
||||
print(penalty_scores)
|
||||
print(out)
|
||||
pre_ids = paddle.ones([bs, length_id], dtype="int64")
|
||||
logits = paddle.randn([bs, length], dtype="float32")
|
||||
penalty_scores = paddle.randn([bs], dtype="float32")
|
||||
# pre_ids = np.array([[0, 1, 2, 3, 4, 5]]).astype('int64')
|
||||
# logits = np.random.uniform(1, 10, size=(bs, length)).astype('float32')
|
||||
# penalty_scores = np.random.uniform(1, 2, size=(bs)).astype('float32')
|
||||
out = get_token_penalty(pre_ids, logits, penalty_scores)
|
||||
print(pre_ids)
|
||||
print(logits)
|
||||
print(penalty_scores)
|
||||
print(out)
|
106
custom_ops/xpu_ops/test/python/ops/test_update_inputs.py
Normal file
106
custom_ops/xpu_ops/test/python/ops/test_update_inputs.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import update_inputs
|
||||
|
||||
np.random.seed(2023)
|
||||
|
||||
bs = 48
|
||||
max_bs = 64
|
||||
max_input_length = 6144
|
||||
|
||||
stop_flags = np.random.randint(0, 2, max_bs).astype("bool")
|
||||
not_need_stop = np.array([1], "bool")
|
||||
seq_lens_this_time = np.zeros([bs], "int32")
|
||||
seq_lens_encoder = np.zeros([max_bs], "int32")
|
||||
seq_lens_decoder = np.zeros([max_bs], "int32")
|
||||
for i in range(bs):
|
||||
if i % 2 == 0:
|
||||
seq_lens_encoder[i] = i
|
||||
seq_lens_this_time[i] = i
|
||||
else:
|
||||
seq_lens_decoder[i] = i
|
||||
seq_lens_this_time[i] = 1
|
||||
input_ids_np = np.random.randint(1, 10, [max_bs, max_input_length], "int64")
|
||||
stop_nums = np.array([max_bs], "int64")
|
||||
next_tokens = np.random.randint(1, 10, [max_bs], "int64")
|
||||
is_block_step = np.random.randint(0, 2, [max_bs]).astype("bool")
|
||||
|
||||
stop_flags = paddle.to_tensor(stop_flags)
|
||||
not_need_stop = paddle.to_tensor(not_need_stop, place=paddle.CPUPlace())
|
||||
seq_lens_this_time = paddle.to_tensor(seq_lens_this_time)
|
||||
seq_lens_encoder = paddle.to_tensor(seq_lens_encoder)
|
||||
seq_lens_decoder = paddle.to_tensor(seq_lens_decoder)
|
||||
input_ids = paddle.to_tensor(input_ids_np)
|
||||
stop_nums = paddle.to_tensor(stop_nums)
|
||||
next_tokens = paddle.to_tensor(next_tokens)
|
||||
is_block_step = paddle.to_tensor(is_block_step)
|
||||
|
||||
print("stop_flags:\n", stop_flags)
|
||||
print("not_need_stop:\n", not_need_stop)
|
||||
print("seq_lens_this_time:\n", seq_lens_this_time)
|
||||
print("seq_lens_encoder:\n", seq_lens_encoder)
|
||||
print("seq_lens_decoder:\n", seq_lens_decoder)
|
||||
print("input_ids:\n", input_ids)
|
||||
print("stop_nums:\n", stop_nums)
|
||||
print("next_tokens:\n", next_tokens)
|
||||
print("is_block_step:\n", is_block_step)
|
||||
|
||||
update_inputs(stop_flags, not_need_stop, seq_lens_this_time, seq_lens_encoder,
|
||||
seq_lens_decoder, input_ids, stop_nums, next_tokens,
|
||||
is_block_step)
|
||||
|
||||
print("-" * 50)
|
||||
print("stop_flags:\n", stop_flags)
|
||||
print("not_need_stop:\n", not_need_stop)
|
||||
print("seq_lens_this_time:\n", seq_lens_this_time)
|
||||
print("seq_lens_encoder:\n", seq_lens_encoder)
|
||||
print("seq_lens_decoder:\n", seq_lens_decoder)
|
||||
print("input_ids:\n", input_ids)
|
||||
print("stop_nums:\n", stop_nums)
|
||||
print("next_tokens:\n", next_tokens)
|
||||
|
||||
ref_not_need_stop_out = np.array([True])
|
||||
ref_seq_lens_this_time_out = np.array([
|
||||
0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
|
||||
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1
|
||||
], "int32")
|
||||
ref_seq_lens_encoder_out = np.array([
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
], "int32")
|
||||
ref_seq_lens_decoder_out = np.array([
|
||||
0, 0, 2, 0, 0, 6, 0, 8, 8, 10, 0, 12, 12, 0, 0, 0, 0, 0, 0, 0, 20, 22, 0,
|
||||
24, 24, 0, 26, 28, 0, 0, 0, 32, 32, 0, 34, 0, 0, 38, 0, 40, 0, 0, 42, 0, 0,
|
||||
46, 46, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
], "int32")
|
||||
input_ids_np[:, 0] = np.array([
|
||||
6, 5, 9, 8, 6, 2, 8, 1, 3, 1, 3, 6, 9, 8, 1, 9, 1, 8, 8, 6, 7, 6, 5, 3, 5,
|
||||
9, 3, 6, 3, 9, 8, 8, 8, 8, 4, 8, 7, 4, 2, 3, 5, 8, 4, 2, 5, 6, 8, 9, 6, 7,
|
||||
4, 2, 4, 6, 2, 3, 4, 9, 7, 2, 1, 8, 7, 8
|
||||
], "int64")
|
||||
|
||||
assert not_need_stop.numpy(
|
||||
) == ref_not_need_stop_out, 'Check not_need_stop failed.'
|
||||
assert np.all(seq_lens_this_time.numpy() ==
|
||||
ref_seq_lens_this_time_out), 'Check seq_lens_this_time failed.'
|
||||
assert np.all(seq_lens_encoder.numpy() ==
|
||||
ref_seq_lens_encoder_out), 'Check seq_lens_encoder failed.'
|
||||
assert np.all(seq_lens_decoder.numpy() ==
|
||||
ref_seq_lens_decoder_out), 'Check seq_lens_decoder failed.'
|
||||
assert np.all(input_ids.numpy() == input_ids_np), 'Check input_ids failed.'
|
@@ -0,0 +1,94 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.xpu import weight_quantize_xpu
|
||||
|
||||
np.random.seed(2025)
|
||||
|
||||
|
||||
def np_clip_and_round(x, abs_max=127):
|
||||
return np.clip(np.around(x), -abs_max, abs_max).astype("int8")
|
||||
|
||||
|
||||
def np_quant_weight_int4(weight_np):
|
||||
assert weight_np.dtype == np.float32 # k,n
|
||||
weight = np.transpose(weight_np, [1, 0]) # n,k
|
||||
max_value = np.max(np.abs(weight), axis=1).reshape(-1, 1) # k => k,1
|
||||
quanted_weight = np_clip_and_round(weight / max_value * 7.0, 7) # n,k
|
||||
quanted_weight = (quanted_weight[:, 1::2] & 0xF) << 4 | (
|
||||
quanted_weight[:, ::2] & 0xF) # pack int4, [n,k//2]
|
||||
weight_scales = (max_value).astype(weight_np.dtype).reshape(-1)
|
||||
return quanted_weight, weight_scales.astype(np.float32)
|
||||
|
||||
|
||||
def np_quant_weight(weight_np, algo='weight_only_int8'):
|
||||
assert weight_np.dtype == np.float32
|
||||
|
||||
if algo == 'weight_only_int4':
|
||||
return np_quant_weight_int4(weight_np)
|
||||
|
||||
weight = np.transpose(weight_np, [1, 0])
|
||||
max_value = np.max(np.abs(weight), axis=1).reshape(-1, 1)
|
||||
quanted_weight = np_clip_and_round(weight / max_value * 127.0)
|
||||
weight_scales = (max_value).astype(weight_np.dtype).reshape(-1)
|
||||
return quanted_weight, weight_scales.astype(np.float32)
|
||||
|
||||
|
||||
def int8_to_bin_np(value):
|
||||
value_np = np.int8(value)
|
||||
return np.binary_repr(value_np, width=8)
|
||||
|
||||
|
||||
def int8_to_bin(value):
|
||||
if not -128 <= value <= 127:
|
||||
raise ValueError("int8 值必须在 -128 到 127 之间")
|
||||
return format(value & 0xFF, '08b') # '08b' 表示 8 位二进制,高位补零
|
||||
|
||||
|
||||
# 1) preparation
|
||||
k, n = 128, 256
|
||||
algo = "weight_only_int8"
|
||||
k, n = 8192, 57344
|
||||
|
||||
w_np = (np.random.random((k, n)).astype(np.float32) - 0.5) * 10
|
||||
|
||||
# 2) np calculation
|
||||
qw_np, wscale_np = np_quant_weight(w_np, algo)
|
||||
|
||||
# 3) xpu calculation
|
||||
dtype = 'float32'
|
||||
x_pd = paddle.to_tensor(w_np, dtype=dtype)
|
||||
qw_pd, wscale_pd = weight_quantize_xpu(x_pd, algo, -1, -1)
|
||||
qw_pd_trans = paddle.transpose(qw_pd, [1, 0])
|
||||
# print("w_np:\n{}".format(w_np))
|
||||
# print("qw_np:\n{}".format(qw_np))
|
||||
# print("qw_pd:\n{}".format(qw_pd_trans))
|
||||
# print("wscale_pd:\n{}".format(wscale_pd))
|
||||
# print("wscale_np:\n{}".format(wscale_np))
|
||||
|
||||
# comparation
|
||||
print(f"wscale_pd, mean={wscale_pd.mean()}, std={wscale_pd.std()}")
|
||||
print(f"wscale_np, mean={wscale_np.mean()}, std={wscale_np.std()}")
|
||||
print(
|
||||
f"qw_np, mean={qw_np.astype(np.float32).mean()}, std={qw_np.astype(np.float32).std()}"
|
||||
)
|
||||
print(
|
||||
f"qw_pd_trans, mean={qw_pd_trans.astype('float32').mean()}, std={qw_pd_trans.astype('float32').std()}"
|
||||
)
|
||||
sum_diff = np.sum(
|
||||
np.abs(qw_pd_trans.astype("float32").numpy() - qw_np.astype("float32")))
|
||||
print(f"sum_diff: {sum_diff}")
|
Reference in New Issue
Block a user