mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-31 11:56:44 +08:00 
			
		
		
		
	 3a6058e445
			
		
	
	3a6058e445
	
	
	
		
			
			* add stable ci * fix * update * fix * rename tests dir;fix stable ci bug * add timeout limit * update
		
			
				
	
	
		
			114 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			114 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 | ||
| #
 | ||
| # Licensed under the Apache License, Version 2.0 (the "License");
 | ||
| # you may not use this file except in compliance with the License.
 | ||
| # You may obtain a copy of the License at
 | ||
| #
 | ||
| #     http://www.apache.org/licenses/LICENSE-2.0
 | ||
| #
 | ||
| # Unless required by applicable law or agreed to in writing, software
 | ||
| # distributed under the License is distributed on an "AS IS" BASIS,
 | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||
| # See the License for the specific language governing permissions and
 | ||
| # limitations under the License.
 | ||
| 
 | ||
| 
 | ||
| import unittest
 | ||
| 
 | ||
| import numpy as np
 | ||
| import paddle
 | ||
| import paddle.nn.functional as F
 | ||
| 
 | ||
| from fastdeploy.model_executor.ops.gpu import min_p_sampling
 | ||
| 
 | ||
| 
 | ||
| class TestMinPSampling(unittest.TestCase):
 | ||
|     def setUp(self):
 | ||
|         self.sample_time = 1000000
 | ||
|         self.vocab_size = 1000
 | ||
|         self.min_p_value = 0.5
 | ||
|         self.batch_size = 3
 | ||
|         self.batch_min_p_values = [0.1, 0.0, 0.9]
 | ||
|         self.additional_batch_min_p_values = [0.1, 0.0, 0.3]
 | ||
| 
 | ||
|     # min_p:0.5:FastDeploy
 | ||
|     def min_p_sampling_cpu(self, min_p):
 | ||
|         logits = paddle.ones(shape=[1, self.vocab_size], dtype="float32")
 | ||
|         logits[0][0] = 10
 | ||
|         logits[0][1] = 8
 | ||
|         low_prob_tensor = paddle.linspace(2.0, 0.0, self.vocab_size - 2)
 | ||
|         logits[0][2:] = low_prob_tensor
 | ||
| 
 | ||
|         probs = F.softmax(logits)
 | ||
|         max_probabilities = paddle.amax(probs, axis=-1, keepdim=True)
 | ||
|         adjusted_min_p = max_probabilities * min_p.reshape([-1, 1])
 | ||
|         invalid_token_mask = probs < adjusted_min_p
 | ||
|         probs = paddle.where(invalid_token_mask, paddle.full_like(probs, 0.0), probs)
 | ||
|         return probs
 | ||
| 
 | ||
|     # min_p:0.5:FastDeploy
 | ||
|     def fastdeploy_min_p_sampling(self, min_p):
 | ||
|         logits = paddle.ones(shape=[1, self.vocab_size], dtype="float32")
 | ||
|         logits[0][0] = 10
 | ||
|         logits[0][1] = 8
 | ||
|         low_prob_tensor = paddle.linspace(2.0, 0.0, self.vocab_size - 2)
 | ||
|         logits[0][2:] = low_prob_tensor
 | ||
| 
 | ||
|         probs = F.softmax(logits)
 | ||
|         probs = min_p_sampling(probs, min_p)
 | ||
|         return probs
 | ||
| 
 | ||
|     # batch:[0.1.0.0,0.9]:FastDeploy
 | ||
|     def fastdeploy_batch_min_p_sampling(self, batch_size, min_p_values):
 | ||
|         logits = paddle.ones(shape=[batch_size, self.vocab_size], dtype="float32")
 | ||
|         for b in range(batch_size):
 | ||
|             logits[b][0] = 10
 | ||
|             logits[b][1] = 8
 | ||
|             logits[b][2:] = paddle.linspace(2.0, 0.0, self.vocab_size - 2)
 | ||
| 
 | ||
|         probs = F.softmax(logits, axis=-1)
 | ||
|         min_p_arr = paddle.to_tensor(min_p_values, dtype="float32")
 | ||
| 
 | ||
|         probs = min_p_sampling(probs, min_p_arr)
 | ||
| 
 | ||
|         return probs
 | ||
| 
 | ||
|     def compare_results(self, probs, probs_cpu, atol=1e-6, rtol=1e-6):
 | ||
|         probs_np = probs.numpy()
 | ||
|         probs_cpu_np = probs_cpu.numpy()
 | ||
|         try:
 | ||
|             np.testing.assert_allclose(
 | ||
|                 probs_np,
 | ||
|                 probs_cpu_np,
 | ||
|                 rtol=rtol,
 | ||
|                 atol=atol,
 | ||
|             )
 | ||
|             print("The results are same between fastdeploy_min_p_sampling and min_p_sampling_cpu")
 | ||
|         except AssertionError as e:
 | ||
|             raise AssertionError(
 | ||
|                 f"The results are different between fastdeploy_min_p_sampling and min_p_sampling_cpu:\n{str(e)}"
 | ||
|             )
 | ||
| 
 | ||
|     def test_single_min_p_sampling(self):
 | ||
|         min_p = paddle.to_tensor([self.min_p_value], dtype="float32")
 | ||
|         probs = self.fastdeploy_min_p_sampling(min_p)
 | ||
|         probs_cpu = self.min_p_sampling_cpu(min_p)
 | ||
|         self.compare_results(probs, probs_cpu)
 | ||
| 
 | ||
|     def test_batch_min_p_sampling(self):
 | ||
|         batch_min_p = paddle.to_tensor(self.batch_min_p_values, dtype="float32")
 | ||
|         batch_probs = self.fastdeploy_batch_min_p_sampling(self.batch_size, batch_min_p)
 | ||
|         batch_probs_cpu = self.min_p_sampling_cpu(batch_min_p)
 | ||
|         self.compare_results(batch_probs, batch_probs_cpu)
 | ||
| 
 | ||
|     def test_additional_batch_min_p_sampling(self):
 | ||
|         additional_batch_min_p = paddle.to_tensor(self.additional_batch_min_p_values, dtype="float32")
 | ||
|         additional_batch_probs = self.fastdeploy_batch_min_p_sampling(self.batch_size, additional_batch_min_p)
 | ||
|         additional_batch_probs_cpu = self.min_p_sampling_cpu(additional_batch_min_p)
 | ||
|         self.compare_results(additional_batch_probs, additional_batch_probs_cpu)
 | ||
| 
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
|     if paddle.is_compiled_with_cuda():
 | ||
|         unittest.main()
 |