mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
Sync v2.0 version of code to github repo
This commit is contained in:
@@ -11,16 +11,16 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
""" UT for air_topp_sampling kernel """
|
||||
|
||||
import os
|
||||
import paddle
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
"""
|
||||
Initialize.
|
||||
@@ -33,10 +33,10 @@ class Test(unittest.TestCase):
|
||||
"""
|
||||
Check air_topp_sampling output with paddle.tensor.top_p_sampling.
|
||||
"""
|
||||
if dynamic_mode:
|
||||
os.environ["ELLM_DYNAMIC_MODE"] = "1"
|
||||
if not dynamic_mode:
|
||||
paddle.enable_static()
|
||||
else:
|
||||
os.environ["ELLM_DYNAMIC_MODE"] = "0"
|
||||
paddle.disable_static()
|
||||
from fastdeploy.model_executor.ops.gpu import dequant_int8
|
||||
|
||||
input_tensor = paddle.cast(paddle.ones([128, 128]), "int32")
|
||||
@@ -46,10 +46,14 @@ class Test(unittest.TestCase):
|
||||
|
||||
def test(self):
|
||||
op_out = self.dequant_int8_test()
|
||||
exe = paddle.static.Executor()
|
||||
exe.run(paddle.static.default_startup_program())
|
||||
op_out = exe.run(fetch_list=[op_out])[0]
|
||||
func_out = self.dequant_int8_test(True)
|
||||
np.testing.assert_allclose(
|
||||
op_out.numpy(), func_out.numpy(), rtol=1e-04, atol=1e-04
|
||||
)
|
||||
np.testing.assert_allclose(op_out,
|
||||
func_out.numpy(),
|
||||
rtol=1e-04,
|
||||
atol=1e-04)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user