[LLM] First commit the llm deployment code

2025-10-04 16:22:57 +08:00 · 2025-06-09 19:20:15 +08:00
parent 980c0a1d2c
commit 684703fd72
11814 changed files with 127294 additions and 1293102 deletions
--- a/test/operators/test_deqant_int8_cpp_extension.py
+++ b/test/operators/test_deqant_int8_cpp_extension.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" UT for air_topp_sampling kernel """
+
+import os
+import paddle
+import unittest
+import numpy as np
+
+
+class Test(unittest.TestCase):
+    def setUp(self):
+        """
+        Initialize.
+        """
+        paddle.seed(2024)
+        print(paddle.device.cuda.get_device_properties())
+        print(paddle.__git_commit__)
+
+    def dequant_int8_test(self, dynamic_mode=False):
+        """
+        Check air_topp_sampling output with paddle.tensor.top_p_sampling.
+        """
+        if dynamic_mode:
+            os.environ["ELLM_DYNAMIC_MODE"] = "1"
+        else:
+            os.environ["ELLM_DYNAMIC_MODE"] = "0"
+        from fastdeploy.model_executor.ops.gpu import dequant_int8
+
+        input_tensor = paddle.cast(paddle.ones([128, 128]), "int32")
+        scale_tensor = paddle.cast(paddle.ones([128]), "float32")
+        out = dequant_int8(input_tensor, scale_tensor, "float16")
+        return out
+
+    def test(self):
+        op_out = self.dequant_int8_test()
+        func_out = self.dequant_int8_test(True)
+        np.testing.assert_allclose(
+            op_out.numpy(), func_out.numpy(), rtol=1e-04, atol=1e-04
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()