Sync v2.0 version of code to github repo

2025-10-05 00:33:03 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/test/operators/test_deqant_int8_cpp_extension.py
+++ b/test/operators/test_deqant_int8_cpp_extension.py
@@ -11,16 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """ UT for air_topp_sampling kernel """

-import os
-import paddle
 import unittest
+
 import numpy as np
+import paddle


 class Test(unittest.TestCase):
+
    def setUp(self):
        """
        Initialize.
@@ -33,10 +33,10 @@ class Test(unittest.TestCase):
        """
        Check air_topp_sampling output with paddle.tensor.top_p_sampling.
        """
-        if dynamic_mode:
-            os.environ["ELLM_DYNAMIC_MODE"] = "1"
+        if not dynamic_mode:
+            paddle.enable_static()
        else:
-            os.environ["ELLM_DYNAMIC_MODE"] = "0"
+            paddle.disable_static()
        from fastdeploy.model_executor.ops.gpu import dequant_int8

        input_tensor = paddle.cast(paddle.ones([128, 128]), "int32")
@@ -46,10 +46,14 @@ class Test(unittest.TestCase):

    def test(self):
        op_out = self.dequant_int8_test()
+        exe = paddle.static.Executor()
+        exe.run(paddle.static.default_startup_program())
+        op_out = exe.run(fetch_list=[op_out])[0]
        func_out = self.dequant_int8_test(True)
-        np.testing.assert_allclose(
-            op_out.numpy(), func_out.numpy(), rtol=1e-04, atol=1e-04
-        )
+        np.testing.assert_allclose(op_out,
+                                   func_out.numpy(),
+                                   rtol=1e-04,
+                                   atol=1e-04)


 if __name__ == "__main__":