diff --git a/tests/entrypoints/test_chat.py b/tests/entrypoints/test_chat.py
new file mode 100644
index 000000000..0078cd8a1
--- /dev/null
+++ b/tests/entrypoints/test_chat.py
@@ -0,0 +1,63 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import os
+import unittest
+import weakref
+
+from fastdeploy.entrypoints.llm import LLM
+
+MODEL_NAME = os.getenv("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle"
+
+
+class TestChat(unittest.TestCase):
+    """Test case for chat functionality"""
+
+    PROMPTS = [
+        [{"content": "The color of tomato is ", "role": "user"}],
+        [{"content": "The equation 2+3= ", "role": "user"}],
+        [{"content": "The equation 4-1= ", "role": "user"}],
+        [{"content": "PaddlePaddle is ", "role": "user"}],
+    ]
+
+    @classmethod
+    def setUpClass(cls):
+        try:
+            llm = LLM(
+                model=MODEL_NAME,
+                max_num_batched_tokens=4096,
+                tensor_parallel_size=1,
+                engine_worker_queue_port=int(os.getenv("FD_ENGINE_QUEUE_PORT")),
+                cache_queue_port=int(os.getenv("FD_CACHE_QUEUE_PORT")),
+            )
+            cls.llm = weakref.proxy(llm)
+        except Exception as e:
+            print(f"Setting up LLM failed: {e}")
+            raise unittest.SkipTest(f"LLM initialization failed: {e}")
+
+    @classmethod
+    def tearDownClass(cls):
+        """Clean up after all tests have run"""
+        if hasattr(cls, "llm"):
+            del cls.llm
+
+    def test_chat(self):
+        outputs = self.llm.chat(messages=self.PROMPTS, sampling_params=None)
+        self.assertEqual(len(self.PROMPTS), len(outputs))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/entrypoints/test_generation.py b/tests/entrypoints/test_generation.py
index 345c393b2..617a635ef 100644
--- a/tests/entrypoints/test_generation.py
+++ b/tests/entrypoints/test_generation.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 """
 
+import copy
 import os
 import unittest
 import weakref
@@ -120,6 +121,48 @@ class TestGeneration(unittest.TestCase):
         outputs = self.llm.generate(prompts=self.PROMPTS, sampling_params=None)
         self.assertEqual(len(self.PROMPTS), len(outputs))
 
+    def test_consistency_single_prompt_tokens_chat(self):
+        """Test consistency between different prompt input formats"""
+        sampling_params = SamplingParams(temperature=1.0, top_p=0.0)
+
+        for prompt_token_ids in self.TOKEN_IDS:
+            with self.subTest(prompt_token_ids=prompt_token_ids):
+                output1 = self.llm.chat(messages=[prompt_token_ids], sampling_params=sampling_params)
+                output2 = self.llm.chat(
+                    [{"prompt": "", "prompt_token_ids": prompt_token_ids}], sampling_params=sampling_params
+                )
+                self.assert_outputs_equal(output1, output2)
+
+    def test_multiple_sampling_params_chat(self):
+        """Test multiple sampling parameters combinations"""
+        sampling_params = [
+            SamplingParams(temperature=0.01, top_p=0.95),
+            SamplingParams(temperature=0.3, top_p=0.95),
+            SamplingParams(temperature=0.7, top_p=0.95),
+            SamplingParams(temperature=0.99, top_p=0.95),
+        ]
+
+        prompts = copy.copy(self.PROMPTS)
+        # Multiple SamplingParams should be matched with each prompt
+        outputs = self.llm.chat(messages=prompts, sampling_params=sampling_params)
+        self.assertEqual(len(self.PROMPTS), len(outputs))
+
+        prompts = copy.copy(self.PROMPTS)
+        # Exception raised if size mismatch
+        with self.assertRaises(ValueError):
+            self.llm.chat(messages=prompts, sampling_params=sampling_params[:3])
+
+        prompts = copy.copy(self.PROMPTS)
+        # Single SamplingParams should be applied to every prompt
+        single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95)
+        outputs = self.llm.chat(messages=prompts, sampling_params=single_sampling_params)
+        self.assertEqual(len(self.PROMPTS), len(outputs))
+
+        prompts = copy.copy(self.PROMPTS)
+        # sampling_params is None, default params should be applied
+        outputs = self.llm.chat(messages=prompts, sampling_params=None)
+        self.assertEqual(len(self.PROMPTS), len(outputs))
+
 
 if __name__ == "__main__":
     unittest.main()