diff --git a/fastdeploy/entrypoints/openai/response_processors.py b/fastdeploy/entrypoints/openai/response_processors.py
index ad54d2030..6e7138d07 100644
--- a/fastdeploy/entrypoints/openai/response_processors.py
+++ b/fastdeploy/entrypoints/openai/response_processors.py
@@ -115,7 +115,9 @@ class ChatResponseProcessor:
                             )
                         yield response
                     elif decode_type == 2:  # audio
-                        if self.eoa_token_id is not None and self.eoa_token_id in token_ids:
+                        if self.eoa_token_id is not None and any(
+                            token_id >= self.eoa_token_id for token_id in token_ids
+                        ):
                             continue
                         if req_id in self._audio_buffer:
                             self._audio_buffer[req_id].append(token_ids)
diff --git a/tests/entrypoints/openai/test_response_processors.py b/tests/entrypoints/openai/test_response_processors.py
index bfdc0010d..4e785af33 100644
--- a/tests/entrypoints/openai/test_response_processors.py
+++ b/tests/entrypoints/openai/test_response_processors.py
@@ -60,9 +60,9 @@ class TestChatResponseProcessor(unittest.IsolatedAsyncioTestCase):
         """不开启 multimodal，直接走 data_processor"""
         processor = ChatResponseProcessor(self.mock_data_processor)
         request_outputs = [
-            {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [[11, 22]]}},
+            {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [11, 22]}},
             {"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [1]}},
-            {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [[11, 22]]}},
+            {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [11, 22]}},
             {"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [2]}},
         ]