diff --git a/fastdeploy/entrypoints/openai/response_processors.py b/fastdeploy/entrypoints/openai/response_processors.py index ad54d2030..6e7138d07 100644 --- a/fastdeploy/entrypoints/openai/response_processors.py +++ b/fastdeploy/entrypoints/openai/response_processors.py @@ -115,7 +115,9 @@ class ChatResponseProcessor: ) yield response elif decode_type == 2: # audio - if self.eoa_token_id is not None and self.eoa_token_id in token_ids: + if self.eoa_token_id is not None and any( + token_id >= self.eoa_token_id for token_id in token_ids + ): continue if req_id in self._audio_buffer: self._audio_buffer[req_id].append(token_ids) diff --git a/tests/entrypoints/openai/test_response_processors.py b/tests/entrypoints/openai/test_response_processors.py index bfdc0010d..4e785af33 100644 --- a/tests/entrypoints/openai/test_response_processors.py +++ b/tests/entrypoints/openai/test_response_processors.py @@ -60,9 +60,9 @@ class TestChatResponseProcessor(unittest.IsolatedAsyncioTestCase): """不开启 multimodal,直接走 data_processor""" processor = ChatResponseProcessor(self.mock_data_processor) request_outputs = [ - {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [[11, 22]]}}, + {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [11, 22]}}, {"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [1]}}, - {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [[11, 22]]}}, + {"request_id": "req1", "outputs": {"decode_type": 2, "token_ids": [11, 22]}}, {"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [2]}}, ]