mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
[Feature] Add AsyncTokenizerClient&ChatResponseProcessor with remote encode&decode support. (#3674)
* [Feature] add AsyncTokenizerClient * add decode_image * Add response_processors with remote decode support. * [Feature] add tokenizer_base_url startup argument * Revert comment removal and restore original content. * [Feature] Non-streaming requests now support remote image decoding. * Fix parameter type issue in decode_image call. * Keep completion_token_ids when return_token_ids = False. * add copyright
This commit is contained in:
134
tests/entrypoints/openai/test_response_processors.py
Normal file
134
tests/entrypoints/openai/test_response_processors.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from fastdeploy.entrypoints.openai.response_processors import ChatResponseProcessor
|
||||
|
||||
|
||||
class TestChatResponseProcessor(unittest.IsolatedAsyncioTestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.mock_data_processor = MagicMock()
|
||||
self.mock_data_processor.process_response_dict = MagicMock(
|
||||
side_effect=lambda response_dict, **_: {"processed": True, "raw": response_dict}
|
||||
)
|
||||
|
||||
async def asyncSetUp(self):
|
||||
self.processor_mm = ChatResponseProcessor(
|
||||
data_processor=self.mock_data_processor,
|
||||
enable_mm_output=True,
|
||||
eoi_token_id=101032,
|
||||
eos_token_id=2,
|
||||
decoder_base_url="http://fake-decoder",
|
||||
)
|
||||
self.processor_mm.decoder_client.decode_image = AsyncMock(
|
||||
return_value={"http_url": "http://image.url/test.png"}
|
||||
)
|
||||
|
||||
async def test_text_only_mode(self):
|
||||
"""不开启 multimodal 时,直接走 data_processor"""
|
||||
processor = ChatResponseProcessor(self.mock_data_processor)
|
||||
request_outputs = [{"outputs": {"text": "hello"}}]
|
||||
|
||||
results = [
|
||||
r
|
||||
async for r in processor.process_response_chat(
|
||||
request_outputs, stream=False, enable_thinking=False, include_stop_str_in_output=False
|
||||
)
|
||||
]
|
||||
|
||||
self.mock_data_processor.process_response_dict.assert_called_once()
|
||||
self.assertEqual(results[0]["processed"], True)
|
||||
self.assertEqual(results[0]["raw"]["outputs"]["text"], "hello")
|
||||
|
||||
async def test_streaming_text_and_image(self):
|
||||
"""流式模式下:text → image → text"""
|
||||
request_outputs = [
|
||||
{"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [1], "text": "hi"}},
|
||||
{"request_id": "req1", "outputs": {"decode_type": 1, "token_ids": [[11, 22]]}},
|
||||
{"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [101032], "text": "done"}},
|
||||
]
|
||||
|
||||
results = [
|
||||
r
|
||||
async for r in self.processor_mm.process_response_chat(
|
||||
request_outputs, stream=True, enable_thinking=False, include_stop_str_in_output=False
|
||||
)
|
||||
]
|
||||
|
||||
# 第一个 yield:text
|
||||
text_part = results[0]["outputs"]["multipart"][0]
|
||||
self.assertEqual(text_part["type"], "text")
|
||||
self.assertEqual(text_part["text"], "hi")
|
||||
|
||||
# 第二个 yield:image(token_ids 被拼起来了)
|
||||
image_part = results[1]["outputs"]["multipart"][0]
|
||||
self.assertEqual(image_part["type"], "image")
|
||||
self.assertEqual(image_part["url"], "http://image.url/test.png")
|
||||
self.assertEqual(results[1]["outputs"]["token_ids"], [[11, 22]])
|
||||
|
||||
# 第三个 yield:text
|
||||
text_part = results[2]["outputs"]["multipart"][0]
|
||||
self.assertEqual(text_part["type"], "text")
|
||||
self.assertEqual(text_part["text"], "done")
|
||||
|
||||
async def test_streaming_buffer_accumulation(self):
|
||||
"""流式模式:decode_type=1 只累积 buffer,不 yield"""
|
||||
request_outputs = [{"request_id": "req2", "outputs": {"decode_type": 1, "token_ids": [[33, 44]]}}]
|
||||
|
||||
results = [
|
||||
r
|
||||
async for r in self.processor_mm.process_response_chat(
|
||||
request_outputs, stream=True, enable_thinking=False, include_stop_str_in_output=False
|
||||
)
|
||||
]
|
||||
|
||||
self.assertEqual(results, [])
|
||||
self.assertEqual(self.processor_mm._mm_buffer, [[33, 44]])
|
||||
|
||||
async def test_non_streaming_accumulate_and_emit(self):
|
||||
"""非流式模式:等 eos_token_id 才输出 multipart(text+image)"""
|
||||
request_outputs = [
|
||||
{"request_id": "req3", "outputs": {"decode_type": 0, "token_ids": [10], "text": "hello"}},
|
||||
{"request_id": "req3", "outputs": {"decode_type": 1, "token_ids": [[55, 66]]}},
|
||||
{"request_id": "req3", "outputs": {"decode_type": 0, "token_ids": [2], "text": "bye"}}, # eos_token_id
|
||||
]
|
||||
|
||||
results = [
|
||||
r
|
||||
async for r in self.processor_mm.process_response_chat(
|
||||
request_outputs, stream=False, enable_thinking=False, include_stop_str_in_output=False
|
||||
)
|
||||
]
|
||||
|
||||
# 只在最后一个输出 yield
|
||||
self.assertEqual(len(results), 1)
|
||||
multipart = results[0]["outputs"]["multipart"]
|
||||
|
||||
self.assertEqual(multipart[0]["type"], "text")
|
||||
self.assertEqual(multipart[0]["text"], "hello")
|
||||
|
||||
self.assertEqual(multipart[1]["type"], "image")
|
||||
self.assertEqual(multipart[1]["url"], "http://image.url/test.png")
|
||||
|
||||
self.assertEqual(multipart[2]["type"], "text")
|
||||
self.assertEqual(multipart[2]["text"], "bye")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Reference in New Issue
Block a user