mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00

* [Feature] add AsyncTokenizerClient * add decode_image * Add response_processors with remote decode support. * [Feature] add tokenizer_base_url startup argument * Revert comment removal and restore original content. * [Feature] Non-streaming requests now support remote image decoding. * Fix parameter type issue in decode_image call. * Keep completion_token_ids when return_token_ids = False. * add copyright
135 lines
5.3 KiB
Python
135 lines
5.3 KiB
Python
"""
|
||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||
#
|
||
# Licensed under the Apache License, Version 2.0 (the "License"
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
#
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
#
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
"""
|
||
|
||
import unittest
|
||
from unittest.mock import AsyncMock, MagicMock
|
||
|
||
from fastdeploy.entrypoints.openai.response_processors import ChatResponseProcessor
|
||
|
||
|
||
class TestChatResponseProcessor(unittest.IsolatedAsyncioTestCase):
|
||
|
||
def setUp(self):
|
||
self.mock_data_processor = MagicMock()
|
||
self.mock_data_processor.process_response_dict = MagicMock(
|
||
side_effect=lambda response_dict, **_: {"processed": True, "raw": response_dict}
|
||
)
|
||
|
||
async def asyncSetUp(self):
|
||
self.processor_mm = ChatResponseProcessor(
|
||
data_processor=self.mock_data_processor,
|
||
enable_mm_output=True,
|
||
eoi_token_id=101032,
|
||
eos_token_id=2,
|
||
decoder_base_url="http://fake-decoder",
|
||
)
|
||
self.processor_mm.decoder_client.decode_image = AsyncMock(
|
||
return_value={"http_url": "http://image.url/test.png"}
|
||
)
|
||
|
||
async def test_text_only_mode(self):
|
||
"""不开启 multimodal 时,直接走 data_processor"""
|
||
processor = ChatResponseProcessor(self.mock_data_processor)
|
||
request_outputs = [{"outputs": {"text": "hello"}}]
|
||
|
||
results = [
|
||
r
|
||
async for r in processor.process_response_chat(
|
||
request_outputs, stream=False, enable_thinking=False, include_stop_str_in_output=False
|
||
)
|
||
]
|
||
|
||
self.mock_data_processor.process_response_dict.assert_called_once()
|
||
self.assertEqual(results[0]["processed"], True)
|
||
self.assertEqual(results[0]["raw"]["outputs"]["text"], "hello")
|
||
|
||
async def test_streaming_text_and_image(self):
|
||
"""流式模式下:text → image → text"""
|
||
request_outputs = [
|
||
{"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [1], "text": "hi"}},
|
||
{"request_id": "req1", "outputs": {"decode_type": 1, "token_ids": [[11, 22]]}},
|
||
{"request_id": "req1", "outputs": {"decode_type": 0, "token_ids": [101032], "text": "done"}},
|
||
]
|
||
|
||
results = [
|
||
r
|
||
async for r in self.processor_mm.process_response_chat(
|
||
request_outputs, stream=True, enable_thinking=False, include_stop_str_in_output=False
|
||
)
|
||
]
|
||
|
||
# 第一个 yield:text
|
||
text_part = results[0]["outputs"]["multipart"][0]
|
||
self.assertEqual(text_part["type"], "text")
|
||
self.assertEqual(text_part["text"], "hi")
|
||
|
||
# 第二个 yield:image(token_ids 被拼起来了)
|
||
image_part = results[1]["outputs"]["multipart"][0]
|
||
self.assertEqual(image_part["type"], "image")
|
||
self.assertEqual(image_part["url"], "http://image.url/test.png")
|
||
self.assertEqual(results[1]["outputs"]["token_ids"], [[11, 22]])
|
||
|
||
# 第三个 yield:text
|
||
text_part = results[2]["outputs"]["multipart"][0]
|
||
self.assertEqual(text_part["type"], "text")
|
||
self.assertEqual(text_part["text"], "done")
|
||
|
||
async def test_streaming_buffer_accumulation(self):
|
||
"""流式模式:decode_type=1 只累积 buffer,不 yield"""
|
||
request_outputs = [{"request_id": "req2", "outputs": {"decode_type": 1, "token_ids": [[33, 44]]}}]
|
||
|
||
results = [
|
||
r
|
||
async for r in self.processor_mm.process_response_chat(
|
||
request_outputs, stream=True, enable_thinking=False, include_stop_str_in_output=False
|
||
)
|
||
]
|
||
|
||
self.assertEqual(results, [])
|
||
self.assertEqual(self.processor_mm._mm_buffer, [[33, 44]])
|
||
|
||
async def test_non_streaming_accumulate_and_emit(self):
|
||
"""非流式模式:等 eos_token_id 才输出 multipart(text+image)"""
|
||
request_outputs = [
|
||
{"request_id": "req3", "outputs": {"decode_type": 0, "token_ids": [10], "text": "hello"}},
|
||
{"request_id": "req3", "outputs": {"decode_type": 1, "token_ids": [[55, 66]]}},
|
||
{"request_id": "req3", "outputs": {"decode_type": 0, "token_ids": [2], "text": "bye"}}, # eos_token_id
|
||
]
|
||
|
||
results = [
|
||
r
|
||
async for r in self.processor_mm.process_response_chat(
|
||
request_outputs, stream=False, enable_thinking=False, include_stop_str_in_output=False
|
||
)
|
||
]
|
||
|
||
# 只在最后一个输出 yield
|
||
self.assertEqual(len(results), 1)
|
||
multipart = results[0]["outputs"]["multipart"]
|
||
|
||
self.assertEqual(multipart[0]["type"], "text")
|
||
self.assertEqual(multipart[0]["text"], "hello")
|
||
|
||
self.assertEqual(multipart[1]["type"], "image")
|
||
self.assertEqual(multipart[1]["url"], "http://image.url/test.png")
|
||
|
||
self.assertEqual(multipart[2]["type"], "text")
|
||
self.assertEqual(multipart[2]["text"], "bye")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
unittest.main()
|