From 1531004085c266f28d59c957d3752ddb6992f464 Mon Sep 17 00:00:00 2001 From: guozhuangzhuang <61482277+zhuangzhuang12@users.noreply.github.com> Date: Wed, 22 Oct 2025 14:59:05 +0800 Subject: [PATCH] fix image token output (#4487) * fix * fix * fix * add test case * add test case * add test case --- fastdeploy/engine/common_engine.py | 2 ++ tests/engine/test_decode_token.py | 51 ++++++++++++++++++++++++++++++ tests/engine/test_send_tokens.py | 47 +++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 tests/engine/test_decode_token.py create mode 100644 tests/engine/test_send_tokens.py diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 9f8eb51cf..8e7ae5709 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -718,6 +718,8 @@ class EngineSevice: content.outputs.token_ids = token_ids content.outputs.text = delta_text new_contents.append(content) + elif content.finished: + new_contents.append(content) else: llm_logger.warning( f"current tokens need to accumulate, req_id: {request_id} {content.outputs.token_ids}" diff --git a/tests/engine/test_decode_token.py b/tests/engine/test_decode_token.py new file mode 100644 index 000000000..cc76f245d --- /dev/null +++ b/tests/engine/test_decode_token.py @@ -0,0 +1,51 @@ +import unittest +from unittest.mock import MagicMock, patch + + +class DummyDataProcessor: + def __init__(self): + self.decode_status = {} + + def ids2tokens(self, token_ids, req_id): + return "", [], None + + +class TestDecodeToken(unittest.TestCase): + @patch("fastdeploy.engine.common_engine.EngineSevice.__init__", return_value=None) + def setUp(self, mock_init): + from fastdeploy.engine.common_engine import EngineSevice + + self.obj = EngineSevice(None) + self.obj.data_processor = DummyDataProcessor() + + @patch("fastdeploy.engine.common_engine.envs.FD_ENABLE_RETURN_TEXT", True) + def test_decode_token_with_text(self): + """测试:env 启用 + 返回非空 delta_text""" + self.obj.data_processor.ids2tokens = MagicMock(return_value=("hello", [10, 11, 12, 13], None)) + self.obj.data_processor.decode_status = {"req_1": (1, 3)} + + delta_text, token_ids = self.obj._decode_token([1, 2, 3], "req_1", is_end=False) + + assert delta_text == "hello" + assert token_ids == [11, 12] + + @patch("fastdeploy.engine.common_engine.envs.FD_ENABLE_RETURN_TEXT", True) + def test_decode_token_empty_text(self): + """测试:env 启用 + 返回空 delta_text""" + self.obj.data_processor.ids2tokens = MagicMock(return_value=("", [10, 11, 12], None)) + self.obj.data_processor.decode_status = {"req_1": (0, 2)} + + delta_text, token_ids = self.obj._decode_token([1, 2], "req_1", is_end=False) + + assert delta_text == "" + assert token_ids == [] + + @patch("fastdeploy.engine.common_engine.envs.FD_ENABLE_RETURN_TEXT", True) + def test_decode_token_with_is_end(self): + """测试:is_end=True 时 decode_status 被删除""" + self.obj.data_processor.ids2tokens = MagicMock(return_value=("bye", [1, 2, 3, 4], None)) + self.obj.data_processor.decode_status = {"req_2": (0, 2)} + + delta_text, token_ids = self.obj._decode_token([1, 2, 3], "req_2", is_end=True) + + assert "req_2" not in self.obj.data_processor.decode_status diff --git a/tests/engine/test_send_tokens.py b/tests/engine/test_send_tokens.py new file mode 100644 index 000000000..b090cb357 --- /dev/null +++ b/tests/engine/test_send_tokens.py @@ -0,0 +1,47 @@ +import time +from unittest import TestCase +from unittest.mock import MagicMock, patch + + +class TestZmqSendGeneratedTokens(TestCase): + @patch("time.sleep", return_value=None) + @patch("fastdeploy.engine.common_engine.EngineSevice.__init__", return_value=None) + def setUp(self, mock_init, mock_sleep): + from fastdeploy.engine.common_engine import EngineSevice + + self.obj = EngineSevice(None) + self.obj.running = True + + # mock 依赖组件 + self.obj.scheduler = MagicMock() + self.obj.send_response_server = MagicMock() + self.obj._decode_token = MagicMock() + self.obj._decode_token.return_value = ("decoded_text", [101, 102]) + self.obj.llm_logger = MagicMock() + + def test_zmq_send_generated_tokens_normal_case(self): + mock_output = MagicMock() + mock_output.outputs.decode_type = 0 + mock_output.outputs.token_ids = [1, 2, 3] + mock_output.finished = True + + self.obj.scheduler.get_results.side_effect = [ + {"req_1": [mock_output]}, + {}, + ] + + def stop_running(): + time.sleep(0.01) + self.obj.running = False + + import threading + + threading.Thread(target=stop_running).start() + + self.obj._zmq_send_generated_tokens() + + self.obj.send_response_server.send_response.assert_called_once() + args, kwargs = self.obj.send_response_server.send_response.call_args + assert args[0] == "req_1" + assert isinstance(args[1], list) + assert args[1][0].outputs.text == "decoded_text"