[CI]【Hackathon 9th Sprint No.22】功能模块 fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py 单测补充 (#5263)

* test: improve ernie4_5_vl_processor.py test coverage

* update

* improve coverage

* update

* fix: correct test expectation for thinking_mode false in test_ernie_vl_processor

* remove test_process_request_dict_comprehensive test case

---------

Co-authored-by: CSWYF3634076 <wangyafeng@baidu.com>
This commit is contained in:
kesmeey
2025-12-15 14:00:53 +08:00
committed by GitHub
parent 722de5ace1
commit 4bd991aa17

View File

@@ -1,4 +1,4 @@
"""
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -69,11 +69,32 @@ class TestErnie4_5VLProcessorProcessResponseDictStreaming(unittest.TestCase):
return request
def mock_pack_outputs(outputs):
return outputs
# Ensure input_ids is numpy array if it exists
result = outputs.copy() if isinstance(outputs, dict) else outputs
if isinstance(result, dict):
if "input_ids" in result and isinstance(result["input_ids"], list):
result["input_ids"] = np.array(result["input_ids"])
if "token_type_ids" in result and isinstance(result["token_type_ids"], list):
result["token_type_ids"] = np.array(result["token_type_ids"])
if "position_ids" in result and isinstance(result["position_ids"], list):
result["position_ids"] = np.array(result["position_ids"])
return result
def mock_prompt_token_ids2outputs(request):
return {
"input_ids": np.array([1, 1, 1]),
"token_type_ids": np.array([0, 0, 0]),
"position_ids": np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]]),
"images": [],
"grid_thw": [],
"image_type_ids": [],
"cur_position": 3,
}
self.processor._apply_default_parameters = mock_apply_default_parameters
self.processor._check_mm_limits = mock_check_mm_limits
self.processor.ernie4_5_processor.request2ids = mock_request2ids
self.processor.ernie4_5_processor.prompt_token_ids2outputs = mock_prompt_token_ids2outputs
self.processor.pack_outputs = mock_pack_outputs
# Mock reasoning parser
@@ -109,6 +130,308 @@ class TestErnie4_5VLProcessorProcessResponseDictStreaming(unittest.TestCase):
self.processor.process_request_dict(request, max_model_len=512)
self.assertEqual(request["enable_thinking"], True)
def test_init(self):
"""Test __init__ method"""
with patch("fastdeploy.input.ernie4_5_vl_processor.ernie4_5_vl_processor.data_processor_logger"):
mock_dp = MagicMock()
mock_dp.image_patch_id = 1001
mock_dp.spatial_conv_size = 14
mock_dp.tokenizer = MagicMock()
mock_dp.tokenizer.pad_token_id = 0
mock_dp.eval = MagicMock()
with patch("fastdeploy.input.ernie4_5_vl_processor.ernie4_5_vl_processor.DataProcessor") as mock_dp_class:
mock_dp_class.return_value = mock_dp
with patch(
"fastdeploy.input.ernie4_5_vl_processor.ernie4_5_vl_processor.GenerationConfig"
) as mock_gen_config:
mock_gen_config.from_pretrained.return_value = MagicMock()
with patch("paddleformers.trl.llm_utils.get_eos_token_id") as mock_get_eos:
mock_get_eos.return_value = [1, 2]
# Test normal initialization
mock_reasoning_parser_class = MagicMock()
processor = Ernie4_5_VLProcessor(
"model_path",
limit_mm_per_prompt={"image": 2, "video": 1},
mm_processor_kwargs={"spatial_conv_size": 14},
reasoning_parser_obj=lambda tokenizer: mock_reasoning_parser_class,
tool_parser_obj=MagicMock(),
enable_processor_cache=True,
)
self.assertEqual(processor.image_patch_id, 1001)
self.assertEqual(processor.spatial_conv_size, 14)
self.assertIsNotNone(processor.tokenizer)
self.assertIsNotNone(processor.generation_config)
self.assertEqual(processor.eos_token_ids, [1, 2])
self.assertEqual(processor.limit_mm_per_prompt["image"], 2)
self.assertEqual(processor.limit_mm_per_prompt["video"], 1)
mock_dp.eval.assert_called_once()
# Test with generation config exception
mock_gen_config.from_pretrained.side_effect = Exception("Config not found")
processor2 = Ernie4_5_VLProcessor("model_path")
self.assertIsNone(processor2.generation_config)
# Test with reasoning_parser_obj
mock_reasoning_parser = MagicMock()
processor3 = Ernie4_5_VLProcessor(
"model_path", reasoning_parser_obj=lambda tokenizer: mock_reasoning_parser
)
self.assertIsNotNone(processor3.reasoning_parser)
def test_parse_processor_kwargs(self):
"""Test _parse_processor_kwargs with various inputs"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor._parse_processor_kwargs = Ernie4_5_VLProcessor._parse_processor_kwargs.__get__(
processor, Ernie4_5_VLProcessor
)
# Test with valid kwargs
valid_kwargs = {
"spatial_conv_size": 14,
"temporal_conv_size": 2,
"image_min_pixels": 1000,
"image_max_pixels": 10000,
}
result = processor._parse_processor_kwargs(valid_kwargs)
self.assertEqual(result, valid_kwargs)
# Test with invalid type (implementation catches exception and returns empty dict)
invalid_kwargs = {"spatial_conv_size": "invalid"} # Should be int
result = Ernie4_5_VLProcessor._parse_processor_kwargs(processor, invalid_kwargs)
self.assertEqual(result, {})
# Test with non-dict input (implementation catches exception and returns empty dict)
result = Ernie4_5_VLProcessor._parse_processor_kwargs(processor, "not a dict")
self.assertEqual(result, {})
# Test exception handling with None
with patch("fastdeploy.input.ernie4_5_vl_processor.ernie4_5_vl_processor.data_processor_logger"):
result = processor._parse_processor_kwargs(None)
self.assertEqual(result, {})
def test_parse_limits(self):
"""Test _parse_limits with various inputs"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor._parse_limits = Ernie4_5_VLProcessor._parse_limits.__get__(processor, Ernie4_5_VLProcessor)
# Test with valid limits
valid_limits = {"image": 5, "video": 3}
result = processor._parse_limits(valid_limits)
self.assertEqual(result["image"], 5)
self.assertEqual(result["video"], 3)
self.assertEqual(result["audio"], 1) # Default value
# Test with empty input (None)
result = processor._parse_limits(None)
self.assertEqual(result["image"], 1)
self.assertEqual(result["video"], 1)
self.assertEqual(result["audio"], 1)
# Test with invalid type (implementation catches exception and returns default limits)
result = Ernie4_5_VLProcessor._parse_limits(processor, "not a dict")
self.assertEqual(result["image"], 1)
self.assertEqual(result["video"], 1)
self.assertEqual(result["audio"], 1)
def test_check_mm_limits(self):
"""Test _check_mm_limits with various inputs"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor._check_mm_limits = Ernie4_5_VLProcessor._check_mm_limits.__get__(processor, Ernie4_5_VLProcessor)
# Test with dict input (should not raise)
processor.limit_mm_per_prompt = {"image": 2, "video": 1}
mm_data = {"image": [1, 2], "video": [1]}
processor._check_mm_limits(mm_data)
# Test with messages input (should not raise)
messages = [
{"role": "user", "content": [{"type": "image", "data": "img1"}]},
{"role": "user", "content": [{"type": "video", "data": "vid1"}]},
]
processor._check_mm_limits(messages)
# Test when limit is exceeded (should raise ValueError)
processor.limit_mm_per_prompt = {"image": 1, "video": 1}
mm_data = {"image": [1, 2, 3], "video": []} # 3 images, limit is 1
with self.assertRaises(ValueError) as context:
processor._check_mm_limits(mm_data)
self.assertIn("Too many image items", str(context.exception))
def test_process_request(self):
"""Test process_request method"""
from fastdeploy.engine.request import Request
# Mock the process_request_dict method
self.processor.process_request_dict = MagicMock()
# Create a mock Request object
mock_request = MagicMock(spec=Request)
mock_request.to_dict.return_value = {"messages": [{"role": "user", "content": "Hello"}]}
# Mock Request.from_dict to return a mock request
with patch.object(Request, "from_dict") as mock_from_dict:
mock_result_request = MagicMock(spec=Request)
mock_from_dict.return_value = mock_result_request
self.processor.process_request(mock_request, max_model_len=100, chat_template_kwargs={"key": "value"})
# Verify to_dict was called
mock_request.to_dict.assert_called_once()
# Verify process_request_dict was called
self.processor.process_request_dict.assert_called_once()
# Verify from_dict was called
mock_from_dict.assert_called_once()
def test_get_pad_id(self):
"""Test get_pad_id method"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor.tokenizer = MagicMock()
processor.tokenizer.pad_token_id = 100
processor.get_pad_id = Ernie4_5_VLProcessor.get_pad_id.__get__(processor, Ernie4_5_VLProcessor)
result = processor.get_pad_id()
self.assertEqual(result, 100)
def test_load_tokenizer(self):
"""Test _load_tokenizer method"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
mock_tokenizer = MagicMock()
processor.ernie4_5_processor = MagicMock()
processor.ernie4_5_processor.tokenizer = mock_tokenizer
processor._load_tokenizer = Ernie4_5_VLProcessor._load_tokenizer.__get__(processor, Ernie4_5_VLProcessor)
processor._load_tokenizer()
self.assertEqual(processor.tokenizer, mock_tokenizer)
def test_append_completion_tokens(self):
"""Test append_completion_tokens method"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor.append_completion_tokens = Ernie4_5_VLProcessor.append_completion_tokens.__get__(
processor, Ernie4_5_VLProcessor
)
multimodal_inputs = {
"input_ids": [1, 2, 3],
"token_type_ids": [0, 0, 0],
"position_ids": [[0, 0, 0], [1, 1, 1], [2, 2, 2]],
"cur_position": 3,
}
completion_token_ids = [10, 11, 12]
processor.append_completion_tokens(multimodal_inputs, completion_token_ids)
self.assertEqual(multimodal_inputs["input_ids"], [1, 2, 3, 10, 11, 12])
self.assertEqual(multimodal_inputs["token_type_ids"], [0, 0, 0, 0, 0, 0])
self.assertEqual(len(multimodal_inputs["position_ids"]), 6)
self.assertEqual(multimodal_inputs["cur_position"], 6)
def test_pack_outputs(self):
"""Test pack_outputs with and without images"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor.image_patch_id = 1001
processor.pack_outputs = Ernie4_5_VLProcessor.pack_outputs.__get__(processor, Ernie4_5_VLProcessor)
# Test with images
outs_with_images = {
"input_ids": [1, 2, 3],
"token_type_ids": [0, 0, 0],
"position_ids": [[0, 0, 0], [1, 1, 1], [2, 2, 2]],
"images": [np.array([[1, 2], [3, 4]])],
"grid_thw": [np.array([[1, 2, 2]])],
"image_type_ids": [0],
}
result = processor.pack_outputs(outs_with_images)
self.assertIsNotNone(result["images"])
self.assertIsNotNone(result["grid_thw"])
self.assertIsNotNone(result["image_type_ids"])
self.assertEqual(result["image_patch_id"], 1001)
self.assertIsInstance(result["input_ids"], np.ndarray)
self.assertIsInstance(result["token_type_ids"], np.ndarray)
self.assertIsInstance(result["position_ids"], np.ndarray)
# Test without images
outs_without_images = {
"input_ids": [1, 2, 3],
"token_type_ids": [0, 0, 0],
"position_ids": [[0, 0, 0], [1, 1, 1], [2, 2, 2]],
"images": [],
"grid_thw": [],
"image_type_ids": [],
}
result = processor.pack_outputs(outs_without_images)
self.assertIsNone(result["images"])
self.assertIsNone(result["grid_thw"])
self.assertIsNone(result["image_type_ids"])
def test_process_response_dict(self):
"""Test process_response_dict with different parameters"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor.process_response_dict = Ernie4_5_VLProcessor.process_response_dict.__get__(
processor, Ernie4_5_VLProcessor
)
# Test with stream=True
processor.process_response_dict_streaming = MagicMock(return_value={"text": "response"})
response_dict = {"ids": [1, 2, 3]}
result = processor.process_response_dict(response_dict, stream=True)
processor.process_response_dict_streaming.assert_called_once()
self.assertEqual(result, {"text": "response"})
# Test with stream=False
processor.process_response_dict_normal = MagicMock(return_value={"text": "response"})
response_dict = {"ids": [1, 2, 3]}
result = processor.process_response_dict(response_dict, stream=False)
processor.process_response_dict_normal.assert_called_once()
self.assertEqual(result, {"text": "response"})
def test_apply_default_parameters(self):
"""Test _apply_default_parameters with dict and object request"""
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None):
processor = Ernie4_5_VLProcessor("model_path")
processor.generation_config = MagicMock()
processor.generation_config.top_p = 0.8
processor.generation_config.temperature = 0.9
processor._apply_default_parameters = Ernie4_5_VLProcessor._apply_default_parameters.__get__(
processor, Ernie4_5_VLProcessor
)
# Test with dict request
request = {}
result = processor._apply_default_parameters(request)
self.assertEqual(result["top_p"], 0.8)
self.assertEqual(result["temperature"], 0.9)
# Test with object request
class MockRequest:
def __init__(self):
self.top_p = None
self.temperature = None
def get(self, key):
return getattr(self, key, None)
def set(self, key, value):
setattr(self, key, value)
request = MockRequest()
result = processor._apply_default_parameters(request)
self.assertEqual(result.top_p, 0.8)
class TestDataProcessorTargetMethods(unittest.TestCase):
def setUp(self):