Fix rollout_model init (#2881)

This commit is contained in:
Yuanle Liu
2025-07-17 13:36:21 +08:00
committed by GitHub
parent 1f15ca21e4
commit dbb9e2506b
9 changed files with 76 additions and 312 deletions

View File

@@ -47,14 +47,12 @@ from fastdeploy.platforms import current_platform
if not current_platform.is_dcu():
from fastdeploy.spec_decode import MTPProposer, NgramProposer
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
from fastdeploy.input.mm_processor import DataProcessor
from fastdeploy.model_executor.forward_meta import ForwardMeta
from fastdeploy.model_executor.models.ernie4_5_vl.modeling_resampler import \
ScatterOp
from fastdeploy.worker.model_runner_base import ModelRunnerBase
from fastdeploy.worker.output import ModelOutputData, ModelRunnerOutput
from fastdeploy.worker.utils import check_safetensors_model
class GPUModelRunner(ModelRunnerBase):
@@ -81,16 +79,7 @@ class GPUModelRunner(ModelRunnerBase):
# VL model config:
if self.enable_mm:
model_path = os.path.dirname(self.parallel_config.model_name_or_path)
self.is_safetensors_model = check_safetensors_model(
self.parallel_config.model_name_or_path)
if not self.is_safetensors_model:
self.tokenizer_path = self.image_preprocessor_path = model_path
else:
self.tokenizer_path = self.parallel_config.model_name_or_path
self.image_preprocessor_path = self.parallel_config.model_name_or_path
self.vision_model_name_or_path = os.path.join(
model_path, "DFNRopeVisionTransformer")
self._init_image_preprocess()
self.amp_black = [
"reduce_sum",
@@ -734,8 +723,6 @@ class GPUModelRunner(ModelRunnerBase):
f"Starting to load model {self.model_config.architectures[0]}")
time_before_load = time.perf_counter()
# 1. Load original model
if self.enable_mm:
self.load_mm_config_and_image_preprocess()
self.model = get_model_from_loader(fd_config=self.fd_config)
# 1.1 Load RL dynamic model
if self.fd_config.load_config.dynamic_load_weight:
@@ -1440,8 +1427,8 @@ class GPUModelRunner(ModelRunnerBase):
def _init_image_preprocess(self) -> None:
processor = DataProcessor(
tokenizer_name=self.tokenizer_path,
image_preprocessor_name=str(self.image_preprocessor_path),
tokenizer_name=self.parallel_config.model_name_or_path,
image_preprocessor_name=str(self.parallel_config.model_name_or_path),
)
processor.eval()
image_preprocess = processor.image_preprocessor
@@ -1459,31 +1446,6 @@ class GPUModelRunner(ModelRunnerBase):
-1)
self.image_preprocess = image_preprocess
def load_mm_config_and_image_preprocess(self) -> None:
tokenizer = ErnieBotTokenizer.from_pretrained(
self.tokenizer_path,
model_max_length=self.parallel_config.max_model_len,
padding_side="right",
use_fast=False,
)
tokenizer.ignored_index = -100
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.unk_token
self.fd_config.model_config.tensor_parallel_degree = self.parallel_config.tensor_parallel_size
self.fd_config.model_config.tensor_parallel_rank = self.parallel_config.tensor_parallel_rank
vision_config = self.fd_config.model_config.vision_config
vision_config.dtype = self.fd_config.model_config.dtype
vision_config.tensor_parallel_degree = self.parallel_config.tensor_parallel_size
vision_config.tensor_parallel_rank = self.parallel_config.tensor_parallel_rank
self.fd_config.model_config.im_patch_id = tokenizer.get_vocab()[
"<|IMAGE_PLACEHOLDER|>"
]
self.fd_config.model_config.think_end_id = tokenizer.get_vocab()["</think>"]
self.fd_config.model_config.sequence_parallel = self.parallel_config.sequence_parallel
self.model_config = self.fd_config.model_config
self._init_image_preprocess()
def _preprocess_mm_task(self, one: dict) -> None:
"""process batch"""