mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
Fix rollout_model init (#2881)
This commit is contained in:
@@ -47,14 +47,12 @@ from fastdeploy.platforms import current_platform
|
||||
if not current_platform.is_dcu():
|
||||
from fastdeploy.spec_decode import MTPProposer, NgramProposer
|
||||
|
||||
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
|
||||
from fastdeploy.input.mm_processor import DataProcessor
|
||||
from fastdeploy.model_executor.forward_meta import ForwardMeta
|
||||
from fastdeploy.model_executor.models.ernie4_5_vl.modeling_resampler import \
|
||||
ScatterOp
|
||||
from fastdeploy.worker.model_runner_base import ModelRunnerBase
|
||||
from fastdeploy.worker.output import ModelOutputData, ModelRunnerOutput
|
||||
from fastdeploy.worker.utils import check_safetensors_model
|
||||
|
||||
|
||||
class GPUModelRunner(ModelRunnerBase):
|
||||
@@ -81,16 +79,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
|
||||
# VL model config:
|
||||
if self.enable_mm:
|
||||
model_path = os.path.dirname(self.parallel_config.model_name_or_path)
|
||||
self.is_safetensors_model = check_safetensors_model(
|
||||
self.parallel_config.model_name_or_path)
|
||||
if not self.is_safetensors_model:
|
||||
self.tokenizer_path = self.image_preprocessor_path = model_path
|
||||
else:
|
||||
self.tokenizer_path = self.parallel_config.model_name_or_path
|
||||
self.image_preprocessor_path = self.parallel_config.model_name_or_path
|
||||
self.vision_model_name_or_path = os.path.join(
|
||||
model_path, "DFNRopeVisionTransformer")
|
||||
self._init_image_preprocess()
|
||||
|
||||
self.amp_black = [
|
||||
"reduce_sum",
|
||||
@@ -734,8 +723,6 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
f"Starting to load model {self.model_config.architectures[0]}")
|
||||
time_before_load = time.perf_counter()
|
||||
# 1. Load original model
|
||||
if self.enable_mm:
|
||||
self.load_mm_config_and_image_preprocess()
|
||||
self.model = get_model_from_loader(fd_config=self.fd_config)
|
||||
# 1.1 Load RL dynamic model
|
||||
if self.fd_config.load_config.dynamic_load_weight:
|
||||
@@ -1440,8 +1427,8 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
|
||||
def _init_image_preprocess(self) -> None:
|
||||
processor = DataProcessor(
|
||||
tokenizer_name=self.tokenizer_path,
|
||||
image_preprocessor_name=str(self.image_preprocessor_path),
|
||||
tokenizer_name=self.parallel_config.model_name_or_path,
|
||||
image_preprocessor_name=str(self.parallel_config.model_name_or_path),
|
||||
)
|
||||
processor.eval()
|
||||
image_preprocess = processor.image_preprocessor
|
||||
@@ -1459,31 +1446,6 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
-1)
|
||||
self.image_preprocess = image_preprocess
|
||||
|
||||
def load_mm_config_and_image_preprocess(self) -> None:
|
||||
tokenizer = ErnieBotTokenizer.from_pretrained(
|
||||
self.tokenizer_path,
|
||||
model_max_length=self.parallel_config.max_model_len,
|
||||
padding_side="right",
|
||||
use_fast=False,
|
||||
)
|
||||
tokenizer.ignored_index = -100
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token = tokenizer.unk_token
|
||||
|
||||
self.fd_config.model_config.tensor_parallel_degree = self.parallel_config.tensor_parallel_size
|
||||
self.fd_config.model_config.tensor_parallel_rank = self.parallel_config.tensor_parallel_rank
|
||||
vision_config = self.fd_config.model_config.vision_config
|
||||
vision_config.dtype = self.fd_config.model_config.dtype
|
||||
vision_config.tensor_parallel_degree = self.parallel_config.tensor_parallel_size
|
||||
vision_config.tensor_parallel_rank = self.parallel_config.tensor_parallel_rank
|
||||
self.fd_config.model_config.im_patch_id = tokenizer.get_vocab()[
|
||||
"<|IMAGE_PLACEHOLDER|>"
|
||||
]
|
||||
self.fd_config.model_config.think_end_id = tokenizer.get_vocab()["</think>"]
|
||||
self.fd_config.model_config.sequence_parallel = self.parallel_config.sequence_parallel
|
||||
self.model_config = self.fd_config.model_config
|
||||
self._init_image_preprocess()
|
||||
|
||||
def _preprocess_mm_task(self, one: dict) -> None:
|
||||
"""process batch"""
|
||||
|
||||
|
Reference in New Issue
Block a user