[Feature] Optim PaddleOCR-VL (#4873)

* [Feature] Optim PaddleOCR-VL

* fix bug
This commit is contained in:
ming1753
2025-11-07 14:56:44 +08:00
committed by GitHub
parent bbe0820555
commit cba185f1fe
12 changed files with 535 additions and 112 deletions

View File

@@ -22,7 +22,6 @@ import paddle
import paddle.nn as nn
from paddleformers.transformers import PretrainedModel
from fastdeploy import envs
from fastdeploy.config import FDConfig
from fastdeploy.model_executor.forward_meta import ForwardMeta
from fastdeploy.model_executor.graph_optimization.decorator import (
@@ -136,12 +135,8 @@ class PaddleOCRVLForConditionalGeneration(ModelForCasualLM):
)
# Persistent buffers for CUDA graphs.
if envs.FD_ENABLE_MAX_PREFILL:
max_length = fd_config.scheduler_config.max_num_seqs * fd_config.model_config.max_model_len
else:
max_length = fd_config.model_config.max_model_len
self._input_embeddings = paddle.zeros(
[max_length, fd_config.model_config.hidden_size],
self._decoder_input_embeddings = paddle.zeros(
[fd_config.scheduler_config.max_num_seqs, fd_config.model_config.hidden_size],
dtype=fd_config.model_config.dtype,
)
@@ -247,12 +242,19 @@ class PaddleOCRVLForConditionalGeneration(ModelForCasualLM):
input_embeddings = self.get_input_embeddings(
ids_remove_padding=ids_remove_padding, image_features=image_features
)
self._input_embeddings.copy_(input_embeddings, False)
hidden_states = self.model(
input_embeddings=self._input_embeddings,
forward_meta=forward_meta,
)
if forward_meta.step_use_cudagraph:
self._decoder_input_embeddings.copy_(input_embeddings, False)
hidden_states = self.model(
input_embeddings=self._decoder_input_embeddings,
forward_meta=forward_meta,
)
else:
hidden_states = self.model(
input_embeddings=input_embeddings,
forward_meta=forward_meta,
)
return hidden_states