mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
Optimizing the performance of think length limit using custom operators (#4279)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FD Image Build (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FD Image Build (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
* delete impl * delete min_length&max_length * support limit thinking content strategy * fix * fix * fix * update * fix set_value_by_flags_and_idx * fix * fix * fix * fix * update * fix * fix * fix typo * fix ci * fix * fix * support mtp * fix * fix * update * update
This commit is contained in:
@@ -28,7 +28,6 @@ from paddle.distributed import fleet
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.config import (
|
||||
CacheConfig,
|
||||
DecodingConfig,
|
||||
DeviceConfig,
|
||||
EarlyStopConfig,
|
||||
ErnieArchitectures,
|
||||
@@ -41,7 +40,6 @@ from fastdeploy.config import (
|
||||
SpeculativeConfig,
|
||||
StructuredOutputsConfig,
|
||||
)
|
||||
from fastdeploy.input.ernie4_5_tokenizer import Ernie4_5Tokenizer
|
||||
from fastdeploy.inter_communicator import EngineWorkerQueue as TaskQueue
|
||||
from fastdeploy.inter_communicator import ExistTaskStatus, IPCSignal, ModelWeightsStatus
|
||||
from fastdeploy.model_executor.layers.quantization import parse_quant_config
|
||||
@@ -117,25 +115,9 @@ def init_distributed_environment(seed: int = 20) -> Tuple[int, int]:
|
||||
def update_fd_config_for_mm(fd_config: FDConfig) -> None:
|
||||
architectures = fd_config.model_config.architectures
|
||||
if fd_config.model_config.enable_mm and ErnieArchitectures.contains_ernie_arch(architectures):
|
||||
tokenizer = Ernie4_5Tokenizer.from_pretrained(
|
||||
fd_config.model_config.model,
|
||||
model_max_length=fd_config.model_config.max_model_len,
|
||||
padding_side="right",
|
||||
use_fast=False,
|
||||
)
|
||||
tokenizer.ignored_index = -100
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token = tokenizer.unk_token
|
||||
|
||||
fd_config.model_config.tensor_parallel_degree = fd_config.parallel_config.tensor_parallel_size
|
||||
fd_config.model_config.tensor_parallel_rank = fd_config.parallel_config.tensor_parallel_rank
|
||||
vision_config = fd_config.model_config.vision_config
|
||||
vision_config.dtype = fd_config.model_config.dtype
|
||||
# vision_config.tensor_parallel_degree = fd_config.parallel_config.tensor_parallel_size
|
||||
# vision_config.tensor_parallel_rank = fd_config.parallel_config.tensor_parallel_rank
|
||||
fd_config.model_config.im_patch_id = tokenizer.get_vocab()["<|IMAGE_PLACEHOLDER|>"]
|
||||
fd_config.model_config.think_end_id = tokenizer.get_vocab()["</think>"]
|
||||
fd_config.model_config.sequence_parallel = fd_config.parallel_config.sequence_parallel
|
||||
fd_config.model_config.vision_config.dtype = fd_config.model_config.dtype
|
||||
|
||||
|
||||
class PaddleDisWorkerProc:
|
||||
@@ -577,6 +559,8 @@ def parse_args():
|
||||
)
|
||||
parser.add_argument("--ori_vocab_size", type=int, default=None)
|
||||
parser.add_argument("--think_end_id", type=int, default=-1)
|
||||
parser.add_argument("--image_patch_id", type=int, default=-1)
|
||||
parser.add_argument("--line_break_id", type=int, default=-1)
|
||||
|
||||
parser.add_argument(
|
||||
"--quantization",
|
||||
@@ -707,7 +691,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
paddle.set_default_dtype(args.dtype)
|
||||
model_config = ModelConfig(vars(args))
|
||||
device_config = DeviceConfig(vars(args))
|
||||
decoding_config = DecodingConfig(vars(args))
|
||||
speculative_config = SpeculativeConfig(args.speculative_config)
|
||||
parallel_config = ParallelConfig(vars(args))
|
||||
cache_config = CacheConfig(vars(args))
|
||||
@@ -808,7 +791,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
speculative_config=speculative_config,
|
||||
device_config=device_config,
|
||||
load_config=load_config,
|
||||
decoding_config=decoding_config,
|
||||
quant_config=quant_config,
|
||||
graph_opt_config=graph_opt_config,
|
||||
early_stop_config=early_stop_config,
|
||||
|
||||
Reference in New Issue
Block a user