diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 253636349..39abb8c60 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -500,7 +500,7 @@ class QKVParallelLinear(ColumnParallelLinear): k_bias = get_tensor(state_dict.pop(k_bias_key)) v_bias = get_tensor(state_dict.pop(v_bias_key)) qkv_bias = paddle.concat([q_bias, k_bias, v_bias], axis=-1) - self.bias.set_value(qkv_bias) + self.bias.set_value(qkv_bias) class RowParallelLinear(LinearBase): diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index c44cac5c5..8a12988c4 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -17,7 +17,7 @@ import argparse import json import time -from typing import List +from typing import Tuple import numpy as np import paddle @@ -74,7 +74,7 @@ def get_worker(fd_config: FDConfig, local_rank: int, rank: int) -> WorkerBase: return GcuWorker(fd_config=fd_config, local_rank=local_rank, rank=rank) -def init_distributed_environment(seed: int = 20) -> List[int]: +def init_distributed_environment(seed: int = 20) -> Tuple[int, int]: """Initialize Paddle Fleet and get rank of worker""" # Global rank ranks = dist.get_world_size() @@ -122,9 +122,9 @@ def update_fd_config_for_mm(fd_config: FDConfig) -> None: class PaddleDisWorkerProc: """ - Paddle Distrubuted wrapper for fastdeploy.worker.Worker, + Paddle Distributed wrapper for fastdeploy.worker.Worker, for handling single-node multi-GPU tensor parallel. - The wrapper internally executea an event loop that continuously executes requests + The wrapper internally executes an event loop that continuously executes requests in the task queue. Control flow is transmitted by IPC. """