From 17c88f429ff9ab6bdb8e06120dacd116ddadbc3d Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Wed, 3 Dec 2025 13:20:51 +0800 Subject: [PATCH] fix skip_quant (#5342) * fix skip_quant * fix --- fastdeploy/model_executor/layers/linear.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 7b1dc794a..00fec6ba7 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -508,7 +508,7 @@ class QKVParallelLinear(ColumnParallelLinear): QKVParallelLinear Layer. """ - def __init__(self, fd_config, prefix, with_bias=False, add_bias=True): + def __init__(self, fd_config, prefix, with_bias=False, add_bias=True, skip_quant=False): """ Initialize the QKV Linear layer with given parameters. @@ -542,6 +542,7 @@ class QKVParallelLinear(ColumnParallelLinear): output_size=output_size, with_bias=with_bias, add_bias=add_bias, + skip_quant=skip_quant, ) def _get_shard_size_mapping(self, loaded_shard_id: str): @@ -720,7 +721,6 @@ class RowParallelLinear(LinearBase): skip_quant (bool): Whether to skip quantization. Defaults to False. """ self.fd_config = fd_config - self.skip_quant = False self.nranks = fd_config.parallel_config.tensor_parallel_size self.tp_group = fd_config.parallel_config.tp_group self.hidden_size = fd_config.model_config.hidden_size