add input_processor plugin (#3657)

* add input_processor plugin * update * update * update * update * update * update * update * update * update * update * update
2025-10-05 00:33:03 +08:00 · 2025-08-28 22:53:57 +08:00
parent 02b3644903
commit 4957908275
18 changed files with 232 additions and 146 deletions
--- a/fastdeploy/model_executor/layers/linear.py
+++ b/fastdeploy/model_executor/layers/linear.py
@@ -721,7 +721,8 @@ class RowParallelLinear(LinearBase):
            add_bias=add_bias,
            skip_quant=skip_quant,
        )
-
+        if add_bias:
+            assert with_bias, "with_bias must be True when add_bias is True."
        assert self.quant_method is not None
        self.quant_method.create_weights(
            self,
@@ -753,7 +754,8 @@ class RowParallelLinear(LinearBase):

        if self.reduce_results and self.nranks > 1:
            tensor_model_parallel_all_reduce(out, self.tp_group)
-
+        if not self.fd_config.quant_config and self.add_bias:
+            out = paddle.add(out, self.bias)
        return out