[Metax] support default_v1 loader & thinking model (#4956)

Co-authored-by: plusNew001 <95567040+plusNew001@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-11-12 16:32:26 +08:00
parent bde6e2f931
commit 9d9f5df8d0
4 changed files with 23 additions and 5 deletions
--- a/fastdeploy/model_executor/layers/quantization/weight_only.py
+++ b/fastdeploy/model_executor/layers/quantization/weight_only.py
@@ -326,6 +326,9 @@ class WeightOnlyLinearMethod(QuantMethodBase):
                    arch=self.quant_config.weight_only_linear_arch,
                )

+                if current_platform.is_maca():
+                    quanted_weight_tensor = paddle.transpose(quanted_weight_tensor, [1, 0])
+
            free_tensor(layer.weight)

            layer.weight = layer.create_parameter(