[Model] tp+ep support v1_loader (#5600)

* [Model] tp+ep support v1_loader * fix * fix mtp_linear * fix mtp_linear * fix * fix * fix v0 loader * fix * Add get_tensor for EP * fix linear weight_loader * fix typo * fix
2025-12-24 13:28:13 +08:00 · 2025-12-18 15:27:12 +08:00
parent 5300e73f8b
commit a30a5b4216
8 changed files with 48 additions and 20 deletions
--- a/fastdeploy/model_executor/layers/quantization/block_wise_fp8.py
+++ b/fastdeploy/model_executor/layers/quantization/block_wise_fp8.py
@@ -138,7 +138,9 @@ class BlockWiseFP8LinearMethod(QuantMethodBase):
                weight_shape = layer.weight_shape
                weight_scale_inv_shape = weight_scale_inv_shape
                extra_weight_attrs["output_dim"] = (
-                    not extra_weight_attrs["output_dim"] if extra_weight_attrs["output_dim"] is not None else None
+                    not extra_weight_attrs["output_dim"]
+                    if extra_weight_attrs.get("output_dim", None) is not None
+                    else None
                )

            layer.weight_dtype = "float8_e4m3fn"