qwen3_moe (#3084)

2025-10-05 08:37:06 +08:00 · 2025-08-06 14:45:27 +08:00
parent 91dc87f1c5
commit 20839abccf
30 changed files with 1361 additions and 1087 deletions
--- a/fastdeploy/model_executor/models/utils.py
+++ b/fastdeploy/model_executor/models/utils.py
@@ -72,7 +72,11 @@ def default_weight_loader(fd_config: FDConfig) -> None:
                    loaded_weight = loaded_weight[..., shard_offset:shard_size]
                else:
                    loaded_weight = loaded_weight[shard_offset:shard_size, ...]
+
            loaded_weight = get_tensor(loaded_weight)
+            # mlp.gate.weight is precision-sensitive, so we cast it to float32 for computation
+            if param.dtype != loaded_weight.dtype:
+                loaded_weight = loaded_weight.cast(param.dtype)

            assert param.shape == loaded_weight.shape, (
                f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"