mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[V1 Loader] Ernie kv cache quant support v1 loader (#3899)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
* support c8 for ernie * add unittest * support vl * fix c8
This commit is contained in:
@@ -539,6 +539,10 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
("qkv_proj", "v_proj", None, "v"),
|
||||
("up_gate_proj", "gate_proj", None, "gate"),
|
||||
("up_gate_proj", "up_proj", None, "up"),
|
||||
("attn.cache_k_scale", "cachek_matmul.activation_scale", None, None),
|
||||
("attn.cache_v_scale", "cachev_matmul.activation_scale", None, None),
|
||||
("attn.cache_k_zp", "cachek_matmul.activation_zero_point", None, None),
|
||||
("attn.cache_v_zp", "cachev_matmul.activation_zero_point", None, None),
|
||||
]
|
||||
|
||||
expert_params_mapping = []
|
||||
@@ -563,6 +567,7 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
all_param_mapping = general_params_mapping + expert_params_mapping
|
||||
|
||||
params_dict = dict(self.named_parameters())
|
||||
|
||||
process_weights_after_loading_fn = process_weights_after_loading(dict(self.named_sublayers()))
|
||||
|
||||
for loaded_weight_name, loaded_weight in weights_iterator:
|
||||
@@ -591,7 +596,9 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
else:
|
||||
weight_loader(param, loaded_weight, shard_id)
|
||||
|
||||
model_sublayer_name = re.sub(r"\.(up_gate_proj_weight|down_proj_weight|weight)$", "", model_param_name)
|
||||
model_sublayer_name = re.sub(
|
||||
r"\.(up_gate_proj_weight|down_proj_weight|weight|cache_k_scale|cache_v_scale)$", "", model_param_name
|
||||
)
|
||||
process_weights_after_loading_fn(model_sublayer_name, param)
|
||||
|
||||
if self.tie_word_embeddings:
|
||||
|
@@ -616,6 +616,10 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
|
||||
("resampler_model", "ernie.resampler_model", None, None),
|
||||
("vision_model", "ernie.vision_model", None, None),
|
||||
("gate_correction_bias", "moe_statics.e_score_correction_bias", None, None),
|
||||
("attn.cache_k_scale", "cachek_matmul.activation_scale", None, None),
|
||||
("attn.cache_v_scale", "cachev_matmul.activation_scale", None, None),
|
||||
("attn.cache_k_zp", "cachek_matmul.activation_zero_point", None, None),
|
||||
("attn.cache_v_zp", "cachev_matmul.activation_zero_point", None, None),
|
||||
# for torch model
|
||||
("resampler_model", "model.resampler_model", None, None),
|
||||
("qkv_proj", "q_proj", None, "q"),
|
||||
@@ -679,7 +683,9 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
|
||||
weight_loader(param, loaded_weight, expert_id=expert_id, shard_id=shard_id)
|
||||
else:
|
||||
weight_loader(param, loaded_weight, shard_id)
|
||||
model_sublayer_name = re.sub(r"\.(up_gate_proj_weight|down_proj_weight|weight)$", "", model_param_name)
|
||||
model_sublayer_name = re.sub(
|
||||
r"\.(up_gate_proj_weight|down_proj_weight|weight|cache_k_scale|cache_v_scale)$", "", model_param_name
|
||||
)
|
||||
process_weights_after_loading_fn(model_sublayer_name, param)
|
||||
if self.tie_word_embeddings:
|
||||
# because we use lazy guard and is not initialized by default
|
||||
|
Reference in New Issue
Block a user