Spaces:
Running
on
Zero
Running
on
Zero
# Copyright 2023-present the HuggingFace Inc. team. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import torch | |
from transformers import BloomPreTrainedModel | |
from .peft_types import PeftType | |
# needed for prefix-tuning of bloom model | |
def bloom_model_postprocess_past_key_value(past_key_values): | |
past_key_values = torch.cat(past_key_values) | |
total_layers, batch_size, num_attention_heads, num_virtual_tokens, head_dim = past_key_values.shape | |
keys = past_key_values[: total_layers // 2] | |
keys = keys.transpose(2, 3).reshape( | |
total_layers // 2, batch_size * num_attention_heads, head_dim, num_virtual_tokens | |
) | |
values = past_key_values[total_layers // 2 :] | |
values = values.reshape(total_layers // 2, batch_size * num_attention_heads, num_virtual_tokens, head_dim) | |
return tuple(zip(keys, values)) | |
# needed for prefix-tuning of StarCoder models | |
def starcoder_model_postprocess_past_key_value(past_key_values): | |
result = [] | |
for k in past_key_values: | |
k = k[:, :, 0] | |
k = k.permute([1, 2, 0, 3]) | |
k = k.reshape(*k.shape[:-2], -1) | |
result.append(k) | |
return tuple(result) | |
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING = { | |
"gpt_bigcode": starcoder_model_postprocess_past_key_value, | |
} | |
if hasattr(BloomPreTrainedModel, "_convert_to_standard_cache"): | |
# special handling for bloom architecture was fixed in: | |
# https://github.com/huggingface/transformers/pull/31445 | |
# the _convert_to_standard_cache method is removed in the PR and thus serves as an indicator | |
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING["bloom"] = bloom_model_postprocess_past_key_value | |
TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING = { | |
"llama": ["input_layernorm", "post_attention_layernorm", "norm"], | |
"bloom": ["input_layernorm", "post_attention_layernorm", "ln_f"], | |
"llava": [ | |
"multi_modal_projector", | |
"input_layernorm", | |
"post_attention_layernorm", | |
"norm", | |
"embed_tokens", | |
"lm_head", | |
], | |
"t5": ["layer_norm", "final_layer_norm"], | |
"mt5": ["layer_norm", "final_layer_norm"], | |
"bart": ["self_attn_layer_norm", "encoder_attn_layer_norm", "final_layer_norm"], | |
"gpt2": ["ln_1", "ln_2", "ln_f"], | |
"blip-2": ["layernorm", "LayerNorm", "final_layer_norm", "self_attn_layer_norm"], | |
"gptj": ["ln_1", "ln_f"], | |
"falcon": ["input_layernorm", "post_attention_layernorm", "ln_f"], | |
"mistral": ["input_layernorm", "post_attention_layernorm", "norm"], | |
"phi": ["input_layernorm", "final_layernorm"], | |
"gemma": ["input_layernorm", "post_attention_layernorm", "norm"], | |
"gemma2": [ | |
"input_layernorm", | |
"post_attention_layernorm", | |
"pre_feedforward_layernorm", | |
"post_feedforward_layernorm", | |
"norm", | |
], | |
"qwen2": ["post_attention_layernorm"], | |
} | |
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = { | |
"t5": ["q", "v"], | |
"mt5": ["q", "v"], | |
"bart": ["q_proj", "v_proj"], | |
"gpt2": ["c_attn"], | |
"bloom": ["query_key_value"], | |
"blip-2": ["q", "v", "q_proj", "v_proj"], | |
"opt": ["q_proj", "v_proj"], | |
"gptj": ["q_proj", "v_proj"], | |
"gpt_neox": ["query_key_value"], | |
"gpt_neo": ["q_proj", "v_proj"], | |
"bert": ["query", "value"], | |
"roberta": ["query", "value"], | |
"xlm-roberta": ["query", "value"], | |
"electra": ["query", "value"], | |
"deberta-v2": ["query_proj", "value_proj"], | |
"deberta": ["in_proj"], | |
"layoutlm": ["query", "value"], | |
"llama": ["q_proj", "v_proj"], | |
"chatglm": ["query_key_value"], | |
"gpt_bigcode": ["c_attn"], | |
"mpt": ["Wqkv"], | |
"RefinedWebModel": ["query_key_value"], | |
"RefinedWeb": ["query_key_value"], | |
"falcon": ["query_key_value"], | |
"btlm": ["c_proj", "c_attn"], | |
"codegen": ["qkv_proj"], | |
"mistral": ["q_proj", "v_proj"], | |
"mixtral": ["q_proj", "v_proj"], | |
"stablelm": ["q_proj", "v_proj"], | |
"phi": ["q_proj", "v_proj", "fc1", "fc2"], | |
"gemma": ["q_proj", "v_proj"], | |
"gemma2": ["q_proj", "v_proj"], | |
"qwen2": ["q_proj", "v_proj"], | |
} | |
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING = { | |
"t5": ["k", "v", "wo"], | |
"mt5": ["k", "v", "wi_1"], | |
"gpt2": ["c_attn", "mlp.c_proj"], | |
"bloom": ["query_key_value", "mlp.dense_4h_to_h"], | |
"roberta": ["key", "value", "output.dense"], | |
"opt": ["q_proj", "k_proj", "fc2"], | |
"gptj": ["q_proj", "v_proj", "fc_out"], | |
"gpt_neox": ["query_key_value", "dense_4h_to_h"], | |
"gpt_neo": ["q_proj", "v_proj", "c_proj"], | |
"bart": ["q_proj", "v_proj", "fc2"], | |
"gpt_bigcode": ["c_attn", "mlp.c_proj"], | |
"llama": ["k_proj", "v_proj", "down_proj"], | |
"mistral": ["k_proj", "v_proj", "down_proj"], | |
"mixtral": ["k_proj", "v_proj", "w2"], | |
"bert": ["key", "value", "output.dense"], | |
"deberta-v2": ["key_proj", "value_proj", "output.dense"], | |
"deberta": ["in_proj", "output.dense"], | |
"RefinedWebModel": ["query_key_value", "dense_4h_to_h"], | |
"RefinedWeb": ["query_key_value", "dense_4h_to_h"], | |
"falcon": ["query_key_value", "dense_4h_to_h"], | |
"phi": ["q_proj", "v_proj", "fc2"], | |
"gemma": ["q_proj", "v_proj", "down_proj"], | |
"gemma2": ["q_proj", "v_proj", "down_proj"], | |
"qwen2": ["q_proj", "v_proj", "down_proj"], | |
} | |
TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING = { | |
"t5": ["wo"], | |
"mt5": [], | |
"gpt2": ["mlp.c_proj"], | |
"bloom": ["mlp.dense_4h_to_h"], | |
"roberta": ["output.dense"], | |
"opt": ["fc2"], | |
"gptj": ["fc_out"], | |
"gpt_neox": ["dense_4h_to_h"], | |
"gpt_neo": ["c_proj"], | |
"bart": ["fc2"], | |
"gpt_bigcode": ["mlp.c_proj"], | |
"llama": ["down_proj"], | |
"mistral": ["down_proj"], | |
"mixtral": ["w2"], | |
"bert": ["output.dense"], | |
"deberta-v2": ["output.dense"], | |
"deberta": ["output.dense"], | |
"RefinedWeb": ["dense_4h_to_h"], | |
"RefinedWebModel": ["dense_4h_to_h"], | |
"falcon": ["dense_4h_to_h"], | |
"phi": ["fc2"], | |
"gemma": ["down_proj"], | |
"gemma2": ["down_proj"], | |
"qwen2": ["down_proj"], | |
} | |
TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING = { | |
"t5": ["q", "k", "v", "o", "wi", "wo"], | |
"mt5": ["q", "k", "v", "o", "wi_0", "wi_1", "wo"], | |
"bart": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"], | |
"gpt2": ["c_attn"], | |
"bloom": ["query_key_value"], | |
"opt": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"], | |
"gptj": ["q_proj", "v_proj"], | |
"gpt_neox": ["query_key_value"], | |
"gpt_neo": ["q_proj", "v_proj"], | |
"llama": ["q_proj", "v_proj"], | |
"bert": ["query", "value"], | |
"roberta": ["query", "key", "value", "dense"], | |
# "xlm-roberta": ["query", "value"], | |
# "electra": ["query", "value"], | |
"deberta-v2": ["query_proj", "key_proj", "value_proj", "dense"], | |
"gpt_bigcode": ["c_attn"], | |
"deberta": ["in_proj"], | |
# "layoutlm": ["query", "value"], | |
"qwen2": ["q_proj", "v_proj"], | |
} | |
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING = { | |
"t5": ["q", "v"], | |
"mt5": ["q", "v"], | |
"bart": ["q_proj", "v_proj"], | |
"gpt2": ["c_attn"], | |
"bloom": ["query_key_value"], | |
"blip-2": ["q", "v", "q_proj", "v_proj"], | |
"opt": ["q_proj", "v_proj"], | |
"gptj": ["q_proj", "v_proj"], | |
"gpt_neox": ["query_key_value"], | |
"gpt_neo": ["q_proj", "v_proj"], | |
"bert": ["query", "value"], | |
"roberta": ["query", "value"], | |
"xlm-roberta": ["query", "value"], | |
"electra": ["query", "value"], | |
"deberta-v2": ["query_proj", "value_proj"], | |
"deberta": ["in_proj"], | |
"layoutlm": ["query", "value"], | |
"llama": ["q_proj", "v_proj"], | |
"chatglm": ["query_key_value"], | |
"gpt_bigcode": ["c_attn"], | |
"mpt": ["Wqkv"], | |
"RefinedWebModel": ["query_key_value"], | |
"RefinedWeb": ["query_key_value"], | |
"falcon": ["query_key_value"], | |
"btlm": ["c_proj", "c_attn"], | |
"codegen": ["qkv_proj"], | |
"mistral": ["q_proj", "v_proj"], | |
"mixtral": ["q_proj", "v_proj"], | |
"stablelm": ["q_proj", "v_proj"], | |
"phi": ["q_proj", "v_proj"], | |
"gemma": ["q_proj", "v_proj"], | |
"gemma2": ["q_proj", "v_proj"], | |
"qwen2": ["q_proj", "v_proj"], | |
} | |
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING = { | |
"t5": ["q", "v"], | |
"mt5": ["q", "v"], | |
"bart": ["q_proj", "v_proj"], | |
"gpt2": ["mlp.c_proj"], | |
"bloom": ["query_key_value"], | |
"blip-2": ["q", "v", "q_proj", "v_proj"], | |
"opt": ["q_proj", "v_proj"], | |
"gptj": ["q_proj", "v_proj"], | |
"gpt_neox": ["query_key_value"], | |
"gpt_neo": ["q_proj", "v_proj"], | |
"bert": ["query", "value"], | |
"roberta": ["query", "value"], | |
"xlm-roberta": ["query", "value"], | |
"electra": ["query", "value"], | |
"deberta-v2": ["query_proj", "value_proj"], | |
"deberta": ["in_proj"], | |
"layoutlm": ["query", "value"], | |
"llama": ["q_proj", "v_proj"], | |
"chatglm": ["query_key_value"], | |
"gpt_bigcode": ["mlp.c_proj"], | |
"mpt": ["Wqkv"], | |
"RefinedWebModel": ["query_key_value"], | |
"RefinedWeb": ["query_key_value"], | |
"falcon": ["query_key_value"], | |
"codegen": ["qkv_proj"], | |
"mistral": ["q_proj", "v_proj"], | |
"mixtral": ["q_proj", "v_proj"], | |
"stablelm": ["q_proj", "v_proj"], | |
"phi": ["q_proj", "v_proj", "fc1", "fc2"], | |
"gemma": ["q_proj", "v_proj"], | |
"gemma2": ["q_proj", "v_proj"], | |
"qwen2": ["q_proj", "v_proj"], | |
} | |
TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING = { | |
"t5": ["q", "k", "v", "o", "wi", "wo"], | |
"mt5": ["q", "k", "v", "o", "wi_0", "wi_1", "wo"], | |
"bart": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"], | |
"gpt2": ["c_attn"], | |
"bloom": ["query_key_value"], | |
"opt": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"], | |
"gptj": ["q_proj", "v_proj"], | |
"gpt_neox": ["query_key_value"], | |
"gpt_neo": ["q_proj", "v_proj"], | |
"llama": ["q_proj", "v_proj"], | |
"bert": ["query", "value"], | |
"roberta": ["query", "value"], | |
"deberta-v2": ["query_proj", "key_proj", "value_proj", "dense"], | |
"gpt_bigcode": ["c_attn"], | |
"deberta": ["in_proj"], | |
"qwen2": ["q_proj", "v_proj"], | |
} | |
PEFT_TYPE_TO_PREFIX_MAPPING = { | |
PeftType.IA3: "ia3_", | |
PeftType.LORA: "lora_", | |
PeftType.ADALORA: "lora_", | |
PeftType.LOHA: "hada_", | |
PeftType.LOKR: "lokr_", | |
PeftType.OFT: "oft_", | |
PeftType.POLY: "poly_", | |
PeftType.BOFT: "boft_", | |
PeftType.LN_TUNING: "ln_tuning_", | |
PeftType.VERA: "vera_lambda_", | |
PeftType.FOURIERFT: "fourierft_", | |
PeftType.HRA: "hra_", | |
PeftType.VBLORA: "vblora_", | |
PeftType.BONE: "bone_", | |
} | |
WEIGHTS_NAME = "adapter_model.bin" | |
SAFETENSORS_WEIGHTS_NAME = "adapter_model.safetensors" | |
CONFIG_NAME = "adapter_config.json" | |
EMBEDDING_LAYER_NAMES = ["embed_tokens", "lm_head"] | |
SEQ_CLS_HEAD_NAMES = ["score", "classifier"] | |
INCLUDE_LINEAR_LAYERS_SHORTHAND = "all-linear" | |
TOKENIZER_CONFIG_NAME = "tokenizer_config.json" | |
DUMMY_TARGET_MODULES = "dummy-target-modules" | |
DUMMY_MODEL_CONFIG = {"model_type": "custom"} | |
# If users specify more than this number of target modules, we apply an optimization to try to reduce the target modules | |
# to a minimal set of suffixes, which makes loading faster. We only apply this when exceeding a certain size since | |
# otherwise there is no point in optimizing and there is a small chance of bugs in the optimization algorithm, so no | |
# point in taking unnecessary risks. See #2045 for more context. | |
MIN_TARGET_MODULES_FOR_OPTIMIZATION = 20 | |