zpn commited on
Commit
11d9e72
·
verified ·
1 Parent(s): d0abaff

Create training_config.yml

Browse files
Files changed (1) hide show
  1. training_config.yml +78 -0
training_config.yml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config:
2
+ (): colpali_engine.trainer.colmodel_training.ColModelTrainingConfig
3
+ output_dir: !path ../../../models/biqwen2_5_1ep_colpali_train_5neg_128bs_7b_2e4
4
+ processor:
5
+ (): colpali_engine.utils.transformers_wrappers.AllPurposeWrapper
6
+ class_to_instanciate: !ext colpali_engine.models.BiQwen2_5_Processor
7
+ pretrained_model_name_or_path: "Qwen/Qwen2.5-VL-7B-Instruct" # "./models/paligemma-3b-mix-448"
8
+ # num_image_tokens: 2048
9
+ # max_length: 50
10
+ num_negatives: 5
11
+
12
+ model:
13
+ (): colpali_engine.utils.transformers_wrappers.AllPurposeWrapper
14
+ class_to_instanciate: !ext colpali_engine.models.BiQwen2_5
15
+ pretrained_model_name_or_path: "Qwen/Qwen2.5-VL-7B-Instruct"
16
+ torch_dtype: !ext torch.bfloat16
17
+ use_cache: false
18
+ attn_implementation: "flash_attention_2"
19
+ # device_map: "auto"
20
+ # quantization_config:
21
+ # (): transformers.BitsAndBytesConfig
22
+ # load_in_4bit: true
23
+ # bnb_4bit_quant_type: "nf4"
24
+ # bnb_4bit_compute_dtype: "bfloat16"
25
+ # bnb_4bit_use_double_quant: true
26
+
27
+ dataset_loading_func: !ext colpali_engine.utils.dataset_transformation.load_train_set_ir_negs
28
+ eval_dataset_loader: !import ../data/test_data.yaml
29
+
30
+ # max_length: 50
31
+ run_eval: true
32
+ loss_func:
33
+ (): colpali_engine.loss.bi_encoder_losses.BiEncoderLoss
34
+ tr_args:
35
+ (): transformers.training_args.TrainingArguments
36
+ output_dir: null
37
+ overwrite_output_dir: true
38
+ num_train_epochs: 1
39
+ # this is global batch size
40
+ per_device_train_batch_size: 128
41
+ gradient_checkpointing: true
42
+ gradient_checkpointing_kwargs: { "use_reentrant": false }
43
+ ddp_find_unused_parameters: false
44
+ resume_from_checkpoint: false
45
+ # gradient_checkpointing: true
46
+ # 6 x 8 gpus = 48 batch size
47
+ # gradient_accumulation_steps: 4
48
+ per_device_eval_batch_size: 16
49
+ eval_strategy: "no"
50
+ dataloader_num_workers: 0
51
+ log_level: "info"
52
+ accelerator_config:
53
+ split_batches: true
54
+ # bf16: true
55
+ save_steps: 500
56
+ logging_steps: 10
57
+ eval_steps: 100
58
+ warmup_ratio: 0.01
59
+ learning_rate: 2e-4
60
+ save_total_limit: 1
61
+ # resume_from_checkpoint: true
62
+ optim: "paged_adamw_8bit"
63
+ # fsdp: "shard_grad_op"
64
+ # wandb logging
65
+ run_name: "biqwen2_5_1ep_colpali_train_5neg_128bs_7b_2e4"
66
+ report_to: "wandb"
67
+
68
+
69
+ peft_config:
70
+ (): peft.LoraConfig
71
+ r: 32
72
+ lora_alpha: 32
73
+ lora_dropout: 0.1
74
+ init_lora_weights: "gaussian"
75
+ bias: "none"
76
+ task_type: "FEATURE_EXTRACTION"
77
+ target_modules: '(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
78
+ # target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'