aliangdw commited on
Commit
a46af57
·
verified ·
1 Parent(s): 68016e0

Upload RFM model

Browse files
Files changed (1) hide show
  1. config.yaml +109 -0
config.yaml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ dataloader_num_workers: 0
3
+ dataloader_pin_memory: false
4
+ dataset_preference_ratio: 0.7
5
+ dataset_type: default
6
+ eval_datasets:
7
+ - abraranwar/libero_rfm
8
+ - ykorkmaz/libero_failure_rfm
9
+ - HenryZhang/metaworld_rewind_rfm_eval
10
+ eval_subset_size: 500
11
+ eval_subsets:
12
+ - - libero256_10
13
+ - - libero_10_failure
14
+ - - metaworld_rewind_eval
15
+ force_reprocess: false
16
+ fps: 10
17
+ max_frames: 16
18
+ max_frames_for_preprocessing: 64
19
+ max_trajectories: -1
20
+ model_type: default
21
+ n_wrong_tasks: 5
22
+ num_bins: 10
23
+ num_proc: 1
24
+ preference_ratio: 1.0
25
+ preference_strategy_ratio:
26
+ - 0.4
27
+ - 0.3
28
+ - 0.3
29
+ - 0.0
30
+ progress_ratio: 0.5
31
+ resized_height: 128
32
+ resized_width: 128
33
+ rewind_lengths: null
34
+ samples_per_trajectory: 1
35
+ seed: 42
36
+ shuffle: true
37
+ train_datasets:
38
+ - abraranwar/libero_rfm
39
+ - ykorkmaz/libero_failure_rfm
40
+ - HenryZhang/metaworld_rewind_rfm_train
41
+ train_subsets:
42
+ - - libero256_90
43
+ - - libero_90_failure
44
+ - - metaworld_rewind_train
45
+ video_frame_sampling: uniform
46
+ debug: false
47
+ logging:
48
+ print_trainable_parameters: true
49
+ save_model: true
50
+ save_processor: true
51
+ use_wandb: true
52
+ wandb_entity: clvr
53
+ wandb_project: rfm
54
+ wandb_run_name: rfm
55
+ mode: train
56
+ model:
57
+ base_model_id: Qwen/Qwen2.5-VL-3B-Instruct
58
+ torch_dtype: bfloat16
59
+ train_language_model: false
60
+ train_preference_head: true
61
+ train_progress_head: true
62
+ train_similarity_head: false
63
+ train_value_head: true
64
+ train_vision_encoder: true
65
+ trust_remote_code: true
66
+ peft:
67
+ bias: none
68
+ lora_alpha: 64
69
+ lora_dropout: 0.05
70
+ peft_vision_encoder: true
71
+ r: 32
72
+ target_modules:
73
+ - q_proj
74
+ - k_proj
75
+ - v_proj
76
+ - o_proj
77
+ - gate_proj
78
+ - up_proj
79
+ - down_proj
80
+ use_peft: false
81
+ training:
82
+ beta: 0.1
83
+ bf16: true
84
+ ddp_bucket_cap_mb: 25
85
+ ddp_find_unused_parameters: true
86
+ do_eval: true
87
+ eval_steps: 100
88
+ evaluation_strategy: steps
89
+ fp16: false
90
+ gradient_accumulation_steps: 1
91
+ gradient_checkpointing: true
92
+ learning_rate: 2.0e-05
93
+ logging_steps: 1
94
+ lr_scheduler_type: cosine
95
+ max_grad_norm: 10.0
96
+ max_seq_length: 1024
97
+ max_steps: 5000
98
+ num_gpus: 2
99
+ num_train_epochs: -1
100
+ output_dir: ./logs/rfm_progpref_peft_vision
101
+ per_device_eval_batch_size: 8
102
+ per_device_train_batch_size: 8
103
+ prediction_loss_only: true
104
+ remove_unused_columns: false
105
+ resume_from_checkpoint: null
106
+ save_steps: 200
107
+ save_strategy: steps
108
+ warmup_ratio: 0.1
109
+ warmup_steps: 0