Update README.md
Browse files
README.md
CHANGED
@@ -6,11 +6,12 @@ tags:
|
|
6 |
- llama-factory
|
7 |
- lora
|
8 |
datasets:
|
|
|
9 |
- Nekochu/novel17_train_alpaca_format
|
10 |
- bofenghuang/vigogne
|
11 |
-
- jpacifico/French-Alpaca-dataset-Instruct-110K
|
12 |
- MaziyarPanahi/french_instruct_human_sharegpt
|
13 |
-
-
|
|
|
14 |
|
15 |
language:
|
16 |
- fr
|
@@ -33,8 +34,91 @@ Stage B: Continued **S**upervised **F**ine-**T**uning, QA
|
|
33 |
set CUDA_VISIBLE_DEVICES=0 && llamafactory-cli train --stage sft --do_train True --model_name_or_path NousResearch/Meta-Llama-3.1-8B-Instruct --preprocessing_num_workers 16 --finetuning_type lora --template alpaca --rope_scaling linear --flash_attn fa2 --dataset_dir data --dataset Acquiesce_french_vigogne,novel17_train --cutoff_len 8192 --learning_rate 5e-05 --num_train_epochs 3.0 --max_samples 10000000 --per_device_train_batch_size 1 --gradient_accumulation_steps 1 --lr_scheduler_type cosine --max_grad_norm 1.0 --logging_steps 10 --save_steps 1000 --warmup_steps 0 --neftune_noise_alpha 5 --optim adamw_8bit --packing True --report_to none --output_dir saves\LLaMA3.1-8B-Chat\lora\QLoRA_french_sft --bf16 True --plot_loss True --ddp_timeout 180000000 --adapter_name_or_path saves\LLaMA3.1-8B-Chat\lora\QLoRA_french_pt --quantization_bit 4 --quantization_method bitsandbytes --lora_rank 32 --lora_alpha 64 --lora_dropout 0.15 --lora_target all
|
34 |
```
|
35 |
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
</details>
|
|
|
6 |
- llama-factory
|
7 |
- lora
|
8 |
datasets:
|
9 |
+
- Snit/french-conversation
|
10 |
- Nekochu/novel17_train_alpaca_format
|
11 |
- bofenghuang/vigogne
|
|
|
12 |
- MaziyarPanahi/french_instruct_human_sharegpt
|
13 |
+
- jpacifico/French-Alpaca-dataset-Instruct-110K
|
14 |
+
- jpacifico/french-orca-dpo-pairs-revised
|
15 |
|
16 |
language:
|
17 |
- fr
|
|
|
34 |
set CUDA_VISIBLE_DEVICES=0 && llamafactory-cli train --stage sft --do_train True --model_name_or_path NousResearch/Meta-Llama-3.1-8B-Instruct --preprocessing_num_workers 16 --finetuning_type lora --template alpaca --rope_scaling linear --flash_attn fa2 --dataset_dir data --dataset Acquiesce_french_vigogne,novel17_train --cutoff_len 8192 --learning_rate 5e-05 --num_train_epochs 3.0 --max_samples 10000000 --per_device_train_batch_size 1 --gradient_accumulation_steps 1 --lr_scheduler_type cosine --max_grad_norm 1.0 --logging_steps 10 --save_steps 1000 --warmup_steps 0 --neftune_noise_alpha 5 --optim adamw_8bit --packing True --report_to none --output_dir saves\LLaMA3.1-8B-Chat\lora\QLoRA_french_sft --bf16 True --plot_loss True --ddp_timeout 180000000 --adapter_name_or_path saves\LLaMA3.1-8B-Chat\lora\QLoRA_french_pt --quantization_bit 4 --quantization_method bitsandbytes --lora_rank 32 --lora_alpha 64 --lora_dropout 0.15 --lora_target all
|
35 |
```
|
36 |
|
37 |
+
Stage C: Continued **D**irect **P**reference **O**ptimization
|
38 |
+
|
39 |
+
<details>
|
40 |
+
<summary>Config</summary>
|
41 |
+
|
42 |
+
`llama3_lora_dpo.yaml`
|
43 |
+
```yaml
|
44 |
+
### model:
|
45 |
+
model_name_or_path: NousResearch/Meta-Llama-3.1-8B-Instruct
|
46 |
+
quantization_bit: 4
|
47 |
+
use_adam_mini: true
|
48 |
+
adapter_name_or_path: saves\LLaMA3.1-8B-Chat\lora\QLoRA_french_sft
|
49 |
|
50 |
+
### method
|
51 |
+
stage: dpo
|
52 |
+
do_train: true
|
53 |
+
finetuning_type: lora
|
54 |
+
lora_target: all
|
55 |
+
pref_beta: 0.1
|
56 |
+
pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo]
|
57 |
+
|
58 |
+
### dataset
|
59 |
+
dataset: french_orca_rlhf-revised
|
60 |
+
template: llama3
|
61 |
+
cutoff_len: 4096
|
62 |
+
max_samples: 10000
|
63 |
+
overwrite_cache: true
|
64 |
+
preprocessing_num_workers: 16
|
65 |
+
|
66 |
+
### output
|
67 |
+
output_dir: saves/LLaMA3.1-8B-Chat/lora/QLoRA_french_dpo
|
68 |
+
logging_steps: 10
|
69 |
+
save_steps: 1000
|
70 |
+
plot_loss: true
|
71 |
+
overwrite_output_dir: true
|
72 |
+
|
73 |
+
### train
|
74 |
+
per_device_train_batch_size: 1
|
75 |
+
gradient_accumulation_steps: 1
|
76 |
+
learning_rate: 5.0e-6
|
77 |
+
num_train_epochs: 1.0
|
78 |
+
lr_scheduler_type: cosine
|
79 |
+
warmup_ratio: 0.1
|
80 |
+
bf16: true
|
81 |
+
ddp_timeout: 180000000
|
82 |
+
flash_attn: fa2
|
83 |
+
optim: paged_adamw_8bit
|
84 |
+
```
|
85 |
+
|
86 |
+
`dataset_info.json`:
|
87 |
+
```json
|
88 |
+
"french_orca_rlhf-revised": {
|
89 |
+
"hf_hub_url": "jpacifico/french-orca-dpo-pairs-revised",
|
90 |
+
"ranking": true,
|
91 |
+
"columns": {
|
92 |
+
"prompt": "question",
|
93 |
+
"chosen": "chosen",
|
94 |
+
"rejected": "rejected",
|
95 |
+
"system": "system"
|
96 |
+
}
|
97 |
+
},
|
98 |
+
"novel17_train": {
|
99 |
+
"hf_hub_url": "Nekochu/novel17_train_alpaca_format",
|
100 |
+
"formatting": "alpaca"
|
101 |
+
},
|
102 |
+
"Acquiesce_french_vigogne": {
|
103 |
+
"file_name": "Acquiesce_french_vigogne.json",
|
104 |
+
"formatting": "alpaca",
|
105 |
+
"columns": {
|
106 |
+
"prompt": "instruction",
|
107 |
+
"query": "input",
|
108 |
+
"response": "output",
|
109 |
+
"system": "system",
|
110 |
+
"history": "history"
|
111 |
+
}
|
112 |
+
},
|
113 |
+
"french-raw-pt": {
|
114 |
+
"file_name": "french-raw-pt.json",
|
115 |
+
"columns": {
|
116 |
+
"prompt": "text"
|
117 |
+
}
|
118 |
+
},
|
119 |
+
```
|
120 |
+
</details>
|
121 |
+
|
122 |
+
Dataset convert to Alpaca: [Acquiesce_french_vigogne](https://huggingface.co/datasets/Nekochu/Luminia-mixture/tree/split-v2/General/French),french-raw-pt
|
123 |
|
124 |
</details>
|