data: | |
dataset_name: valid_consolidated | |
from_hub: false | |
from_langfuse: true | |
input_file: input/executed/executed_filtered_valid_consolidated.jsonl | |
num_proc: 2 | |
seed: 3407 | |
split: train | |
test_size: 0.2 | |
logging: | |
file: finetune.log | |
model: | |
adapter: null | |
dtype: null | |
load_in_4bit: true | |
lora: | |
alpha: 16 | |
bias: none | |
dropout: 0 | |
gradient_checkpointing: unsloth | |
loftq_config: null | |
r: 16 | |
random_state: 3407 | |
target_modules: | |
- q_proj | |
- k_proj | |
- v_proj | |
- o_proj | |
- gate_proj | |
- up_proj | |
- down_proj | |
use_rslora: false | |
max_seq_length: 2048 | |
name: Qwen/Qwen2.5-14B-Instruct | |
save: | |
gguf_quantization: | |
- q4_k_m | |
- q8_0 | |
- q5_k_m | |
hub_repo: joaormedeiros/qwen-2.5-recursive-v1 | |
local_dir: lora_model | |
private: true | |
push_to_hub: true | |
save_mode: all_training_files | |
test_inference: | |
enabled: true | |
input: 1, 1, 2, 3, 5, 8 | |
instruction: Continue the fibonnaci sequence. | |
training: | |
batch_size: | |
eval: 2 | |
train: 4 | |
eval_steps: 100 | |
gradient_accumulation_steps: 4 | |
learning_rate: 0.0002 | |
logging_steps: 1 | |
lr_scheduler: linear | |
num_epochs: 1 | |
optimizer: adamw_8bit | |
output_dir: outputs | |
report_to: tensorboard | |
seed: 3407 | |
warmup_steps: 1 | |
weight_decay: 0.01 | |