model:
  name: meta-llama/Llama-3.2-1B
  learning_rate: 1.2e-05
  ppo_epochs: 4
  init_kl_coef: 0.3
  target: 3
  cliprange: 0.2
  cliprange_value: 0.3
  vf_coef: 0.15
  adap_kl_ctrl: true
  use_score_norm: true
  ratio_threshold: 10.0
  batch_size: 64
  mini_batch_size: 8
  forward_batch_size: 2
  gradient_accumulation_steps: 8
  reward_model: s-nlp/roberta_toxicity_classifier
  use_raw_logits: true
  generation:
    min_length: 5
    max_new_tokens: 64
    output_min_length: 15
    output_max_length: 20
    do_sample: true
    top_k: 0.0
    top_p: 0.85
now: 2025-09-22_18-35-27
training:
  num_train_epochs: 100
  save_freq: 20
  eval_freq: 20
  seed: 42
  fast_start: true
dataset:
  name: allenai/real-toxicity-prompts
  toxicity_threshold: 0.8
  filter_metric: profanity
  input_min_text_length: 15
  input_max_text_length: 20
  test_size: 0.1
  original_dataset_path: null
  detoxified_dataset_path: null
output:
  push_to_hub: true
  push_checkpoints_to_hub: true
  checkpoint_push_freq: 20
  organization: null
  repository_name: llama-3-2-1b-detox_v1f_SCALE8_round3
  private: false
wandb:
  project: irl_llms
  entity: null
  name: Llama-3.2-1B-2025-09-22_18-35-27
irl:
  posterior_dir: re_irl_min_stratified_plots/round_3
  global_norm_dir: re_irl_min_stratified_plots
  base_model_name: null
  use_round: 3
  sample_theta_each_step: true
  n_samples: 100
  feature_max_length: 256
  feature_batch_size: 16
  use_platt: false
  platt_a: 1.0
  platt_b: 0.0
  features_on_cpu: false
  reward_scale: 8
  reward_clip: 4