model: name: meta-llama/Llama-3.2-1B learning_rate: 1.2e-05 ppo_epochs: 4 init_kl_coef: 0.3 target: 3 cliprange: 0.2 cliprange_value: 0.3 vf_coef: 0.15 adap_kl_ctrl: true use_score_norm: true ratio_threshold: 10.0 batch_size: 64 mini_batch_size: 8 forward_batch_size: 2 gradient_accumulation_steps: 8 reward_model: s-nlp/roberta_toxicity_classifier use_raw_logits: true generation: min_length: 5 max_new_tokens: 64 output_min_length: 15 output_max_length: 20 do_sample: true top_k: 0.0 top_p: 0.85 now: 2025-09-22_18-35-27 training: num_train_epochs: 100 save_freq: 20 eval_freq: 20 seed: 42 fast_start: true dataset: name: allenai/real-toxicity-prompts toxicity_threshold: 0.8 filter_metric: profanity input_min_text_length: 15 input_max_text_length: 20 test_size: 0.1 original_dataset_path: null detoxified_dataset_path: null output: push_to_hub: true push_checkpoints_to_hub: true checkpoint_push_freq: 20 organization: null repository_name: llama-3-2-1b-detox_v1f_SCALE8_round3 private: false wandb: project: irl_llms entity: null name: Llama-3.2-1B-2025-09-22_18-35-27 irl: posterior_dir: re_irl_min_stratified_plots/round_3 global_norm_dir: re_irl_min_stratified_plots base_model_name: null use_round: 3 sample_theta_each_step: true n_samples: 100 feature_max_length: 256 feature_batch_size: 16 use_platt: false platt_a: 1.0 platt_b: 0.0 features_on_cpu: false reward_scale: 8 reward_clip: 4