model: name: EleutherAI/gpt-neo-125M reward_type: irl reward_model: matthieubou-imperial-college-london/bayes_irl_vi/posterior_bradley_terry_rkiq5pd8:v0 base_model_name: EleutherAI/pythia-410m likelihood_type: bradley_terry use_raw_irl_score: true irl_normalization_strategy: none irl_reward_scale: 1.0 n_posterior_samples: 100 use_distance_sampling: false learning_rate: 2.0e-06 batch_size: 128 mini_batch_size: 8 gradient_accumulation_steps: 16 max_sequence_length: 512 generation: min_length: -1 top_k: 0 top_p: 0.8 temperature: 0.7 do_sample: true output_min_length: 20 output_max_length: 30 rlhf: model: ppo_epochs: 3 init_kl_coef: 0.2 target: 1.0 cliprange: 0.1 cliprange_value: 0.2 vf_coef: 0.1 adap_kl_ctrl: true use_score_norm: true ratio_threshold: 10.0 training: num_train_epochs: 80 save_freq: 2 eval_freq: 2 seed: 0 dataset: name: allenai/real-toxicity-prompts toxicity_metric: profanity toxicity_threshold: 0.7 input_min_text_length: 10 input_max_text_length: 100 test_size: 0.1 num_samples: 20000 output: push_to_hub: true push_checkpoints_to_hub: true push_final_model_to_hub: true hub_org: MattBou00 repo_name_prefix: rlhf-checkpoint private: false wandb: project: irl-rlhf-detox entity: matthieubou-imperial-college-london name: null now: 2025-08-02_23-53-31 logging: use_wandb: true project_name: irl-rlhf-detox wandb_mode: online