MattBou00
/

llama-3-2-1b-detox_v1f_SCALE8_round3-checkpoint-epoch-20

Reinforcement Learning

text-generation

text-generation-inference

Model card Files Files and versions

llama-3-2-1b-detox_v1f_SCALE8_round3-checkpoint-epoch-20 / rlhf_config.yaml

MattBou00's picture

Checkpoint epoch 20

4fb1e7e verified 19 days ago

history blame contribute delete

1.53 kB

	model:
	name: meta-llama/Llama-3.2-1B
	learning_rate: 1.2e-05
	ppo_epochs: 4
	init_kl_coef: 0.3
	target: 3
	cliprange: 0.2
	cliprange_value: 0.3
	vf_coef: 0.15
	adap_kl_ctrl: true
	use_score_norm: true
	ratio_threshold: 10.0
	batch_size: 64
	mini_batch_size: 8
	forward_batch_size: 2
	gradient_accumulation_steps: 8
	reward_model: s-nlp/roberta_toxicity_classifier
	use_raw_logits: true
	generation:
	min_length: 5
	max_new_tokens: 64
	output_min_length: 15
	output_max_length: 20
	do_sample: true
	top_k: 0.0
	top_p: 0.85
	now: 2025-09-22_18-35-27
	training:
	num_train_epochs: 100
	save_freq: 20
	eval_freq: 20
	seed: 42
	fast_start: true
	dataset:
	name: allenai/real-toxicity-prompts
	toxicity_threshold: 0.8
	filter_metric: profanity
	input_min_text_length: 15
	input_max_text_length: 20
	test_size: 0.1
	original_dataset_path: null
	detoxified_dataset_path: null
	output:
	push_to_hub: true
	push_checkpoints_to_hub: true
	checkpoint_push_freq: 20
	organization: null
	repository_name: llama-3-2-1b-detox_v1f_SCALE8_round3
	private: false
	wandb:
	project: irl_llms
	entity: null
	name: Llama-3.2-1B-2025-09-22_18-35-27
	irl:
	posterior_dir: re_irl_min_stratified_plots/round_3
	global_norm_dir: re_irl_min_stratified_plots
	base_model_name: null
	use_round: 3
	sample_theta_each_step: true
	n_samples: 100
	feature_max_length: 256
	feature_batch_size: 16
	use_platt: false
	platt_a: 1.0
	platt_b: 0.0
	features_on_cpu: false
	reward_scale: 8
	reward_clip: 4