|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9921671018276762, |
|
"eval_steps": 500, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.531418223090284, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -2.851747512817383, |
|
"logits/rejected": -2.833996534347534, |
|
"logps/chosen": -165.70089721679688, |
|
"logps/rejected": -198.857666015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.924500505195678, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7712948322296143, |
|
"logits/rejected": -2.7740774154663086, |
|
"logps/chosen": -171.32467651367188, |
|
"logps/rejected": -172.57489013671875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0003833131631836295, |
|
"rewards/margins": -8.183407771866769e-06, |
|
"rewards/rejected": 0.00039149660733528435, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 7.115083584877395, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.787468433380127, |
|
"logits/rejected": -2.796309471130371, |
|
"logps/chosen": -188.18690490722656, |
|
"logps/rejected": -193.19281005859375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.021850628778338432, |
|
"rewards/margins": 0.00575407687574625, |
|
"rewards/rejected": 0.016096554696559906, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 7.576406474777614, |
|
"learning_rate": 9.91486549841951e-07, |
|
"logits/chosen": -2.84570574760437, |
|
"logits/rejected": -2.855926513671875, |
|
"logps/chosen": -193.6645050048828, |
|
"logps/rejected": -185.57162475585938, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.06298746168613434, |
|
"rewards/margins": 0.029651161283254623, |
|
"rewards/rejected": 0.03333630412817001, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.329171862378226, |
|
"learning_rate": 9.66236114702178e-07, |
|
"logits/chosen": -2.831686496734619, |
|
"logits/rejected": -2.8417012691497803, |
|
"logps/chosen": -170.6667938232422, |
|
"logps/rejected": -189.2131805419922, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.019239652901887894, |
|
"rewards/margins": 0.0761820375919342, |
|
"rewards/rejected": -0.0954216942191124, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.230824794807015, |
|
"learning_rate": 9.251085678648071e-07, |
|
"logits/chosen": -2.7868192195892334, |
|
"logits/rejected": -2.7850959300994873, |
|
"logps/chosen": -136.22763061523438, |
|
"logps/rejected": -169.01756286621094, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04755619913339615, |
|
"rewards/margins": 0.1481182873249054, |
|
"rewards/rejected": -0.19567449390888214, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 10.213782257588267, |
|
"learning_rate": 8.695044586103295e-07, |
|
"logits/chosen": -2.74267840385437, |
|
"logits/rejected": -2.7413182258605957, |
|
"logps/chosen": -163.45603942871094, |
|
"logps/rejected": -190.14236450195312, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.039765361696481705, |
|
"rewards/margins": 0.20882920920848846, |
|
"rewards/rejected": -0.24859456717967987, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 9.724737450156574, |
|
"learning_rate": 8.013173181896282e-07, |
|
"logits/chosen": -2.7809696197509766, |
|
"logits/rejected": -2.774864435195923, |
|
"logps/chosen": -203.69174194335938, |
|
"logps/rejected": -228.673828125, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12420445680618286, |
|
"rewards/margins": 0.1601138412952423, |
|
"rewards/rejected": -0.28431832790374756, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 12.552857274756276, |
|
"learning_rate": 7.228691778882692e-07, |
|
"logits/chosen": -2.6985127925872803, |
|
"logits/rejected": -2.7104218006134033, |
|
"logps/chosen": -158.4896240234375, |
|
"logps/rejected": -199.87753295898438, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.25630590319633484, |
|
"rewards/margins": 0.26447391510009766, |
|
"rewards/rejected": -0.5207797884941101, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 12.147016576418242, |
|
"learning_rate": 6.368314950360415e-07, |
|
"logits/chosen": -2.6584348678588867, |
|
"logits/rejected": -2.6736438274383545, |
|
"logps/chosen": -232.7853240966797, |
|
"logps/rejected": -262.41015625, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4413759112358093, |
|
"rewards/margins": 0.3659912645816803, |
|
"rewards/rejected": -0.807367205619812, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 13.949220920878545, |
|
"learning_rate": 5.46134179731651e-07, |
|
"logits/chosen": -2.61830735206604, |
|
"logits/rejected": -2.6202476024627686, |
|
"logps/chosen": -231.3226776123047, |
|
"logps/rejected": -291.72406005859375, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5699625015258789, |
|
"rewards/margins": 0.44822096824645996, |
|
"rewards/rejected": -1.0181834697723389, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 15.029123695933512, |
|
"learning_rate": 4.5386582026834904e-07, |
|
"logits/chosen": -2.4635961055755615, |
|
"logits/rejected": -2.4716153144836426, |
|
"logps/chosen": -202.64266967773438, |
|
"logps/rejected": -248.4220733642578, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5452982783317566, |
|
"rewards/margins": 0.4111364483833313, |
|
"rewards/rejected": -0.9564347267150879, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 17.205678814880788, |
|
"learning_rate": 3.6316850496395855e-07, |
|
"logits/chosen": -2.5053164958953857, |
|
"logits/rejected": -2.5017011165618896, |
|
"logps/chosen": -228.46469116210938, |
|
"logps/rejected": -310.7025451660156, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5650664567947388, |
|
"rewards/margins": 0.5835850238800049, |
|
"rewards/rejected": -1.148651361465454, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 19.89901816550521, |
|
"learning_rate": 2.771308221117309e-07, |
|
"logits/chosen": -2.4981369972229004, |
|
"logits/rejected": -2.5108680725097656, |
|
"logps/chosen": -236.54653930664062, |
|
"logps/rejected": -310.23162841796875, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7446938753128052, |
|
"rewards/margins": 0.6594551801681519, |
|
"rewards/rejected": -1.4041489362716675, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 23.0785172466263, |
|
"learning_rate": 1.9868268181037184e-07, |
|
"logits/chosen": -2.4181766510009766, |
|
"logits/rejected": -2.4266486167907715, |
|
"logps/chosen": -264.23138427734375, |
|
"logps/rejected": -309.7364807128906, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8709458112716675, |
|
"rewards/margins": 0.48874396085739136, |
|
"rewards/rejected": -1.3596898317337036, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 20.36959281359639, |
|
"learning_rate": 1.304955413896705e-07, |
|
"logits/chosen": -2.3758950233459473, |
|
"logits/rejected": -2.3715245723724365, |
|
"logps/chosen": -251.191650390625, |
|
"logps/rejected": -353.01251220703125, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7933205366134644, |
|
"rewards/margins": 0.9006916880607605, |
|
"rewards/rejected": -1.6940120458602905, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 32.00854391073857, |
|
"learning_rate": 7.4891432135193e-08, |
|
"logits/chosen": -2.346649646759033, |
|
"logits/rejected": -2.3456640243530273, |
|
"logps/chosen": -242.99374389648438, |
|
"logps/rejected": -310.0765075683594, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7500452995300293, |
|
"rewards/margins": 0.5974142551422119, |
|
"rewards/rejected": -1.3474594354629517, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 24.468901281465246, |
|
"learning_rate": 3.376388529782215e-08, |
|
"logits/chosen": -2.396498918533325, |
|
"logits/rejected": -2.398857593536377, |
|
"logps/chosen": -232.2967987060547, |
|
"logps/rejected": -311.627197265625, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7211315035820007, |
|
"rewards/margins": 0.70616614818573, |
|
"rewards/rejected": -1.427297830581665, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 21.76700663596853, |
|
"learning_rate": 8.513450158049106e-09, |
|
"logits/chosen": -2.337101459503174, |
|
"logits/rejected": -2.347583770751953, |
|
"logps/chosen": -246.55062866210938, |
|
"logps/rejected": -331.9063415527344, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7761114835739136, |
|
"rewards/margins": 0.7871293425559998, |
|
"rewards/rejected": -1.5632407665252686, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 19.722062441035273, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.403937339782715, |
|
"logits/rejected": -2.4172253608703613, |
|
"logps/chosen": -263.65972900390625, |
|
"logps/rejected": -332.18292236328125, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7805107831954956, |
|
"rewards/margins": 0.713537335395813, |
|
"rewards/rejected": -1.4940482378005981, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 95, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6041482373287803, |
|
"train_runtime": 2420.5057, |
|
"train_samples_per_second": 5.051, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 95, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|