flozi00 commited on
Commit
a690550
·
verified ·
1 Parent(s): 194fa9a

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. model.safetensors +1 -1
  3. trainer_state.json +284 -624
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "VAGOsolutions/SauerkrautLM-1.5b",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "../educlassifier/checkpoint-400",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:308b0909c8b941e293cffef3c22bb8ffbb5a9f307ddc235f9370c9ffaa10de45
3
  size 3087467144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be9bdf8f55eb810bab80529acd87e4b1c9c9d3474842f45b1dee3b1a49014da7
3
  size 3087467144
trainer_state.json CHANGED
@@ -1,696 +1,356 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5712596162965194,
5
  "eval_steps": 5000,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.014281490407412986,
13
- "grad_norm": 49.5,
14
- "learning_rate": 4.285714285714285e-05,
15
- "logits/chosen": -2.928096294403076,
16
- "logits/rejected": -2.9280941486358643,
17
- "logps/chosen": -3.81945538520813,
18
- "logps/rejected": -4.2658491134643555,
19
- "loss": 3.8875,
20
- "odds_ratio_loss": 7.880957126617432,
21
- "rewards/accuracies": 0.641796886920929,
22
- "rewards/chosen": -0.38194555044174194,
23
- "rewards/margins": 0.044639457017183304,
24
- "rewards/rejected": -0.42658501863479614,
25
- "sft_loss": 3.099365711212158,
26
  "step": 10
27
  },
28
  {
29
- "epoch": 0.028562980814825972,
30
- "grad_norm": 9.4375,
31
- "learning_rate": 8.57142857142857e-05,
32
- "logits/chosen": -3.5747146606445312,
33
- "logits/rejected": -3.5747463703155518,
34
- "logps/chosen": -1.0031383037567139,
35
- "logps/rejected": -1.795668601989746,
36
- "loss": 1.0575,
37
- "odds_ratio_loss": 3.653076171875,
38
- "rewards/accuracies": 0.7490234375,
39
- "rewards/chosen": -0.10031384229660034,
40
- "rewards/margins": 0.07925303280353546,
41
- "rewards/rejected": -0.1795668601989746,
42
- "sft_loss": 0.6921551823616028,
43
  "step": 20
44
  },
45
  {
46
- "epoch": 0.04284447122223896,
47
- "grad_norm": 6.375,
48
- "learning_rate": 0.00012857142857142855,
49
- "logits/chosen": -4.056135177612305,
50
- "logits/rejected": -4.05615234375,
51
- "logps/chosen": -0.7499507665634155,
52
- "logps/rejected": -1.9470783472061157,
53
- "loss": 0.7945,
54
- "odds_ratio_loss": 2.919811248779297,
55
- "rewards/accuracies": 0.791210949420929,
56
- "rewards/chosen": -0.07499508559703827,
57
- "rewards/margins": 0.11971275508403778,
58
- "rewards/rejected": -0.19470782577991486,
59
- "sft_loss": 0.5025397539138794,
60
  "step": 30
61
  },
62
  {
63
- "epoch": 0.057125961629651945,
64
- "grad_norm": 6.4375,
65
- "learning_rate": 0.0001714285714285714,
66
- "logits/chosen": -3.9972000122070312,
67
- "logits/rejected": -3.997206211090088,
68
- "logps/chosen": -0.6447885632514954,
69
- "logps/rejected": -2.2042269706726074,
70
- "loss": 0.6819,
71
- "odds_ratio_loss": 2.3861823081970215,
72
- "rewards/accuracies": 0.8414062261581421,
73
- "rewards/chosen": -0.06447885185480118,
74
- "rewards/margins": 0.1559438556432724,
75
- "rewards/rejected": -0.22042270004749298,
76
- "sft_loss": 0.4432622492313385,
77
  "step": 40
78
  },
79
  {
80
- "epoch": 0.07140745203706493,
81
- "grad_norm": 2.53125,
82
- "learning_rate": 0.00021428571428571427,
83
- "logits/chosen": -3.6315770149230957,
84
- "logits/rejected": -3.631592273712158,
85
- "logps/chosen": -0.5242542028427124,
86
- "logps/rejected": -2.490429401397705,
87
- "loss": 0.5524,
88
- "odds_ratio_loss": 1.806905746459961,
89
- "rewards/accuracies": 0.887890636920929,
90
- "rewards/chosen": -0.05242542549967766,
91
- "rewards/margins": 0.1966175138950348,
92
- "rewards/rejected": -0.24904294312000275,
93
- "sft_loss": 0.37168318033218384,
94
  "step": 50
95
  },
96
  {
97
- "epoch": 0.08568894244447792,
98
- "grad_norm": 242.0,
99
- "learning_rate": 0.0002571428571428571,
100
- "logits/chosen": -3.232466459274292,
101
- "logits/rejected": -3.2325031757354736,
102
- "logps/chosen": -0.8311947584152222,
103
- "logps/rejected": -2.9961485862731934,
104
- "loss": 0.8575,
105
- "odds_ratio_loss": 3.2764008045196533,
106
- "rewards/accuracies": 0.8935546875,
107
- "rewards/chosen": -0.0831194818019867,
108
- "rewards/margins": 0.21649539470672607,
109
- "rewards/rejected": -0.2996148467063904,
110
- "sft_loss": 0.5298588275909424,
111
  "step": 60
112
  },
113
  {
114
- "epoch": 0.0999704328518909,
115
- "grad_norm": 5.65625,
116
- "learning_rate": 0.0003,
117
- "logits/chosen": -2.657118558883667,
118
- "logits/rejected": -2.6572413444519043,
119
- "logps/chosen": -0.6468337774276733,
120
- "logps/rejected": -2.5454134941101074,
121
- "loss": 0.6815,
122
- "odds_ratio_loss": 2.252271890640259,
123
- "rewards/accuracies": 0.869921863079071,
124
- "rewards/chosen": -0.06468339264392853,
125
- "rewards/margins": 0.18985795974731445,
126
- "rewards/rejected": -0.2545413374900818,
127
- "sft_loss": 0.4562531113624573,
128
  "step": 70
129
  },
130
  {
131
- "epoch": 0.11425192325930389,
132
- "grad_norm": 4.25,
133
- "learning_rate": 0.0002998135381828383,
134
- "logits/chosen": -2.8170955181121826,
135
- "logits/rejected": -2.8171167373657227,
136
- "logps/chosen": -0.4997388422489166,
137
- "logps/rejected": -2.737879514694214,
138
- "loss": 0.5264,
139
- "odds_ratio_loss": 1.6635347604751587,
140
- "rewards/accuracies": 0.8994140625,
141
- "rewards/chosen": -0.04997389018535614,
142
- "rewards/margins": 0.22381405532360077,
143
- "rewards/rejected": -0.2737879753112793,
144
- "sft_loss": 0.3600570261478424,
145
  "step": 80
146
  },
147
  {
148
- "epoch": 0.12853341366671686,
149
- "grad_norm": 3.671875,
150
- "learning_rate": 0.0002992546163048102,
151
- "logits/chosen": -3.062329053878784,
152
- "logits/rejected": -3.0623464584350586,
153
- "logps/chosen": -0.4833168089389801,
154
- "logps/rejected": -2.676713466644287,
155
- "loss": 0.5082,
156
- "odds_ratio_loss": 1.625109314918518,
157
- "rewards/accuracies": 0.9037109613418579,
158
- "rewards/chosen": -0.048331670463085175,
159
- "rewards/margins": 0.21933968365192413,
160
- "rewards/rejected": -0.2676713764667511,
161
- "sft_loss": 0.3456498384475708,
162
  "step": 90
163
  },
164
  {
165
- "epoch": 0.14281490407412986,
166
- "grad_norm": 3.828125,
167
- "learning_rate": 0.0002983246239337692,
168
- "logits/chosen": -2.9990651607513428,
169
- "logits/rejected": -2.9990792274475098,
170
- "logps/chosen": -0.45154619216918945,
171
- "logps/rejected": -2.796322822570801,
172
- "loss": 0.4752,
173
- "odds_ratio_loss": 1.524524450302124,
174
- "rewards/accuracies": 0.907421886920929,
175
- "rewards/chosen": -0.04515461623668671,
176
- "rewards/margins": 0.2344777137041092,
177
- "rewards/rejected": -0.2796323001384735,
178
- "sft_loss": 0.3227214813232422,
179
  "step": 100
180
  },
181
  {
182
- "epoch": 0.15709639448154283,
183
- "grad_norm": 4.625,
184
- "learning_rate": 0.00029702587317728153,
185
- "logits/chosen": -3.0073421001434326,
186
- "logits/rejected": -3.0073623657226562,
187
- "logps/chosen": -0.4845556318759918,
188
- "logps/rejected": -2.689493417739868,
189
- "loss": 0.5115,
190
- "odds_ratio_loss": 1.6119966506958008,
191
- "rewards/accuracies": 0.8880859613418579,
192
- "rewards/chosen": -0.04845556616783142,
193
- "rewards/margins": 0.22049376368522644,
194
- "rewards/rejected": -0.26894932985305786,
195
- "sft_loss": 0.3503072261810303,
196
  "step": 110
197
  },
198
  {
199
- "epoch": 0.17137788488895583,
200
- "grad_norm": 4.25,
201
- "learning_rate": 0.00029536159293436166,
202
- "logits/chosen": -3.0959103107452393,
203
- "logits/rejected": -3.095935583114624,
204
- "logps/chosen": -0.4538491368293762,
205
- "logps/rejected": -2.8936073780059814,
206
- "loss": 0.4767,
207
- "odds_ratio_loss": 1.5220377445220947,
208
- "rewards/accuracies": 0.9091796875,
209
- "rewards/chosen": -0.04538491368293762,
210
- "rewards/margins": 0.24397583305835724,
211
- "rewards/rejected": -0.28936073184013367,
212
- "sft_loss": 0.32447534799575806,
213
  "step": 120
214
  },
215
  {
216
- "epoch": 0.1856593752963688,
217
- "grad_norm": 3.015625,
218
- "learning_rate": 0.00029333592086792107,
219
- "logits/chosen": -3.2102882862091064,
220
- "logits/rejected": -3.210312604904175,
221
- "logps/chosen": -0.4560007154941559,
222
- "logps/rejected": -2.9516916275024414,
223
- "loss": 0.4775,
224
- "odds_ratio_loss": 1.4568021297454834,
225
- "rewards/accuracies": 0.9140625,
226
- "rewards/chosen": -0.04560007154941559,
227
- "rewards/margins": 0.2495690882205963,
228
- "rewards/rejected": -0.2951691746711731,
229
- "sft_loss": 0.33186882734298706,
230
  "step": 130
231
  },
232
  {
233
- "epoch": 0.1999408657037818,
234
- "grad_norm": 4.625,
235
- "learning_rate": 0.0002909538931178862,
236
- "logits/chosen": -3.1817660331726074,
237
- "logits/rejected": -3.181790828704834,
238
- "logps/chosen": -0.4694591164588928,
239
- "logps/rejected": -2.694068193435669,
240
- "loss": 0.4932,
241
- "odds_ratio_loss": 1.5894033908843994,
242
- "rewards/accuracies": 0.903515636920929,
243
- "rewards/chosen": -0.046945907175540924,
244
- "rewards/margins": 0.22246094048023224,
245
- "rewards/rejected": -0.26940685510635376,
246
- "sft_loss": 0.3343026041984558,
247
  "step": 140
248
  },
249
  {
250
- "epoch": 0.21422235611119478,
251
- "grad_norm": 2.109375,
252
- "learning_rate": 0.00028822143178056114,
253
- "logits/chosen": -3.19804310798645,
254
- "logits/rejected": -3.1980957984924316,
255
- "logps/chosen": -0.4429679811000824,
256
- "logps/rejected": -2.8848023414611816,
257
- "loss": 0.4646,
258
- "odds_ratio_loss": 1.4565680027008057,
259
- "rewards/accuracies": 0.9175781011581421,
260
- "rewards/chosen": -0.04429679363965988,
261
- "rewards/margins": 0.24418342113494873,
262
- "rewards/rejected": -0.2884802222251892,
263
- "sft_loss": 0.31895238161087036,
264
  "step": 150
265
  },
266
  {
267
- "epoch": 0.22850384651860778,
268
- "grad_norm": 2.53125,
269
- "learning_rate": 0.0002851453301853628,
270
- "logits/chosen": -3.1286864280700684,
271
- "logits/rejected": -3.1287217140197754,
272
- "logps/chosen": -0.4620634913444519,
273
- "logps/rejected": -2.8395490646362305,
274
- "loss": 0.4861,
275
- "odds_ratio_loss": 1.5178568363189697,
276
- "rewards/accuracies": 0.8980468511581421,
277
- "rewards/chosen": -0.04620635136961937,
278
- "rewards/margins": 0.23774857819080353,
279
- "rewards/rejected": -0.283954918384552,
280
- "sft_loss": 0.33431634306907654,
281
  "step": 160
282
  },
283
  {
284
- "epoch": 0.24278533692602075,
285
- "grad_norm": 3.8125,
286
- "learning_rate": 0.0002817332360055343,
287
- "logits/chosen": -3.0237438678741455,
288
- "logits/rejected": -3.0237746238708496,
289
- "logps/chosen": -0.4375666677951813,
290
- "logps/rejected": -2.892333507537842,
291
- "loss": 0.4602,
292
- "odds_ratio_loss": 1.4504070281982422,
293
- "rewards/accuracies": 0.9097656011581421,
294
- "rewards/chosen": -0.04375666379928589,
295
- "rewards/margins": 0.24547667801380157,
296
- "rewards/rejected": -0.28923335671424866,
297
- "sft_loss": 0.31515270471572876,
298
  "step": 170
299
  },
300
  {
301
- "epoch": 0.2570668273334337,
302
- "grad_norm": 2.625,
303
- "learning_rate": 0.0002779936322448233,
304
- "logits/chosen": -3.0108470916748047,
305
- "logits/rejected": -3.0108840465545654,
306
- "logps/chosen": -0.43512678146362305,
307
- "logps/rejected": -3.0354106426239014,
308
- "loss": 0.4572,
309
- "odds_ratio_loss": 1.3499114513397217,
310
- "rewards/accuracies": 0.9091796875,
311
- "rewards/chosen": -0.043512679636478424,
312
- "rewards/margins": 0.2600283920764923,
313
- "rewards/rejected": -0.30354106426239014,
314
- "sft_loss": 0.32224926352500916,
315
  "step": 180
316
  },
317
  {
318
- "epoch": 0.2713483177408467,
319
- "grad_norm": 2.78125,
320
- "learning_rate": 0.00027393581614739923,
321
- "logits/chosen": -3.0553345680236816,
322
- "logits/rejected": -3.055368423461914,
323
- "logps/chosen": -0.42374086380004883,
324
- "logps/rejected": -2.915168523788452,
325
- "loss": 0.445,
326
- "odds_ratio_loss": 1.4213359355926514,
327
- "rewards/accuracies": 0.9175781011581421,
328
- "rewards/chosen": -0.042374081909656525,
329
- "rewards/margins": 0.24914276599884033,
330
- "rewards/rejected": -0.29151684045791626,
331
- "sft_loss": 0.3028421401977539,
332
  "step": 190
333
  },
334
  {
335
- "epoch": 0.2856298081482597,
336
- "grad_norm": 2.421875,
337
- "learning_rate": 0.0002695698760834384,
338
- "logits/chosen": -2.994476318359375,
339
- "logits/rejected": -2.9945011138916016,
340
- "logps/chosen": -0.4544607102870941,
341
- "logps/rejected": -2.8547749519348145,
342
- "loss": 0.4776,
343
- "odds_ratio_loss": 1.4777902364730835,
344
- "rewards/accuracies": 0.9033203125,
345
- "rewards/chosen": -0.04544607177376747,
346
- "rewards/margins": 0.240031436085701,
347
- "rewards/rejected": -0.28547748923301697,
348
- "sft_loss": 0.32984623312950134,
349
  "step": 200
350
- },
351
- {
352
- "epoch": 0.2999112985556727,
353
- "grad_norm": 3.546875,
354
- "learning_rate": 0.00026490666646784665,
355
- "logits/chosen": -3.063324451446533,
356
- "logits/rejected": -3.063349723815918,
357
- "logps/chosen": -0.43639254570007324,
358
- "logps/rejected": -3.105325698852539,
359
- "loss": 0.4578,
360
- "odds_ratio_loss": 1.3640453815460205,
361
- "rewards/accuracies": 0.9140625,
362
- "rewards/chosen": -0.04363925755023956,
363
- "rewards/margins": 0.26689332723617554,
364
- "rewards/rejected": -0.3105325698852539,
365
- "sft_loss": 0.3214019536972046,
366
- "step": 210
367
- },
368
- {
369
- "epoch": 0.31419278896308567,
370
- "grad_norm": 2.296875,
371
- "learning_rate": 0.0002599577807744739,
372
- "logits/chosen": -3.115455389022827,
373
- "logits/rejected": -3.1154801845550537,
374
- "logps/chosen": -0.4168368875980377,
375
- "logps/rejected": -3.096985340118408,
376
- "loss": 0.4371,
377
- "odds_ratio_loss": 1.3493207693099976,
378
- "rewards/accuracies": 0.9164062738418579,
379
- "rewards/chosen": -0.04168368875980377,
380
- "rewards/margins": 0.2680148482322693,
381
- "rewards/rejected": -0.30969855189323425,
382
- "sft_loss": 0.3021194040775299,
383
- "step": 220
384
- },
385
- {
386
- "epoch": 0.32847427937049867,
387
- "grad_norm": 1.921875,
388
- "learning_rate": 0.0002547355227129109,
389
- "logits/chosen": -3.162436008453369,
390
- "logits/rejected": -3.1624579429626465,
391
- "logps/chosen": -0.4433667063713074,
392
- "logps/rejected": -3.0064072608947754,
393
- "loss": 0.4655,
394
- "odds_ratio_loss": 1.3855293989181519,
395
- "rewards/accuracies": 0.912304699420929,
396
- "rewards/chosen": -0.04433666914701462,
397
- "rewards/margins": 0.2563040852546692,
398
- "rewards/rejected": -0.3006407618522644,
399
- "sft_loss": 0.3269914984703064,
400
- "step": 230
401
- },
402
- {
403
- "epoch": 0.34275576977791167,
404
- "grad_norm": 1.2734375,
405
- "learning_rate": 0.0002492528756395289,
406
- "logits/chosen": -3.1590659618377686,
407
- "logits/rejected": -3.1590869426727295,
408
- "logps/chosen": -0.4178268015384674,
409
- "logps/rejected": -3.0285942554473877,
410
- "loss": 0.4376,
411
- "odds_ratio_loss": 1.3180664777755737,
412
- "rewards/accuracies": 0.917187511920929,
413
- "rewards/chosen": -0.0417826846241951,
414
- "rewards/margins": 0.26107674837112427,
415
- "rewards/rejected": -0.30285942554473877,
416
- "sft_loss": 0.3058391213417053,
417
- "step": 240
418
- },
419
- {
420
- "epoch": 0.35703726018532467,
421
- "grad_norm": 4.125,
422
- "learning_rate": 0.00024352347027881003,
423
- "logits/chosen": -3.2828221321105957,
424
- "logits/rejected": -3.282838821411133,
425
- "logps/chosen": -0.4194249212741852,
426
- "logps/rejected": -3.0415470600128174,
427
- "loss": 0.4403,
428
- "odds_ratio_loss": 1.3526116609573364,
429
- "rewards/accuracies": 0.9166015386581421,
430
- "rewards/chosen": -0.041942495852708817,
431
- "rewards/margins": 0.26221221685409546,
432
- "rewards/rejected": -0.3041546940803528,
433
- "sft_loss": 0.3050472140312195,
434
- "step": 250
435
- },
436
- {
437
- "epoch": 0.3713187505927376,
438
- "grad_norm": 2.640625,
439
- "learning_rate": 0.00023756155083521846,
440
- "logits/chosen": -3.22637677192688,
441
- "logits/rejected": -3.2263927459716797,
442
- "logps/chosen": -0.41995421051979065,
443
- "logps/rejected": -3.0328941345214844,
444
- "loss": 0.4417,
445
- "odds_ratio_loss": 1.3745537996292114,
446
- "rewards/accuracies": 0.9140625,
447
- "rewards/chosen": -0.04199542477726936,
448
- "rewards/margins": 0.26129403710365295,
449
- "rewards/rejected": -0.3032894432544708,
450
- "sft_loss": 0.30421775579452515,
451
- "step": 260
452
- },
453
- {
454
- "epoch": 0.3856002410001506,
455
- "grad_norm": 2.578125,
456
- "learning_rate": 0.0002313819395798639,
457
- "logits/chosen": -3.186093330383301,
458
- "logits/rejected": -3.1861069202423096,
459
- "logps/chosen": -0.4216841161251068,
460
- "logps/rejected": -3.125148057937622,
461
- "loss": 0.4439,
462
- "odds_ratio_loss": 1.3766355514526367,
463
- "rewards/accuracies": 0.913281261920929,
464
- "rewards/chosen": -0.04216841608285904,
465
- "rewards/margins": 0.27034634351730347,
466
- "rewards/rejected": -0.3125148117542267,
467
- "sft_loss": 0.30621883273124695,
468
- "step": 270
469
- },
470
- {
471
- "epoch": 0.3998817314075636,
472
- "grad_norm": 2.03125,
473
- "learning_rate": 0.000225,
474
- "logits/chosen": -3.167736530303955,
475
- "logits/rejected": -3.167752265930176,
476
- "logps/chosen": -0.42215681076049805,
477
- "logps/rejected": -3.0445384979248047,
478
- "loss": 0.4415,
479
- "odds_ratio_loss": 1.3319975137710571,
480
- "rewards/accuracies": 0.9189453125,
481
- "rewards/chosen": -0.04221567511558533,
482
- "rewards/margins": 0.26223814487457275,
483
- "rewards/rejected": -0.30445384979248047,
484
- "sft_loss": 0.3082923889160156,
485
- "step": 280
486
- },
487
- {
488
- "epoch": 0.4141632218149766,
489
- "grad_norm": 1.9140625,
490
- "learning_rate": 0.00021843159860297442,
491
- "logits/chosen": -3.2080886363983154,
492
- "logits/rejected": -3.208108425140381,
493
- "logps/chosen": -0.3955201208591461,
494
- "logps/rejected": -3.1413300037384033,
495
- "loss": 0.4162,
496
- "odds_ratio_loss": 1.298853874206543,
497
- "rewards/accuracies": 0.9166015386581421,
498
- "rewards/chosen": -0.03955201059579849,
499
- "rewards/margins": 0.27458101511001587,
500
- "rewards/rejected": -0.3141329884529114,
501
- "sft_loss": 0.28630274534225464,
502
- "step": 290
503
- },
504
- {
505
- "epoch": 0.42844471222238956,
506
- "grad_norm": 2.296875,
507
- "learning_rate": 0.00021169306546959174,
508
- "logits/chosen": -3.1625964641571045,
509
- "logits/rejected": -3.1626217365264893,
510
- "logps/chosen": -0.4014604091644287,
511
- "logps/rejected": -3.231706142425537,
512
- "loss": 0.4211,
513
- "odds_ratio_loss": 1.2652801275253296,
514
- "rewards/accuracies": 0.91796875,
515
- "rewards/chosen": -0.04014604538679123,
516
- "rewards/margins": 0.2830246090888977,
517
- "rewards/rejected": -0.32317066192626953,
518
- "sft_loss": 0.2946000099182129,
519
- "step": 300
520
- },
521
- {
522
- "epoch": 0.44272620262980256,
523
- "grad_norm": 1.9765625,
524
- "learning_rate": 0.00020480115365495926,
525
- "logits/chosen": -3.1747231483459473,
526
- "logits/rejected": -3.1747519969940186,
527
- "logps/chosen": -0.3960801064968109,
528
- "logps/rejected": -3.082359790802002,
529
- "loss": 0.4173,
530
- "odds_ratio_loss": 1.3159233331680298,
531
- "rewards/accuracies": 0.9173828363418579,
532
- "rewards/chosen": -0.03960801288485527,
533
- "rewards/margins": 0.26862797141075134,
534
- "rewards/rejected": -0.30823594331741333,
535
- "sft_loss": 0.2857065200805664,
536
- "step": 310
537
- },
538
- {
539
- "epoch": 0.45700769303721556,
540
- "grad_norm": 2.90625,
541
- "learning_rate": 0.00019777299753775265,
542
- "logits/chosen": -3.2027382850646973,
543
- "logits/rejected": -3.202775478363037,
544
- "logps/chosen": -0.3917561173439026,
545
- "logps/rejected": -3.128166675567627,
546
- "loss": 0.4113,
547
- "odds_ratio_loss": 1.213888168334961,
548
- "rewards/accuracies": 0.923046886920929,
549
- "rewards/chosen": -0.0391756072640419,
550
- "rewards/margins": 0.27364104986190796,
551
- "rewards/rejected": -0.31281667947769165,
552
- "sft_loss": 0.28995418548583984,
553
- "step": 320
554
- },
555
- {
556
- "epoch": 0.47128918344462856,
557
- "grad_norm": 1.1171875,
558
- "learning_rate": 0.00019062607022145078,
559
- "logits/chosen": -3.223431348800659,
560
- "logits/rejected": -3.2234749794006348,
561
- "logps/chosen": -0.3959726393222809,
562
- "logps/rejected": -3.1301980018615723,
563
- "loss": 0.4159,
564
- "odds_ratio_loss": 1.2752103805541992,
565
- "rewards/accuracies": 0.9134765863418579,
566
- "rewards/chosen": -0.03959726542234421,
567
- "rewards/margins": 0.2734225392341614,
568
- "rewards/rejected": -0.3130198121070862,
569
- "sft_loss": 0.2884255647659302,
570
- "step": 330
571
- },
572
- {
573
- "epoch": 0.4855706738520415,
574
- "grad_norm": 2.421875,
575
- "learning_rate": 0.00018337814009344714,
576
- "logits/chosen": -3.229165554046631,
577
- "logits/rejected": -3.22920298576355,
578
- "logps/chosen": -0.40863022208213806,
579
- "logps/rejected": -3.2035133838653564,
580
- "loss": 0.4293,
581
- "odds_ratio_loss": 1.3022868633270264,
582
- "rewards/accuracies": 0.923046886920929,
583
- "rewards/chosen": -0.040863025933504105,
584
- "rewards/margins": 0.27948835492134094,
585
- "rewards/rejected": -0.32035139203071594,
586
- "sft_loss": 0.29906368255615234,
587
- "step": 340
588
- },
589
- {
590
- "epoch": 0.4998521642594545,
591
- "grad_norm": 1.6953125,
592
- "learning_rate": 0.00017604722665003956,
593
- "logits/chosen": -3.268237590789795,
594
- "logits/rejected": -3.268270969390869,
595
- "logps/chosen": -0.3820918798446655,
596
- "logps/rejected": -3.3142802715301514,
597
- "loss": 0.4012,
598
- "odds_ratio_loss": 1.2229855060577393,
599
- "rewards/accuracies": 0.924023449420929,
600
- "rewards/chosen": -0.03820918872952461,
601
- "rewards/margins": 0.29321882128715515,
602
- "rewards/rejected": -0.33142799139022827,
603
- "sft_loss": 0.278933584690094,
604
- "step": 350
605
- },
606
- {
607
- "epoch": 0.5141336546668674,
608
- "grad_norm": 2.375,
609
- "learning_rate": 0.00016865155569712278,
610
- "logits/chosen": -3.3011035919189453,
611
- "logits/rejected": -3.3011412620544434,
612
- "logps/chosen": -0.38040798902511597,
613
- "logps/rejected": -3.2756595611572266,
614
- "loss": 0.3985,
615
- "odds_ratio_loss": 1.1769336462020874,
616
- "rewards/accuracies": 0.9302734136581421,
617
- "rewards/chosen": -0.03804079815745354,
618
- "rewards/margins": 0.2895251214504242,
619
- "rewards/rejected": -0.3275659680366516,
620
- "sft_loss": 0.28078263998031616,
621
- "step": 360
622
- },
623
- {
624
- "epoch": 0.5284151450742804,
625
- "grad_norm": 1.484375,
626
- "learning_rate": 0.00016120951403796364,
627
- "logits/chosen": -3.336045026779175,
628
- "logits/rejected": -3.3360836505889893,
629
- "logps/chosen": -0.3747532069683075,
630
- "logps/rejected": -3.3229317665100098,
631
- "loss": 0.3938,
632
- "odds_ratio_loss": 1.2170263528823853,
633
- "rewards/accuracies": 0.924023449420929,
634
- "rewards/chosen": -0.03747531771659851,
635
- "rewards/margins": 0.29481783509254456,
636
- "rewards/rejected": -0.33229315280914307,
637
- "sft_loss": 0.2720716595649719,
638
- "step": 370
639
- },
640
- {
641
- "epoch": 0.5426966354816934,
642
- "grad_norm": 1.1953125,
643
- "learning_rate": 0.00015373960376071093,
644
- "logits/chosen": -3.3047919273376465,
645
- "logits/rejected": -3.3048160076141357,
646
- "logps/chosen": -0.37664586305618286,
647
- "logps/rejected": -3.1220898628234863,
648
- "loss": 0.3961,
649
- "odds_ratio_loss": 1.2487261295318604,
650
- "rewards/accuracies": 0.921875,
651
- "rewards/chosen": -0.037664588540792465,
652
- "rewards/margins": 0.2745443880558014,
653
- "rewards/rejected": -0.31220895051956177,
654
- "sft_loss": 0.2712169289588928,
655
- "step": 380
656
- },
657
- {
658
- "epoch": 0.5569781258891064,
659
- "grad_norm": 2.015625,
660
- "learning_rate": 0.00014626039623928907,
661
- "logits/chosen": -3.3392891883850098,
662
- "logits/rejected": -3.339310884475708,
663
- "logps/chosen": -0.3588925302028656,
664
- "logps/rejected": -3.4983272552490234,
665
- "loss": 0.3772,
666
- "odds_ratio_loss": 1.1625574827194214,
667
- "rewards/accuracies": 0.927734375,
668
- "rewards/chosen": -0.03588924929499626,
669
- "rewards/margins": 0.31394344568252563,
670
- "rewards/rejected": -0.3498327136039734,
671
- "sft_loss": 0.2609647512435913,
672
- "step": 390
673
- },
674
- {
675
- "epoch": 0.5712596162965194,
676
- "grad_norm": 1.5078125,
677
- "learning_rate": 0.00013879048596203636,
678
- "logits/chosen": -3.382007598876953,
679
- "logits/rejected": -3.3820137977600098,
680
- "logps/chosen": -0.3696475028991699,
681
- "logps/rejected": -3.3737378120422363,
682
- "loss": 0.3872,
683
- "odds_ratio_loss": 1.1461818218231201,
684
- "rewards/accuracies": 0.929882824420929,
685
- "rewards/chosen": -0.03696475178003311,
686
- "rewards/margins": 0.3004090189933777,
687
- "rewards/rejected": -0.3373737931251526,
688
- "sft_loss": 0.27257078886032104,
689
- "step": 400
690
  }
691
  ],
692
  "logging_steps": 10,
693
- "max_steps": 700,
694
  "num_input_tokens_seen": 0,
695
  "num_train_epochs": 1,
696
  "save_steps": 100,
@@ -706,7 +366,7 @@
706
  "attributes": {}
707
  }
708
  },
709
- "total_flos": 3.1756323933484155e+18,
710
  "train_batch_size": 2,
711
  "trial_name": null,
712
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.392349190779794,
5
  "eval_steps": 5000,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0196174595389897,
13
+ "grad_norm": 2.734375,
14
+ "learning_rate": 5.88235294117647e-05,
15
+ "logits/chosen": -3.38761830329895,
16
+ "logits/rejected": -3.3876290321350098,
17
+ "logps/chosen": -0.33322039246559143,
18
+ "logps/rejected": -2.6748645305633545,
19
+ "loss": 0.3505,
20
+ "odds_ratio_loss": 1.2291936874389648,
21
+ "rewards/accuracies": 0.9292968511581421,
22
+ "rewards/chosen": -0.0333220399916172,
23
+ "rewards/margins": 0.23416443169116974,
24
+ "rewards/rejected": -0.26748648285865784,
25
+ "sft_loss": 0.22762596607208252,
26
  "step": 10
27
  },
28
  {
29
+ "epoch": 0.0392349190779794,
30
+ "grad_norm": 2.40625,
31
+ "learning_rate": 0.0001176470588235294,
32
+ "logits/chosen": -3.392183303833008,
33
+ "logits/rejected": -3.392207622528076,
34
+ "logps/chosen": -0.2998594641685486,
35
+ "logps/rejected": -3.382711887359619,
36
+ "loss": 0.3159,
37
+ "odds_ratio_loss": 1.1517291069030762,
38
+ "rewards/accuracies": 0.9417968988418579,
39
+ "rewards/chosen": -0.0299859456717968,
40
+ "rewards/margins": 0.3082852363586426,
41
+ "rewards/rejected": -0.33827120065689087,
42
+ "sft_loss": 0.20074963569641113,
43
  "step": 20
44
  },
45
  {
46
+ "epoch": 0.058852378616969105,
47
+ "grad_norm": 1.203125,
48
+ "learning_rate": 0.0001764705882352941,
49
+ "logits/chosen": -3.330181837081909,
50
+ "logits/rejected": -3.3302066326141357,
51
+ "logps/chosen": -0.2974868416786194,
52
+ "logps/rejected": -2.9257004261016846,
53
+ "loss": 0.3131,
54
+ "odds_ratio_loss": 1.1069270372390747,
55
+ "rewards/accuracies": 0.9404296875,
56
+ "rewards/chosen": -0.029748687520623207,
57
+ "rewards/margins": 0.26282134652137756,
58
+ "rewards/rejected": -0.292570024728775,
59
+ "sft_loss": 0.2024519443511963,
60
  "step": 30
61
  },
62
  {
63
+ "epoch": 0.0784698381559588,
64
+ "grad_norm": 2.25,
65
+ "learning_rate": 0.0002352941176470588,
66
+ "logits/chosen": -3.2383294105529785,
67
+ "logits/rejected": -3.238351345062256,
68
+ "logps/chosen": -0.2954941689968109,
69
+ "logps/rejected": -3.020512580871582,
70
+ "loss": 0.311,
71
+ "odds_ratio_loss": 1.1244500875473022,
72
+ "rewards/accuracies": 0.9439452886581421,
73
+ "rewards/chosen": -0.02954941987991333,
74
+ "rewards/margins": 0.2725018560886383,
75
+ "rewards/rejected": -0.30205124616622925,
76
+ "sft_loss": 0.19855467975139618,
77
  "step": 40
78
  },
79
  {
80
+ "epoch": 0.0980872976949485,
81
+ "grad_norm": 1.1953125,
82
+ "learning_rate": 0.0002941176470588235,
83
+ "logits/chosen": -3.1789066791534424,
84
+ "logits/rejected": -3.1789205074310303,
85
+ "logps/chosen": -0.33446550369262695,
86
+ "logps/rejected": -2.795537233352661,
87
+ "loss": 0.3539,
88
+ "odds_ratio_loss": 1.2969881296157837,
89
+ "rewards/accuracies": 0.9242187738418579,
90
+ "rewards/chosen": -0.033446550369262695,
91
+ "rewards/margins": 0.24610717594623566,
92
+ "rewards/rejected": -0.27955371141433716,
93
+ "sft_loss": 0.22420397400856018,
94
  "step": 50
95
  },
96
  {
97
+ "epoch": 0.11770475723393821,
98
+ "grad_norm": 1.53125,
99
+ "learning_rate": 0.0002997142559882001,
100
+ "logits/chosen": -3.172243118286133,
101
+ "logits/rejected": -3.172255754470825,
102
+ "logps/chosen": -0.3375310003757477,
103
+ "logps/rejected": -2.670206308364868,
104
+ "loss": 0.3562,
105
+ "odds_ratio_loss": 1.2899665832519531,
106
+ "rewards/accuracies": 0.9263671636581421,
107
+ "rewards/chosen": -0.03375309333205223,
108
+ "rewards/margins": 0.23326751589775085,
109
+ "rewards/rejected": -0.26702064275741577,
110
+ "sft_loss": 0.2272227704524994,
111
  "step": 60
112
  },
113
  {
114
+ "epoch": 0.1373222167729279,
115
+ "grad_norm": 2.34375,
116
+ "learning_rate": 0.0002987278965067285,
117
+ "logits/chosen": -3.221324920654297,
118
+ "logits/rejected": -3.2213356494903564,
119
+ "logps/chosen": -0.3481382429599762,
120
+ "logps/rejected": -2.922685146331787,
121
+ "loss": 0.3657,
122
+ "odds_ratio_loss": 1.2384110689163208,
123
+ "rewards/accuracies": 0.9341796636581421,
124
+ "rewards/chosen": -0.03481382131576538,
125
+ "rewards/margins": 0.25745469331741333,
126
+ "rewards/rejected": -0.2922685146331787,
127
+ "sft_loss": 0.2418569028377533,
128
  "step": 70
129
  },
130
  {
131
+ "epoch": 0.1569396763119176,
132
+ "grad_norm": 2.578125,
133
+ "learning_rate": 0.00029704203184486135,
134
+ "logits/chosen": -3.1355342864990234,
135
+ "logits/rejected": -3.1355600357055664,
136
+ "logps/chosen": -0.37931394577026367,
137
+ "logps/rejected": -2.745541572570801,
138
+ "loss": 0.398,
139
+ "odds_ratio_loss": 1.3987653255462646,
140
+ "rewards/accuracies": 0.926953136920929,
141
+ "rewards/chosen": -0.037931397557258606,
142
+ "rewards/margins": 0.23662278056144714,
143
+ "rewards/rejected": -0.27455419301986694,
144
+ "sft_loss": 0.2580908536911011,
145
  "step": 80
146
  },
147
  {
148
+ "epoch": 0.17655713585090732,
149
+ "grad_norm": 2.5625,
150
+ "learning_rate": 0.0002946645910535173,
151
+ "logits/chosen": -2.493868112564087,
152
+ "logits/rejected": -2.493875026702881,
153
+ "logps/chosen": -0.3405473828315735,
154
+ "logps/rejected": -2.824284553527832,
155
+ "loss": 0.358,
156
+ "odds_ratio_loss": 1.2909947633743286,
157
+ "rewards/accuracies": 0.929492175579071,
158
+ "rewards/chosen": -0.03405473753809929,
159
+ "rewards/margins": 0.24837371706962585,
160
+ "rewards/rejected": -0.28242844343185425,
161
+ "sft_loss": 0.22891943156719208,
162
  "step": 90
163
  },
164
  {
165
+ "epoch": 0.196174595389897,
166
+ "grad_norm": 3.421875,
167
+ "learning_rate": 0.0002916067558423063,
168
+ "logits/chosen": -2.403263568878174,
169
+ "logits/rejected": -2.403261423110962,
170
+ "logps/chosen": -0.35239094495773315,
171
+ "logps/rejected": -2.8051955699920654,
172
+ "loss": 0.3722,
173
+ "odds_ratio_loss": 1.3476725816726685,
174
+ "rewards/accuracies": 0.9253906011581421,
175
+ "rewards/chosen": -0.03523910045623779,
176
+ "rewards/margins": 0.245280459523201,
177
+ "rewards/rejected": -0.2805195450782776,
178
+ "sft_loss": 0.23746605217456818,
179
  "step": 100
180
  },
181
  {
182
+ "epoch": 0.2157920549288867,
183
+ "grad_norm": 4.59375,
184
+ "learning_rate": 0.000287882907989101,
185
+ "logits/chosen": -2.881706714630127,
186
+ "logits/rejected": -2.8817214965820312,
187
+ "logps/chosen": -0.3732207417488098,
188
+ "logps/rejected": -2.704741954803467,
189
+ "loss": 0.3954,
190
+ "odds_ratio_loss": 1.417145013809204,
191
+ "rewards/accuracies": 0.9146484136581421,
192
+ "rewards/chosen": -0.03732207417488098,
193
+ "rewards/margins": 0.2331521213054657,
194
+ "rewards/rejected": -0.2704741954803467,
195
+ "sft_loss": 0.2537223696708679,
196
  "step": 110
197
  },
198
  {
199
+ "epoch": 0.23540951446787642,
200
+ "grad_norm": 2.15625,
201
+ "learning_rate": 0.0002835105616988724,
202
+ "logits/chosen": -2.9882686138153076,
203
+ "logits/rejected": -2.9882702827453613,
204
+ "logps/chosen": -0.3399866223335266,
205
+ "logps/rejected": -2.6247470378875732,
206
+ "loss": 0.3602,
207
+ "odds_ratio_loss": 1.3240976333618164,
208
+ "rewards/accuracies": 0.9248046875,
209
+ "rewards/chosen": -0.03399866446852684,
210
+ "rewards/margins": 0.22847604751586914,
211
+ "rewards/rejected": -0.2624747157096863,
212
+ "sft_loss": 0.2277795970439911,
213
  "step": 120
214
  },
215
  {
216
+ "epoch": 0.25502697400686614,
217
+ "grad_norm": 1.5078125,
218
+ "learning_rate": 0.00027851028122992275,
219
+ "logits/chosen": -2.9560532569885254,
220
+ "logits/rejected": -2.956058979034424,
221
+ "logps/chosen": -0.3172145485877991,
222
+ "logps/rejected": -2.878936290740967,
223
+ "loss": 0.3352,
224
+ "odds_ratio_loss": 1.214184045791626,
225
+ "rewards/accuracies": 0.9339843988418579,
226
+ "rewards/chosen": -0.03172145411372185,
227
+ "rewards/margins": 0.25617218017578125,
228
+ "rewards/rejected": -0.2878936529159546,
229
+ "sft_loss": 0.21379432082176208,
230
  "step": 130
231
  },
232
  {
233
+ "epoch": 0.2746444335458558,
234
+ "grad_norm": 2.78125,
235
+ "learning_rate": 0.0002729055841749404,
236
+ "logits/chosen": -2.830913543701172,
237
+ "logits/rejected": -2.830937623977661,
238
+ "logps/chosen": -0.31502777338027954,
239
+ "logps/rejected": -2.772010326385498,
240
+ "loss": 0.3319,
241
+ "odds_ratio_loss": 1.2316268682479858,
242
+ "rewards/accuracies": 0.927539050579071,
243
+ "rewards/chosen": -0.03150278329849243,
244
+ "rewards/margins": 0.2456982582807541,
245
+ "rewards/rejected": -0.2772010266780853,
246
+ "sft_loss": 0.2087615430355072,
247
  "step": 140
248
  },
249
  {
250
+ "epoch": 0.2942618930848455,
251
+ "grad_norm": 1.1796875,
252
+ "learning_rate": 0.0002667228308517715,
253
+ "logits/chosen": -2.7827162742614746,
254
+ "logits/rejected": -2.7827162742614746,
255
+ "logps/chosen": -0.31607183814048767,
256
+ "logps/rejected": -2.9119043350219727,
257
+ "loss": 0.3321,
258
+ "odds_ratio_loss": 1.1889328956604004,
259
+ "rewards/accuracies": 0.9408203363418579,
260
+ "rewards/chosen": -0.031607188284397125,
261
+ "rewards/margins": 0.2595832943916321,
262
+ "rewards/rejected": -0.2911904454231262,
263
+ "sft_loss": 0.2131653130054474,
264
  "step": 150
265
  },
266
  {
267
+ "epoch": 0.3138793526238352,
268
+ "grad_norm": 1.46875,
269
+ "learning_rate": 0.0002599911003241308,
270
+ "logits/chosen": -2.828200101852417,
271
+ "logits/rejected": -2.828172206878662,
272
+ "logps/chosen": -0.3029854893684387,
273
+ "logps/rejected": -2.940296173095703,
274
+ "loss": 0.3187,
275
+ "odds_ratio_loss": 1.1783697605133057,
276
+ "rewards/accuracies": 0.9388672113418579,
277
+ "rewards/chosen": -0.030298549681901932,
278
+ "rewards/margins": 0.26373106241226196,
279
+ "rewards/rejected": -0.2940296232700348,
280
+ "sft_loss": 0.20090630650520325,
281
  "step": 160
282
  },
283
  {
284
+ "epoch": 0.3334968121628249,
285
+ "grad_norm": 1.6875,
286
+ "learning_rate": 0.0002527420536353579,
287
+ "logits/chosen": -2.8486075401306152,
288
+ "logits/rejected": -2.8485991954803467,
289
+ "logps/chosen": -0.2984660565853119,
290
+ "logps/rejected": -2.9544076919555664,
291
+ "loss": 0.3145,
292
+ "odds_ratio_loss": 1.185927152633667,
293
+ "rewards/accuracies": 0.9361327886581421,
294
+ "rewards/chosen": -0.02984660305082798,
295
+ "rewards/margins": 0.2655941843986511,
296
+ "rewards/rejected": -0.29544076323509216,
297
+ "sft_loss": 0.19592249393463135,
298
  "step": 170
299
  },
300
  {
301
+ "epoch": 0.35311427170181464,
302
+ "grad_norm": 2.8125,
303
+ "learning_rate": 0.00024500978489846596,
304
+ "logits/chosen": -2.9189867973327637,
305
+ "logits/rejected": -2.918994188308716,
306
+ "logps/chosen": -0.3076332211494446,
307
+ "logps/rejected": -3.066086530685425,
308
+ "loss": 0.3244,
309
+ "odds_ratio_loss": 1.1991689205169678,
310
+ "rewards/accuracies": 0.9306640625,
311
+ "rewards/chosen": -0.030763322487473488,
312
+ "rewards/margins": 0.27584534883499146,
313
+ "rewards/rejected": -0.306608647108078,
314
+ "sft_loss": 0.20451626181602478,
315
  "step": 180
316
  },
317
  {
318
+ "epoch": 0.37273173124080433,
319
+ "grad_norm": 1.3828125,
320
+ "learning_rate": 0.00023683066094284426,
321
+ "logits/chosen": -2.911583423614502,
322
+ "logits/rejected": -2.9115893840789795,
323
+ "logps/chosen": -0.2833148241043091,
324
+ "logps/rejected": -3.1591389179229736,
325
+ "loss": 0.2994,
326
+ "odds_ratio_loss": 1.1305713653564453,
327
+ "rewards/accuracies": 0.9390624761581421,
328
+ "rewards/chosen": -0.02833147905766964,
329
+ "rewards/margins": 0.2875823974609375,
330
+ "rewards/rejected": -0.3159138560295105,
331
+ "sft_loss": 0.186366468667984,
332
  "step": 190
333
  },
334
  {
335
+ "epoch": 0.392349190779794,
336
+ "grad_norm": 1.03125,
337
+ "learning_rate": 0.00022824315027179597,
338
+ "logits/chosen": -2.9097139835357666,
339
+ "logits/rejected": -2.90971040725708,
340
+ "logps/chosen": -0.28494173288345337,
341
+ "logps/rejected": -2.9840199947357178,
342
+ "loss": 0.2999,
343
+ "odds_ratio_loss": 1.0984803438186646,
344
+ "rewards/accuracies": 0.9439452886581421,
345
+ "rewards/chosen": -0.028494173660874367,
346
+ "rewards/margins": 0.2699078619480133,
347
+ "rewards/rejected": -0.29840201139450073,
348
+ "sft_loss": 0.19001488387584686,
349
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  }
351
  ],
352
  "logging_steps": 10,
353
+ "max_steps": 509,
354
  "num_input_tokens_seen": 0,
355
  "num_train_epochs": 1,
356
  "save_steps": 100,
 
366
  "attributes": {}
367
  }
368
  },
369
+ "total_flos": 1.581922179792175e+18,
370
  "train_batch_size": 2,
371
  "trial_name": null,
372
  "trial_params": null