|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0202020202020203, |
|
"eval_steps": 50, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005050505050505051, |
|
"grad_norm": 2.061766753199945, |
|
"learning_rate": 1.0344827586206896e-06, |
|
"loss": 1.9091, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005050505050505051, |
|
"eval_loss": 1.7296912670135498, |
|
"eval_runtime": 229.6516, |
|
"eval_samples_per_second": 6.231, |
|
"eval_steps_per_second": 0.779, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010101010101010102, |
|
"grad_norm": 0.9340006537063892, |
|
"learning_rate": 2.068965517241379e-06, |
|
"loss": 1.9543, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015151515151515152, |
|
"grad_norm": 0.8730658898573816, |
|
"learning_rate": 3.103448275862069e-06, |
|
"loss": 1.8991, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.020202020202020204, |
|
"grad_norm": 0.9760544939810394, |
|
"learning_rate": 4.137931034482758e-06, |
|
"loss": 1.9469, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.025252525252525252, |
|
"grad_norm": 1.1435345547054372, |
|
"learning_rate": 5.172413793103449e-06, |
|
"loss": 1.875, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.030303030303030304, |
|
"grad_norm": 0.6977064869142138, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 1.9303, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03535353535353535, |
|
"grad_norm": 0.9852376303852637, |
|
"learning_rate": 7.241379310344828e-06, |
|
"loss": 1.9764, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04040404040404041, |
|
"grad_norm": 1.5021550676895192, |
|
"learning_rate": 8.275862068965517e-06, |
|
"loss": 1.9526, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 14.465654024779864, |
|
"learning_rate": 9.310344827586207e-06, |
|
"loss": 1.9238, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.050505050505050504, |
|
"grad_norm": 1.3153716741597974, |
|
"learning_rate": 1.0344827586206898e-05, |
|
"loss": 1.8548, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 0.7251965259785947, |
|
"learning_rate": 1.1379310344827586e-05, |
|
"loss": 1.9568, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06060606060606061, |
|
"grad_norm": 0.675323305346893, |
|
"learning_rate": 1.2413793103448277e-05, |
|
"loss": 2.0394, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06565656565656566, |
|
"grad_norm": 0.7284662389076806, |
|
"learning_rate": 1.3448275862068966e-05, |
|
"loss": 1.9143, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0707070707070707, |
|
"grad_norm": 0.9273147668903492, |
|
"learning_rate": 1.4482758620689657e-05, |
|
"loss": 1.8514, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07575757575757576, |
|
"grad_norm": 0.8045697974814354, |
|
"learning_rate": 1.5517241379310346e-05, |
|
"loss": 1.8691, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08080808080808081, |
|
"grad_norm": 0.8617824816372583, |
|
"learning_rate": 1.6551724137931033e-05, |
|
"loss": 1.9208, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08585858585858586, |
|
"grad_norm": 0.8474189127719305, |
|
"learning_rate": 1.7586206896551724e-05, |
|
"loss": 1.971, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 0.7802161578292038, |
|
"learning_rate": 1.8620689655172415e-05, |
|
"loss": 1.9364, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09595959595959595, |
|
"grad_norm": 0.7670275224618359, |
|
"learning_rate": 1.9655172413793102e-05, |
|
"loss": 1.8924, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10101010101010101, |
|
"grad_norm": 0.8871142093374335, |
|
"learning_rate": 2.0689655172413797e-05, |
|
"loss": 1.8666, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10606060606060606, |
|
"grad_norm": 0.8537219904851733, |
|
"learning_rate": 2.1724137931034484e-05, |
|
"loss": 1.903, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 0.7103385523680423, |
|
"learning_rate": 2.275862068965517e-05, |
|
"loss": 1.8842, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11616161616161616, |
|
"grad_norm": 0.7687652688491076, |
|
"learning_rate": 2.3793103448275862e-05, |
|
"loss": 1.9043, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12121212121212122, |
|
"grad_norm": 0.8618641553034271, |
|
"learning_rate": 2.4827586206896553e-05, |
|
"loss": 1.9657, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12626262626262627, |
|
"grad_norm": 0.6952586490192444, |
|
"learning_rate": 2.586206896551724e-05, |
|
"loss": 1.8744, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13131313131313133, |
|
"grad_norm": 0.7879308602246046, |
|
"learning_rate": 2.689655172413793e-05, |
|
"loss": 1.9088, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 0.6286619656859633, |
|
"learning_rate": 2.793103448275862e-05, |
|
"loss": 1.8769, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1414141414141414, |
|
"grad_norm": 0.6368137307786529, |
|
"learning_rate": 2.8965517241379313e-05, |
|
"loss": 1.9609, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.14646464646464646, |
|
"grad_norm": 0.6997236575440393, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8216, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.15151515151515152, |
|
"grad_norm": 0.7017124666486956, |
|
"learning_rate": 2.999976812015884e-05, |
|
"loss": 1.8589, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15656565656565657, |
|
"grad_norm": 0.6473032006817286, |
|
"learning_rate": 2.999907248780446e-05, |
|
"loss": 1.8554, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.16161616161616163, |
|
"grad_norm": 0.5625959900004289, |
|
"learning_rate": 2.9997913124443945e-05, |
|
"loss": 1.8885, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.5416656043411677, |
|
"learning_rate": 2.9996290065921693e-05, |
|
"loss": 1.8011, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1717171717171717, |
|
"grad_norm": 0.6288840271054257, |
|
"learning_rate": 2.9994203362418313e-05, |
|
"loss": 1.8941, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.17676767676767677, |
|
"grad_norm": 0.46613508572576606, |
|
"learning_rate": 2.9991653078449062e-05, |
|
"loss": 1.8755, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 0.483380275460621, |
|
"learning_rate": 2.998863929286187e-05, |
|
"loss": 1.7971, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.18686868686868688, |
|
"grad_norm": 0.5435079019515928, |
|
"learning_rate": 2.9985162098834886e-05, |
|
"loss": 1.8561, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1919191919191919, |
|
"grad_norm": 0.39935840982328513, |
|
"learning_rate": 2.9981221603873608e-05, |
|
"loss": 1.8447, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.19696969696969696, |
|
"grad_norm": 0.4860381910271567, |
|
"learning_rate": 2.9976817929807542e-05, |
|
"loss": 1.8571, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.20202020202020202, |
|
"grad_norm": 0.4704297980261362, |
|
"learning_rate": 2.9971951212786453e-05, |
|
"loss": 1.8953, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.20707070707070707, |
|
"grad_norm": 0.45313219770954005, |
|
"learning_rate": 2.996662160327616e-05, |
|
"loss": 1.8858, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.21212121212121213, |
|
"grad_norm": 0.4849523903159801, |
|
"learning_rate": 2.9960829266053854e-05, |
|
"loss": 1.8558, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.21717171717171718, |
|
"grad_norm": 0.5296271329297302, |
|
"learning_rate": 2.9954574380203036e-05, |
|
"loss": 1.8819, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.42138253356228195, |
|
"learning_rate": 2.9947857139107964e-05, |
|
"loss": 1.8218, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 0.54778300283403, |
|
"learning_rate": 2.994067775044768e-05, |
|
"loss": 1.8718, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.23232323232323232, |
|
"grad_norm": 0.5171055027316829, |
|
"learning_rate": 2.9933036436189582e-05, |
|
"loss": 1.8202, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.23737373737373738, |
|
"grad_norm": 0.42755509411252407, |
|
"learning_rate": 2.992493343258257e-05, |
|
"loss": 1.7941, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.24242424242424243, |
|
"grad_norm": 0.5864597089331413, |
|
"learning_rate": 2.9916368990149738e-05, |
|
"loss": 1.8177, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2474747474747475, |
|
"grad_norm": 0.5015977034643889, |
|
"learning_rate": 2.990734337368062e-05, |
|
"loss": 1.8441, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.25252525252525254, |
|
"grad_norm": 0.4223650022421803, |
|
"learning_rate": 2.989785686222302e-05, |
|
"loss": 1.8235, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25252525252525254, |
|
"eval_loss": 1.5191664695739746, |
|
"eval_runtime": 228.7897, |
|
"eval_samples_per_second": 6.255, |
|
"eval_steps_per_second": 0.782, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25757575757575757, |
|
"grad_norm": 0.5549092161535342, |
|
"learning_rate": 2.9887909749074373e-05, |
|
"loss": 1.8724, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.26262626262626265, |
|
"grad_norm": 0.5808532765316217, |
|
"learning_rate": 2.9877502341772687e-05, |
|
"loss": 1.8668, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2676767676767677, |
|
"grad_norm": 0.42037362125882416, |
|
"learning_rate": 2.9866634962087014e-05, |
|
"loss": 1.8149, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 0.510701064713823, |
|
"learning_rate": 2.9855307946007532e-05, |
|
"loss": 1.8388, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 0.4740044982938013, |
|
"learning_rate": 2.984352164373513e-05, |
|
"loss": 1.8898, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2828282828282828, |
|
"grad_norm": 0.42600738162835966, |
|
"learning_rate": 2.9831276419670593e-05, |
|
"loss": 1.7645, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2878787878787879, |
|
"grad_norm": 0.523957827857094, |
|
"learning_rate": 2.9818572652403336e-05, |
|
"loss": 1.8688, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.29292929292929293, |
|
"grad_norm": 0.4739264160838728, |
|
"learning_rate": 2.9805410734699694e-05, |
|
"loss": 1.8253, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.29797979797979796, |
|
"grad_norm": 0.4030312937735394, |
|
"learning_rate": 2.9791791073490795e-05, |
|
"loss": 1.8799, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 0.4287715829219747, |
|
"learning_rate": 2.9777714089859946e-05, |
|
"loss": 1.8283, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.30808080808080807, |
|
"grad_norm": 0.41789218942140666, |
|
"learning_rate": 2.976318021902965e-05, |
|
"loss": 1.8135, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.31313131313131315, |
|
"grad_norm": 0.4587324787560187, |
|
"learning_rate": 2.9748189910348122e-05, |
|
"loss": 1.8821, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 0.4460155322546901, |
|
"learning_rate": 2.9732743627275428e-05, |
|
"loss": 1.8889, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.32323232323232326, |
|
"grad_norm": 0.4157211867080571, |
|
"learning_rate": 2.9716841847369106e-05, |
|
"loss": 1.8345, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3282828282828283, |
|
"grad_norm": 0.41844440976522496, |
|
"learning_rate": 2.9700485062269465e-05, |
|
"loss": 1.7642, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.4520630042741615, |
|
"learning_rate": 2.968367377768432e-05, |
|
"loss": 1.8485, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3383838383838384, |
|
"grad_norm": 0.4368200901934031, |
|
"learning_rate": 2.966640851337342e-05, |
|
"loss": 1.8617, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3434343434343434, |
|
"grad_norm": 0.38212187200746967, |
|
"learning_rate": 2.964868980313232e-05, |
|
"loss": 1.7493, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3484848484848485, |
|
"grad_norm": 0.4173981057054639, |
|
"learning_rate": 2.963051819477592e-05, |
|
"loss": 1.836, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.35353535353535354, |
|
"grad_norm": 0.4067519509663366, |
|
"learning_rate": 2.9611894250121508e-05, |
|
"loss": 1.81, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.35858585858585856, |
|
"grad_norm": 0.40355345502721407, |
|
"learning_rate": 2.9592818544971394e-05, |
|
"loss": 1.8126, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 0.4205716149104244, |
|
"learning_rate": 2.9573291669095112e-05, |
|
"loss": 1.725, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3686868686868687, |
|
"grad_norm": 0.42333778099845615, |
|
"learning_rate": 2.955331422621117e-05, |
|
"loss": 1.8448, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.37373737373737376, |
|
"grad_norm": 0.39602223830054895, |
|
"learning_rate": 2.953288683396841e-05, |
|
"loss": 1.8518, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3787878787878788, |
|
"grad_norm": 0.42272756108809484, |
|
"learning_rate": 2.95120101239269e-05, |
|
"loss": 1.846, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3838383838383838, |
|
"grad_norm": 0.38458304927883635, |
|
"learning_rate": 2.9490684741538394e-05, |
|
"loss": 1.7287, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3888888888888889, |
|
"grad_norm": 0.4515217019544044, |
|
"learning_rate": 2.9468911346126395e-05, |
|
"loss": 1.9222, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3939393939393939, |
|
"grad_norm": 0.3902694265684303, |
|
"learning_rate": 2.9446690610865775e-05, |
|
"loss": 1.8075, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.398989898989899, |
|
"grad_norm": 0.4621030399414333, |
|
"learning_rate": 2.9424023222761938e-05, |
|
"loss": 1.8307, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"grad_norm": 0.4164626377683268, |
|
"learning_rate": 2.9400909882629595e-05, |
|
"loss": 1.826, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 0.4989467879911725, |
|
"learning_rate": 2.9377351305071097e-05, |
|
"loss": 1.7694, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.41414141414141414, |
|
"grad_norm": 0.38502918266185665, |
|
"learning_rate": 2.935334821845434e-05, |
|
"loss": 1.8558, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.41919191919191917, |
|
"grad_norm": 0.43876163502583143, |
|
"learning_rate": 2.9328901364890253e-05, |
|
"loss": 1.9031, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.42424242424242425, |
|
"grad_norm": 0.44274327469566893, |
|
"learning_rate": 2.930401150020983e-05, |
|
"loss": 1.8486, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4292929292929293, |
|
"grad_norm": 0.42306669374100425, |
|
"learning_rate": 2.9278679393940794e-05, |
|
"loss": 1.8095, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.43434343434343436, |
|
"grad_norm": 0.43069541482459783, |
|
"learning_rate": 2.9252905829283778e-05, |
|
"loss": 1.8091, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4393939393939394, |
|
"grad_norm": 0.524368528081938, |
|
"learning_rate": 2.9226691603088124e-05, |
|
"loss": 1.8407, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.4306578292725837, |
|
"learning_rate": 2.9200037525827255e-05, |
|
"loss": 1.8075, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4494949494949495, |
|
"grad_norm": 0.401793293233711, |
|
"learning_rate": 2.9172944421573587e-05, |
|
"loss": 1.8405, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 0.48084582939870474, |
|
"learning_rate": 2.9145413127973085e-05, |
|
"loss": 1.8596, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4595959595959596, |
|
"grad_norm": 0.4259765113516244, |
|
"learning_rate": 2.911744449621935e-05, |
|
"loss": 1.7931, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.46464646464646464, |
|
"grad_norm": 0.41686644545343865, |
|
"learning_rate": 2.90890393910273e-05, |
|
"loss": 1.8693, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4696969696969697, |
|
"grad_norm": 0.49645836560093654, |
|
"learning_rate": 2.9060198690606438e-05, |
|
"loss": 1.8567, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.47474747474747475, |
|
"grad_norm": 0.4122894943723907, |
|
"learning_rate": 2.9030923286633703e-05, |
|
"loss": 1.7979, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4797979797979798, |
|
"grad_norm": 0.41291814598709514, |
|
"learning_rate": 2.9001214084225898e-05, |
|
"loss": 1.8409, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.48484848484848486, |
|
"grad_norm": 0.4346649991553492, |
|
"learning_rate": 2.89710720019117e-05, |
|
"loss": 1.8904, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4898989898989899, |
|
"grad_norm": 0.41818341954882543, |
|
"learning_rate": 2.8940497971603288e-05, |
|
"loss": 1.8695, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.494949494949495, |
|
"grad_norm": 0.4420144131423374, |
|
"learning_rate": 2.890949293856749e-05, |
|
"loss": 1.8021, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.43880248651128445, |
|
"learning_rate": 2.8878057861396606e-05, |
|
"loss": 1.8329, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5050505050505051, |
|
"grad_norm": 0.40444053098152477, |
|
"learning_rate": 2.8846193711978717e-05, |
|
"loss": 1.8201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5050505050505051, |
|
"eval_loss": 1.4966636896133423, |
|
"eval_runtime": 229.0804, |
|
"eval_samples_per_second": 6.247, |
|
"eval_steps_per_second": 0.781, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.51010101010101, |
|
"grad_norm": 0.45960433784465665, |
|
"learning_rate": 2.881390147546768e-05, |
|
"loss": 1.8183, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5151515151515151, |
|
"grad_norm": 0.38763847079359476, |
|
"learning_rate": 2.878118215025265e-05, |
|
"loss": 1.8208, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5202020202020202, |
|
"grad_norm": 0.41520378471101016, |
|
"learning_rate": 2.874803674792722e-05, |
|
"loss": 1.8714, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5252525252525253, |
|
"grad_norm": 0.40206269424525176, |
|
"learning_rate": 2.8714466293258142e-05, |
|
"loss": 1.7878, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5303030303030303, |
|
"grad_norm": 0.4373758839964535, |
|
"learning_rate": 2.868047182415364e-05, |
|
"loss": 1.8093, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5353535353535354, |
|
"grad_norm": 0.3875079810746361, |
|
"learning_rate": 2.864605439163133e-05, |
|
"loss": 1.8606, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5404040404040404, |
|
"grad_norm": 0.4303262268327845, |
|
"learning_rate": 2.8611215059785706e-05, |
|
"loss": 1.8233, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 0.45649878897921986, |
|
"learning_rate": 2.8575954905755278e-05, |
|
"loss": 1.8817, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5505050505050505, |
|
"grad_norm": 0.44045126098579024, |
|
"learning_rate": 2.8540275019689237e-05, |
|
"loss": 1.7945, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 0.43144466038537, |
|
"learning_rate": 2.8504176504713745e-05, |
|
"loss": 1.7419, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5606060606060606, |
|
"grad_norm": 0.38271501972768057, |
|
"learning_rate": 2.846766047689787e-05, |
|
"loss": 1.8557, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5656565656565656, |
|
"grad_norm": 0.4193952465924727, |
|
"learning_rate": 2.8430728065219035e-05, |
|
"loss": 1.8245, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5707070707070707, |
|
"grad_norm": 0.41096403730883174, |
|
"learning_rate": 2.839338041152814e-05, |
|
"loss": 1.8117, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5757575757575758, |
|
"grad_norm": 0.41746315731568373, |
|
"learning_rate": 2.835561867051426e-05, |
|
"loss": 1.7859, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5808080808080808, |
|
"grad_norm": 0.35796806338294784, |
|
"learning_rate": 2.8317444009668916e-05, |
|
"loss": 1.8125, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5858585858585859, |
|
"grad_norm": 0.3782772183418781, |
|
"learning_rate": 2.8278857609250033e-05, |
|
"loss": 1.8551, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 0.4563811308324174, |
|
"learning_rate": 2.823986066224538e-05, |
|
"loss": 1.8711, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5959595959595959, |
|
"grad_norm": 0.42499726957188555, |
|
"learning_rate": 2.820045437433575e-05, |
|
"loss": 1.8827, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.601010101010101, |
|
"grad_norm": 0.35463970926383165, |
|
"learning_rate": 2.816063996385765e-05, |
|
"loss": 1.8511, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 0.39828700116760574, |
|
"learning_rate": 2.8120418661765624e-05, |
|
"loss": 1.8386, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6111111111111112, |
|
"grad_norm": 0.406542351713585, |
|
"learning_rate": 2.8079791711594236e-05, |
|
"loss": 1.8035, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6161616161616161, |
|
"grad_norm": 0.36243471155718693, |
|
"learning_rate": 2.8038760369419583e-05, |
|
"loss": 1.8014, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6212121212121212, |
|
"grad_norm": 0.3845741639511099, |
|
"learning_rate": 2.7997325903820478e-05, |
|
"loss": 1.9082, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6262626262626263, |
|
"grad_norm": 0.4004906560962447, |
|
"learning_rate": 2.7955489595839228e-05, |
|
"loss": 1.828, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6313131313131313, |
|
"grad_norm": 0.40745184195706274, |
|
"learning_rate": 2.7913252738942027e-05, |
|
"loss": 1.8285, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 0.4701862294606233, |
|
"learning_rate": 2.787061663897896e-05, |
|
"loss": 1.7612, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6414141414141414, |
|
"grad_norm": 0.3845304191870889, |
|
"learning_rate": 2.782758261414365e-05, |
|
"loss": 1.9005, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6464646464646465, |
|
"grad_norm": 0.45387333682470754, |
|
"learning_rate": 2.7784151994932462e-05, |
|
"loss": 1.8369, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6515151515151515, |
|
"grad_norm": 0.402744943135056, |
|
"learning_rate": 2.7740326124103416e-05, |
|
"loss": 1.8382, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6565656565656566, |
|
"grad_norm": 0.39184387630637835, |
|
"learning_rate": 2.7696106356634637e-05, |
|
"loss": 1.8659, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6616161616161617, |
|
"grad_norm": 0.4626435038267262, |
|
"learning_rate": 2.7651494059682485e-05, |
|
"loss": 1.8667, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.40353571201115057, |
|
"learning_rate": 2.7606490612539262e-05, |
|
"loss": 1.8209, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6717171717171717, |
|
"grad_norm": 0.4162695579854718, |
|
"learning_rate": 2.7561097406590595e-05, |
|
"loss": 1.8002, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6767676767676768, |
|
"grad_norm": 0.3909561466126897, |
|
"learning_rate": 2.751531584527241e-05, |
|
"loss": 1.8076, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 0.41533705192980835, |
|
"learning_rate": 2.746914734402752e-05, |
|
"loss": 1.9099, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6868686868686869, |
|
"grad_norm": 0.38207500499538627, |
|
"learning_rate": 2.7422593330261888e-05, |
|
"loss": 1.7688, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6919191919191919, |
|
"grad_norm": 0.35742242270800784, |
|
"learning_rate": 2.7375655243300493e-05, |
|
"loss": 1.8651, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.696969696969697, |
|
"grad_norm": 0.41421312272061606, |
|
"learning_rate": 2.7328334534342827e-05, |
|
"loss": 1.8785, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.702020202020202, |
|
"grad_norm": 0.4171193053836604, |
|
"learning_rate": 2.7280632666418013e-05, |
|
"loss": 1.8781, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7070707070707071, |
|
"grad_norm": 0.40782157651436396, |
|
"learning_rate": 2.723255111433959e-05, |
|
"loss": 1.8247, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7121212121212122, |
|
"grad_norm": 0.4281506276367188, |
|
"learning_rate": 2.7184091364659923e-05, |
|
"loss": 1.733, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7171717171717171, |
|
"grad_norm": 0.39479511690560315, |
|
"learning_rate": 2.7135254915624213e-05, |
|
"loss": 1.8401, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7222222222222222, |
|
"grad_norm": 0.42100536218468343, |
|
"learning_rate": 2.70860432771242e-05, |
|
"loss": 1.8484, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.3855161042933354, |
|
"learning_rate": 2.703645797065147e-05, |
|
"loss": 1.8197, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7323232323232324, |
|
"grad_norm": 0.3920745691095182, |
|
"learning_rate": 2.6986500529250427e-05, |
|
"loss": 1.8345, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7373737373737373, |
|
"grad_norm": 0.42204478492867153, |
|
"learning_rate": 2.6936172497470874e-05, |
|
"loss": 1.8368, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7424242424242424, |
|
"grad_norm": 0.40405492226523393, |
|
"learning_rate": 2.688547543132029e-05, |
|
"loss": 1.8182, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.7474747474747475, |
|
"grad_norm": 0.36686029807824033, |
|
"learning_rate": 2.6834410898215688e-05, |
|
"loss": 1.8581, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7525252525252525, |
|
"grad_norm": 0.36859567357522177, |
|
"learning_rate": 2.678298047693518e-05, |
|
"loss": 1.7276, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 0.37761559772659303, |
|
"learning_rate": 2.6731185757569153e-05, |
|
"loss": 1.8321, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"eval_loss": 1.484434962272644, |
|
"eval_runtime": 229.048, |
|
"eval_samples_per_second": 6.248, |
|
"eval_steps_per_second": 0.781, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7626262626262627, |
|
"grad_norm": 0.40657370322541503, |
|
"learning_rate": 2.6679028341471114e-05, |
|
"loss": 1.8364, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7676767676767676, |
|
"grad_norm": 0.3852020973392934, |
|
"learning_rate": 2.6626509841208177e-05, |
|
"loss": 1.8723, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 0.4471085311517623, |
|
"learning_rate": 2.6573631880511214e-05, |
|
"loss": 1.9017, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 0.3858532089668969, |
|
"learning_rate": 2.652039609422463e-05, |
|
"loss": 1.7849, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7828282828282829, |
|
"grad_norm": 0.427216562758674, |
|
"learning_rate": 2.6466804128255865e-05, |
|
"loss": 1.7743, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7878787878787878, |
|
"grad_norm": 0.38828532121415243, |
|
"learning_rate": 2.6412857639524442e-05, |
|
"loss": 1.8294, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7929292929292929, |
|
"grad_norm": 0.40962967785771864, |
|
"learning_rate": 2.6358558295910805e-05, |
|
"loss": 1.8314, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.797979797979798, |
|
"grad_norm": 0.7209413951413858, |
|
"learning_rate": 2.6303907776204706e-05, |
|
"loss": 1.8388, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.803030303030303, |
|
"grad_norm": 0.3753158677183495, |
|
"learning_rate": 2.624890777005332e-05, |
|
"loss": 1.9189, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"grad_norm": 0.3683405444328328, |
|
"learning_rate": 2.6193559977909008e-05, |
|
"loss": 1.7511, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8131313131313131, |
|
"grad_norm": 0.420103866946048, |
|
"learning_rate": 2.6137866110976742e-05, |
|
"loss": 1.8447, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 0.4025395036734605, |
|
"learning_rate": 2.608182789116118e-05, |
|
"loss": 1.7567, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8232323232323232, |
|
"grad_norm": 0.41152519566999235, |
|
"learning_rate": 2.6025447051013466e-05, |
|
"loss": 1.8844, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8282828282828283, |
|
"grad_norm": 0.37456667248254777, |
|
"learning_rate": 2.5968725333677628e-05, |
|
"loss": 1.8316, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.3989107911221168, |
|
"learning_rate": 2.5911664492836714e-05, |
|
"loss": 1.8336, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8383838383838383, |
|
"grad_norm": 0.4192570186399055, |
|
"learning_rate": 2.585426629265854e-05, |
|
"loss": 1.8368, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.8434343434343434, |
|
"grad_norm": 0.4039992281383353, |
|
"learning_rate": 2.579653250774119e-05, |
|
"loss": 1.8474, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.8484848484848485, |
|
"grad_norm": 0.3860934737365767, |
|
"learning_rate": 2.5738464923058118e-05, |
|
"loss": 1.9136, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8535353535353535, |
|
"grad_norm": 0.3766141715707985, |
|
"learning_rate": 2.568006533390295e-05, |
|
"loss": 1.7905, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8585858585858586, |
|
"grad_norm": 0.43692452251495456, |
|
"learning_rate": 2.562133554583402e-05, |
|
"loss": 1.8692, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 0.38506171479168916, |
|
"learning_rate": 2.556227737461852e-05, |
|
"loss": 1.8802, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.8686868686868687, |
|
"grad_norm": 0.4139103090309799, |
|
"learning_rate": 2.5502892646176364e-05, |
|
"loss": 1.9145, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.8737373737373737, |
|
"grad_norm": 0.41202711286858845, |
|
"learning_rate": 2.5443183196523744e-05, |
|
"loss": 1.8799, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.8787878787878788, |
|
"grad_norm": 0.441012649010044, |
|
"learning_rate": 2.5383150871716342e-05, |
|
"loss": 1.7964, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8838383838383839, |
|
"grad_norm": 0.3924859380226668, |
|
"learning_rate": 2.5322797527792297e-05, |
|
"loss": 1.8196, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.484116113199706, |
|
"learning_rate": 2.526212503071477e-05, |
|
"loss": 1.8211, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8939393939393939, |
|
"grad_norm": 0.4214268259949748, |
|
"learning_rate": 2.52011352563143e-05, |
|
"loss": 1.8756, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.898989898989899, |
|
"grad_norm": 0.3761454305197214, |
|
"learning_rate": 2.5139830090230776e-05, |
|
"loss": 1.7887, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9040404040404041, |
|
"grad_norm": 0.38472045617953676, |
|
"learning_rate": 2.507821142785516e-05, |
|
"loss": 1.9109, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.4437224782187781, |
|
"learning_rate": 2.501628117427087e-05, |
|
"loss": 1.8024, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9141414141414141, |
|
"grad_norm": 0.3928636586609788, |
|
"learning_rate": 2.4954041244194883e-05, |
|
"loss": 1.8177, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9191919191919192, |
|
"grad_norm": 0.40978302308222725, |
|
"learning_rate": 2.4891493561918545e-05, |
|
"loss": 1.8238, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9242424242424242, |
|
"grad_norm": 0.41578094468387805, |
|
"learning_rate": 2.482864006124808e-05, |
|
"loss": 1.7558, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.9292929292929293, |
|
"grad_norm": 0.47459329104171105, |
|
"learning_rate": 2.4765482685444786e-05, |
|
"loss": 1.8982, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.9343434343434344, |
|
"grad_norm": 0.4584785873115325, |
|
"learning_rate": 2.470202338716497e-05, |
|
"loss": 1.8925, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9393939393939394, |
|
"grad_norm": 0.39624647908908, |
|
"learning_rate": 2.4638264128399555e-05, |
|
"loss": 1.8032, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.9444444444444444, |
|
"grad_norm": 0.38536269925326294, |
|
"learning_rate": 2.457420688041345e-05, |
|
"loss": 1.7927, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9494949494949495, |
|
"grad_norm": 0.44911468803951493, |
|
"learning_rate": 2.4509853623684598e-05, |
|
"loss": 1.8635, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 0.38483613716276227, |
|
"learning_rate": 2.4445206347842714e-05, |
|
"loss": 1.828, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.9595959595959596, |
|
"grad_norm": 0.4134516508007333, |
|
"learning_rate": 2.43802670516078e-05, |
|
"loss": 1.7766, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9646464646464646, |
|
"grad_norm": 0.4205305313105495, |
|
"learning_rate": 2.4315037742728366e-05, |
|
"loss": 1.8162, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.9696969696969697, |
|
"grad_norm": 0.4019754805551098, |
|
"learning_rate": 2.4249520437919307e-05, |
|
"loss": 1.7429, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.9747474747474747, |
|
"grad_norm": 0.38403144394408584, |
|
"learning_rate": 2.4183717162799587e-05, |
|
"loss": 1.731, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.9797979797979798, |
|
"grad_norm": 0.3807338127364377, |
|
"learning_rate": 2.4117629951829602e-05, |
|
"loss": 1.8942, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9848484848484849, |
|
"grad_norm": 0.44761141331014614, |
|
"learning_rate": 2.4051260848248286e-05, |
|
"loss": 1.8313, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.98989898989899, |
|
"grad_norm": 0.38554593665783987, |
|
"learning_rate": 2.398461190400993e-05, |
|
"loss": 1.8394, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9949494949494949, |
|
"grad_norm": 0.403968521425789, |
|
"learning_rate": 2.3917685179720752e-05, |
|
"loss": 1.8287, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.36274397859542107, |
|
"learning_rate": 2.3850482744575177e-05, |
|
"loss": 1.7637, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.005050505050505, |
|
"grad_norm": 0.5408632160019302, |
|
"learning_rate": 2.3783006676291866e-05, |
|
"loss": 1.6493, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.0101010101010102, |
|
"grad_norm": 0.4273958454403075, |
|
"learning_rate": 2.3715259061049487e-05, |
|
"loss": 1.5513, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0101010101010102, |
|
"eval_loss": 1.4980653524398804, |
|
"eval_runtime": 229.1221, |
|
"eval_samples_per_second": 6.246, |
|
"eval_steps_per_second": 0.781, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0151515151515151, |
|
"grad_norm": 0.9048049507315692, |
|
"learning_rate": 2.3647241993422208e-05, |
|
"loss": 1.6547, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.02020202020202, |
|
"grad_norm": 0.3848882038249155, |
|
"learning_rate": 2.3578957576314944e-05, |
|
"loss": 1.5279, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.0252525252525253, |
|
"grad_norm": 0.40317657177333205, |
|
"learning_rate": 2.3510407920898327e-05, |
|
"loss": 1.6036, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.0303030303030303, |
|
"grad_norm": 0.43727100501923616, |
|
"learning_rate": 2.3441595146543458e-05, |
|
"loss": 1.6581, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.0353535353535352, |
|
"grad_norm": 0.3912845389083466, |
|
"learning_rate": 2.337252138075636e-05, |
|
"loss": 1.5399, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0404040404040404, |
|
"grad_norm": 0.3961929212943775, |
|
"learning_rate": 2.3303188759112213e-05, |
|
"loss": 1.6788, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 0.4307375964156506, |
|
"learning_rate": 2.3233599425189317e-05, |
|
"loss": 1.6081, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.0505050505050506, |
|
"grad_norm": 0.3912142497081324, |
|
"learning_rate": 2.316375553050284e-05, |
|
"loss": 1.5678, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.0555555555555556, |
|
"grad_norm": 0.3763414411950308, |
|
"learning_rate": 2.3093659234438266e-05, |
|
"loss": 1.5547, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.0606060606060606, |
|
"grad_norm": 0.36800317307295144, |
|
"learning_rate": 2.3023312704184676e-05, |
|
"loss": 1.4771, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0656565656565657, |
|
"grad_norm": 0.3910877207089002, |
|
"learning_rate": 2.295271811466769e-05, |
|
"loss": 1.6495, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.0707070707070707, |
|
"grad_norm": 0.39199841864196705, |
|
"learning_rate": 2.2881877648482274e-05, |
|
"loss": 1.6182, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.0757575757575757, |
|
"grad_norm": 0.4188630635601797, |
|
"learning_rate": 2.281079349582524e-05, |
|
"loss": 1.5539, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.0808080808080809, |
|
"grad_norm": 0.4321942331098238, |
|
"learning_rate": 2.2739467854427512e-05, |
|
"loss": 1.7017, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0858585858585859, |
|
"grad_norm": 0.34896226538754127, |
|
"learning_rate": 2.266790292948622e-05, |
|
"loss": 1.6232, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 0.39588469087020545, |
|
"learning_rate": 2.2596100933596498e-05, |
|
"loss": 1.633, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.095959595959596, |
|
"grad_norm": 0.3680904345345009, |
|
"learning_rate": 2.252406408668304e-05, |
|
"loss": 1.5771, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.101010101010101, |
|
"grad_norm": 0.3635291050534616, |
|
"learning_rate": 2.2451794615931542e-05, |
|
"loss": 1.5251, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.106060606060606, |
|
"grad_norm": 0.39495821190617536, |
|
"learning_rate": 2.237929475571979e-05, |
|
"loss": 1.6461, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.3741651687847982, |
|
"learning_rate": 2.2306566747548604e-05, |
|
"loss": 1.5491, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1161616161616161, |
|
"grad_norm": 0.3482093972061835, |
|
"learning_rate": 2.2233612839972497e-05, |
|
"loss": 1.4896, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.121212121212121, |
|
"grad_norm": 0.39618420354024975, |
|
"learning_rate": 2.2160435288530208e-05, |
|
"loss": 1.6567, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.1262626262626263, |
|
"grad_norm": 0.3660259252156906, |
|
"learning_rate": 2.2087036355674947e-05, |
|
"loss": 1.5476, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.1313131313131313, |
|
"grad_norm": 0.39507488097490023, |
|
"learning_rate": 2.2013418310704422e-05, |
|
"loss": 1.5749, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 0.4312941458010281, |
|
"learning_rate": 2.1939583429690716e-05, |
|
"loss": 1.5716, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1414141414141414, |
|
"grad_norm": 0.35158839668333186, |
|
"learning_rate": 2.1865533995409887e-05, |
|
"loss": 1.5532, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.1464646464646464, |
|
"grad_norm": 0.3641625201369256, |
|
"learning_rate": 2.1791272297271416e-05, |
|
"loss": 1.5972, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.1515151515151516, |
|
"grad_norm": 0.4250517041163231, |
|
"learning_rate": 2.1716800631247403e-05, |
|
"loss": 1.6134, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.1565656565656566, |
|
"grad_norm": 0.4357327884434021, |
|
"learning_rate": 2.1642121299801594e-05, |
|
"loss": 1.6299, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.1616161616161615, |
|
"grad_norm": 0.36361246043809753, |
|
"learning_rate": 2.1567236611818187e-05, |
|
"loss": 1.6186, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.38940692200146193, |
|
"learning_rate": 2.149214888253046e-05, |
|
"loss": 1.5636, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.1717171717171717, |
|
"grad_norm": 0.4479735326446513, |
|
"learning_rate": 2.1416860433449177e-05, |
|
"loss": 1.6342, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.1767676767676767, |
|
"grad_norm": 0.36500909652831554, |
|
"learning_rate": 2.1341373592290822e-05, |
|
"loss": 1.545, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 0.41140999886470425, |
|
"learning_rate": 2.126569069290562e-05, |
|
"loss": 1.6267, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.1868686868686869, |
|
"grad_norm": 0.37095591557372914, |
|
"learning_rate": 2.1189814075205406e-05, |
|
"loss": 1.5176, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.1919191919191918, |
|
"grad_norm": 0.3754937123457338, |
|
"learning_rate": 2.1113746085091246e-05, |
|
"loss": 1.6441, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.196969696969697, |
|
"grad_norm": 0.42464228140478444, |
|
"learning_rate": 2.1037489074380934e-05, |
|
"loss": 1.574, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.202020202020202, |
|
"grad_norm": 0.3732085771180313, |
|
"learning_rate": 2.0961045400736286e-05, |
|
"loss": 1.5687, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.2070707070707072, |
|
"grad_norm": 0.4174666906233983, |
|
"learning_rate": 2.0884417427590217e-05, |
|
"loss": 1.5579, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 0.414246878313963, |
|
"learning_rate": 2.0807607524073684e-05, |
|
"loss": 1.6069, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2171717171717171, |
|
"grad_norm": 0.35489448104499416, |
|
"learning_rate": 2.073061806494246e-05, |
|
"loss": 1.5943, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 0.3961867107004295, |
|
"learning_rate": 2.0653451430503686e-05, |
|
"loss": 1.5851, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 0.4264411067539511, |
|
"learning_rate": 2.0576110006542278e-05, |
|
"loss": 1.5436, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.2323232323232323, |
|
"grad_norm": 0.43088202335465686, |
|
"learning_rate": 2.0498596184247196e-05, |
|
"loss": 1.642, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.2373737373737375, |
|
"grad_norm": 0.36469200409321884, |
|
"learning_rate": 2.0420912360137466e-05, |
|
"loss": 1.6031, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.2424242424242424, |
|
"grad_norm": 0.4153699560933353, |
|
"learning_rate": 2.0343060935988136e-05, |
|
"loss": 1.6385, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.2474747474747474, |
|
"grad_norm": 0.39846520447706185, |
|
"learning_rate": 2.0265044318755988e-05, |
|
"loss": 1.5288, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.2525252525252526, |
|
"grad_norm": 0.35709349697159576, |
|
"learning_rate": 2.018686492050513e-05, |
|
"loss": 1.6124, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.2575757575757576, |
|
"grad_norm": 0.38319549330931446, |
|
"learning_rate": 2.010852515833242e-05, |
|
"loss": 1.535, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.2626262626262625, |
|
"grad_norm": 0.46317037418448487, |
|
"learning_rate": 2.003002745429274e-05, |
|
"loss": 1.6523, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2626262626262625, |
|
"eval_loss": 1.498921275138855, |
|
"eval_runtime": 229.2608, |
|
"eval_samples_per_second": 6.242, |
|
"eval_steps_per_second": 0.781, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2676767676767677, |
|
"grad_norm": 0.386123708626963, |
|
"learning_rate": 1.9951374235324105e-05, |
|
"loss": 1.6125, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 0.3970356471812719, |
|
"learning_rate": 1.9872567933172647e-05, |
|
"loss": 1.586, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.2777777777777777, |
|
"grad_norm": 0.4033780506493091, |
|
"learning_rate": 1.97936109843174e-05, |
|
"loss": 1.5574, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.2828282828282829, |
|
"grad_norm": 0.39482348113655924, |
|
"learning_rate": 1.9714505829895004e-05, |
|
"loss": 1.6136, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.2878787878787878, |
|
"grad_norm": 0.45725538920837333, |
|
"learning_rate": 1.963525491562421e-05, |
|
"loss": 1.6221, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.2929292929292928, |
|
"grad_norm": 0.4259870746825829, |
|
"learning_rate": 1.9555860691730277e-05, |
|
"loss": 1.5543, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.297979797979798, |
|
"grad_norm": 0.38172206665482866, |
|
"learning_rate": 1.9476325612869202e-05, |
|
"loss": 1.4709, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.303030303030303, |
|
"grad_norm": 0.3996792794666037, |
|
"learning_rate": 1.9396652138051844e-05, |
|
"loss": 1.6108, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.308080808080808, |
|
"grad_norm": 0.4433312059533111, |
|
"learning_rate": 1.9316842730567902e-05, |
|
"loss": 1.627, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.3131313131313131, |
|
"grad_norm": 0.36892676364281046, |
|
"learning_rate": 1.923689985790974e-05, |
|
"loss": 1.4977, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 0.3785785048742356, |
|
"learning_rate": 1.9156825991696096e-05, |
|
"loss": 1.5503, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.3232323232323233, |
|
"grad_norm": 0.4339005608639186, |
|
"learning_rate": 1.9076623607595696e-05, |
|
"loss": 1.5747, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.3282828282828283, |
|
"grad_norm": 0.37171089149936926, |
|
"learning_rate": 1.8996295185250682e-05, |
|
"loss": 1.4574, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.383278000804476, |
|
"learning_rate": 1.8915843208199967e-05, |
|
"loss": 1.6036, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.3383838383838385, |
|
"grad_norm": 0.41536599243507505, |
|
"learning_rate": 1.8835270163802433e-05, |
|
"loss": 1.6381, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.3434343434343434, |
|
"grad_norm": 0.38161228496015287, |
|
"learning_rate": 1.8754578543160045e-05, |
|
"loss": 1.5011, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.3484848484848486, |
|
"grad_norm": 0.3744640963542385, |
|
"learning_rate": 1.867377084104083e-05, |
|
"loss": 1.6644, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.3535353535353536, |
|
"grad_norm": 0.3733108852557401, |
|
"learning_rate": 1.8592849555801746e-05, |
|
"loss": 1.6011, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.3585858585858586, |
|
"grad_norm": 0.4411122207695828, |
|
"learning_rate": 1.851181718931141e-05, |
|
"loss": 1.5586, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 0.38080346450070857, |
|
"learning_rate": 1.84306762468728e-05, |
|
"loss": 1.5985, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3686868686868687, |
|
"grad_norm": 0.3491871213017396, |
|
"learning_rate": 1.8349429237145776e-05, |
|
"loss": 1.5688, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.3737373737373737, |
|
"grad_norm": 0.37833218710396194, |
|
"learning_rate": 1.8268078672069478e-05, |
|
"loss": 1.5583, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.378787878787879, |
|
"grad_norm": 0.5250217329172516, |
|
"learning_rate": 1.818662706678473e-05, |
|
"loss": 1.5692, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.3838383838383839, |
|
"grad_norm": 0.37119240386882224, |
|
"learning_rate": 1.8105076939556238e-05, |
|
"loss": 1.5941, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 0.3439611330263627, |
|
"learning_rate": 1.8023430811694746e-05, |
|
"loss": 1.6779, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.393939393939394, |
|
"grad_norm": 0.3327777793630774, |
|
"learning_rate": 1.7941691207479067e-05, |
|
"loss": 1.6037, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.398989898989899, |
|
"grad_norm": 0.34323800459831283, |
|
"learning_rate": 1.7859860654078065e-05, |
|
"loss": 1.6266, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.404040404040404, |
|
"grad_norm": 0.3647890764031767, |
|
"learning_rate": 1.77779416814725e-05, |
|
"loss": 1.5636, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 0.3686098374668375, |
|
"learning_rate": 1.769593682237682e-05, |
|
"loss": 1.6069, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.4141414141414141, |
|
"grad_norm": 0.33500372009414925, |
|
"learning_rate": 1.7613848612160857e-05, |
|
"loss": 1.5403, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4191919191919191, |
|
"grad_norm": 0.35011148506577505, |
|
"learning_rate": 1.753167958877143e-05, |
|
"loss": 1.6803, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.4242424242424243, |
|
"grad_norm": 0.3903537969599135, |
|
"learning_rate": 1.7449432292653875e-05, |
|
"loss": 1.5327, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.4292929292929293, |
|
"grad_norm": 0.3511906044729009, |
|
"learning_rate": 1.736710926667352e-05, |
|
"loss": 1.5245, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.4343434343434343, |
|
"grad_norm": 0.36248283366049505, |
|
"learning_rate": 1.7284713056037074e-05, |
|
"loss": 1.6522, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.4393939393939394, |
|
"grad_norm": 0.3698906007038963, |
|
"learning_rate": 1.720224620821389e-05, |
|
"loss": 1.612, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.36917069694620513, |
|
"learning_rate": 1.7119711272857242e-05, |
|
"loss": 1.6716, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.4494949494949494, |
|
"grad_norm": 0.3504632139356775, |
|
"learning_rate": 1.7037110801725498e-05, |
|
"loss": 1.567, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 0.38102242984213275, |
|
"learning_rate": 1.69544473486032e-05, |
|
"loss": 1.5831, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.4595959595959596, |
|
"grad_norm": 0.3490442364729326, |
|
"learning_rate": 1.687172346922213e-05, |
|
"loss": 1.5999, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.4646464646464645, |
|
"grad_norm": 0.36706394830835837, |
|
"learning_rate": 1.67889417211823e-05, |
|
"loss": 1.6264, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4696969696969697, |
|
"grad_norm": 0.330869571421232, |
|
"learning_rate": 1.670610466387285e-05, |
|
"loss": 1.5905, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.4747474747474747, |
|
"grad_norm": 0.35481611568449467, |
|
"learning_rate": 1.662321485839294e-05, |
|
"loss": 1.5783, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.4797979797979797, |
|
"grad_norm": 0.420284177177337, |
|
"learning_rate": 1.6540274867472554e-05, |
|
"loss": 1.5323, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.4848484848484849, |
|
"grad_norm": 0.3733856344040281, |
|
"learning_rate": 1.645728725539329e-05, |
|
"loss": 1.5342, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.4898989898989898, |
|
"grad_norm": 0.35109601539946006, |
|
"learning_rate": 1.637425458790905e-05, |
|
"loss": 1.5483, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.494949494949495, |
|
"grad_norm": 0.341135998502388, |
|
"learning_rate": 1.6291179432166737e-05, |
|
"loss": 1.6149, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.35535095307312015, |
|
"learning_rate": 1.620806435662687e-05, |
|
"loss": 1.5373, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.5050505050505052, |
|
"grad_norm": 0.36290854386190635, |
|
"learning_rate": 1.612491193098419e-05, |
|
"loss": 1.6085, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.51010101010101, |
|
"grad_norm": 0.3571923405167369, |
|
"learning_rate": 1.6041724726088187e-05, |
|
"loss": 1.5928, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 0.36396420706496496, |
|
"learning_rate": 1.5958505313863654e-05, |
|
"loss": 1.5806, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"eval_loss": 1.4940327405929565, |
|
"eval_runtime": 229.2657, |
|
"eval_samples_per_second": 6.242, |
|
"eval_steps_per_second": 0.781, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5202020202020203, |
|
"grad_norm": 0.3431930051075577, |
|
"learning_rate": 1.587525626723113e-05, |
|
"loss": 1.5698, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.5252525252525253, |
|
"grad_norm": 0.3620485249342895, |
|
"learning_rate": 1.5791980160027376e-05, |
|
"loss": 1.5985, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.5303030303030303, |
|
"grad_norm": 0.34146120936201213, |
|
"learning_rate": 1.570867956692579e-05, |
|
"loss": 1.5825, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.5353535353535355, |
|
"grad_norm": 0.3545235530221104, |
|
"learning_rate": 1.5625357063356825e-05, |
|
"loss": 1.5798, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.5404040404040404, |
|
"grad_norm": 0.34094408848239416, |
|
"learning_rate": 1.5542015225428314e-05, |
|
"loss": 1.6249, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 0.3574679152501359, |
|
"learning_rate": 1.545865662984589e-05, |
|
"loss": 1.6048, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.5505050505050506, |
|
"grad_norm": 0.3581945699930548, |
|
"learning_rate": 1.5375283853833272e-05, |
|
"loss": 1.7071, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 0.34332771314631755, |
|
"learning_rate": 1.5291899475052596e-05, |
|
"loss": 1.563, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.5606060606060606, |
|
"grad_norm": 0.3585885575803054, |
|
"learning_rate": 1.5208506071524727e-05, |
|
"loss": 1.5551, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.5656565656565657, |
|
"grad_norm": 0.3478032073365952, |
|
"learning_rate": 1.5125106221549567e-05, |
|
"loss": 1.6227, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5707070707070707, |
|
"grad_norm": 0.34451168320759784, |
|
"learning_rate": 1.5041702503626296e-05, |
|
"loss": 1.5625, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.5757575757575757, |
|
"grad_norm": 0.3625884247911658, |
|
"learning_rate": 1.4958297496373708e-05, |
|
"loss": 1.622, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.5808080808080809, |
|
"grad_norm": 0.3390738782534803, |
|
"learning_rate": 1.4874893778450436e-05, |
|
"loss": 1.5539, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.5858585858585859, |
|
"grad_norm": 0.36415276028038074, |
|
"learning_rate": 1.4791493928475275e-05, |
|
"loss": 1.5946, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 0.3447466909227647, |
|
"learning_rate": 1.4708100524947413e-05, |
|
"loss": 1.6482, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.595959595959596, |
|
"grad_norm": 0.3573366580845515, |
|
"learning_rate": 1.4624716146166734e-05, |
|
"loss": 1.6493, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.601010101010101, |
|
"grad_norm": 0.35959249143609046, |
|
"learning_rate": 1.4541343370154115e-05, |
|
"loss": 1.6135, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.606060606060606, |
|
"grad_norm": 0.3323927118670595, |
|
"learning_rate": 1.4457984774571692e-05, |
|
"loss": 1.6435, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.6111111111111112, |
|
"grad_norm": 0.34075152367107814, |
|
"learning_rate": 1.437464293664318e-05, |
|
"loss": 1.6634, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.6161616161616161, |
|
"grad_norm": 0.3399955824391449, |
|
"learning_rate": 1.4291320433074213e-05, |
|
"loss": 1.6254, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.621212121212121, |
|
"grad_norm": 0.3427914203743951, |
|
"learning_rate": 1.4208019839972627e-05, |
|
"loss": 1.5906, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.6262626262626263, |
|
"grad_norm": 0.3394828693597387, |
|
"learning_rate": 1.4124743732768873e-05, |
|
"loss": 1.6156, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.6313131313131313, |
|
"grad_norm": 0.3854380971355761, |
|
"learning_rate": 1.4041494686136348e-05, |
|
"loss": 1.6758, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 0.3391843715071689, |
|
"learning_rate": 1.3958275273911812e-05, |
|
"loss": 1.6073, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.6414141414141414, |
|
"grad_norm": 0.34941530909401375, |
|
"learning_rate": 1.3875088069015815e-05, |
|
"loss": 1.7296, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.6464646464646466, |
|
"grad_norm": 0.3645855591600974, |
|
"learning_rate": 1.3791935643373133e-05, |
|
"loss": 1.5878, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.6515151515151514, |
|
"grad_norm": 0.3818786490819524, |
|
"learning_rate": 1.3708820567833266e-05, |
|
"loss": 1.6895, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.6565656565656566, |
|
"grad_norm": 0.3918329456403931, |
|
"learning_rate": 1.3625745412090953e-05, |
|
"loss": 1.5491, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.6616161616161618, |
|
"grad_norm": 0.3685269378673387, |
|
"learning_rate": 1.3542712744606712e-05, |
|
"loss": 1.5422, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.3784489076280344, |
|
"learning_rate": 1.3459725132527448e-05, |
|
"loss": 1.516, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6717171717171717, |
|
"grad_norm": 0.35908318066713363, |
|
"learning_rate": 1.3376785141607067e-05, |
|
"loss": 1.5082, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.676767676767677, |
|
"grad_norm": 0.3600676479254767, |
|
"learning_rate": 1.329389533612715e-05, |
|
"loss": 1.6618, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 0.3408967547242199, |
|
"learning_rate": 1.3211058278817705e-05, |
|
"loss": 1.5369, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.6868686868686869, |
|
"grad_norm": 0.3397113886527206, |
|
"learning_rate": 1.3128276530777874e-05, |
|
"loss": 1.5655, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.691919191919192, |
|
"grad_norm": 0.3552202693938084, |
|
"learning_rate": 1.3045552651396805e-05, |
|
"loss": 1.5913, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.696969696969697, |
|
"grad_norm": 0.3744742946582953, |
|
"learning_rate": 1.2962889198274506e-05, |
|
"loss": 1.5468, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.702020202020202, |
|
"grad_norm": 0.34653332317369845, |
|
"learning_rate": 1.2880288727142757e-05, |
|
"loss": 1.6004, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.7070707070707072, |
|
"grad_norm": 0.31916935496978377, |
|
"learning_rate": 1.2797753791786112e-05, |
|
"loss": 1.5545, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.7121212121212122, |
|
"grad_norm": 0.34268191483789684, |
|
"learning_rate": 1.2715286943962925e-05, |
|
"loss": 1.5286, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.7171717171717171, |
|
"grad_norm": 0.34934463675899574, |
|
"learning_rate": 1.2632890733326475e-05, |
|
"loss": 1.6462, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7222222222222223, |
|
"grad_norm": 0.35309417659426007, |
|
"learning_rate": 1.255056770734613e-05, |
|
"loss": 1.6459, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 0.36011757823593665, |
|
"learning_rate": 1.2468320411228579e-05, |
|
"loss": 1.5716, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.7323232323232323, |
|
"grad_norm": 0.31144567675642815, |
|
"learning_rate": 1.2386151387839145e-05, |
|
"loss": 1.4455, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.7373737373737375, |
|
"grad_norm": 0.3723102104349624, |
|
"learning_rate": 1.2304063177623182e-05, |
|
"loss": 1.6399, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.7424242424242424, |
|
"grad_norm": 0.38916725203253794, |
|
"learning_rate": 1.2222058318527502e-05, |
|
"loss": 1.553, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.7474747474747474, |
|
"grad_norm": 0.3322536259081415, |
|
"learning_rate": 1.214013934592194e-05, |
|
"loss": 1.492, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.7525252525252526, |
|
"grad_norm": 0.37090482924070856, |
|
"learning_rate": 1.2058308792520937e-05, |
|
"loss": 1.5703, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.7575757575757576, |
|
"grad_norm": 0.38587618815415825, |
|
"learning_rate": 1.1976569188305255e-05, |
|
"loss": 1.6146, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.7626262626262625, |
|
"grad_norm": 0.37194117916485847, |
|
"learning_rate": 1.1894923060443763e-05, |
|
"loss": 1.5854, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.7676767676767677, |
|
"grad_norm": 0.39535022296888556, |
|
"learning_rate": 1.1813372933215274e-05, |
|
"loss": 1.5918, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7676767676767677, |
|
"eval_loss": 1.4877514839172363, |
|
"eval_runtime": 229.0648, |
|
"eval_samples_per_second": 6.247, |
|
"eval_steps_per_second": 0.781, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 0.35051004852162915, |
|
"learning_rate": 1.1731921327930523e-05, |
|
"loss": 1.6095, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.34675700639977675, |
|
"learning_rate": 1.165057076285423e-05, |
|
"loss": 1.5565, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.7828282828282829, |
|
"grad_norm": 0.325905557218622, |
|
"learning_rate": 1.1569323753127196e-05, |
|
"loss": 1.5533, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.7878787878787878, |
|
"grad_norm": 0.3713917306740112, |
|
"learning_rate": 1.1488182810688593e-05, |
|
"loss": 1.5455, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.7929292929292928, |
|
"grad_norm": 0.405573404381939, |
|
"learning_rate": 1.1407150444198262e-05, |
|
"loss": 1.6257, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.797979797979798, |
|
"grad_norm": 0.34148070848500733, |
|
"learning_rate": 1.132622915895917e-05, |
|
"loss": 1.6975, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.803030303030303, |
|
"grad_norm": 0.34138980212543457, |
|
"learning_rate": 1.1245421456839954e-05, |
|
"loss": 1.5417, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.808080808080808, |
|
"grad_norm": 0.3458043789729876, |
|
"learning_rate": 1.116472983619757e-05, |
|
"loss": 1.6212, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.8131313131313131, |
|
"grad_norm": 0.34821285941193825, |
|
"learning_rate": 1.1084156791800036e-05, |
|
"loss": 1.6329, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 0.35583032341592974, |
|
"learning_rate": 1.100370481474932e-05, |
|
"loss": 1.6058, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.823232323232323, |
|
"grad_norm": 0.3347559695941822, |
|
"learning_rate": 1.0923376392404302e-05, |
|
"loss": 1.5184, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.8282828282828283, |
|
"grad_norm": 0.3914463684785098, |
|
"learning_rate": 1.0843174008303908e-05, |
|
"loss": 1.6334, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.3665474032055003, |
|
"learning_rate": 1.0763100142090267e-05, |
|
"loss": 1.6412, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.8383838383838382, |
|
"grad_norm": 0.3342017672784236, |
|
"learning_rate": 1.0683157269432097e-05, |
|
"loss": 1.6167, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.8434343434343434, |
|
"grad_norm": 0.3300768200411088, |
|
"learning_rate": 1.0603347861948155e-05, |
|
"loss": 1.6253, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.8484848484848486, |
|
"grad_norm": 0.3573589534814461, |
|
"learning_rate": 1.0523674387130806e-05, |
|
"loss": 1.5936, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.8535353535353534, |
|
"grad_norm": 0.3476694128643829, |
|
"learning_rate": 1.0444139308269725e-05, |
|
"loss": 1.6644, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.8585858585858586, |
|
"grad_norm": 0.38035554483291945, |
|
"learning_rate": 1.036474508437579e-05, |
|
"loss": 1.6168, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 0.3334234919513792, |
|
"learning_rate": 1.0285494170104996e-05, |
|
"loss": 1.6633, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.8686868686868687, |
|
"grad_norm": 0.3265228255184419, |
|
"learning_rate": 1.0206389015682601e-05, |
|
"loss": 1.5896, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8737373737373737, |
|
"grad_norm": 0.3691401309714038, |
|
"learning_rate": 1.0127432066827357e-05, |
|
"loss": 1.5916, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.878787878787879, |
|
"grad_norm": 0.33985591597069215, |
|
"learning_rate": 1.0048625764675896e-05, |
|
"loss": 1.6083, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.8838383838383839, |
|
"grad_norm": 0.3654336922923449, |
|
"learning_rate": 9.969972545707266e-06, |
|
"loss": 1.6688, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 0.3474815931456802, |
|
"learning_rate": 9.891474841667585e-06, |
|
"loss": 1.6077, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.893939393939394, |
|
"grad_norm": 0.35105409408446747, |
|
"learning_rate": 9.813135079494872e-06, |
|
"loss": 1.6133, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.898989898989899, |
|
"grad_norm": 0.3412933734315999, |
|
"learning_rate": 9.734955681244016e-06, |
|
"loss": 1.6069, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.904040404040404, |
|
"grad_norm": 0.32783066972147346, |
|
"learning_rate": 9.656939064011861e-06, |
|
"loss": 1.4873, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 0.3478856883264074, |
|
"learning_rate": 9.579087639862538e-06, |
|
"loss": 1.5366, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.9141414141414141, |
|
"grad_norm": 0.3539395477762644, |
|
"learning_rate": 9.501403815752813e-06, |
|
"loss": 1.5213, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.9191919191919191, |
|
"grad_norm": 0.3581622590288963, |
|
"learning_rate": 9.423889993457721e-06, |
|
"loss": 1.6273, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9242424242424243, |
|
"grad_norm": 0.3335926440610502, |
|
"learning_rate": 9.346548569496318e-06, |
|
"loss": 1.6772, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.9292929292929293, |
|
"grad_norm": 0.3413862529782368, |
|
"learning_rate": 9.269381935057546e-06, |
|
"loss": 1.5744, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.9343434343434343, |
|
"grad_norm": 0.378369060255768, |
|
"learning_rate": 9.192392475926315e-06, |
|
"loss": 1.6305, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.9393939393939394, |
|
"grad_norm": 0.3508043937268879, |
|
"learning_rate": 9.115582572409789e-06, |
|
"loss": 1.6425, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 0.3566255219446304, |
|
"learning_rate": 9.038954599263713e-06, |
|
"loss": 1.5535, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.9494949494949494, |
|
"grad_norm": 0.536928554074425, |
|
"learning_rate": 8.962510925619065e-06, |
|
"loss": 1.5138, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 0.37533231697996555, |
|
"learning_rate": 8.88625391490876e-06, |
|
"loss": 1.5625, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.9595959595959596, |
|
"grad_norm": 0.3431698476036675, |
|
"learning_rate": 8.8101859247946e-06, |
|
"loss": 1.5796, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.9646464646464645, |
|
"grad_norm": 0.38914902758530917, |
|
"learning_rate": 8.734309307094382e-06, |
|
"loss": 1.6072, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.9696969696969697, |
|
"grad_norm": 0.3184503763537262, |
|
"learning_rate": 8.658626407709182e-06, |
|
"loss": 1.5288, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.9747474747474747, |
|
"grad_norm": 0.32890220573653406, |
|
"learning_rate": 8.583139566550827e-06, |
|
"loss": 1.592, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.9797979797979797, |
|
"grad_norm": 0.3395800990661379, |
|
"learning_rate": 8.507851117469546e-06, |
|
"loss": 1.5736, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.9848484848484849, |
|
"grad_norm": 0.34733690681577234, |
|
"learning_rate": 8.432763388181812e-06, |
|
"loss": 1.5791, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.98989898989899, |
|
"grad_norm": 0.37942192437254635, |
|
"learning_rate": 8.357878700198407e-06, |
|
"loss": 1.5927, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.9949494949494948, |
|
"grad_norm": 0.3285509965028684, |
|
"learning_rate": 8.283199368752598e-06, |
|
"loss": 1.5848, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3237504993576643, |
|
"learning_rate": 8.208727702728586e-06, |
|
"loss": 1.5468, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.005050505050505, |
|
"grad_norm": 0.6489959146891373, |
|
"learning_rate": 8.134466004590116e-06, |
|
"loss": 1.3495, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.01010101010101, |
|
"grad_norm": 0.5603926475365507, |
|
"learning_rate": 8.060416570309291e-06, |
|
"loss": 1.3804, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.015151515151515, |
|
"grad_norm": 0.49584097590048715, |
|
"learning_rate": 7.986581689295577e-06, |
|
"loss": 1.3215, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.0202020202020203, |
|
"grad_norm": 0.4392542856654279, |
|
"learning_rate": 7.912963644325057e-06, |
|
"loss": 1.3675, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0202020202020203, |
|
"eval_loss": 1.5459927320480347, |
|
"eval_runtime": 229.9347, |
|
"eval_samples_per_second": 6.224, |
|
"eval_steps_per_second": 0.778, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 594, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0732600384594903e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|