{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9943377411662693, "eval_steps": 500, "global_step": 28750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006936917407327119, "grad_norm": 12224.0, "learning_rate": 4.998434782608696e-06, "loss": 10.7132, "step": 10 }, { "epoch": 0.0013873834814654238, "grad_norm": 19712.0, "learning_rate": 4.9966956521739135e-06, "loss": 1.4256, "step": 20 }, { "epoch": 0.0020810752221981357, "grad_norm": 21504.0, "learning_rate": 4.994956521739131e-06, "loss": 0.8714, "step": 30 }, { "epoch": 0.0027747669629308476, "grad_norm": 2624.0, "learning_rate": 4.993217391304348e-06, "loss": 0.7117, "step": 40 }, { "epoch": 0.0034684587036635595, "grad_norm": 23.875, "learning_rate": 4.991478260869566e-06, "loss": 0.3634, "step": 50 }, { "epoch": 0.004162150444396271, "grad_norm": 31.375, "learning_rate": 4.989739130434783e-06, "loss": 0.7818, "step": 60 }, { "epoch": 0.004855842185128983, "grad_norm": 8096.0, "learning_rate": 4.988e-06, "loss": 0.5493, "step": 70 }, { "epoch": 0.005549533925861695, "grad_norm": 27.875, "learning_rate": 4.986260869565218e-06, "loss": 0.457, "step": 80 }, { "epoch": 0.006243225666594407, "grad_norm": 37632.0, "learning_rate": 4.984521739130435e-06, "loss": 0.4123, "step": 90 }, { "epoch": 0.006936917407327119, "grad_norm": 61.5, "learning_rate": 4.9827826086956525e-06, "loss": 0.3742, "step": 100 }, { "epoch": 0.007630609148059831, "grad_norm": 34.25, "learning_rate": 4.98104347826087e-06, "loss": 0.2794, "step": 110 }, { "epoch": 0.008324300888792543, "grad_norm": 61.5, "learning_rate": 4.979304347826087e-06, "loss": 0.2913, "step": 120 }, { "epoch": 0.009017992629525256, "grad_norm": 8.5, "learning_rate": 4.977565217391305e-06, "loss": 0.3476, "step": 130 }, { "epoch": 0.009711684370257967, "grad_norm": 76800.0, "learning_rate": 4.975826086956522e-06, "loss": 1.5003, "step": 140 }, { "epoch": 0.01040537611099068, "grad_norm": 120.5, "learning_rate": 4.97408695652174e-06, "loss": 0.3051, "step": 150 }, { "epoch": 0.01109906785172339, "grad_norm": 2.28125, "learning_rate": 4.972347826086957e-06, "loss": 0.2632, "step": 160 }, { "epoch": 0.011792759592456103, "grad_norm": 12.6875, "learning_rate": 4.970608695652174e-06, "loss": 0.2847, "step": 170 }, { "epoch": 0.012486451333188814, "grad_norm": 20.75, "learning_rate": 4.9688695652173914e-06, "loss": 0.3436, "step": 180 }, { "epoch": 0.013180143073921527, "grad_norm": 234.0, "learning_rate": 4.96713043478261e-06, "loss": 0.2752, "step": 190 }, { "epoch": 0.013873834814654238, "grad_norm": 10.25, "learning_rate": 4.965391304347826e-06, "loss": 0.3504, "step": 200 }, { "epoch": 0.01456752655538695, "grad_norm": 8.0625, "learning_rate": 4.9636521739130436e-06, "loss": 0.2447, "step": 210 }, { "epoch": 0.015261218296119662, "grad_norm": 3.640625, "learning_rate": 4.961913043478262e-06, "loss": 0.2864, "step": 220 }, { "epoch": 0.015954910036852375, "grad_norm": 7.6875, "learning_rate": 4.960173913043478e-06, "loss": 0.2455, "step": 230 }, { "epoch": 0.016648601777585086, "grad_norm": 10.25, "learning_rate": 4.958434782608696e-06, "loss": 0.2847, "step": 240 }, { "epoch": 0.017342293518317797, "grad_norm": 6.75, "learning_rate": 4.956695652173914e-06, "loss": 0.2797, "step": 250 }, { "epoch": 0.01803598525905051, "grad_norm": 9.4375, "learning_rate": 4.954956521739131e-06, "loss": 0.5388, "step": 260 }, { "epoch": 0.018729676999783222, "grad_norm": 125.0, "learning_rate": 4.953217391304348e-06, "loss": 0.2508, "step": 270 }, { "epoch": 0.019423368740515933, "grad_norm": 2.515625, "learning_rate": 4.951478260869565e-06, "loss": 0.263, "step": 280 }, { "epoch": 0.020117060481248644, "grad_norm": 1.578125, "learning_rate": 4.949739130434783e-06, "loss": 0.2583, "step": 290 }, { "epoch": 0.02081075222198136, "grad_norm": 4.53125, "learning_rate": 4.948000000000001e-06, "loss": 0.2684, "step": 300 }, { "epoch": 0.02150444396271407, "grad_norm": 19.625, "learning_rate": 4.946260869565217e-06, "loss": 0.2318, "step": 310 }, { "epoch": 0.02219813570344678, "grad_norm": 15.8125, "learning_rate": 4.9445217391304355e-06, "loss": 0.2509, "step": 320 }, { "epoch": 0.022891827444179492, "grad_norm": 4.5625, "learning_rate": 4.942782608695653e-06, "loss": 0.2428, "step": 330 }, { "epoch": 0.023585519184912206, "grad_norm": 20.75, "learning_rate": 4.94104347826087e-06, "loss": 0.2789, "step": 340 }, { "epoch": 0.024279210925644917, "grad_norm": 1.59375, "learning_rate": 4.939304347826087e-06, "loss": 0.2624, "step": 350 }, { "epoch": 0.02497290266637763, "grad_norm": 1.6640625, "learning_rate": 4.937565217391305e-06, "loss": 0.2712, "step": 360 }, { "epoch": 0.02566659440711034, "grad_norm": 2.0625, "learning_rate": 4.935826086956522e-06, "loss": 0.2973, "step": 370 }, { "epoch": 0.026360286147843054, "grad_norm": 2.4375, "learning_rate": 4.93408695652174e-06, "loss": 0.2769, "step": 380 }, { "epoch": 0.027053977888575765, "grad_norm": 1.3203125, "learning_rate": 4.932347826086957e-06, "loss": 0.2675, "step": 390 }, { "epoch": 0.027747669629308476, "grad_norm": 4.21875, "learning_rate": 4.9306086956521744e-06, "loss": 0.2663, "step": 400 }, { "epoch": 0.028441361370041187, "grad_norm": 17.75, "learning_rate": 4.928869565217392e-06, "loss": 0.3639, "step": 410 }, { "epoch": 0.0291350531107739, "grad_norm": 5.03125, "learning_rate": 4.927130434782609e-06, "loss": 0.3279, "step": 420 }, { "epoch": 0.029828744851506613, "grad_norm": 2.21875, "learning_rate": 4.9253913043478266e-06, "loss": 0.2773, "step": 430 }, { "epoch": 0.030522436592239324, "grad_norm": 6.625, "learning_rate": 4.923652173913044e-06, "loss": 0.2445, "step": 440 }, { "epoch": 0.031216128332972035, "grad_norm": 3.84375, "learning_rate": 4.921913043478261e-06, "loss": 0.2256, "step": 450 }, { "epoch": 0.03190982007370475, "grad_norm": 3.15625, "learning_rate": 4.920173913043479e-06, "loss": 0.2288, "step": 460 }, { "epoch": 0.03260351181443746, "grad_norm": 52.0, "learning_rate": 4.918434782608696e-06, "loss": 0.2662, "step": 470 }, { "epoch": 0.03329720355517017, "grad_norm": 13.4375, "learning_rate": 4.916695652173913e-06, "loss": 0.2476, "step": 480 }, { "epoch": 0.033990895295902886, "grad_norm": 49.0, "learning_rate": 4.914956521739131e-06, "loss": 0.2328, "step": 490 }, { "epoch": 0.03468458703663559, "grad_norm": 1.5546875, "learning_rate": 4.913217391304348e-06, "loss": 0.2463, "step": 500 }, { "epoch": 0.03537827877736831, "grad_norm": 10.5625, "learning_rate": 4.9114782608695655e-06, "loss": 0.2657, "step": 510 }, { "epoch": 0.03607197051810102, "grad_norm": 2.1875, "learning_rate": 4.909739130434783e-06, "loss": 0.268, "step": 520 }, { "epoch": 0.03676566225883373, "grad_norm": 1.9921875, "learning_rate": 4.908e-06, "loss": 0.3324, "step": 530 }, { "epoch": 0.037459353999566444, "grad_norm": 1.5703125, "learning_rate": 4.906260869565218e-06, "loss": 0.2086, "step": 540 }, { "epoch": 0.03815304574029915, "grad_norm": 2.453125, "learning_rate": 4.904521739130435e-06, "loss": 0.2581, "step": 550 }, { "epoch": 0.038846737481031866, "grad_norm": 1.4609375, "learning_rate": 4.902782608695652e-06, "loss": 0.2908, "step": 560 }, { "epoch": 0.03954042922176458, "grad_norm": 4.28125, "learning_rate": 4.90104347826087e-06, "loss": 0.2192, "step": 570 }, { "epoch": 0.04023412096249729, "grad_norm": 3.859375, "learning_rate": 4.899304347826087e-06, "loss": 0.2916, "step": 580 }, { "epoch": 0.04092781270323, "grad_norm": 1.390625, "learning_rate": 4.8975652173913045e-06, "loss": 0.2388, "step": 590 }, { "epoch": 0.04162150444396272, "grad_norm": 1.5859375, "learning_rate": 4.895826086956522e-06, "loss": 0.2509, "step": 600 }, { "epoch": 0.042315196184695425, "grad_norm": 1.3515625, "learning_rate": 4.89408695652174e-06, "loss": 0.2736, "step": 610 }, { "epoch": 0.04300888792542814, "grad_norm": 5.09375, "learning_rate": 4.892347826086957e-06, "loss": 0.329, "step": 620 }, { "epoch": 0.04370257966616085, "grad_norm": 1.15625, "learning_rate": 4.890608695652174e-06, "loss": 0.2188, "step": 630 }, { "epoch": 0.04439627140689356, "grad_norm": 1.640625, "learning_rate": 4.888869565217391e-06, "loss": 0.2942, "step": 640 }, { "epoch": 0.045089963147626276, "grad_norm": 1.046875, "learning_rate": 4.8871304347826096e-06, "loss": 0.2324, "step": 650 }, { "epoch": 0.045783654888358984, "grad_norm": 6.40625, "learning_rate": 4.885391304347826e-06, "loss": 0.2601, "step": 660 }, { "epoch": 0.0464773466290917, "grad_norm": 1.859375, "learning_rate": 4.8836521739130435e-06, "loss": 0.2292, "step": 670 }, { "epoch": 0.04717103836982441, "grad_norm": 1.84375, "learning_rate": 4.881913043478262e-06, "loss": 0.2719, "step": 680 }, { "epoch": 0.04786473011055712, "grad_norm": 2.125, "learning_rate": 4.880173913043479e-06, "loss": 0.2875, "step": 690 }, { "epoch": 0.048558421851289835, "grad_norm": 2.265625, "learning_rate": 4.878434782608696e-06, "loss": 0.2648, "step": 700 }, { "epoch": 0.04925211359202254, "grad_norm": 1.625, "learning_rate": 4.876695652173914e-06, "loss": 0.2901, "step": 710 }, { "epoch": 0.04994580533275526, "grad_norm": 1.2421875, "learning_rate": 4.874956521739131e-06, "loss": 0.2532, "step": 720 }, { "epoch": 0.05063949707348797, "grad_norm": 1.953125, "learning_rate": 4.8732173913043485e-06, "loss": 0.2736, "step": 730 }, { "epoch": 0.05133318881422068, "grad_norm": 2.390625, "learning_rate": 4.871478260869565e-06, "loss": 0.2531, "step": 740 }, { "epoch": 0.05202688055495339, "grad_norm": 1.875, "learning_rate": 4.869739130434783e-06, "loss": 0.262, "step": 750 }, { "epoch": 0.05272057229568611, "grad_norm": 2.171875, "learning_rate": 4.868000000000001e-06, "loss": 0.2242, "step": 760 }, { "epoch": 0.053414264036418815, "grad_norm": 2.546875, "learning_rate": 4.866260869565218e-06, "loss": 0.2873, "step": 770 }, { "epoch": 0.05410795577715153, "grad_norm": 1.4765625, "learning_rate": 4.864521739130435e-06, "loss": 0.2867, "step": 780 }, { "epoch": 0.05480164751788424, "grad_norm": 1.40625, "learning_rate": 4.862782608695653e-06, "loss": 0.2815, "step": 790 }, { "epoch": 0.05549533925861695, "grad_norm": 1.7265625, "learning_rate": 4.86104347826087e-06, "loss": 0.2943, "step": 800 }, { "epoch": 0.056189030999349666, "grad_norm": 1.4375, "learning_rate": 4.8593043478260875e-06, "loss": 0.2723, "step": 810 }, { "epoch": 0.056882722740082374, "grad_norm": 1.25, "learning_rate": 4.857565217391305e-06, "loss": 0.2441, "step": 820 }, { "epoch": 0.05757641448081509, "grad_norm": 1.234375, "learning_rate": 4.855826086956522e-06, "loss": 0.239, "step": 830 }, { "epoch": 0.0582701062215478, "grad_norm": 1.2421875, "learning_rate": 4.85408695652174e-06, "loss": 0.3108, "step": 840 }, { "epoch": 0.05896379796228051, "grad_norm": 1.3515625, "learning_rate": 4.852347826086957e-06, "loss": 0.2425, "step": 850 }, { "epoch": 0.059657489703013225, "grad_norm": 1.390625, "learning_rate": 4.850608695652174e-06, "loss": 0.3201, "step": 860 }, { "epoch": 0.06035118144374593, "grad_norm": 1.859375, "learning_rate": 4.848869565217392e-06, "loss": 0.2886, "step": 870 }, { "epoch": 0.06104487318447865, "grad_norm": 1.15625, "learning_rate": 4.847130434782609e-06, "loss": 0.2269, "step": 880 }, { "epoch": 0.06173856492521136, "grad_norm": 1.21875, "learning_rate": 4.8453913043478265e-06, "loss": 0.247, "step": 890 }, { "epoch": 0.06243225666594407, "grad_norm": 1.484375, "learning_rate": 4.843652173913044e-06, "loss": 0.2773, "step": 900 }, { "epoch": 0.06312594840667678, "grad_norm": 1.1171875, "learning_rate": 4.841913043478261e-06, "loss": 0.2464, "step": 910 }, { "epoch": 0.0638196401474095, "grad_norm": 1.703125, "learning_rate": 4.840173913043479e-06, "loss": 0.2407, "step": 920 }, { "epoch": 0.0645133318881422, "grad_norm": 1.125, "learning_rate": 4.838434782608696e-06, "loss": 0.2772, "step": 930 }, { "epoch": 0.06520702362887491, "grad_norm": 1.015625, "learning_rate": 4.836695652173913e-06, "loss": 0.252, "step": 940 }, { "epoch": 0.06590071536960763, "grad_norm": 1.5703125, "learning_rate": 4.834956521739131e-06, "loss": 0.2426, "step": 950 }, { "epoch": 0.06659440711034034, "grad_norm": 1.421875, "learning_rate": 4.833217391304348e-06, "loss": 0.2722, "step": 960 }, { "epoch": 0.06728809885107305, "grad_norm": 1.7890625, "learning_rate": 4.8314782608695655e-06, "loss": 0.2661, "step": 970 }, { "epoch": 0.06798179059180577, "grad_norm": 1.6015625, "learning_rate": 4.829739130434783e-06, "loss": 0.2329, "step": 980 }, { "epoch": 0.06867548233253848, "grad_norm": 1.578125, "learning_rate": 4.828e-06, "loss": 0.2414, "step": 990 }, { "epoch": 0.06936917407327119, "grad_norm": 1.9296875, "learning_rate": 4.826260869565218e-06, "loss": 0.264, "step": 1000 }, { "epoch": 0.07006286581400391, "grad_norm": 1.5546875, "learning_rate": 4.824521739130435e-06, "loss": 0.2493, "step": 1010 }, { "epoch": 0.07075655755473662, "grad_norm": 1.3984375, "learning_rate": 4.822782608695652e-06, "loss": 0.3118, "step": 1020 }, { "epoch": 0.07145024929546932, "grad_norm": 1.8828125, "learning_rate": 4.82104347826087e-06, "loss": 0.2739, "step": 1030 }, { "epoch": 0.07214394103620204, "grad_norm": 1.0234375, "learning_rate": 4.819304347826088e-06, "loss": 0.2743, "step": 1040 }, { "epoch": 0.07283763277693475, "grad_norm": 1.1796875, "learning_rate": 4.817565217391304e-06, "loss": 0.2299, "step": 1050 }, { "epoch": 0.07353132451766746, "grad_norm": 1.15625, "learning_rate": 4.815826086956522e-06, "loss": 0.2203, "step": 1060 }, { "epoch": 0.07422501625840017, "grad_norm": 0.74609375, "learning_rate": 4.81408695652174e-06, "loss": 0.2155, "step": 1070 }, { "epoch": 0.07491870799913289, "grad_norm": 1.421875, "learning_rate": 4.812347826086957e-06, "loss": 0.248, "step": 1080 }, { "epoch": 0.0756123997398656, "grad_norm": 1.1640625, "learning_rate": 4.810608695652174e-06, "loss": 0.2366, "step": 1090 }, { "epoch": 0.0763060914805983, "grad_norm": 1.0234375, "learning_rate": 4.808869565217391e-06, "loss": 0.2251, "step": 1100 }, { "epoch": 0.07699978322133103, "grad_norm": 1.5859375, "learning_rate": 4.8071304347826095e-06, "loss": 0.2265, "step": 1110 }, { "epoch": 0.07769347496206373, "grad_norm": 1.5546875, "learning_rate": 4.805391304347827e-06, "loss": 0.2876, "step": 1120 }, { "epoch": 0.07838716670279644, "grad_norm": 1.3359375, "learning_rate": 4.803652173913043e-06, "loss": 0.2942, "step": 1130 }, { "epoch": 0.07908085844352916, "grad_norm": 0.8984375, "learning_rate": 4.801913043478262e-06, "loss": 0.2531, "step": 1140 }, { "epoch": 0.07977455018426187, "grad_norm": 2.265625, "learning_rate": 4.800173913043479e-06, "loss": 0.2473, "step": 1150 }, { "epoch": 0.08046824192499458, "grad_norm": 1.2265625, "learning_rate": 4.7984347826086955e-06, "loss": 0.2106, "step": 1160 }, { "epoch": 0.0811619336657273, "grad_norm": 1.0, "learning_rate": 4.796695652173914e-06, "loss": 0.2111, "step": 1170 }, { "epoch": 0.08185562540646, "grad_norm": 1.734375, "learning_rate": 4.794956521739131e-06, "loss": 0.262, "step": 1180 }, { "epoch": 0.08254931714719271, "grad_norm": 1.125, "learning_rate": 4.7932173913043485e-06, "loss": 0.2402, "step": 1190 }, { "epoch": 0.08324300888792543, "grad_norm": 1.2421875, "learning_rate": 4.791478260869565e-06, "loss": 0.2325, "step": 1200 }, { "epoch": 0.08393670062865814, "grad_norm": 1.296875, "learning_rate": 4.789739130434783e-06, "loss": 0.2355, "step": 1210 }, { "epoch": 0.08463039236939085, "grad_norm": 1.234375, "learning_rate": 4.7880000000000006e-06, "loss": 0.2561, "step": 1220 }, { "epoch": 0.08532408411012356, "grad_norm": 1.7890625, "learning_rate": 4.786260869565218e-06, "loss": 0.2351, "step": 1230 }, { "epoch": 0.08601777585085628, "grad_norm": 1.2421875, "learning_rate": 4.784521739130435e-06, "loss": 0.2571, "step": 1240 }, { "epoch": 0.08671146759158899, "grad_norm": 2.640625, "learning_rate": 4.782782608695653e-06, "loss": 0.2187, "step": 1250 }, { "epoch": 0.0874051593323217, "grad_norm": 1.1640625, "learning_rate": 4.78104347826087e-06, "loss": 0.313, "step": 1260 }, { "epoch": 0.08809885107305442, "grad_norm": 1.9140625, "learning_rate": 4.7793043478260874e-06, "loss": 0.3083, "step": 1270 }, { "epoch": 0.08879254281378712, "grad_norm": 1.5859375, "learning_rate": 4.777565217391305e-06, "loss": 0.2748, "step": 1280 }, { "epoch": 0.08948623455451983, "grad_norm": 1.640625, "learning_rate": 4.775826086956522e-06, "loss": 0.2393, "step": 1290 }, { "epoch": 0.09017992629525255, "grad_norm": 1.3359375, "learning_rate": 4.7740869565217395e-06, "loss": 0.2355, "step": 1300 }, { "epoch": 0.09087361803598526, "grad_norm": 1.1328125, "learning_rate": 4.772347826086957e-06, "loss": 0.2335, "step": 1310 }, { "epoch": 0.09156730977671797, "grad_norm": 1.5859375, "learning_rate": 4.770608695652174e-06, "loss": 0.2321, "step": 1320 }, { "epoch": 0.09226100151745069, "grad_norm": 1.4453125, "learning_rate": 4.768869565217392e-06, "loss": 0.3314, "step": 1330 }, { "epoch": 0.0929546932581834, "grad_norm": 0.99609375, "learning_rate": 4.767130434782609e-06, "loss": 0.2046, "step": 1340 }, { "epoch": 0.0936483849989161, "grad_norm": 1.328125, "learning_rate": 4.765391304347826e-06, "loss": 0.2461, "step": 1350 }, { "epoch": 0.09434207673964883, "grad_norm": 1.140625, "learning_rate": 4.763652173913044e-06, "loss": 0.2129, "step": 1360 }, { "epoch": 0.09503576848038153, "grad_norm": 1.2578125, "learning_rate": 4.761913043478261e-06, "loss": 0.2483, "step": 1370 }, { "epoch": 0.09572946022111424, "grad_norm": 1.859375, "learning_rate": 4.7601739130434785e-06, "loss": 0.2285, "step": 1380 }, { "epoch": 0.09642315196184695, "grad_norm": 1.0390625, "learning_rate": 4.758434782608696e-06, "loss": 0.2333, "step": 1390 }, { "epoch": 0.09711684370257967, "grad_norm": 1.4375, "learning_rate": 4.756695652173913e-06, "loss": 0.2183, "step": 1400 }, { "epoch": 0.09781053544331238, "grad_norm": 1.3828125, "learning_rate": 4.754956521739131e-06, "loss": 0.2621, "step": 1410 }, { "epoch": 0.09850422718404508, "grad_norm": 1.5859375, "learning_rate": 4.753217391304348e-06, "loss": 0.2572, "step": 1420 }, { "epoch": 0.0991979189247778, "grad_norm": 1.265625, "learning_rate": 4.751478260869566e-06, "loss": 0.2177, "step": 1430 }, { "epoch": 0.09989161066551051, "grad_norm": 1.5703125, "learning_rate": 4.749739130434783e-06, "loss": 0.2542, "step": 1440 }, { "epoch": 0.10058530240624322, "grad_norm": 1.4609375, "learning_rate": 4.748e-06, "loss": 0.2459, "step": 1450 }, { "epoch": 0.10127899414697594, "grad_norm": 1.640625, "learning_rate": 4.746260869565218e-06, "loss": 0.2786, "step": 1460 }, { "epoch": 0.10197268588770865, "grad_norm": 1.8359375, "learning_rate": 4.744521739130435e-06, "loss": 0.2875, "step": 1470 }, { "epoch": 0.10266637762844136, "grad_norm": 1.9765625, "learning_rate": 4.742782608695652e-06, "loss": 0.2957, "step": 1480 }, { "epoch": 0.10336006936917408, "grad_norm": 1.59375, "learning_rate": 4.74104347826087e-06, "loss": 0.2543, "step": 1490 }, { "epoch": 0.10405376110990679, "grad_norm": 1.828125, "learning_rate": 4.739304347826088e-06, "loss": 0.2631, "step": 1500 }, { "epoch": 0.1047474528506395, "grad_norm": 1.5703125, "learning_rate": 4.737565217391304e-06, "loss": 0.2462, "step": 1510 }, { "epoch": 0.10544114459137222, "grad_norm": 1.203125, "learning_rate": 4.735826086956522e-06, "loss": 0.2237, "step": 1520 }, { "epoch": 0.10613483633210492, "grad_norm": 1.1875, "learning_rate": 4.73408695652174e-06, "loss": 0.2206, "step": 1530 }, { "epoch": 0.10682852807283763, "grad_norm": 1.59375, "learning_rate": 4.732347826086957e-06, "loss": 0.2533, "step": 1540 }, { "epoch": 0.10752221981357034, "grad_norm": 1.203125, "learning_rate": 4.730608695652174e-06, "loss": 0.2136, "step": 1550 }, { "epoch": 0.10821591155430306, "grad_norm": 2.015625, "learning_rate": 4.728869565217391e-06, "loss": 0.3101, "step": 1560 }, { "epoch": 0.10890960329503577, "grad_norm": 2.4375, "learning_rate": 4.727130434782609e-06, "loss": 0.2574, "step": 1570 }, { "epoch": 0.10960329503576847, "grad_norm": 1.25, "learning_rate": 4.725391304347827e-06, "loss": 0.2142, "step": 1580 }, { "epoch": 0.1102969867765012, "grad_norm": 2.21875, "learning_rate": 4.723652173913043e-06, "loss": 0.1908, "step": 1590 }, { "epoch": 0.1109906785172339, "grad_norm": 1.703125, "learning_rate": 4.7219130434782615e-06, "loss": 0.2437, "step": 1600 }, { "epoch": 0.11168437025796661, "grad_norm": 1.25, "learning_rate": 4.720173913043479e-06, "loss": 0.2262, "step": 1610 }, { "epoch": 0.11237806199869933, "grad_norm": 0.9921875, "learning_rate": 4.718434782608696e-06, "loss": 0.258, "step": 1620 }, { "epoch": 0.11307175373943204, "grad_norm": 1.609375, "learning_rate": 4.716695652173914e-06, "loss": 0.2992, "step": 1630 }, { "epoch": 0.11376544548016475, "grad_norm": 1.5703125, "learning_rate": 4.714956521739131e-06, "loss": 0.247, "step": 1640 }, { "epoch": 0.11445913722089747, "grad_norm": 1.1484375, "learning_rate": 4.713217391304348e-06, "loss": 0.262, "step": 1650 }, { "epoch": 0.11515282896163018, "grad_norm": 1.2890625, "learning_rate": 4.711478260869566e-06, "loss": 0.2604, "step": 1660 }, { "epoch": 0.11584652070236288, "grad_norm": 1.0390625, "learning_rate": 4.709739130434783e-06, "loss": 0.2101, "step": 1670 }, { "epoch": 0.1165402124430956, "grad_norm": 1.8671875, "learning_rate": 4.7080000000000005e-06, "loss": 0.2641, "step": 1680 }, { "epoch": 0.11723390418382831, "grad_norm": 1.3984375, "learning_rate": 4.706260869565218e-06, "loss": 0.2134, "step": 1690 }, { "epoch": 0.11792759592456102, "grad_norm": 1.0546875, "learning_rate": 4.704521739130435e-06, "loss": 0.2305, "step": 1700 }, { "epoch": 0.11862128766529373, "grad_norm": 1.2265625, "learning_rate": 4.702782608695653e-06, "loss": 0.2542, "step": 1710 }, { "epoch": 0.11931497940602645, "grad_norm": 1.328125, "learning_rate": 4.70104347826087e-06, "loss": 0.2722, "step": 1720 }, { "epoch": 0.12000867114675916, "grad_norm": 1.015625, "learning_rate": 4.699304347826087e-06, "loss": 0.2172, "step": 1730 }, { "epoch": 0.12070236288749187, "grad_norm": 1.1953125, "learning_rate": 4.697565217391305e-06, "loss": 0.3591, "step": 1740 }, { "epoch": 0.12139605462822459, "grad_norm": 1.3359375, "learning_rate": 4.695826086956522e-06, "loss": 0.2487, "step": 1750 }, { "epoch": 0.1220897463689573, "grad_norm": 1.234375, "learning_rate": 4.6940869565217395e-06, "loss": 0.2619, "step": 1760 }, { "epoch": 0.12278343810969, "grad_norm": 1.0078125, "learning_rate": 4.692347826086957e-06, "loss": 0.2614, "step": 1770 }, { "epoch": 0.12347712985042272, "grad_norm": 1.0625, "learning_rate": 4.690608695652174e-06, "loss": 0.2354, "step": 1780 }, { "epoch": 0.12417082159115543, "grad_norm": 1.109375, "learning_rate": 4.688869565217392e-06, "loss": 0.2302, "step": 1790 }, { "epoch": 0.12486451333188814, "grad_norm": 1.203125, "learning_rate": 4.687130434782609e-06, "loss": 0.2436, "step": 1800 }, { "epoch": 0.12555820507262086, "grad_norm": 1.9140625, "learning_rate": 4.685391304347826e-06, "loss": 0.2219, "step": 1810 }, { "epoch": 0.12625189681335355, "grad_norm": 1.3125, "learning_rate": 4.683652173913044e-06, "loss": 0.237, "step": 1820 }, { "epoch": 0.12694558855408627, "grad_norm": 1.1640625, "learning_rate": 4.681913043478261e-06, "loss": 0.2562, "step": 1830 }, { "epoch": 0.127639280294819, "grad_norm": 1.2734375, "learning_rate": 4.6801739130434784e-06, "loss": 0.2683, "step": 1840 }, { "epoch": 0.1283329720355517, "grad_norm": 1.421875, "learning_rate": 4.678434782608696e-06, "loss": 0.2653, "step": 1850 }, { "epoch": 0.1290266637762844, "grad_norm": 1.2421875, "learning_rate": 4.676695652173913e-06, "loss": 0.2423, "step": 1860 }, { "epoch": 0.12972035551701713, "grad_norm": 0.98828125, "learning_rate": 4.6749565217391305e-06, "loss": 0.2275, "step": 1870 }, { "epoch": 0.13041404725774983, "grad_norm": 1.046875, "learning_rate": 4.673217391304348e-06, "loss": 0.229, "step": 1880 }, { "epoch": 0.13110773899848255, "grad_norm": 1.453125, "learning_rate": 4.671478260869566e-06, "loss": 0.2491, "step": 1890 }, { "epoch": 0.13180143073921527, "grad_norm": 1.3359375, "learning_rate": 4.669739130434783e-06, "loss": 0.2106, "step": 1900 }, { "epoch": 0.13249512247994796, "grad_norm": 1.7265625, "learning_rate": 4.668e-06, "loss": 0.3183, "step": 1910 }, { "epoch": 0.13318881422068068, "grad_norm": 1.5625, "learning_rate": 4.666260869565218e-06, "loss": 0.2252, "step": 1920 }, { "epoch": 0.1338825059614134, "grad_norm": 1.078125, "learning_rate": 4.664521739130436e-06, "loss": 0.2986, "step": 1930 }, { "epoch": 0.1345761977021461, "grad_norm": 1.6328125, "learning_rate": 4.662782608695652e-06, "loss": 0.2311, "step": 1940 }, { "epoch": 0.13526988944287882, "grad_norm": 2.3125, "learning_rate": 4.6610434782608695e-06, "loss": 0.2383, "step": 1950 }, { "epoch": 0.13596358118361154, "grad_norm": 1.4609375, "learning_rate": 4.659304347826088e-06, "loss": 0.2228, "step": 1960 }, { "epoch": 0.13665727292434424, "grad_norm": 1.6484375, "learning_rate": 4.657565217391305e-06, "loss": 0.2704, "step": 1970 }, { "epoch": 0.13735096466507696, "grad_norm": 1.265625, "learning_rate": 4.655826086956522e-06, "loss": 0.2858, "step": 1980 }, { "epoch": 0.13804465640580968, "grad_norm": 1.046875, "learning_rate": 4.65408695652174e-06, "loss": 0.2813, "step": 1990 }, { "epoch": 0.13873834814654237, "grad_norm": 1.25, "learning_rate": 4.652347826086957e-06, "loss": 0.2458, "step": 2000 }, { "epoch": 0.1394320398872751, "grad_norm": 1.1171875, "learning_rate": 4.650608695652175e-06, "loss": 0.2186, "step": 2010 }, { "epoch": 0.14012573162800782, "grad_norm": 2.546875, "learning_rate": 4.648869565217391e-06, "loss": 0.2789, "step": 2020 }, { "epoch": 0.1408194233687405, "grad_norm": 1.59375, "learning_rate": 4.647130434782609e-06, "loss": 0.2564, "step": 2030 }, { "epoch": 0.14151311510947323, "grad_norm": 1.28125, "learning_rate": 4.645391304347827e-06, "loss": 0.2318, "step": 2040 }, { "epoch": 0.14220680685020595, "grad_norm": 1.703125, "learning_rate": 4.643652173913044e-06, "loss": 0.2513, "step": 2050 }, { "epoch": 0.14290049859093865, "grad_norm": 0.89453125, "learning_rate": 4.6419130434782614e-06, "loss": 0.2211, "step": 2060 }, { "epoch": 0.14359419033167137, "grad_norm": 1.1328125, "learning_rate": 4.640173913043479e-06, "loss": 0.2939, "step": 2070 }, { "epoch": 0.1442878820724041, "grad_norm": 1.296875, "learning_rate": 4.638434782608696e-06, "loss": 0.2321, "step": 2080 }, { "epoch": 0.14498157381313678, "grad_norm": 1.484375, "learning_rate": 4.6366956521739136e-06, "loss": 0.226, "step": 2090 }, { "epoch": 0.1456752655538695, "grad_norm": 1.0625, "learning_rate": 4.634956521739131e-06, "loss": 0.2262, "step": 2100 }, { "epoch": 0.1463689572946022, "grad_norm": 1.328125, "learning_rate": 4.633217391304348e-06, "loss": 0.2198, "step": 2110 }, { "epoch": 0.14706264903533492, "grad_norm": 1.421875, "learning_rate": 4.631478260869566e-06, "loss": 0.2517, "step": 2120 }, { "epoch": 0.14775634077606764, "grad_norm": 1.125, "learning_rate": 4.629739130434783e-06, "loss": 0.2736, "step": 2130 }, { "epoch": 0.14845003251680033, "grad_norm": 1.4609375, "learning_rate": 4.628e-06, "loss": 0.2483, "step": 2140 }, { "epoch": 0.14914372425753306, "grad_norm": 1.1328125, "learning_rate": 4.626260869565218e-06, "loss": 0.2138, "step": 2150 }, { "epoch": 0.14983741599826578, "grad_norm": 1.578125, "learning_rate": 4.624521739130435e-06, "loss": 0.2202, "step": 2160 }, { "epoch": 0.15053110773899847, "grad_norm": 1.359375, "learning_rate": 4.6227826086956525e-06, "loss": 0.2468, "step": 2170 }, { "epoch": 0.1512247994797312, "grad_norm": 1.171875, "learning_rate": 4.62104347826087e-06, "loss": 0.2218, "step": 2180 }, { "epoch": 0.1519184912204639, "grad_norm": 1.6328125, "learning_rate": 4.619304347826087e-06, "loss": 0.2741, "step": 2190 }, { "epoch": 0.1526121829611966, "grad_norm": 1.078125, "learning_rate": 4.617565217391305e-06, "loss": 0.2553, "step": 2200 }, { "epoch": 0.15330587470192933, "grad_norm": 1.2265625, "learning_rate": 4.615826086956522e-06, "loss": 0.234, "step": 2210 }, { "epoch": 0.15399956644266205, "grad_norm": 1.4453125, "learning_rate": 4.614086956521739e-06, "loss": 0.2321, "step": 2220 }, { "epoch": 0.15469325818339474, "grad_norm": 1.2265625, "learning_rate": 4.612347826086957e-06, "loss": 0.2222, "step": 2230 }, { "epoch": 0.15538694992412747, "grad_norm": 1.40625, "learning_rate": 4.610608695652174e-06, "loss": 0.2726, "step": 2240 }, { "epoch": 0.1560806416648602, "grad_norm": 1.0078125, "learning_rate": 4.6088695652173915e-06, "loss": 0.2418, "step": 2250 }, { "epoch": 0.15677433340559288, "grad_norm": 1.3828125, "learning_rate": 4.607130434782609e-06, "loss": 0.2499, "step": 2260 }, { "epoch": 0.1574680251463256, "grad_norm": 0.984375, "learning_rate": 4.605391304347826e-06, "loss": 0.2741, "step": 2270 }, { "epoch": 0.15816171688705832, "grad_norm": 1.3515625, "learning_rate": 4.6036521739130445e-06, "loss": 0.2061, "step": 2280 }, { "epoch": 0.15885540862779102, "grad_norm": 1.3046875, "learning_rate": 4.601913043478261e-06, "loss": 0.2696, "step": 2290 }, { "epoch": 0.15954910036852374, "grad_norm": 1.359375, "learning_rate": 4.600173913043478e-06, "loss": 0.2767, "step": 2300 }, { "epoch": 0.16024279210925646, "grad_norm": 1.3515625, "learning_rate": 4.598434782608696e-06, "loss": 0.2485, "step": 2310 }, { "epoch": 0.16093648384998915, "grad_norm": 1.2578125, "learning_rate": 4.596695652173914e-06, "loss": 0.2485, "step": 2320 }, { "epoch": 0.16163017559072188, "grad_norm": 1.8046875, "learning_rate": 4.5949565217391305e-06, "loss": 0.2586, "step": 2330 }, { "epoch": 0.1623238673314546, "grad_norm": 1.0390625, "learning_rate": 4.593217391304348e-06, "loss": 0.1902, "step": 2340 }, { "epoch": 0.1630175590721873, "grad_norm": 1.7734375, "learning_rate": 4.591478260869566e-06, "loss": 0.3126, "step": 2350 }, { "epoch": 0.16371125081292, "grad_norm": 1.8828125, "learning_rate": 4.5897391304347834e-06, "loss": 0.285, "step": 2360 }, { "epoch": 0.16440494255365273, "grad_norm": 1.1484375, "learning_rate": 4.588e-06, "loss": 0.2054, "step": 2370 }, { "epoch": 0.16509863429438543, "grad_norm": 2.921875, "learning_rate": 4.586260869565218e-06, "loss": 0.2095, "step": 2380 }, { "epoch": 0.16579232603511815, "grad_norm": 1.046875, "learning_rate": 4.5845217391304355e-06, "loss": 0.234, "step": 2390 }, { "epoch": 0.16648601777585087, "grad_norm": 1.4765625, "learning_rate": 4.582782608695652e-06, "loss": 0.2567, "step": 2400 }, { "epoch": 0.16717970951658356, "grad_norm": 0.94140625, "learning_rate": 4.5810434782608694e-06, "loss": 0.2976, "step": 2410 }, { "epoch": 0.16787340125731628, "grad_norm": 1.3203125, "learning_rate": 4.579304347826088e-06, "loss": 0.2411, "step": 2420 }, { "epoch": 0.16856709299804898, "grad_norm": 1.125, "learning_rate": 4.577565217391305e-06, "loss": 0.2256, "step": 2430 }, { "epoch": 0.1692607847387817, "grad_norm": 1.1796875, "learning_rate": 4.5758260869565215e-06, "loss": 0.2514, "step": 2440 }, { "epoch": 0.16995447647951442, "grad_norm": 1.0625, "learning_rate": 4.57408695652174e-06, "loss": 0.2754, "step": 2450 }, { "epoch": 0.17064816822024712, "grad_norm": 1.2890625, "learning_rate": 4.572347826086957e-06, "loss": 0.2486, "step": 2460 }, { "epoch": 0.17134185996097984, "grad_norm": 1.1015625, "learning_rate": 4.5706086956521745e-06, "loss": 0.2643, "step": 2470 }, { "epoch": 0.17203555170171256, "grad_norm": 1.4453125, "learning_rate": 4.568869565217391e-06, "loss": 0.2401, "step": 2480 }, { "epoch": 0.17272924344244525, "grad_norm": 1.3671875, "learning_rate": 4.567130434782609e-06, "loss": 0.2471, "step": 2490 }, { "epoch": 0.17342293518317797, "grad_norm": 1.6015625, "learning_rate": 4.565391304347827e-06, "loss": 0.2813, "step": 2500 }, { "epoch": 0.1741166269239107, "grad_norm": 2.109375, "learning_rate": 4.563652173913044e-06, "loss": 0.2199, "step": 2510 }, { "epoch": 0.1748103186646434, "grad_norm": 1.0859375, "learning_rate": 4.561913043478261e-06, "loss": 0.2346, "step": 2520 }, { "epoch": 0.1755040104053761, "grad_norm": 0.90234375, "learning_rate": 4.560173913043479e-06, "loss": 0.3302, "step": 2530 }, { "epoch": 0.17619770214610883, "grad_norm": 1.234375, "learning_rate": 4.558434782608696e-06, "loss": 0.2683, "step": 2540 }, { "epoch": 0.17689139388684152, "grad_norm": 2.34375, "learning_rate": 4.5566956521739135e-06, "loss": 0.2297, "step": 2550 }, { "epoch": 0.17758508562757425, "grad_norm": 1.3203125, "learning_rate": 4.554956521739131e-06, "loss": 0.2285, "step": 2560 }, { "epoch": 0.17827877736830697, "grad_norm": 1.1796875, "learning_rate": 4.553217391304348e-06, "loss": 0.2914, "step": 2570 }, { "epoch": 0.17897246910903966, "grad_norm": 1.3359375, "learning_rate": 4.551478260869566e-06, "loss": 0.2983, "step": 2580 }, { "epoch": 0.17966616084977238, "grad_norm": 1.1484375, "learning_rate": 4.549739130434783e-06, "loss": 0.2334, "step": 2590 }, { "epoch": 0.1803598525905051, "grad_norm": 1.4609375, "learning_rate": 4.548e-06, "loss": 0.2318, "step": 2600 }, { "epoch": 0.1810535443312378, "grad_norm": 1.4609375, "learning_rate": 4.546260869565218e-06, "loss": 0.2391, "step": 2610 }, { "epoch": 0.18174723607197052, "grad_norm": 0.80859375, "learning_rate": 4.544521739130435e-06, "loss": 0.2697, "step": 2620 }, { "epoch": 0.18244092781270324, "grad_norm": 1.0703125, "learning_rate": 4.5427826086956524e-06, "loss": 0.263, "step": 2630 }, { "epoch": 0.18313461955343593, "grad_norm": 1.6015625, "learning_rate": 4.54104347826087e-06, "loss": 0.2569, "step": 2640 }, { "epoch": 0.18382831129416866, "grad_norm": 1.8203125, "learning_rate": 4.539304347826087e-06, "loss": 0.2997, "step": 2650 }, { "epoch": 0.18452200303490138, "grad_norm": 1.4765625, "learning_rate": 4.5375652173913046e-06, "loss": 0.2352, "step": 2660 }, { "epoch": 0.18521569477563407, "grad_norm": 2.0625, "learning_rate": 4.535826086956523e-06, "loss": 0.2896, "step": 2670 }, { "epoch": 0.1859093865163668, "grad_norm": 1.921875, "learning_rate": 4.534086956521739e-06, "loss": 0.2807, "step": 2680 }, { "epoch": 0.18660307825709951, "grad_norm": 1.2578125, "learning_rate": 4.532347826086957e-06, "loss": 0.2258, "step": 2690 }, { "epoch": 0.1872967699978322, "grad_norm": 0.91015625, "learning_rate": 4.530608695652174e-06, "loss": 0.2467, "step": 2700 }, { "epoch": 0.18799046173856493, "grad_norm": 1.1796875, "learning_rate": 4.528869565217391e-06, "loss": 0.2375, "step": 2710 }, { "epoch": 0.18868415347929765, "grad_norm": 1.0859375, "learning_rate": 4.527130434782609e-06, "loss": 0.2648, "step": 2720 }, { "epoch": 0.18937784522003034, "grad_norm": 1.1640625, "learning_rate": 4.525391304347826e-06, "loss": 0.241, "step": 2730 }, { "epoch": 0.19007153696076307, "grad_norm": 1.59375, "learning_rate": 4.523652173913044e-06, "loss": 0.2694, "step": 2740 }, { "epoch": 0.19076522870149576, "grad_norm": 1.0390625, "learning_rate": 4.521913043478261e-06, "loss": 0.213, "step": 2750 }, { "epoch": 0.19145892044222848, "grad_norm": 1.0546875, "learning_rate": 4.520173913043478e-06, "loss": 0.2127, "step": 2760 }, { "epoch": 0.1921526121829612, "grad_norm": 1.4765625, "learning_rate": 4.518434782608696e-06, "loss": 0.26, "step": 2770 }, { "epoch": 0.1928463039236939, "grad_norm": 1.28125, "learning_rate": 4.516695652173914e-06, "loss": 0.2297, "step": 2780 }, { "epoch": 0.19353999566442662, "grad_norm": 1.40625, "learning_rate": 4.51495652173913e-06, "loss": 0.2087, "step": 2790 }, { "epoch": 0.19423368740515934, "grad_norm": 1.1328125, "learning_rate": 4.513217391304348e-06, "loss": 0.2179, "step": 2800 }, { "epoch": 0.19492737914589203, "grad_norm": 1.5625, "learning_rate": 4.511478260869566e-06, "loss": 0.2698, "step": 2810 }, { "epoch": 0.19562107088662475, "grad_norm": 1.0625, "learning_rate": 4.509739130434783e-06, "loss": 0.2297, "step": 2820 }, { "epoch": 0.19631476262735748, "grad_norm": 1.578125, "learning_rate": 4.508e-06, "loss": 0.2154, "step": 2830 }, { "epoch": 0.19700845436809017, "grad_norm": 1.1171875, "learning_rate": 4.506260869565218e-06, "loss": 0.236, "step": 2840 }, { "epoch": 0.1977021461088229, "grad_norm": 1.109375, "learning_rate": 4.5045217391304355e-06, "loss": 0.281, "step": 2850 }, { "epoch": 0.1983958378495556, "grad_norm": 1.1171875, "learning_rate": 4.502782608695653e-06, "loss": 0.2373, "step": 2860 }, { "epoch": 0.1990895295902883, "grad_norm": 1.375, "learning_rate": 4.501043478260869e-06, "loss": 0.2899, "step": 2870 }, { "epoch": 0.19978322133102103, "grad_norm": 1.421875, "learning_rate": 4.4993043478260876e-06, "loss": 0.2459, "step": 2880 }, { "epoch": 0.20047691307175375, "grad_norm": 1.1015625, "learning_rate": 4.497565217391305e-06, "loss": 0.2292, "step": 2890 }, { "epoch": 0.20117060481248644, "grad_norm": 1.0546875, "learning_rate": 4.495826086956522e-06, "loss": 0.2465, "step": 2900 }, { "epoch": 0.20186429655321916, "grad_norm": 1.3671875, "learning_rate": 4.49408695652174e-06, "loss": 0.2323, "step": 2910 }, { "epoch": 0.20255798829395188, "grad_norm": 2.234375, "learning_rate": 4.492347826086957e-06, "loss": 0.3579, "step": 2920 }, { "epoch": 0.20325168003468458, "grad_norm": 1.28125, "learning_rate": 4.4906086956521744e-06, "loss": 0.3302, "step": 2930 }, { "epoch": 0.2039453717754173, "grad_norm": 1.4375, "learning_rate": 4.488869565217392e-06, "loss": 0.2271, "step": 2940 }, { "epoch": 0.20463906351615002, "grad_norm": 1.1796875, "learning_rate": 4.487130434782609e-06, "loss": 0.2119, "step": 2950 }, { "epoch": 0.20533275525688272, "grad_norm": 1.140625, "learning_rate": 4.4853913043478265e-06, "loss": 0.2446, "step": 2960 }, { "epoch": 0.20602644699761544, "grad_norm": 0.9375, "learning_rate": 4.483652173913044e-06, "loss": 0.2289, "step": 2970 }, { "epoch": 0.20672013873834816, "grad_norm": 1.1484375, "learning_rate": 4.481913043478261e-06, "loss": 0.2335, "step": 2980 }, { "epoch": 0.20741383047908085, "grad_norm": 0.9609375, "learning_rate": 4.480173913043479e-06, "loss": 0.2208, "step": 2990 }, { "epoch": 0.20810752221981357, "grad_norm": 1.5859375, "learning_rate": 4.478434782608696e-06, "loss": 0.2224, "step": 3000 }, { "epoch": 0.2088012139605463, "grad_norm": 1.234375, "learning_rate": 4.476695652173913e-06, "loss": 0.2543, "step": 3010 }, { "epoch": 0.209494905701279, "grad_norm": 1.171875, "learning_rate": 4.474956521739131e-06, "loss": 0.2274, "step": 3020 }, { "epoch": 0.2101885974420117, "grad_norm": 1.3046875, "learning_rate": 4.473217391304348e-06, "loss": 0.2999, "step": 3030 }, { "epoch": 0.21088228918274443, "grad_norm": 1.1640625, "learning_rate": 4.4714782608695655e-06, "loss": 0.2384, "step": 3040 }, { "epoch": 0.21157598092347712, "grad_norm": 1.3984375, "learning_rate": 4.469739130434783e-06, "loss": 0.2618, "step": 3050 }, { "epoch": 0.21226967266420985, "grad_norm": 1.453125, "learning_rate": 4.468e-06, "loss": 0.2545, "step": 3060 }, { "epoch": 0.21296336440494254, "grad_norm": 1.1171875, "learning_rate": 4.466260869565218e-06, "loss": 0.2794, "step": 3070 }, { "epoch": 0.21365705614567526, "grad_norm": 1.0234375, "learning_rate": 4.464521739130435e-06, "loss": 0.2537, "step": 3080 }, { "epoch": 0.21435074788640798, "grad_norm": 1.4921875, "learning_rate": 4.462782608695652e-06, "loss": 0.2936, "step": 3090 }, { "epoch": 0.21504443962714068, "grad_norm": 1.2890625, "learning_rate": 4.46104347826087e-06, "loss": 0.2446, "step": 3100 }, { "epoch": 0.2157381313678734, "grad_norm": 1.171875, "learning_rate": 4.459304347826087e-06, "loss": 0.2443, "step": 3110 }, { "epoch": 0.21643182310860612, "grad_norm": 1.2578125, "learning_rate": 4.4575652173913045e-06, "loss": 0.2338, "step": 3120 }, { "epoch": 0.2171255148493388, "grad_norm": 1.0, "learning_rate": 4.455826086956523e-06, "loss": 0.2197, "step": 3130 }, { "epoch": 0.21781920659007153, "grad_norm": 0.96875, "learning_rate": 4.454086956521739e-06, "loss": 0.2222, "step": 3140 }, { "epoch": 0.21851289833080426, "grad_norm": 1.15625, "learning_rate": 4.452347826086957e-06, "loss": 0.2494, "step": 3150 }, { "epoch": 0.21920659007153695, "grad_norm": 1.2890625, "learning_rate": 4.450608695652174e-06, "loss": 0.2529, "step": 3160 }, { "epoch": 0.21990028181226967, "grad_norm": 0.9921875, "learning_rate": 4.448869565217392e-06, "loss": 0.185, "step": 3170 }, { "epoch": 0.2205939735530024, "grad_norm": 1.6015625, "learning_rate": 4.447130434782609e-06, "loss": 0.2576, "step": 3180 }, { "epoch": 0.2212876652937351, "grad_norm": 1.84375, "learning_rate": 4.445391304347826e-06, "loss": 0.2218, "step": 3190 }, { "epoch": 0.2219813570344678, "grad_norm": 1.546875, "learning_rate": 4.443652173913044e-06, "loss": 0.2442, "step": 3200 }, { "epoch": 0.22267504877520053, "grad_norm": 1.0859375, "learning_rate": 4.441913043478262e-06, "loss": 0.269, "step": 3210 }, { "epoch": 0.22336874051593322, "grad_norm": 1.2578125, "learning_rate": 4.440173913043478e-06, "loss": 0.239, "step": 3220 }, { "epoch": 0.22406243225666594, "grad_norm": 0.94140625, "learning_rate": 4.4384347826086956e-06, "loss": 0.2284, "step": 3230 }, { "epoch": 0.22475612399739867, "grad_norm": 1.2265625, "learning_rate": 4.436695652173914e-06, "loss": 0.2405, "step": 3240 }, { "epoch": 0.22544981573813136, "grad_norm": 1.5703125, "learning_rate": 4.434956521739131e-06, "loss": 0.2391, "step": 3250 }, { "epoch": 0.22614350747886408, "grad_norm": 1.171875, "learning_rate": 4.433217391304348e-06, "loss": 0.2244, "step": 3260 }, { "epoch": 0.2268371992195968, "grad_norm": 1.5, "learning_rate": 4.431478260869566e-06, "loss": 0.2313, "step": 3270 }, { "epoch": 0.2275308909603295, "grad_norm": 1.71875, "learning_rate": 4.429739130434783e-06, "loss": 0.2855, "step": 3280 }, { "epoch": 0.22822458270106222, "grad_norm": 1.4921875, "learning_rate": 4.428000000000001e-06, "loss": 0.2532, "step": 3290 }, { "epoch": 0.22891827444179494, "grad_norm": 1.2890625, "learning_rate": 4.426260869565218e-06, "loss": 0.2226, "step": 3300 }, { "epoch": 0.22961196618252763, "grad_norm": 1.3203125, "learning_rate": 4.424521739130435e-06, "loss": 0.2389, "step": 3310 }, { "epoch": 0.23030565792326035, "grad_norm": 1.1484375, "learning_rate": 4.422782608695653e-06, "loss": 0.2067, "step": 3320 }, { "epoch": 0.23099934966399308, "grad_norm": 1.2890625, "learning_rate": 4.421043478260869e-06, "loss": 0.1951, "step": 3330 }, { "epoch": 0.23169304140472577, "grad_norm": 1.2265625, "learning_rate": 4.4193043478260875e-06, "loss": 0.2054, "step": 3340 }, { "epoch": 0.2323867331454585, "grad_norm": 1.125, "learning_rate": 4.417565217391305e-06, "loss": 0.2557, "step": 3350 }, { "epoch": 0.2330804248861912, "grad_norm": 1.203125, "learning_rate": 4.415826086956522e-06, "loss": 0.229, "step": 3360 }, { "epoch": 0.2337741166269239, "grad_norm": 1.4921875, "learning_rate": 4.41408695652174e-06, "loss": 0.2895, "step": 3370 }, { "epoch": 0.23446780836765663, "grad_norm": 1.0859375, "learning_rate": 4.412347826086957e-06, "loss": 0.299, "step": 3380 }, { "epoch": 0.23516150010838932, "grad_norm": 1.1015625, "learning_rate": 4.410608695652174e-06, "loss": 0.2132, "step": 3390 }, { "epoch": 0.23585519184912204, "grad_norm": 1.078125, "learning_rate": 4.408869565217392e-06, "loss": 0.2573, "step": 3400 }, { "epoch": 0.23654888358985476, "grad_norm": 1.109375, "learning_rate": 4.407130434782609e-06, "loss": 0.2225, "step": 3410 }, { "epoch": 0.23724257533058746, "grad_norm": 1.2109375, "learning_rate": 4.4053913043478265e-06, "loss": 0.2154, "step": 3420 }, { "epoch": 0.23793626707132018, "grad_norm": 0.91015625, "learning_rate": 4.403652173913044e-06, "loss": 0.3088, "step": 3430 }, { "epoch": 0.2386299588120529, "grad_norm": 1.9375, "learning_rate": 4.401913043478261e-06, "loss": 0.2877, "step": 3440 }, { "epoch": 0.2393236505527856, "grad_norm": 1.984375, "learning_rate": 4.4001739130434786e-06, "loss": 0.2145, "step": 3450 }, { "epoch": 0.24001734229351832, "grad_norm": 0.8515625, "learning_rate": 4.398434782608696e-06, "loss": 0.2338, "step": 3460 }, { "epoch": 0.24071103403425104, "grad_norm": 1.328125, "learning_rate": 4.396695652173913e-06, "loss": 0.2488, "step": 3470 }, { "epoch": 0.24140472577498373, "grad_norm": 1.0234375, "learning_rate": 4.394956521739131e-06, "loss": 0.2444, "step": 3480 }, { "epoch": 0.24209841751571645, "grad_norm": 1.234375, "learning_rate": 4.393217391304348e-06, "loss": 0.2326, "step": 3490 }, { "epoch": 0.24279210925644917, "grad_norm": 0.99609375, "learning_rate": 4.3914782608695654e-06, "loss": 0.2431, "step": 3500 }, { "epoch": 0.24348580099718187, "grad_norm": 1.4375, "learning_rate": 4.389739130434783e-06, "loss": 0.2479, "step": 3510 }, { "epoch": 0.2441794927379146, "grad_norm": 1.46875, "learning_rate": 4.388e-06, "loss": 0.2517, "step": 3520 }, { "epoch": 0.2448731844786473, "grad_norm": 1.1640625, "learning_rate": 4.3862608695652175e-06, "loss": 0.2808, "step": 3530 }, { "epoch": 0.24556687621938, "grad_norm": 1.1796875, "learning_rate": 4.384521739130435e-06, "loss": 0.2571, "step": 3540 }, { "epoch": 0.24626056796011273, "grad_norm": 1.9921875, "learning_rate": 4.382782608695652e-06, "loss": 0.302, "step": 3550 }, { "epoch": 0.24695425970084545, "grad_norm": 1.28125, "learning_rate": 4.3810434782608705e-06, "loss": 0.2128, "step": 3560 }, { "epoch": 0.24764795144157814, "grad_norm": 0.96875, "learning_rate": 4.379304347826087e-06, "loss": 0.2285, "step": 3570 }, { "epoch": 0.24834164318231086, "grad_norm": 1.3671875, "learning_rate": 4.377565217391304e-06, "loss": 0.2401, "step": 3580 }, { "epoch": 0.24903533492304358, "grad_norm": 1.109375, "learning_rate": 4.375826086956523e-06, "loss": 0.2543, "step": 3590 }, { "epoch": 0.24972902666377628, "grad_norm": 1.2109375, "learning_rate": 4.37408695652174e-06, "loss": 0.2598, "step": 3600 }, { "epoch": 0.25042271840450897, "grad_norm": 1.34375, "learning_rate": 4.3723478260869565e-06, "loss": 0.3157, "step": 3610 }, { "epoch": 0.2511164101452417, "grad_norm": 1.3671875, "learning_rate": 4.370608695652174e-06, "loss": 0.1943, "step": 3620 }, { "epoch": 0.2518101018859744, "grad_norm": 1.1953125, "learning_rate": 4.368869565217392e-06, "loss": 0.2737, "step": 3630 }, { "epoch": 0.2525037936267071, "grad_norm": 0.9375, "learning_rate": 4.367130434782609e-06, "loss": 0.2737, "step": 3640 }, { "epoch": 0.25319748536743986, "grad_norm": 1.078125, "learning_rate": 4.365391304347826e-06, "loss": 0.2254, "step": 3650 }, { "epoch": 0.25389117710817255, "grad_norm": 1.0859375, "learning_rate": 4.363652173913044e-06, "loss": 0.2407, "step": 3660 }, { "epoch": 0.25458486884890524, "grad_norm": 1.453125, "learning_rate": 4.361913043478262e-06, "loss": 0.2414, "step": 3670 }, { "epoch": 0.255278560589638, "grad_norm": 0.97265625, "learning_rate": 4.360173913043478e-06, "loss": 0.2899, "step": 3680 }, { "epoch": 0.2559722523303707, "grad_norm": 1.125, "learning_rate": 4.3584347826086955e-06, "loss": 0.2452, "step": 3690 }, { "epoch": 0.2566659440711034, "grad_norm": 1.2109375, "learning_rate": 4.356695652173914e-06, "loss": 0.2251, "step": 3700 }, { "epoch": 0.25735963581183613, "grad_norm": 1.421875, "learning_rate": 4.354956521739131e-06, "loss": 0.2506, "step": 3710 }, { "epoch": 0.2580533275525688, "grad_norm": 1.3125, "learning_rate": 4.353217391304348e-06, "loss": 0.2592, "step": 3720 }, { "epoch": 0.2587470192933015, "grad_norm": 1.3984375, "learning_rate": 4.351478260869566e-06, "loss": 0.2523, "step": 3730 }, { "epoch": 0.25944071103403427, "grad_norm": 1.3515625, "learning_rate": 4.349739130434783e-06, "loss": 0.2409, "step": 3740 }, { "epoch": 0.26013440277476696, "grad_norm": 1.3359375, "learning_rate": 4.3480000000000006e-06, "loss": 0.253, "step": 3750 }, { "epoch": 0.26082809451549965, "grad_norm": 1.8515625, "learning_rate": 4.346260869565218e-06, "loss": 0.2862, "step": 3760 }, { "epoch": 0.2615217862562324, "grad_norm": 1.359375, "learning_rate": 4.344521739130435e-06, "loss": 0.2504, "step": 3770 }, { "epoch": 0.2622154779969651, "grad_norm": 0.984375, "learning_rate": 4.342782608695653e-06, "loss": 0.2537, "step": 3780 }, { "epoch": 0.2629091697376978, "grad_norm": 1.0078125, "learning_rate": 4.34104347826087e-06, "loss": 0.2099, "step": 3790 }, { "epoch": 0.26360286147843054, "grad_norm": 1.0625, "learning_rate": 4.339304347826087e-06, "loss": 0.2753, "step": 3800 }, { "epoch": 0.26429655321916323, "grad_norm": 1.125, "learning_rate": 4.337565217391305e-06, "loss": 0.2202, "step": 3810 }, { "epoch": 0.2649902449598959, "grad_norm": 1.1484375, "learning_rate": 4.335826086956522e-06, "loss": 0.2382, "step": 3820 }, { "epoch": 0.2656839367006287, "grad_norm": 1.09375, "learning_rate": 4.3340869565217395e-06, "loss": 0.2092, "step": 3830 }, { "epoch": 0.26637762844136137, "grad_norm": 1.1328125, "learning_rate": 4.332347826086957e-06, "loss": 0.3343, "step": 3840 }, { "epoch": 0.26707132018209406, "grad_norm": 1.265625, "learning_rate": 4.330608695652174e-06, "loss": 0.2324, "step": 3850 }, { "epoch": 0.2677650119228268, "grad_norm": 1.1875, "learning_rate": 4.328869565217392e-06, "loss": 0.2864, "step": 3860 }, { "epoch": 0.2684587036635595, "grad_norm": 1.46875, "learning_rate": 4.327130434782609e-06, "loss": 0.2883, "step": 3870 }, { "epoch": 0.2691523954042922, "grad_norm": 1.046875, "learning_rate": 4.325391304347826e-06, "loss": 0.2376, "step": 3880 }, { "epoch": 0.26984608714502495, "grad_norm": 1.5, "learning_rate": 4.323652173913044e-06, "loss": 0.2556, "step": 3890 }, { "epoch": 0.27053977888575764, "grad_norm": 1.3359375, "learning_rate": 4.321913043478261e-06, "loss": 0.3276, "step": 3900 }, { "epoch": 0.27123347062649034, "grad_norm": 1.375, "learning_rate": 4.3201739130434785e-06, "loss": 0.2472, "step": 3910 }, { "epoch": 0.2719271623672231, "grad_norm": 1.1640625, "learning_rate": 4.318434782608696e-06, "loss": 0.2264, "step": 3920 }, { "epoch": 0.2726208541079558, "grad_norm": 1.3515625, "learning_rate": 4.316695652173913e-06, "loss": 0.2494, "step": 3930 }, { "epoch": 0.2733145458486885, "grad_norm": 1.59375, "learning_rate": 4.314956521739131e-06, "loss": 0.2339, "step": 3940 }, { "epoch": 0.2740082375894212, "grad_norm": 0.89453125, "learning_rate": 4.313217391304348e-06, "loss": 0.2365, "step": 3950 }, { "epoch": 0.2747019293301539, "grad_norm": 1.359375, "learning_rate": 4.311478260869565e-06, "loss": 0.2584, "step": 3960 }, { "epoch": 0.2753956210708866, "grad_norm": 1.28125, "learning_rate": 4.309739130434783e-06, "loss": 0.2279, "step": 3970 }, { "epoch": 0.27608931281161936, "grad_norm": 1.3984375, "learning_rate": 4.308000000000001e-06, "loss": 0.2542, "step": 3980 }, { "epoch": 0.27678300455235205, "grad_norm": 1.15625, "learning_rate": 4.3062608695652175e-06, "loss": 0.2315, "step": 3990 }, { "epoch": 0.27747669629308475, "grad_norm": 1.5, "learning_rate": 4.304521739130435e-06, "loss": 0.2557, "step": 4000 }, { "epoch": 0.2781703880338175, "grad_norm": 0.90625, "learning_rate": 4.302782608695652e-06, "loss": 0.2569, "step": 4010 }, { "epoch": 0.2788640797745502, "grad_norm": 1.1875, "learning_rate": 4.30104347826087e-06, "loss": 0.2823, "step": 4020 }, { "epoch": 0.2795577715152829, "grad_norm": 0.9921875, "learning_rate": 4.299304347826087e-06, "loss": 0.2826, "step": 4030 }, { "epoch": 0.28025146325601563, "grad_norm": 1.234375, "learning_rate": 4.297565217391304e-06, "loss": 0.2421, "step": 4040 }, { "epoch": 0.2809451549967483, "grad_norm": 1.375, "learning_rate": 4.2958260869565225e-06, "loss": 0.236, "step": 4050 }, { "epoch": 0.281638846737481, "grad_norm": 1.0625, "learning_rate": 4.29408695652174e-06, "loss": 0.2304, "step": 4060 }, { "epoch": 0.28233253847821377, "grad_norm": 1.2578125, "learning_rate": 4.2923478260869564e-06, "loss": 0.2403, "step": 4070 }, { "epoch": 0.28302623021894646, "grad_norm": 2.03125, "learning_rate": 4.290608695652174e-06, "loss": 0.3514, "step": 4080 }, { "epoch": 0.28371992195967916, "grad_norm": 1.328125, "learning_rate": 4.288869565217392e-06, "loss": 0.2904, "step": 4090 }, { "epoch": 0.2844136137004119, "grad_norm": 1.140625, "learning_rate": 4.287130434782609e-06, "loss": 0.2514, "step": 4100 }, { "epoch": 0.2851073054411446, "grad_norm": 1.1953125, "learning_rate": 4.285391304347826e-06, "loss": 0.2677, "step": 4110 }, { "epoch": 0.2858009971818773, "grad_norm": 1.46875, "learning_rate": 4.283652173913044e-06, "loss": 0.2288, "step": 4120 }, { "epoch": 0.28649468892261004, "grad_norm": 1.328125, "learning_rate": 4.2819130434782615e-06, "loss": 0.2551, "step": 4130 }, { "epoch": 0.28718838066334273, "grad_norm": 0.98828125, "learning_rate": 4.280173913043479e-06, "loss": 0.2538, "step": 4140 }, { "epoch": 0.28788207240407543, "grad_norm": 1.3671875, "learning_rate": 4.278434782608696e-06, "loss": 0.2679, "step": 4150 }, { "epoch": 0.2885757641448082, "grad_norm": 1.296875, "learning_rate": 4.276695652173914e-06, "loss": 0.2682, "step": 4160 }, { "epoch": 0.28926945588554087, "grad_norm": 1.8046875, "learning_rate": 4.274956521739131e-06, "loss": 0.2981, "step": 4170 }, { "epoch": 0.28996314762627357, "grad_norm": 2.015625, "learning_rate": 4.273217391304348e-06, "loss": 0.2792, "step": 4180 }, { "epoch": 0.29065683936700626, "grad_norm": 1.234375, "learning_rate": 4.271478260869566e-06, "loss": 0.243, "step": 4190 }, { "epoch": 0.291350531107739, "grad_norm": 1.078125, "learning_rate": 4.269739130434783e-06, "loss": 0.217, "step": 4200 }, { "epoch": 0.2920442228484717, "grad_norm": 1.28125, "learning_rate": 4.2680000000000005e-06, "loss": 0.2287, "step": 4210 }, { "epoch": 0.2927379145892044, "grad_norm": 1.265625, "learning_rate": 4.266260869565218e-06, "loss": 0.2609, "step": 4220 }, { "epoch": 0.29343160632993714, "grad_norm": 1.4453125, "learning_rate": 4.264521739130435e-06, "loss": 0.2283, "step": 4230 }, { "epoch": 0.29412529807066984, "grad_norm": 1.0078125, "learning_rate": 4.262782608695653e-06, "loss": 0.2076, "step": 4240 }, { "epoch": 0.29481898981140253, "grad_norm": 1.6953125, "learning_rate": 4.26104347826087e-06, "loss": 0.2701, "step": 4250 }, { "epoch": 0.2955126815521353, "grad_norm": 1.34375, "learning_rate": 4.259304347826087e-06, "loss": 0.2239, "step": 4260 }, { "epoch": 0.296206373292868, "grad_norm": 1.015625, "learning_rate": 4.257565217391305e-06, "loss": 0.2263, "step": 4270 }, { "epoch": 0.29690006503360067, "grad_norm": 1.0234375, "learning_rate": 4.255826086956522e-06, "loss": 0.2316, "step": 4280 }, { "epoch": 0.2975937567743334, "grad_norm": 1.4375, "learning_rate": 4.2540869565217394e-06, "loss": 0.2467, "step": 4290 }, { "epoch": 0.2982874485150661, "grad_norm": 1.359375, "learning_rate": 4.252347826086957e-06, "loss": 0.2514, "step": 4300 }, { "epoch": 0.2989811402557988, "grad_norm": 1.1796875, "learning_rate": 4.250608695652174e-06, "loss": 0.257, "step": 4310 }, { "epoch": 0.29967483199653155, "grad_norm": 1.453125, "learning_rate": 4.2488695652173916e-06, "loss": 0.2062, "step": 4320 }, { "epoch": 0.30036852373726425, "grad_norm": 1.140625, "learning_rate": 4.247130434782609e-06, "loss": 0.197, "step": 4330 }, { "epoch": 0.30106221547799694, "grad_norm": 0.9609375, "learning_rate": 4.245391304347826e-06, "loss": 0.2319, "step": 4340 }, { "epoch": 0.3017559072187297, "grad_norm": 1.171875, "learning_rate": 4.243652173913044e-06, "loss": 0.2404, "step": 4350 }, { "epoch": 0.3024495989594624, "grad_norm": 1.09375, "learning_rate": 4.241913043478261e-06, "loss": 0.2156, "step": 4360 }, { "epoch": 0.3031432907001951, "grad_norm": 1.3046875, "learning_rate": 4.240173913043478e-06, "loss": 0.2299, "step": 4370 }, { "epoch": 0.3038369824409278, "grad_norm": 1.375, "learning_rate": 4.238434782608696e-06, "loss": 0.2404, "step": 4380 }, { "epoch": 0.3045306741816605, "grad_norm": 1.4140625, "learning_rate": 4.236695652173913e-06, "loss": 0.1988, "step": 4390 }, { "epoch": 0.3052243659223932, "grad_norm": 0.87890625, "learning_rate": 4.2349565217391305e-06, "loss": 0.2215, "step": 4400 }, { "epoch": 0.30591805766312596, "grad_norm": 1.0390625, "learning_rate": 4.233217391304349e-06, "loss": 0.1986, "step": 4410 }, { "epoch": 0.30661174940385866, "grad_norm": 1.296875, "learning_rate": 4.231478260869565e-06, "loss": 0.2359, "step": 4420 }, { "epoch": 0.30730544114459135, "grad_norm": 1.2109375, "learning_rate": 4.229739130434783e-06, "loss": 0.2575, "step": 4430 }, { "epoch": 0.3079991328853241, "grad_norm": 1.2734375, "learning_rate": 4.228000000000001e-06, "loss": 0.2429, "step": 4440 }, { "epoch": 0.3086928246260568, "grad_norm": 1.28125, "learning_rate": 4.226260869565218e-06, "loss": 0.2587, "step": 4450 }, { "epoch": 0.3093865163667895, "grad_norm": 1.6171875, "learning_rate": 4.224521739130435e-06, "loss": 0.348, "step": 4460 }, { "epoch": 0.31008020810752224, "grad_norm": 1.2265625, "learning_rate": 4.222782608695652e-06, "loss": 0.2771, "step": 4470 }, { "epoch": 0.31077389984825493, "grad_norm": 1.1015625, "learning_rate": 4.22104347826087e-06, "loss": 0.2308, "step": 4480 }, { "epoch": 0.3114675915889876, "grad_norm": 1.015625, "learning_rate": 4.219304347826088e-06, "loss": 0.2672, "step": 4490 }, { "epoch": 0.3121612833297204, "grad_norm": 1.359375, "learning_rate": 4.217565217391304e-06, "loss": 0.2603, "step": 4500 }, { "epoch": 0.31285497507045307, "grad_norm": 0.8671875, "learning_rate": 4.2158260869565225e-06, "loss": 0.2212, "step": 4510 }, { "epoch": 0.31354866681118576, "grad_norm": 1.1875, "learning_rate": 4.21408695652174e-06, "loss": 0.2073, "step": 4520 }, { "epoch": 0.3142423585519185, "grad_norm": 1.546875, "learning_rate": 4.212347826086957e-06, "loss": 0.2602, "step": 4530 }, { "epoch": 0.3149360502926512, "grad_norm": 1.140625, "learning_rate": 4.210608695652174e-06, "loss": 0.2279, "step": 4540 }, { "epoch": 0.3156297420333839, "grad_norm": 1.40625, "learning_rate": 4.208869565217392e-06, "loss": 0.2433, "step": 4550 }, { "epoch": 0.31632343377411665, "grad_norm": 1.171875, "learning_rate": 4.207130434782609e-06, "loss": 0.3271, "step": 4560 }, { "epoch": 0.31701712551484934, "grad_norm": 1.046875, "learning_rate": 4.205391304347826e-06, "loss": 0.3058, "step": 4570 }, { "epoch": 0.31771081725558203, "grad_norm": 1.921875, "learning_rate": 4.203652173913044e-06, "loss": 0.2724, "step": 4580 }, { "epoch": 0.3184045089963148, "grad_norm": 1.09375, "learning_rate": 4.201913043478261e-06, "loss": 0.2538, "step": 4590 }, { "epoch": 0.3190982007370475, "grad_norm": 1.328125, "learning_rate": 4.200173913043479e-06, "loss": 0.2394, "step": 4600 }, { "epoch": 0.31979189247778017, "grad_norm": 1.265625, "learning_rate": 4.198434782608696e-06, "loss": 0.2433, "step": 4610 }, { "epoch": 0.3204855842185129, "grad_norm": 1.2890625, "learning_rate": 4.1966956521739135e-06, "loss": 0.2457, "step": 4620 }, { "epoch": 0.3211792759592456, "grad_norm": 1.0390625, "learning_rate": 4.194956521739131e-06, "loss": 0.2291, "step": 4630 }, { "epoch": 0.3218729676999783, "grad_norm": 1.40625, "learning_rate": 4.193217391304348e-06, "loss": 0.3308, "step": 4640 }, { "epoch": 0.32256665944071106, "grad_norm": 1.234375, "learning_rate": 4.191478260869566e-06, "loss": 0.2558, "step": 4650 }, { "epoch": 0.32326035118144375, "grad_norm": 1.2265625, "learning_rate": 4.189739130434783e-06, "loss": 0.308, "step": 4660 }, { "epoch": 0.32395404292217644, "grad_norm": 1.578125, "learning_rate": 4.188e-06, "loss": 0.2524, "step": 4670 }, { "epoch": 0.3246477346629092, "grad_norm": 1.125, "learning_rate": 4.186260869565218e-06, "loss": 0.2373, "step": 4680 }, { "epoch": 0.3253414264036419, "grad_norm": 1.4609375, "learning_rate": 4.184521739130435e-06, "loss": 0.2355, "step": 4690 }, { "epoch": 0.3260351181443746, "grad_norm": 1.0234375, "learning_rate": 4.1827826086956525e-06, "loss": 0.2094, "step": 4700 }, { "epoch": 0.32672880988510733, "grad_norm": 1.1328125, "learning_rate": 4.18104347826087e-06, "loss": 0.2346, "step": 4710 }, { "epoch": 0.32742250162584, "grad_norm": 1.109375, "learning_rate": 4.179304347826087e-06, "loss": 0.2531, "step": 4720 }, { "epoch": 0.3281161933665727, "grad_norm": 1.6015625, "learning_rate": 4.177565217391305e-06, "loss": 0.2837, "step": 4730 }, { "epoch": 0.32880988510730547, "grad_norm": 1.1484375, "learning_rate": 4.175826086956522e-06, "loss": 0.2844, "step": 4740 }, { "epoch": 0.32950357684803816, "grad_norm": 1.25, "learning_rate": 4.174086956521739e-06, "loss": 0.2293, "step": 4750 }, { "epoch": 0.33019726858877085, "grad_norm": 1.234375, "learning_rate": 4.172347826086957e-06, "loss": 0.2447, "step": 4760 }, { "epoch": 0.3308909603295036, "grad_norm": 1.4453125, "learning_rate": 4.170608695652174e-06, "loss": 0.2422, "step": 4770 }, { "epoch": 0.3315846520702363, "grad_norm": 1.2578125, "learning_rate": 4.1688695652173915e-06, "loss": 0.2318, "step": 4780 }, { "epoch": 0.332278343810969, "grad_norm": 1.7734375, "learning_rate": 4.167130434782609e-06, "loss": 0.2632, "step": 4790 }, { "epoch": 0.33297203555170174, "grad_norm": 0.91796875, "learning_rate": 4.165391304347827e-06, "loss": 0.2217, "step": 4800 }, { "epoch": 0.33366572729243443, "grad_norm": 1.34375, "learning_rate": 4.163652173913044e-06, "loss": 0.2919, "step": 4810 }, { "epoch": 0.3343594190331671, "grad_norm": 1.109375, "learning_rate": 4.161913043478261e-06, "loss": 0.2504, "step": 4820 }, { "epoch": 0.3350531107738999, "grad_norm": 0.890625, "learning_rate": 4.160173913043478e-06, "loss": 0.2633, "step": 4830 }, { "epoch": 0.33574680251463257, "grad_norm": 1.03125, "learning_rate": 4.1584347826086965e-06, "loss": 0.2394, "step": 4840 }, { "epoch": 0.33644049425536526, "grad_norm": 1.1484375, "learning_rate": 4.156695652173913e-06, "loss": 0.2634, "step": 4850 }, { "epoch": 0.33713418599609796, "grad_norm": 1.265625, "learning_rate": 4.1549565217391304e-06, "loss": 0.2994, "step": 4860 }, { "epoch": 0.3378278777368307, "grad_norm": 1.0, "learning_rate": 4.153217391304349e-06, "loss": 0.2641, "step": 4870 }, { "epoch": 0.3385215694775634, "grad_norm": 1.1484375, "learning_rate": 4.151478260869565e-06, "loss": 0.233, "step": 4880 }, { "epoch": 0.3392152612182961, "grad_norm": 1.0703125, "learning_rate": 4.1497391304347826e-06, "loss": 0.2218, "step": 4890 }, { "epoch": 0.33990895295902884, "grad_norm": 1.5546875, "learning_rate": 4.148000000000001e-06, "loss": 0.2219, "step": 4900 }, { "epoch": 0.34060264469976154, "grad_norm": 1.1015625, "learning_rate": 4.146260869565218e-06, "loss": 0.2372, "step": 4910 }, { "epoch": 0.34129633644049423, "grad_norm": 1.3828125, "learning_rate": 4.144521739130435e-06, "loss": 0.2405, "step": 4920 }, { "epoch": 0.341990028181227, "grad_norm": 1.1640625, "learning_rate": 4.142782608695652e-06, "loss": 0.2585, "step": 4930 }, { "epoch": 0.3426837199219597, "grad_norm": 1.09375, "learning_rate": 4.14104347826087e-06, "loss": 0.2515, "step": 4940 }, { "epoch": 0.34337741166269237, "grad_norm": 1.09375, "learning_rate": 4.139304347826088e-06, "loss": 0.3092, "step": 4950 }, { "epoch": 0.3440711034034251, "grad_norm": 1.28125, "learning_rate": 4.137565217391304e-06, "loss": 0.237, "step": 4960 }, { "epoch": 0.3447647951441578, "grad_norm": 1.359375, "learning_rate": 4.135826086956522e-06, "loss": 0.2279, "step": 4970 }, { "epoch": 0.3454584868848905, "grad_norm": 1.3203125, "learning_rate": 4.13408695652174e-06, "loss": 0.2481, "step": 4980 }, { "epoch": 0.34615217862562325, "grad_norm": 1.046875, "learning_rate": 4.132347826086957e-06, "loss": 0.2375, "step": 4990 }, { "epoch": 0.34684587036635595, "grad_norm": 1.3125, "learning_rate": 4.130608695652174e-06, "loss": 0.2408, "step": 5000 }, { "epoch": 0.34753956210708864, "grad_norm": 1.203125, "learning_rate": 4.128869565217392e-06, "loss": 0.2326, "step": 5010 }, { "epoch": 0.3482332538478214, "grad_norm": 1.0234375, "learning_rate": 4.127130434782609e-06, "loss": 0.2321, "step": 5020 }, { "epoch": 0.3489269455885541, "grad_norm": 1.078125, "learning_rate": 4.125391304347827e-06, "loss": 0.2579, "step": 5030 }, { "epoch": 0.3496206373292868, "grad_norm": 1.0390625, "learning_rate": 4.123652173913044e-06, "loss": 0.2407, "step": 5040 }, { "epoch": 0.3503143290700195, "grad_norm": 1.53125, "learning_rate": 4.121913043478261e-06, "loss": 0.2519, "step": 5050 }, { "epoch": 0.3510080208107522, "grad_norm": 1.1953125, "learning_rate": 4.120173913043479e-06, "loss": 0.2598, "step": 5060 }, { "epoch": 0.3517017125514849, "grad_norm": 1.203125, "learning_rate": 4.118434782608696e-06, "loss": 0.2603, "step": 5070 }, { "epoch": 0.35239540429221766, "grad_norm": 1.296875, "learning_rate": 4.1166956521739135e-06, "loss": 0.2179, "step": 5080 }, { "epoch": 0.35308909603295036, "grad_norm": 0.98828125, "learning_rate": 4.114956521739131e-06, "loss": 0.2368, "step": 5090 }, { "epoch": 0.35378278777368305, "grad_norm": 1.1015625, "learning_rate": 4.113217391304348e-06, "loss": 0.2639, "step": 5100 }, { "epoch": 0.3544764795144158, "grad_norm": 1.03125, "learning_rate": 4.1114782608695656e-06, "loss": 0.224, "step": 5110 }, { "epoch": 0.3551701712551485, "grad_norm": 1.2578125, "learning_rate": 4.109739130434783e-06, "loss": 0.2546, "step": 5120 }, { "epoch": 0.3558638629958812, "grad_norm": 1.3671875, "learning_rate": 4.108e-06, "loss": 0.2738, "step": 5130 }, { "epoch": 0.35655755473661394, "grad_norm": 1.125, "learning_rate": 4.106260869565218e-06, "loss": 0.2176, "step": 5140 }, { "epoch": 0.35725124647734663, "grad_norm": 1.125, "learning_rate": 4.104521739130435e-06, "loss": 0.2345, "step": 5150 }, { "epoch": 0.3579449382180793, "grad_norm": 1.25, "learning_rate": 4.102782608695652e-06, "loss": 0.2391, "step": 5160 }, { "epoch": 0.35863862995881207, "grad_norm": 1.328125, "learning_rate": 4.10104347826087e-06, "loss": 0.2476, "step": 5170 }, { "epoch": 0.35933232169954477, "grad_norm": 1.390625, "learning_rate": 4.099304347826087e-06, "loss": 0.2305, "step": 5180 }, { "epoch": 0.36002601344027746, "grad_norm": 1.3984375, "learning_rate": 4.0975652173913045e-06, "loss": 0.2072, "step": 5190 }, { "epoch": 0.3607197051810102, "grad_norm": 1.3828125, "learning_rate": 4.095826086956522e-06, "loss": 0.2452, "step": 5200 }, { "epoch": 0.3614133969217429, "grad_norm": 1.453125, "learning_rate": 4.094086956521739e-06, "loss": 0.2279, "step": 5210 }, { "epoch": 0.3621070886624756, "grad_norm": 0.8984375, "learning_rate": 4.092347826086957e-06, "loss": 0.2563, "step": 5220 }, { "epoch": 0.36280078040320834, "grad_norm": 1.1953125, "learning_rate": 4.090608695652174e-06, "loss": 0.2403, "step": 5230 }, { "epoch": 0.36349447214394104, "grad_norm": 1.4375, "learning_rate": 4.088869565217391e-06, "loss": 0.2423, "step": 5240 }, { "epoch": 0.36418816388467373, "grad_norm": 1.0234375, "learning_rate": 4.087130434782609e-06, "loss": 0.2361, "step": 5250 }, { "epoch": 0.3648818556254065, "grad_norm": 1.125, "learning_rate": 4.085391304347827e-06, "loss": 0.2578, "step": 5260 }, { "epoch": 0.3655755473661392, "grad_norm": 1.5703125, "learning_rate": 4.0836521739130435e-06, "loss": 0.2747, "step": 5270 }, { "epoch": 0.36626923910687187, "grad_norm": 0.91015625, "learning_rate": 4.081913043478261e-06, "loss": 0.2621, "step": 5280 }, { "epoch": 0.3669629308476046, "grad_norm": 1.2265625, "learning_rate": 4.080173913043478e-06, "loss": 0.2146, "step": 5290 }, { "epoch": 0.3676566225883373, "grad_norm": 1.2734375, "learning_rate": 4.0784347826086965e-06, "loss": 0.2239, "step": 5300 }, { "epoch": 0.36835031432907, "grad_norm": 1.0078125, "learning_rate": 4.076695652173913e-06, "loss": 0.2409, "step": 5310 }, { "epoch": 0.36904400606980275, "grad_norm": 1.5078125, "learning_rate": 4.07495652173913e-06, "loss": 0.2346, "step": 5320 }, { "epoch": 0.36973769781053545, "grad_norm": 1.1796875, "learning_rate": 4.073217391304349e-06, "loss": 0.2461, "step": 5330 }, { "epoch": 0.37043138955126814, "grad_norm": 1.328125, "learning_rate": 4.071478260869566e-06, "loss": 0.3231, "step": 5340 }, { "epoch": 0.3711250812920009, "grad_norm": 1.0546875, "learning_rate": 4.0697391304347825e-06, "loss": 0.2363, "step": 5350 }, { "epoch": 0.3718187730327336, "grad_norm": 1.21875, "learning_rate": 4.068000000000001e-06, "loss": 0.2485, "step": 5360 }, { "epoch": 0.3725124647734663, "grad_norm": 1.3046875, "learning_rate": 4.066260869565218e-06, "loss": 0.2416, "step": 5370 }, { "epoch": 0.37320615651419903, "grad_norm": 1.34375, "learning_rate": 4.0645217391304354e-06, "loss": 0.2117, "step": 5380 }, { "epoch": 0.3738998482549317, "grad_norm": 1.1640625, "learning_rate": 4.062782608695652e-06, "loss": 0.2054, "step": 5390 }, { "epoch": 0.3745935399956644, "grad_norm": 1.078125, "learning_rate": 4.06104347826087e-06, "loss": 0.216, "step": 5400 }, { "epoch": 0.37528723173639716, "grad_norm": 1.0390625, "learning_rate": 4.0593043478260875e-06, "loss": 0.251, "step": 5410 }, { "epoch": 0.37598092347712986, "grad_norm": 0.92578125, "learning_rate": 4.057565217391305e-06, "loss": 0.2377, "step": 5420 }, { "epoch": 0.37667461521786255, "grad_norm": 1.2890625, "learning_rate": 4.055826086956522e-06, "loss": 0.2643, "step": 5430 }, { "epoch": 0.3773683069585953, "grad_norm": 2.03125, "learning_rate": 4.05408695652174e-06, "loss": 0.3051, "step": 5440 }, { "epoch": 0.378061998699328, "grad_norm": 0.96875, "learning_rate": 4.052347826086957e-06, "loss": 0.2471, "step": 5450 }, { "epoch": 0.3787556904400607, "grad_norm": 1.015625, "learning_rate": 4.050608695652174e-06, "loss": 0.2804, "step": 5460 }, { "epoch": 0.37944938218079344, "grad_norm": 1.078125, "learning_rate": 4.048869565217392e-06, "loss": 0.2892, "step": 5470 }, { "epoch": 0.38014307392152613, "grad_norm": 1.015625, "learning_rate": 4.047130434782609e-06, "loss": 0.2323, "step": 5480 }, { "epoch": 0.3808367656622588, "grad_norm": 1.0859375, "learning_rate": 4.0453913043478265e-06, "loss": 0.2362, "step": 5490 }, { "epoch": 0.3815304574029915, "grad_norm": 0.96875, "learning_rate": 4.043652173913044e-06, "loss": 0.2262, "step": 5500 }, { "epoch": 0.38222414914372427, "grad_norm": 1.1015625, "learning_rate": 4.041913043478261e-06, "loss": 0.2667, "step": 5510 }, { "epoch": 0.38291784088445696, "grad_norm": 1.03125, "learning_rate": 4.040173913043479e-06, "loss": 0.2642, "step": 5520 }, { "epoch": 0.38361153262518966, "grad_norm": 1.4140625, "learning_rate": 4.038434782608696e-06, "loss": 0.2579, "step": 5530 }, { "epoch": 0.3843052243659224, "grad_norm": 0.921875, "learning_rate": 4.036695652173913e-06, "loss": 0.2491, "step": 5540 }, { "epoch": 0.3849989161066551, "grad_norm": 1.7890625, "learning_rate": 4.034956521739131e-06, "loss": 0.2521, "step": 5550 }, { "epoch": 0.3856926078473878, "grad_norm": 1.9453125, "learning_rate": 4.033217391304348e-06, "loss": 0.3052, "step": 5560 }, { "epoch": 0.38638629958812054, "grad_norm": 1.1171875, "learning_rate": 4.0314782608695655e-06, "loss": 0.2403, "step": 5570 }, { "epoch": 0.38707999132885323, "grad_norm": 1.1328125, "learning_rate": 4.029739130434783e-06, "loss": 0.2278, "step": 5580 }, { "epoch": 0.38777368306958593, "grad_norm": 1.15625, "learning_rate": 4.028e-06, "loss": 0.2863, "step": 5590 }, { "epoch": 0.3884673748103187, "grad_norm": 1.46875, "learning_rate": 4.026260869565218e-06, "loss": 0.2414, "step": 5600 }, { "epoch": 0.38916106655105137, "grad_norm": 1.4375, "learning_rate": 4.024521739130435e-06, "loss": 0.2932, "step": 5610 }, { "epoch": 0.38985475829178406, "grad_norm": 1.7421875, "learning_rate": 4.022782608695652e-06, "loss": 0.2212, "step": 5620 }, { "epoch": 0.3905484500325168, "grad_norm": 1.28125, "learning_rate": 4.02104347826087e-06, "loss": 0.2225, "step": 5630 }, { "epoch": 0.3912421417732495, "grad_norm": 0.9296875, "learning_rate": 4.019304347826087e-06, "loss": 0.2289, "step": 5640 }, { "epoch": 0.3919358335139822, "grad_norm": 1.25, "learning_rate": 4.017565217391305e-06, "loss": 0.3125, "step": 5650 }, { "epoch": 0.39262952525471495, "grad_norm": 1.7578125, "learning_rate": 4.015826086956522e-06, "loss": 0.278, "step": 5660 }, { "epoch": 0.39332321699544764, "grad_norm": 1.1875, "learning_rate": 4.014086956521739e-06, "loss": 0.2555, "step": 5670 }, { "epoch": 0.39401690873618034, "grad_norm": 1.1171875, "learning_rate": 4.0123478260869566e-06, "loss": 0.2313, "step": 5680 }, { "epoch": 0.3947106004769131, "grad_norm": 1.1953125, "learning_rate": 4.010608695652175e-06, "loss": 0.2275, "step": 5690 }, { "epoch": 0.3954042922176458, "grad_norm": 1.0390625, "learning_rate": 4.008869565217391e-06, "loss": 0.219, "step": 5700 }, { "epoch": 0.3960979839583785, "grad_norm": 1.21875, "learning_rate": 4.007130434782609e-06, "loss": 0.2823, "step": 5710 }, { "epoch": 0.3967916756991112, "grad_norm": 1.4296875, "learning_rate": 4.005391304347827e-06, "loss": 0.2273, "step": 5720 }, { "epoch": 0.3974853674398439, "grad_norm": 1.0234375, "learning_rate": 4.003652173913044e-06, "loss": 0.2356, "step": 5730 }, { "epoch": 0.3981790591805766, "grad_norm": 1.0859375, "learning_rate": 4.001913043478261e-06, "loss": 0.2267, "step": 5740 }, { "epoch": 0.39887275092130936, "grad_norm": 1.203125, "learning_rate": 4.000173913043478e-06, "loss": 0.2469, "step": 5750 }, { "epoch": 0.39956644266204205, "grad_norm": 1.3515625, "learning_rate": 3.998434782608696e-06, "loss": 0.2549, "step": 5760 }, { "epoch": 0.40026013440277475, "grad_norm": 0.7578125, "learning_rate": 3.996695652173914e-06, "loss": 0.23, "step": 5770 }, { "epoch": 0.4009538261435075, "grad_norm": 1.2734375, "learning_rate": 3.99495652173913e-06, "loss": 0.2393, "step": 5780 }, { "epoch": 0.4016475178842402, "grad_norm": 1.328125, "learning_rate": 3.9932173913043485e-06, "loss": 0.3191, "step": 5790 }, { "epoch": 0.4023412096249729, "grad_norm": 1.09375, "learning_rate": 3.991478260869566e-06, "loss": 0.2143, "step": 5800 }, { "epoch": 0.40303490136570563, "grad_norm": 1.4296875, "learning_rate": 3.989739130434782e-06, "loss": 0.2801, "step": 5810 }, { "epoch": 0.4037285931064383, "grad_norm": 1.4140625, "learning_rate": 3.988000000000001e-06, "loss": 0.2921, "step": 5820 }, { "epoch": 0.404422284847171, "grad_norm": 1.2265625, "learning_rate": 3.986260869565218e-06, "loss": 0.3027, "step": 5830 }, { "epoch": 0.40511597658790377, "grad_norm": 1.53125, "learning_rate": 3.984521739130435e-06, "loss": 0.2357, "step": 5840 }, { "epoch": 0.40580966832863646, "grad_norm": 0.99609375, "learning_rate": 3.982782608695652e-06, "loss": 0.2426, "step": 5850 }, { "epoch": 0.40650336006936916, "grad_norm": 1.3515625, "learning_rate": 3.98104347826087e-06, "loss": 0.2232, "step": 5860 }, { "epoch": 0.4071970518101019, "grad_norm": 1.296875, "learning_rate": 3.9793043478260875e-06, "loss": 0.2597, "step": 5870 }, { "epoch": 0.4078907435508346, "grad_norm": 1.03125, "learning_rate": 3.977565217391305e-06, "loss": 0.2848, "step": 5880 }, { "epoch": 0.4085844352915673, "grad_norm": 1.3203125, "learning_rate": 3.975826086956522e-06, "loss": 0.2033, "step": 5890 }, { "epoch": 0.40927812703230004, "grad_norm": 1.2578125, "learning_rate": 3.97408695652174e-06, "loss": 0.2851, "step": 5900 }, { "epoch": 0.40997181877303274, "grad_norm": 1.078125, "learning_rate": 3.972347826086957e-06, "loss": 0.2773, "step": 5910 }, { "epoch": 0.41066551051376543, "grad_norm": 1.375, "learning_rate": 3.970608695652174e-06, "loss": 0.3003, "step": 5920 }, { "epoch": 0.4113592022544982, "grad_norm": 1.328125, "learning_rate": 3.968869565217392e-06, "loss": 0.2621, "step": 5930 }, { "epoch": 0.4120528939952309, "grad_norm": 1.2890625, "learning_rate": 3.967130434782609e-06, "loss": 0.2287, "step": 5940 }, { "epoch": 0.41274658573596357, "grad_norm": 1.1640625, "learning_rate": 3.9653913043478264e-06, "loss": 0.2912, "step": 5950 }, { "epoch": 0.4134402774766963, "grad_norm": 1.1171875, "learning_rate": 3.963652173913044e-06, "loss": 0.1962, "step": 5960 }, { "epoch": 0.414133969217429, "grad_norm": 0.88671875, "learning_rate": 3.961913043478261e-06, "loss": 0.2444, "step": 5970 }, { "epoch": 0.4148276609581617, "grad_norm": 1.1328125, "learning_rate": 3.9601739130434785e-06, "loss": 0.2362, "step": 5980 }, { "epoch": 0.41552135269889445, "grad_norm": 1.15625, "learning_rate": 3.958434782608696e-06, "loss": 0.2421, "step": 5990 }, { "epoch": 0.41621504443962715, "grad_norm": 1.0390625, "learning_rate": 3.956695652173913e-06, "loss": 0.2565, "step": 6000 }, { "epoch": 0.41690873618035984, "grad_norm": 1.53125, "learning_rate": 3.954956521739131e-06, "loss": 0.2551, "step": 6010 }, { "epoch": 0.4176024279210926, "grad_norm": 1.234375, "learning_rate": 3.953217391304348e-06, "loss": 0.2542, "step": 6020 }, { "epoch": 0.4182961196618253, "grad_norm": 1.2421875, "learning_rate": 3.951478260869565e-06, "loss": 0.2514, "step": 6030 }, { "epoch": 0.418989811402558, "grad_norm": 1.6875, "learning_rate": 3.949739130434783e-06, "loss": 0.2156, "step": 6040 }, { "epoch": 0.4196835031432907, "grad_norm": 0.96484375, "learning_rate": 3.948e-06, "loss": 0.2041, "step": 6050 }, { "epoch": 0.4203771948840234, "grad_norm": 1.5859375, "learning_rate": 3.9462608695652175e-06, "loss": 0.245, "step": 6060 }, { "epoch": 0.4210708866247561, "grad_norm": 1.125, "learning_rate": 3.944521739130435e-06, "loss": 0.2536, "step": 6070 }, { "epoch": 0.42176457836548886, "grad_norm": 1.28125, "learning_rate": 3.942782608695653e-06, "loss": 0.2117, "step": 6080 }, { "epoch": 0.42245827010622156, "grad_norm": 1.359375, "learning_rate": 3.94104347826087e-06, "loss": 0.2369, "step": 6090 }, { "epoch": 0.42315196184695425, "grad_norm": 1.484375, "learning_rate": 3.939304347826087e-06, "loss": 0.2449, "step": 6100 }, { "epoch": 0.423845653587687, "grad_norm": 1.4921875, "learning_rate": 3.937565217391305e-06, "loss": 0.3172, "step": 6110 }, { "epoch": 0.4245393453284197, "grad_norm": 0.984375, "learning_rate": 3.935826086956522e-06, "loss": 0.2349, "step": 6120 }, { "epoch": 0.4252330370691524, "grad_norm": 1.09375, "learning_rate": 3.934086956521739e-06, "loss": 0.2547, "step": 6130 }, { "epoch": 0.4259267288098851, "grad_norm": 1.125, "learning_rate": 3.9323478260869565e-06, "loss": 0.2688, "step": 6140 }, { "epoch": 0.42662042055061783, "grad_norm": 1.46875, "learning_rate": 3.930608695652175e-06, "loss": 0.3595, "step": 6150 }, { "epoch": 0.4273141122913505, "grad_norm": 1.125, "learning_rate": 3.928869565217391e-06, "loss": 0.2317, "step": 6160 }, { "epoch": 0.4280078040320832, "grad_norm": 1.6328125, "learning_rate": 3.927130434782609e-06, "loss": 0.2359, "step": 6170 }, { "epoch": 0.42870149577281597, "grad_norm": 1.125, "learning_rate": 3.925391304347827e-06, "loss": 0.2333, "step": 6180 }, { "epoch": 0.42939518751354866, "grad_norm": 1.1328125, "learning_rate": 3.923652173913044e-06, "loss": 0.2054, "step": 6190 }, { "epoch": 0.43008887925428135, "grad_norm": 1.40625, "learning_rate": 3.921913043478261e-06, "loss": 0.2491, "step": 6200 }, { "epoch": 0.4307825709950141, "grad_norm": 1.328125, "learning_rate": 3.920173913043478e-06, "loss": 0.2249, "step": 6210 }, { "epoch": 0.4314762627357468, "grad_norm": 1.078125, "learning_rate": 3.918434782608696e-06, "loss": 0.2962, "step": 6220 }, { "epoch": 0.4321699544764795, "grad_norm": 1.59375, "learning_rate": 3.916695652173914e-06, "loss": 0.3596, "step": 6230 }, { "epoch": 0.43286364621721224, "grad_norm": 1.28125, "learning_rate": 3.91495652173913e-06, "loss": 0.2359, "step": 6240 }, { "epoch": 0.43355733795794493, "grad_norm": 1.4140625, "learning_rate": 3.913217391304348e-06, "loss": 0.3381, "step": 6250 }, { "epoch": 0.4342510296986776, "grad_norm": 1.171875, "learning_rate": 3.911478260869566e-06, "loss": 0.24, "step": 6260 }, { "epoch": 0.4349447214394104, "grad_norm": 1.21875, "learning_rate": 3.909739130434783e-06, "loss": 0.2566, "step": 6270 }, { "epoch": 0.43563841318014307, "grad_norm": 1.359375, "learning_rate": 3.9080000000000005e-06, "loss": 0.2599, "step": 6280 }, { "epoch": 0.43633210492087576, "grad_norm": 1.234375, "learning_rate": 3.906260869565218e-06, "loss": 0.2257, "step": 6290 }, { "epoch": 0.4370257966616085, "grad_norm": 1.078125, "learning_rate": 3.904521739130435e-06, "loss": 0.229, "step": 6300 }, { "epoch": 0.4377194884023412, "grad_norm": 1.2109375, "learning_rate": 3.902782608695653e-06, "loss": 0.2464, "step": 6310 }, { "epoch": 0.4384131801430739, "grad_norm": 1.34375, "learning_rate": 3.90104347826087e-06, "loss": 0.2392, "step": 6320 }, { "epoch": 0.43910687188380665, "grad_norm": 1.6015625, "learning_rate": 3.899304347826087e-06, "loss": 0.2661, "step": 6330 }, { "epoch": 0.43980056362453934, "grad_norm": 0.94140625, "learning_rate": 3.897565217391305e-06, "loss": 0.2971, "step": 6340 }, { "epoch": 0.44049425536527204, "grad_norm": 1.2421875, "learning_rate": 3.895826086956522e-06, "loss": 0.2578, "step": 6350 }, { "epoch": 0.4411879471060048, "grad_norm": 1.125, "learning_rate": 3.8940869565217395e-06, "loss": 0.2107, "step": 6360 }, { "epoch": 0.4418816388467375, "grad_norm": 1.1640625, "learning_rate": 3.892347826086957e-06, "loss": 0.213, "step": 6370 }, { "epoch": 0.4425753305874702, "grad_norm": 1.4375, "learning_rate": 3.890608695652174e-06, "loss": 0.3, "step": 6380 }, { "epoch": 0.4432690223282029, "grad_norm": 1.21875, "learning_rate": 3.888869565217392e-06, "loss": 0.2388, "step": 6390 }, { "epoch": 0.4439627140689356, "grad_norm": 1.0859375, "learning_rate": 3.887130434782609e-06, "loss": 0.2557, "step": 6400 }, { "epoch": 0.4446564058096683, "grad_norm": 0.71484375, "learning_rate": 3.885391304347826e-06, "loss": 0.2638, "step": 6410 }, { "epoch": 0.44535009755040106, "grad_norm": 1.5234375, "learning_rate": 3.883652173913044e-06, "loss": 0.2884, "step": 6420 }, { "epoch": 0.44604378929113375, "grad_norm": 1.2265625, "learning_rate": 3.881913043478261e-06, "loss": 0.2356, "step": 6430 }, { "epoch": 0.44673748103186645, "grad_norm": 1.65625, "learning_rate": 3.8801739130434785e-06, "loss": 0.2654, "step": 6440 }, { "epoch": 0.4474311727725992, "grad_norm": 1.0390625, "learning_rate": 3.878434782608696e-06, "loss": 0.2445, "step": 6450 }, { "epoch": 0.4481248645133319, "grad_norm": 1.1796875, "learning_rate": 3.876695652173913e-06, "loss": 0.2363, "step": 6460 }, { "epoch": 0.4488185562540646, "grad_norm": 1.234375, "learning_rate": 3.874956521739131e-06, "loss": 0.2402, "step": 6470 }, { "epoch": 0.44951224799479733, "grad_norm": 1.0546875, "learning_rate": 3.873217391304348e-06, "loss": 0.2362, "step": 6480 }, { "epoch": 0.45020593973553, "grad_norm": 1.2890625, "learning_rate": 3.871478260869565e-06, "loss": 0.3399, "step": 6490 }, { "epoch": 0.4508996314762627, "grad_norm": 1.2421875, "learning_rate": 3.869739130434783e-06, "loss": 0.2556, "step": 6500 }, { "epoch": 0.45159332321699547, "grad_norm": 1.0390625, "learning_rate": 3.868e-06, "loss": 0.2391, "step": 6510 }, { "epoch": 0.45228701495772816, "grad_norm": 1.1875, "learning_rate": 3.8662608695652174e-06, "loss": 0.315, "step": 6520 }, { "epoch": 0.45298070669846086, "grad_norm": 1.5546875, "learning_rate": 3.864521739130435e-06, "loss": 0.2522, "step": 6530 }, { "epoch": 0.4536743984391936, "grad_norm": 1.2109375, "learning_rate": 3.862782608695653e-06, "loss": 0.2416, "step": 6540 }, { "epoch": 0.4543680901799263, "grad_norm": 1.328125, "learning_rate": 3.8610434782608696e-06, "loss": 0.2644, "step": 6550 }, { "epoch": 0.455061781920659, "grad_norm": 1.140625, "learning_rate": 3.859304347826087e-06, "loss": 0.2374, "step": 6560 }, { "epoch": 0.45575547366139174, "grad_norm": 1.375, "learning_rate": 3.857565217391305e-06, "loss": 0.2737, "step": 6570 }, { "epoch": 0.45644916540212443, "grad_norm": 1.015625, "learning_rate": 3.8558260869565225e-06, "loss": 0.2772, "step": 6580 }, { "epoch": 0.45714285714285713, "grad_norm": 1.2109375, "learning_rate": 3.854086956521739e-06, "loss": 0.2583, "step": 6590 }, { "epoch": 0.4578365488835899, "grad_norm": 1.546875, "learning_rate": 3.852347826086956e-06, "loss": 0.2847, "step": 6600 }, { "epoch": 0.45853024062432257, "grad_norm": 1.1875, "learning_rate": 3.850608695652175e-06, "loss": 0.213, "step": 6610 }, { "epoch": 0.45922393236505527, "grad_norm": 0.98046875, "learning_rate": 3.848869565217392e-06, "loss": 0.2157, "step": 6620 }, { "epoch": 0.459917624105788, "grad_norm": 1.375, "learning_rate": 3.8471304347826085e-06, "loss": 0.2696, "step": 6630 }, { "epoch": 0.4606113158465207, "grad_norm": 1.046875, "learning_rate": 3.845391304347827e-06, "loss": 0.2729, "step": 6640 }, { "epoch": 0.4613050075872534, "grad_norm": 1.1796875, "learning_rate": 3.843652173913044e-06, "loss": 0.2258, "step": 6650 }, { "epoch": 0.46199869932798615, "grad_norm": 1.125, "learning_rate": 3.8419130434782615e-06, "loss": 0.2469, "step": 6660 }, { "epoch": 0.46269239106871884, "grad_norm": 0.94140625, "learning_rate": 3.840173913043478e-06, "loss": 0.2038, "step": 6670 }, { "epoch": 0.46338608280945154, "grad_norm": 1.1171875, "learning_rate": 3.838434782608696e-06, "loss": 0.315, "step": 6680 }, { "epoch": 0.4640797745501843, "grad_norm": 1.453125, "learning_rate": 3.836695652173914e-06, "loss": 0.2534, "step": 6690 }, { "epoch": 0.464773466290917, "grad_norm": 1.265625, "learning_rate": 3.834956521739131e-06, "loss": 0.2372, "step": 6700 }, { "epoch": 0.4654671580316497, "grad_norm": 1.234375, "learning_rate": 3.833217391304348e-06, "loss": 0.246, "step": 6710 }, { "epoch": 0.4661608497723824, "grad_norm": 1.1171875, "learning_rate": 3.831478260869566e-06, "loss": 0.2586, "step": 6720 }, { "epoch": 0.4668545415131151, "grad_norm": 1.25, "learning_rate": 3.829739130434783e-06, "loss": 0.2605, "step": 6730 }, { "epoch": 0.4675482332538478, "grad_norm": 1.5625, "learning_rate": 3.8280000000000004e-06, "loss": 0.2853, "step": 6740 }, { "epoch": 0.46824192499458056, "grad_norm": 1.375, "learning_rate": 3.826260869565218e-06, "loss": 0.2116, "step": 6750 }, { "epoch": 0.46893561673531325, "grad_norm": 1.09375, "learning_rate": 3.824521739130435e-06, "loss": 0.3067, "step": 6760 }, { "epoch": 0.46962930847604595, "grad_norm": 1.1953125, "learning_rate": 3.8227826086956526e-06, "loss": 0.2128, "step": 6770 }, { "epoch": 0.47032300021677864, "grad_norm": 0.85546875, "learning_rate": 3.82104347826087e-06, "loss": 0.2067, "step": 6780 }, { "epoch": 0.4710166919575114, "grad_norm": 1.296875, "learning_rate": 3.819304347826087e-06, "loss": 0.2094, "step": 6790 }, { "epoch": 0.4717103836982441, "grad_norm": 1.4140625, "learning_rate": 3.817565217391305e-06, "loss": 0.2455, "step": 6800 }, { "epoch": 0.4724040754389768, "grad_norm": 1.1796875, "learning_rate": 3.815826086956522e-06, "loss": 0.2471, "step": 6810 }, { "epoch": 0.4730977671797095, "grad_norm": 1.5546875, "learning_rate": 3.8140869565217394e-06, "loss": 0.2839, "step": 6820 }, { "epoch": 0.4737914589204422, "grad_norm": 1.5703125, "learning_rate": 3.812347826086957e-06, "loss": 0.2218, "step": 6830 }, { "epoch": 0.4744851506611749, "grad_norm": 1.1875, "learning_rate": 3.810608695652174e-06, "loss": 0.2793, "step": 6840 }, { "epoch": 0.47517884240190766, "grad_norm": 1.9453125, "learning_rate": 3.808869565217392e-06, "loss": 0.3007, "step": 6850 }, { "epoch": 0.47587253414264036, "grad_norm": 0.99609375, "learning_rate": 3.807130434782609e-06, "loss": 0.1971, "step": 6860 }, { "epoch": 0.47656622588337305, "grad_norm": 0.89453125, "learning_rate": 3.8053913043478263e-06, "loss": 0.2195, "step": 6870 }, { "epoch": 0.4772599176241058, "grad_norm": 1.140625, "learning_rate": 3.803652173913044e-06, "loss": 0.2397, "step": 6880 }, { "epoch": 0.4779536093648385, "grad_norm": 1.5625, "learning_rate": 3.8019130434782614e-06, "loss": 0.2547, "step": 6890 }, { "epoch": 0.4786473011055712, "grad_norm": 1.1015625, "learning_rate": 3.8001739130434784e-06, "loss": 0.2687, "step": 6900 }, { "epoch": 0.47934099284630394, "grad_norm": 1.46875, "learning_rate": 3.7984347826086958e-06, "loss": 0.2853, "step": 6910 }, { "epoch": 0.48003468458703663, "grad_norm": 1.140625, "learning_rate": 3.7966956521739136e-06, "loss": 0.2279, "step": 6920 }, { "epoch": 0.4807283763277693, "grad_norm": 1.5078125, "learning_rate": 3.794956521739131e-06, "loss": 0.2167, "step": 6930 }, { "epoch": 0.4814220680685021, "grad_norm": 1.1875, "learning_rate": 3.793217391304348e-06, "loss": 0.3339, "step": 6940 }, { "epoch": 0.48211575980923477, "grad_norm": 1.46875, "learning_rate": 3.7914782608695657e-06, "loss": 0.2604, "step": 6950 }, { "epoch": 0.48280945154996746, "grad_norm": 1.0078125, "learning_rate": 3.789739130434783e-06, "loss": 0.2045, "step": 6960 }, { "epoch": 0.4835031432907002, "grad_norm": 1.40625, "learning_rate": 3.7880000000000004e-06, "loss": 0.2629, "step": 6970 }, { "epoch": 0.4841968350314329, "grad_norm": 1.1484375, "learning_rate": 3.7862608695652174e-06, "loss": 0.2602, "step": 6980 }, { "epoch": 0.4848905267721656, "grad_norm": 1.6171875, "learning_rate": 3.784521739130435e-06, "loss": 0.3032, "step": 6990 }, { "epoch": 0.48558421851289835, "grad_norm": 0.9453125, "learning_rate": 3.7827826086956525e-06, "loss": 0.238, "step": 7000 }, { "epoch": 0.48627791025363104, "grad_norm": 1.3203125, "learning_rate": 3.7810434782608703e-06, "loss": 0.2422, "step": 7010 }, { "epoch": 0.48697160199436373, "grad_norm": 1.0234375, "learning_rate": 3.7793043478260873e-06, "loss": 0.2349, "step": 7020 }, { "epoch": 0.4876652937350965, "grad_norm": 1.3125, "learning_rate": 3.7775652173913046e-06, "loss": 0.23, "step": 7030 }, { "epoch": 0.4883589854758292, "grad_norm": 1.03125, "learning_rate": 3.775826086956522e-06, "loss": 0.2634, "step": 7040 }, { "epoch": 0.48905267721656187, "grad_norm": 1.453125, "learning_rate": 3.7740869565217394e-06, "loss": 0.2912, "step": 7050 }, { "epoch": 0.4897463689572946, "grad_norm": 1.0234375, "learning_rate": 3.7723478260869567e-06, "loss": 0.232, "step": 7060 }, { "epoch": 0.4904400606980273, "grad_norm": 1.0390625, "learning_rate": 3.770608695652174e-06, "loss": 0.2252, "step": 7070 }, { "epoch": 0.49113375243876, "grad_norm": 0.8125, "learning_rate": 3.768869565217392e-06, "loss": 0.2311, "step": 7080 }, { "epoch": 0.49182744417949276, "grad_norm": 1.234375, "learning_rate": 3.767130434782609e-06, "loss": 0.2493, "step": 7090 }, { "epoch": 0.49252113592022545, "grad_norm": 1.3671875, "learning_rate": 3.7653913043478262e-06, "loss": 0.2665, "step": 7100 }, { "epoch": 0.49321482766095814, "grad_norm": 1.328125, "learning_rate": 3.763652173913044e-06, "loss": 0.2158, "step": 7110 }, { "epoch": 0.4939085194016909, "grad_norm": 1.1328125, "learning_rate": 3.7619130434782614e-06, "loss": 0.2555, "step": 7120 }, { "epoch": 0.4946022111424236, "grad_norm": 0.9296875, "learning_rate": 3.7601739130434783e-06, "loss": 0.2437, "step": 7130 }, { "epoch": 0.4952959028831563, "grad_norm": 1.15625, "learning_rate": 3.7584347826086957e-06, "loss": 0.2703, "step": 7140 }, { "epoch": 0.49598959462388903, "grad_norm": 1.203125, "learning_rate": 3.7566956521739135e-06, "loss": 0.2314, "step": 7150 }, { "epoch": 0.4966832863646217, "grad_norm": 1.265625, "learning_rate": 3.754956521739131e-06, "loss": 0.2534, "step": 7160 }, { "epoch": 0.4973769781053544, "grad_norm": 1.2578125, "learning_rate": 3.753217391304348e-06, "loss": 0.2763, "step": 7170 }, { "epoch": 0.49807066984608717, "grad_norm": 1.578125, "learning_rate": 3.7514782608695656e-06, "loss": 0.2734, "step": 7180 }, { "epoch": 0.49876436158681986, "grad_norm": 1.1640625, "learning_rate": 3.749739130434783e-06, "loss": 0.2158, "step": 7190 }, { "epoch": 0.49945805332755255, "grad_norm": 1.4765625, "learning_rate": 3.7480000000000004e-06, "loss": 0.2429, "step": 7200 }, { "epoch": 0.5001517450682853, "grad_norm": 1.2421875, "learning_rate": 3.7462608695652173e-06, "loss": 0.2961, "step": 7210 }, { "epoch": 0.5008454368090179, "grad_norm": 1.4375, "learning_rate": 3.744521739130435e-06, "loss": 0.2368, "step": 7220 }, { "epoch": 0.5015391285497507, "grad_norm": 1.5625, "learning_rate": 3.7427826086956525e-06, "loss": 0.2605, "step": 7230 }, { "epoch": 0.5022328202904834, "grad_norm": 0.83984375, "learning_rate": 3.7410434782608703e-06, "loss": 0.2194, "step": 7240 }, { "epoch": 0.5029265120312161, "grad_norm": 1.1953125, "learning_rate": 3.7393043478260872e-06, "loss": 0.2153, "step": 7250 }, { "epoch": 0.5036202037719488, "grad_norm": 1.671875, "learning_rate": 3.7375652173913046e-06, "loss": 0.2329, "step": 7260 }, { "epoch": 0.5043138955126816, "grad_norm": 1.8203125, "learning_rate": 3.735826086956522e-06, "loss": 0.2821, "step": 7270 }, { "epoch": 0.5050075872534142, "grad_norm": 1.3984375, "learning_rate": 3.7340869565217398e-06, "loss": 0.263, "step": 7280 }, { "epoch": 0.505701278994147, "grad_norm": 1.3046875, "learning_rate": 3.7323478260869567e-06, "loss": 0.2428, "step": 7290 }, { "epoch": 0.5063949707348797, "grad_norm": 1.015625, "learning_rate": 3.730608695652174e-06, "loss": 0.2353, "step": 7300 }, { "epoch": 0.5070886624756124, "grad_norm": 1.0234375, "learning_rate": 3.728869565217392e-06, "loss": 0.2616, "step": 7310 }, { "epoch": 0.5077823542163451, "grad_norm": 1.3203125, "learning_rate": 3.7271304347826092e-06, "loss": 0.2314, "step": 7320 }, { "epoch": 0.5084760459570778, "grad_norm": 1.171875, "learning_rate": 3.725391304347826e-06, "loss": 0.2484, "step": 7330 }, { "epoch": 0.5091697376978105, "grad_norm": 1.1796875, "learning_rate": 3.723652173913044e-06, "loss": 0.2182, "step": 7340 }, { "epoch": 0.5098634294385432, "grad_norm": 1.296875, "learning_rate": 3.7219130434782614e-06, "loss": 0.1906, "step": 7350 }, { "epoch": 0.510557121179276, "grad_norm": 1.2421875, "learning_rate": 3.7201739130434783e-06, "loss": 0.2971, "step": 7360 }, { "epoch": 0.5112508129200086, "grad_norm": 1.359375, "learning_rate": 3.7184347826086957e-06, "loss": 0.2281, "step": 7370 }, { "epoch": 0.5119445046607414, "grad_norm": 1.4375, "learning_rate": 3.7166956521739135e-06, "loss": 0.2597, "step": 7380 }, { "epoch": 0.5126381964014741, "grad_norm": 1.3125, "learning_rate": 3.714956521739131e-06, "loss": 0.2685, "step": 7390 }, { "epoch": 0.5133318881422068, "grad_norm": 0.98046875, "learning_rate": 3.713217391304348e-06, "loss": 0.2763, "step": 7400 }, { "epoch": 0.5140255798829395, "grad_norm": 1.3203125, "learning_rate": 3.7114782608695656e-06, "loss": 0.2088, "step": 7410 }, { "epoch": 0.5147192716236723, "grad_norm": 4.9375, "learning_rate": 3.709739130434783e-06, "loss": 0.2388, "step": 7420 }, { "epoch": 0.5154129633644049, "grad_norm": 1.3046875, "learning_rate": 3.7080000000000003e-06, "loss": 0.2594, "step": 7430 }, { "epoch": 0.5161066551051376, "grad_norm": 1.7265625, "learning_rate": 3.7062608695652173e-06, "loss": 0.2595, "step": 7440 }, { "epoch": 0.5168003468458704, "grad_norm": 1.2578125, "learning_rate": 3.704521739130435e-06, "loss": 0.2404, "step": 7450 }, { "epoch": 0.517494038586603, "grad_norm": 1.25, "learning_rate": 3.7027826086956524e-06, "loss": 0.2635, "step": 7460 }, { "epoch": 0.5181877303273358, "grad_norm": 0.875, "learning_rate": 3.7010434782608702e-06, "loss": 0.245, "step": 7470 }, { "epoch": 0.5188814220680685, "grad_norm": 1.171875, "learning_rate": 3.699304347826087e-06, "loss": 0.2676, "step": 7480 }, { "epoch": 0.5195751138088012, "grad_norm": 1.21875, "learning_rate": 3.6975652173913046e-06, "loss": 0.2699, "step": 7490 }, { "epoch": 0.5202688055495339, "grad_norm": 1.234375, "learning_rate": 3.695826086956522e-06, "loss": 0.244, "step": 7500 }, { "epoch": 0.5209624972902667, "grad_norm": 1.390625, "learning_rate": 3.6940869565217397e-06, "loss": 0.2438, "step": 7510 }, { "epoch": 0.5216561890309993, "grad_norm": 1.2890625, "learning_rate": 3.6923478260869567e-06, "loss": 0.2516, "step": 7520 }, { "epoch": 0.5223498807717321, "grad_norm": 1.0625, "learning_rate": 3.690608695652174e-06, "loss": 0.2458, "step": 7530 }, { "epoch": 0.5230435725124648, "grad_norm": 1.375, "learning_rate": 3.688869565217392e-06, "loss": 0.2696, "step": 7540 }, { "epoch": 0.5237372642531974, "grad_norm": 1.453125, "learning_rate": 3.687130434782609e-06, "loss": 0.2365, "step": 7550 }, { "epoch": 0.5244309559939302, "grad_norm": 1.609375, "learning_rate": 3.685391304347826e-06, "loss": 0.2247, "step": 7560 }, { "epoch": 0.5251246477346629, "grad_norm": 1.390625, "learning_rate": 3.683652173913044e-06, "loss": 0.2388, "step": 7570 }, { "epoch": 0.5258183394753956, "grad_norm": 1.171875, "learning_rate": 3.6819130434782613e-06, "loss": 0.2709, "step": 7580 }, { "epoch": 0.5265120312161283, "grad_norm": 1.2578125, "learning_rate": 3.6801739130434787e-06, "loss": 0.3069, "step": 7590 }, { "epoch": 0.5272057229568611, "grad_norm": 1.046875, "learning_rate": 3.6784347826086956e-06, "loss": 0.2254, "step": 7600 }, { "epoch": 0.5278994146975937, "grad_norm": 1.0390625, "learning_rate": 3.6766956521739134e-06, "loss": 0.2332, "step": 7610 }, { "epoch": 0.5285931064383265, "grad_norm": 1.3359375, "learning_rate": 3.674956521739131e-06, "loss": 0.2917, "step": 7620 }, { "epoch": 0.5292867981790592, "grad_norm": 1.203125, "learning_rate": 3.6732173913043486e-06, "loss": 0.2575, "step": 7630 }, { "epoch": 0.5299804899197919, "grad_norm": 0.96875, "learning_rate": 3.6714782608695655e-06, "loss": 0.2636, "step": 7640 }, { "epoch": 0.5306741816605246, "grad_norm": 1.0390625, "learning_rate": 3.669739130434783e-06, "loss": 0.2782, "step": 7650 }, { "epoch": 0.5313678734012574, "grad_norm": 1.4296875, "learning_rate": 3.6680000000000003e-06, "loss": 0.2155, "step": 7660 }, { "epoch": 0.53206156514199, "grad_norm": 1.96875, "learning_rate": 3.6662608695652172e-06, "loss": 0.2726, "step": 7670 }, { "epoch": 0.5327552568827227, "grad_norm": 1.453125, "learning_rate": 3.664521739130435e-06, "loss": 0.2456, "step": 7680 }, { "epoch": 0.5334489486234555, "grad_norm": 1.328125, "learning_rate": 3.6627826086956524e-06, "loss": 0.2691, "step": 7690 }, { "epoch": 0.5341426403641881, "grad_norm": 1.421875, "learning_rate": 3.66104347826087e-06, "loss": 0.2618, "step": 7700 }, { "epoch": 0.5348363321049209, "grad_norm": 1.3359375, "learning_rate": 3.659304347826087e-06, "loss": 0.2684, "step": 7710 }, { "epoch": 0.5355300238456536, "grad_norm": 0.8828125, "learning_rate": 3.6575652173913045e-06, "loss": 0.2165, "step": 7720 }, { "epoch": 0.5362237155863863, "grad_norm": 0.79296875, "learning_rate": 3.655826086956522e-06, "loss": 0.2917, "step": 7730 }, { "epoch": 0.536917407327119, "grad_norm": 1.203125, "learning_rate": 3.6540869565217397e-06, "loss": 0.2629, "step": 7740 }, { "epoch": 0.5376110990678518, "grad_norm": 1.046875, "learning_rate": 3.6523478260869566e-06, "loss": 0.2612, "step": 7750 }, { "epoch": 0.5383047908085844, "grad_norm": 1.1953125, "learning_rate": 3.650608695652174e-06, "loss": 0.3481, "step": 7760 }, { "epoch": 0.5389984825493171, "grad_norm": 1.5859375, "learning_rate": 3.648869565217392e-06, "loss": 0.2939, "step": 7770 }, { "epoch": 0.5396921742900499, "grad_norm": 1.1015625, "learning_rate": 3.647130434782609e-06, "loss": 0.232, "step": 7780 }, { "epoch": 0.5403858660307825, "grad_norm": 1.3359375, "learning_rate": 3.645391304347826e-06, "loss": 0.2843, "step": 7790 }, { "epoch": 0.5410795577715153, "grad_norm": 1.0, "learning_rate": 3.643652173913044e-06, "loss": 0.2311, "step": 7800 }, { "epoch": 0.541773249512248, "grad_norm": 1.078125, "learning_rate": 3.6419130434782613e-06, "loss": 0.2587, "step": 7810 }, { "epoch": 0.5424669412529807, "grad_norm": 0.95703125, "learning_rate": 3.6401739130434786e-06, "loss": 0.2894, "step": 7820 }, { "epoch": 0.5431606329937134, "grad_norm": 1.0078125, "learning_rate": 3.6384347826086956e-06, "loss": 0.2337, "step": 7830 }, { "epoch": 0.5438543247344462, "grad_norm": 0.9921875, "learning_rate": 3.6366956521739134e-06, "loss": 0.2619, "step": 7840 }, { "epoch": 0.5445480164751788, "grad_norm": 1.15625, "learning_rate": 3.6349565217391308e-06, "loss": 0.3071, "step": 7850 }, { "epoch": 0.5452417082159116, "grad_norm": 0.87109375, "learning_rate": 3.6332173913043486e-06, "loss": 0.2436, "step": 7860 }, { "epoch": 0.5459353999566443, "grad_norm": 0.98046875, "learning_rate": 3.6314782608695655e-06, "loss": 0.316, "step": 7870 }, { "epoch": 0.546629091697377, "grad_norm": 1.6328125, "learning_rate": 3.629739130434783e-06, "loss": 0.2135, "step": 7880 }, { "epoch": 0.5473227834381097, "grad_norm": 1.3671875, "learning_rate": 3.6280000000000002e-06, "loss": 0.2578, "step": 7890 }, { "epoch": 0.5480164751788424, "grad_norm": 1.2578125, "learning_rate": 3.626260869565218e-06, "loss": 0.2587, "step": 7900 }, { "epoch": 0.5487101669195751, "grad_norm": 1.3828125, "learning_rate": 3.624521739130435e-06, "loss": 0.2425, "step": 7910 }, { "epoch": 0.5494038586603078, "grad_norm": 1.421875, "learning_rate": 3.6227826086956524e-06, "loss": 0.2497, "step": 7920 }, { "epoch": 0.5500975504010406, "grad_norm": 1.0546875, "learning_rate": 3.62104347826087e-06, "loss": 0.1969, "step": 7930 }, { "epoch": 0.5507912421417732, "grad_norm": 1.25, "learning_rate": 3.6193043478260875e-06, "loss": 0.2794, "step": 7940 }, { "epoch": 0.551484933882506, "grad_norm": 1.4921875, "learning_rate": 3.6175652173913045e-06, "loss": 0.2452, "step": 7950 }, { "epoch": 0.5521786256232387, "grad_norm": 1.1640625, "learning_rate": 3.615826086956522e-06, "loss": 0.2181, "step": 7960 }, { "epoch": 0.5528723173639714, "grad_norm": 1.2265625, "learning_rate": 3.6140869565217396e-06, "loss": 0.2407, "step": 7970 }, { "epoch": 0.5535660091047041, "grad_norm": 1.0703125, "learning_rate": 3.6123478260869566e-06, "loss": 0.2556, "step": 7980 }, { "epoch": 0.5542597008454369, "grad_norm": 1.234375, "learning_rate": 3.610608695652174e-06, "loss": 0.2783, "step": 7990 }, { "epoch": 0.5549533925861695, "grad_norm": 1.59375, "learning_rate": 3.6088695652173918e-06, "loss": 0.2702, "step": 8000 }, { "epoch": 0.5556470843269022, "grad_norm": 1.359375, "learning_rate": 3.607130434782609e-06, "loss": 0.2606, "step": 8010 }, { "epoch": 0.556340776067635, "grad_norm": 1.21875, "learning_rate": 3.605391304347826e-06, "loss": 0.2103, "step": 8020 }, { "epoch": 0.5570344678083676, "grad_norm": 1.34375, "learning_rate": 3.603652173913044e-06, "loss": 0.2451, "step": 8030 }, { "epoch": 0.5577281595491004, "grad_norm": 1.2734375, "learning_rate": 3.6019130434782612e-06, "loss": 0.2489, "step": 8040 }, { "epoch": 0.5584218512898331, "grad_norm": 1.2578125, "learning_rate": 3.6001739130434786e-06, "loss": 0.2254, "step": 8050 }, { "epoch": 0.5591155430305658, "grad_norm": 1.5625, "learning_rate": 3.5984347826086956e-06, "loss": 0.2404, "step": 8060 }, { "epoch": 0.5598092347712985, "grad_norm": 1.3125, "learning_rate": 3.5966956521739134e-06, "loss": 0.3151, "step": 8070 }, { "epoch": 0.5605029265120313, "grad_norm": 1.4296875, "learning_rate": 3.5949565217391307e-06, "loss": 0.2554, "step": 8080 }, { "epoch": 0.5611966182527639, "grad_norm": 1.234375, "learning_rate": 3.5932173913043485e-06, "loss": 0.25, "step": 8090 }, { "epoch": 0.5618903099934967, "grad_norm": 1.390625, "learning_rate": 3.5914782608695655e-06, "loss": 0.2968, "step": 8100 }, { "epoch": 0.5625840017342294, "grad_norm": 1.2109375, "learning_rate": 3.589739130434783e-06, "loss": 0.2523, "step": 8110 }, { "epoch": 0.563277693474962, "grad_norm": 1.2421875, "learning_rate": 3.588e-06, "loss": 0.2254, "step": 8120 }, { "epoch": 0.5639713852156948, "grad_norm": 1.578125, "learning_rate": 3.586260869565218e-06, "loss": 0.272, "step": 8130 }, { "epoch": 0.5646650769564275, "grad_norm": 1.6953125, "learning_rate": 3.584521739130435e-06, "loss": 0.3524, "step": 8140 }, { "epoch": 0.5653587686971602, "grad_norm": 1.1640625, "learning_rate": 3.5827826086956523e-06, "loss": 0.2291, "step": 8150 }, { "epoch": 0.5660524604378929, "grad_norm": 1.34375, "learning_rate": 3.58104347826087e-06, "loss": 0.27, "step": 8160 }, { "epoch": 0.5667461521786257, "grad_norm": 1.1640625, "learning_rate": 3.5793043478260875e-06, "loss": 0.324, "step": 8170 }, { "epoch": 0.5674398439193583, "grad_norm": 1.015625, "learning_rate": 3.5775652173913044e-06, "loss": 0.3121, "step": 8180 }, { "epoch": 0.5681335356600911, "grad_norm": 1.0078125, "learning_rate": 3.575826086956522e-06, "loss": 0.2035, "step": 8190 }, { "epoch": 0.5688272274008238, "grad_norm": 1.1015625, "learning_rate": 3.5740869565217396e-06, "loss": 0.2779, "step": 8200 }, { "epoch": 0.5695209191415564, "grad_norm": 1.46875, "learning_rate": 3.572347826086957e-06, "loss": 0.2545, "step": 8210 }, { "epoch": 0.5702146108822892, "grad_norm": 1.2734375, "learning_rate": 3.570608695652174e-06, "loss": 0.3188, "step": 8220 }, { "epoch": 0.570908302623022, "grad_norm": 1.1875, "learning_rate": 3.5688695652173917e-06, "loss": 0.2356, "step": 8230 }, { "epoch": 0.5716019943637546, "grad_norm": 1.1484375, "learning_rate": 3.567130434782609e-06, "loss": 0.2316, "step": 8240 }, { "epoch": 0.5722956861044873, "grad_norm": 1.5390625, "learning_rate": 3.5653913043478265e-06, "loss": 0.2724, "step": 8250 }, { "epoch": 0.5729893778452201, "grad_norm": 1.34375, "learning_rate": 3.563652173913044e-06, "loss": 0.2535, "step": 8260 }, { "epoch": 0.5736830695859527, "grad_norm": 1.125, "learning_rate": 3.561913043478261e-06, "loss": 0.2413, "step": 8270 }, { "epoch": 0.5743767613266855, "grad_norm": 0.98046875, "learning_rate": 3.5601739130434786e-06, "loss": 0.2488, "step": 8280 }, { "epoch": 0.5750704530674182, "grad_norm": 1.140625, "learning_rate": 3.5584347826086955e-06, "loss": 0.2248, "step": 8290 }, { "epoch": 0.5757641448081509, "grad_norm": 1.1640625, "learning_rate": 3.5566956521739133e-06, "loss": 0.2339, "step": 8300 }, { "epoch": 0.5764578365488836, "grad_norm": 2.25, "learning_rate": 3.5549565217391307e-06, "loss": 0.3776, "step": 8310 }, { "epoch": 0.5771515282896164, "grad_norm": 1.2109375, "learning_rate": 3.5532173913043485e-06, "loss": 0.2102, "step": 8320 }, { "epoch": 0.577845220030349, "grad_norm": 0.9921875, "learning_rate": 3.5514782608695654e-06, "loss": 0.2692, "step": 8330 }, { "epoch": 0.5785389117710817, "grad_norm": 1.6796875, "learning_rate": 3.549739130434783e-06, "loss": 0.3611, "step": 8340 }, { "epoch": 0.5792326035118145, "grad_norm": 1.15625, "learning_rate": 3.548e-06, "loss": 0.227, "step": 8350 }, { "epoch": 0.5799262952525471, "grad_norm": 1.015625, "learning_rate": 3.546260869565218e-06, "loss": 0.242, "step": 8360 }, { "epoch": 0.5806199869932799, "grad_norm": 1.09375, "learning_rate": 3.544521739130435e-06, "loss": 0.3021, "step": 8370 }, { "epoch": 0.5813136787340125, "grad_norm": 1.2265625, "learning_rate": 3.5427826086956523e-06, "loss": 0.2542, "step": 8380 }, { "epoch": 0.5820073704747453, "grad_norm": 1.2734375, "learning_rate": 3.54104347826087e-06, "loss": 0.24, "step": 8390 }, { "epoch": 0.582701062215478, "grad_norm": 1.046875, "learning_rate": 3.5393043478260874e-06, "loss": 0.2421, "step": 8400 }, { "epoch": 0.5833947539562107, "grad_norm": 0.984375, "learning_rate": 3.5375652173913044e-06, "loss": 0.212, "step": 8410 }, { "epoch": 0.5840884456969434, "grad_norm": 1.0390625, "learning_rate": 3.5358260869565218e-06, "loss": 0.2281, "step": 8420 }, { "epoch": 0.5847821374376762, "grad_norm": 1.40625, "learning_rate": 3.5340869565217396e-06, "loss": 0.2296, "step": 8430 }, { "epoch": 0.5854758291784088, "grad_norm": 1.640625, "learning_rate": 3.532347826086957e-06, "loss": 0.2546, "step": 8440 }, { "epoch": 0.5861695209191415, "grad_norm": 1.7109375, "learning_rate": 3.530608695652174e-06, "loss": 0.2691, "step": 8450 }, { "epoch": 0.5868632126598743, "grad_norm": 1.5078125, "learning_rate": 3.5288695652173917e-06, "loss": 0.2716, "step": 8460 }, { "epoch": 0.5875569044006069, "grad_norm": 1.3046875, "learning_rate": 3.527130434782609e-06, "loss": 0.3, "step": 8470 }, { "epoch": 0.5882505961413397, "grad_norm": 0.98046875, "learning_rate": 3.5253913043478264e-06, "loss": 0.2811, "step": 8480 }, { "epoch": 0.5889442878820724, "grad_norm": 1.265625, "learning_rate": 3.5236521739130438e-06, "loss": 0.2327, "step": 8490 }, { "epoch": 0.5896379796228051, "grad_norm": 1.03125, "learning_rate": 3.521913043478261e-06, "loss": 0.2393, "step": 8500 }, { "epoch": 0.5903316713635378, "grad_norm": 1.03125, "learning_rate": 3.5201739130434785e-06, "loss": 0.2633, "step": 8510 }, { "epoch": 0.5910253631042706, "grad_norm": 1.1015625, "learning_rate": 3.5184347826086963e-06, "loss": 0.2459, "step": 8520 }, { "epoch": 0.5917190548450032, "grad_norm": 1.1328125, "learning_rate": 3.5166956521739133e-06, "loss": 0.2063, "step": 8530 }, { "epoch": 0.592412746585736, "grad_norm": 0.88671875, "learning_rate": 3.5149565217391306e-06, "loss": 0.2193, "step": 8540 }, { "epoch": 0.5931064383264687, "grad_norm": 1.3828125, "learning_rate": 3.5132173913043484e-06, "loss": 0.3143, "step": 8550 }, { "epoch": 0.5938001300672013, "grad_norm": 1.2421875, "learning_rate": 3.511478260869566e-06, "loss": 0.2205, "step": 8560 }, { "epoch": 0.5944938218079341, "grad_norm": 1.2734375, "learning_rate": 3.5097391304347828e-06, "loss": 0.2392, "step": 8570 }, { "epoch": 0.5951875135486668, "grad_norm": 1.421875, "learning_rate": 3.508e-06, "loss": 0.252, "step": 8580 }, { "epoch": 0.5958812052893995, "grad_norm": 1.4296875, "learning_rate": 3.506260869565218e-06, "loss": 0.318, "step": 8590 }, { "epoch": 0.5965748970301322, "grad_norm": 1.078125, "learning_rate": 3.504521739130435e-06, "loss": 0.2799, "step": 8600 }, { "epoch": 0.597268588770865, "grad_norm": 1.25, "learning_rate": 3.5027826086956522e-06, "loss": 0.2301, "step": 8610 }, { "epoch": 0.5979622805115976, "grad_norm": 1.28125, "learning_rate": 3.50104347826087e-06, "loss": 0.2516, "step": 8620 }, { "epoch": 0.5986559722523304, "grad_norm": 1.375, "learning_rate": 3.4993043478260874e-06, "loss": 0.2338, "step": 8630 }, { "epoch": 0.5993496639930631, "grad_norm": 1.09375, "learning_rate": 3.4975652173913044e-06, "loss": 0.2511, "step": 8640 }, { "epoch": 0.6000433557337957, "grad_norm": 1.15625, "learning_rate": 3.4958260869565217e-06, "loss": 0.2401, "step": 8650 }, { "epoch": 0.6007370474745285, "grad_norm": 1.09375, "learning_rate": 3.4940869565217395e-06, "loss": 0.2961, "step": 8660 }, { "epoch": 0.6014307392152612, "grad_norm": 1.1953125, "learning_rate": 3.492347826086957e-06, "loss": 0.2639, "step": 8670 }, { "epoch": 0.6021244309559939, "grad_norm": 1.2578125, "learning_rate": 3.490608695652174e-06, "loss": 0.2283, "step": 8680 }, { "epoch": 0.6028181226967266, "grad_norm": 1.0703125, "learning_rate": 3.4888695652173916e-06, "loss": 0.2404, "step": 8690 }, { "epoch": 0.6035118144374594, "grad_norm": 1.375, "learning_rate": 3.487130434782609e-06, "loss": 0.2559, "step": 8700 }, { "epoch": 0.604205506178192, "grad_norm": 1.0390625, "learning_rate": 3.4853913043478264e-06, "loss": 0.24, "step": 8710 }, { "epoch": 0.6048991979189248, "grad_norm": 1.078125, "learning_rate": 3.4836521739130437e-06, "loss": 0.2297, "step": 8720 }, { "epoch": 0.6055928896596575, "grad_norm": 1.296875, "learning_rate": 3.481913043478261e-06, "loss": 0.2517, "step": 8730 }, { "epoch": 0.6062865814003902, "grad_norm": 1.1875, "learning_rate": 3.4801739130434785e-06, "loss": 0.2096, "step": 8740 }, { "epoch": 0.6069802731411229, "grad_norm": 1.4609375, "learning_rate": 3.4784347826086963e-06, "loss": 0.2116, "step": 8750 }, { "epoch": 0.6076739648818557, "grad_norm": 0.79296875, "learning_rate": 3.4766956521739132e-06, "loss": 0.1944, "step": 8760 }, { "epoch": 0.6083676566225883, "grad_norm": 1.3125, "learning_rate": 3.4749565217391306e-06, "loss": 0.2687, "step": 8770 }, { "epoch": 0.609061348363321, "grad_norm": 1.2421875, "learning_rate": 3.4732173913043484e-06, "loss": 0.283, "step": 8780 }, { "epoch": 0.6097550401040538, "grad_norm": 1.3359375, "learning_rate": 3.4714782608695658e-06, "loss": 0.2059, "step": 8790 }, { "epoch": 0.6104487318447864, "grad_norm": 0.9765625, "learning_rate": 3.4697391304347827e-06, "loss": 0.3626, "step": 8800 }, { "epoch": 0.6111424235855192, "grad_norm": 1.2578125, "learning_rate": 3.468e-06, "loss": 0.2557, "step": 8810 }, { "epoch": 0.6118361153262519, "grad_norm": 1.3203125, "learning_rate": 3.466260869565218e-06, "loss": 0.2391, "step": 8820 }, { "epoch": 0.6125298070669846, "grad_norm": 1.265625, "learning_rate": 3.4645217391304353e-06, "loss": 0.296, "step": 8830 }, { "epoch": 0.6132234988077173, "grad_norm": 1.046875, "learning_rate": 3.462782608695652e-06, "loss": 0.2482, "step": 8840 }, { "epoch": 0.6139171905484501, "grad_norm": 1.359375, "learning_rate": 3.46104347826087e-06, "loss": 0.2761, "step": 8850 }, { "epoch": 0.6146108822891827, "grad_norm": 1.2890625, "learning_rate": 3.4593043478260874e-06, "loss": 0.2493, "step": 8860 }, { "epoch": 0.6153045740299155, "grad_norm": 1.1328125, "learning_rate": 3.4575652173913047e-06, "loss": 0.2301, "step": 8870 }, { "epoch": 0.6159982657706482, "grad_norm": 0.99609375, "learning_rate": 3.4558260869565217e-06, "loss": 0.2608, "step": 8880 }, { "epoch": 0.6166919575113808, "grad_norm": 0.92578125, "learning_rate": 3.4540869565217395e-06, "loss": 0.2481, "step": 8890 }, { "epoch": 0.6173856492521136, "grad_norm": 1.4140625, "learning_rate": 3.452347826086957e-06, "loss": 0.2292, "step": 8900 }, { "epoch": 0.6180793409928463, "grad_norm": 1.1015625, "learning_rate": 3.450608695652174e-06, "loss": 0.2091, "step": 8910 }, { "epoch": 0.618773032733579, "grad_norm": 1.2421875, "learning_rate": 3.4488695652173916e-06, "loss": 0.2997, "step": 8920 }, { "epoch": 0.6194667244743117, "grad_norm": 1.453125, "learning_rate": 3.447130434782609e-06, "loss": 0.3502, "step": 8930 }, { "epoch": 0.6201604162150445, "grad_norm": 1.7578125, "learning_rate": 3.4453913043478263e-06, "loss": 0.313, "step": 8940 }, { "epoch": 0.6208541079557771, "grad_norm": 1.2109375, "learning_rate": 3.4436521739130437e-06, "loss": 0.2215, "step": 8950 }, { "epoch": 0.6215477996965099, "grad_norm": 1.3203125, "learning_rate": 3.441913043478261e-06, "loss": 0.2398, "step": 8960 }, { "epoch": 0.6222414914372426, "grad_norm": 1.046875, "learning_rate": 3.4401739130434784e-06, "loss": 0.2002, "step": 8970 }, { "epoch": 0.6229351831779752, "grad_norm": 1.484375, "learning_rate": 3.4384347826086962e-06, "loss": 0.2549, "step": 8980 }, { "epoch": 0.623628874918708, "grad_norm": 1.1328125, "learning_rate": 3.436695652173913e-06, "loss": 0.2009, "step": 8990 }, { "epoch": 0.6243225666594407, "grad_norm": 0.9375, "learning_rate": 3.4349565217391306e-06, "loss": 0.22, "step": 9000 }, { "epoch": 0.6250162584001734, "grad_norm": 1.1875, "learning_rate": 3.4332173913043484e-06, "loss": 0.2436, "step": 9010 }, { "epoch": 0.6257099501409061, "grad_norm": 0.96484375, "learning_rate": 3.4314782608695657e-06, "loss": 0.2411, "step": 9020 }, { "epoch": 0.6264036418816389, "grad_norm": 1.15625, "learning_rate": 3.4297391304347827e-06, "loss": 0.2155, "step": 9030 }, { "epoch": 0.6270973336223715, "grad_norm": 1.765625, "learning_rate": 3.428e-06, "loss": 0.26, "step": 9040 }, { "epoch": 0.6277910253631043, "grad_norm": 1.296875, "learning_rate": 3.426260869565218e-06, "loss": 0.2602, "step": 9050 }, { "epoch": 0.628484717103837, "grad_norm": 1.0859375, "learning_rate": 3.424521739130435e-06, "loss": 0.2459, "step": 9060 }, { "epoch": 0.6291784088445697, "grad_norm": 1.515625, "learning_rate": 3.422782608695652e-06, "loss": 0.3279, "step": 9070 }, { "epoch": 0.6298721005853024, "grad_norm": 1.09375, "learning_rate": 3.42104347826087e-06, "loss": 0.2208, "step": 9080 }, { "epoch": 0.6305657923260352, "grad_norm": 1.046875, "learning_rate": 3.4193043478260873e-06, "loss": 0.2717, "step": 9090 }, { "epoch": 0.6312594840667678, "grad_norm": 1.0546875, "learning_rate": 3.4175652173913047e-06, "loss": 0.2052, "step": 9100 }, { "epoch": 0.6319531758075005, "grad_norm": 1.1171875, "learning_rate": 3.4158260869565216e-06, "loss": 0.2791, "step": 9110 }, { "epoch": 0.6326468675482333, "grad_norm": 1.109375, "learning_rate": 3.4140869565217394e-06, "loss": 0.2904, "step": 9120 }, { "epoch": 0.6333405592889659, "grad_norm": 1.046875, "learning_rate": 3.412347826086957e-06, "loss": 0.2522, "step": 9130 }, { "epoch": 0.6340342510296987, "grad_norm": 1.4140625, "learning_rate": 3.4106086956521746e-06, "loss": 0.2051, "step": 9140 }, { "epoch": 0.6347279427704314, "grad_norm": 1.515625, "learning_rate": 3.4088695652173915e-06, "loss": 0.2321, "step": 9150 }, { "epoch": 0.6354216345111641, "grad_norm": 1.203125, "learning_rate": 3.407130434782609e-06, "loss": 0.2734, "step": 9160 }, { "epoch": 0.6361153262518968, "grad_norm": 1.1796875, "learning_rate": 3.4053913043478263e-06, "loss": 0.2302, "step": 9170 }, { "epoch": 0.6368090179926296, "grad_norm": 1.40625, "learning_rate": 3.403652173913044e-06, "loss": 0.2541, "step": 9180 }, { "epoch": 0.6375027097333622, "grad_norm": 1.5, "learning_rate": 3.401913043478261e-06, "loss": 0.2151, "step": 9190 }, { "epoch": 0.638196401474095, "grad_norm": 1.2421875, "learning_rate": 3.4001739130434784e-06, "loss": 0.2795, "step": 9200 }, { "epoch": 0.6388900932148277, "grad_norm": 1.2734375, "learning_rate": 3.398434782608696e-06, "loss": 0.2498, "step": 9210 }, { "epoch": 0.6395837849555603, "grad_norm": 2.0625, "learning_rate": 3.396695652173913e-06, "loss": 0.2836, "step": 9220 }, { "epoch": 0.6402774766962931, "grad_norm": 1.125, "learning_rate": 3.3949565217391305e-06, "loss": 0.2346, "step": 9230 }, { "epoch": 0.6409711684370258, "grad_norm": 1.03125, "learning_rate": 3.3932173913043483e-06, "loss": 0.2447, "step": 9240 }, { "epoch": 0.6416648601777585, "grad_norm": 1.2109375, "learning_rate": 3.3914782608695657e-06, "loss": 0.2775, "step": 9250 }, { "epoch": 0.6423585519184912, "grad_norm": 1.4296875, "learning_rate": 3.3897391304347826e-06, "loss": 0.2225, "step": 9260 }, { "epoch": 0.643052243659224, "grad_norm": 1.4609375, "learning_rate": 3.388e-06, "loss": 0.2327, "step": 9270 }, { "epoch": 0.6437459353999566, "grad_norm": 1.078125, "learning_rate": 3.386260869565218e-06, "loss": 0.2091, "step": 9280 }, { "epoch": 0.6444396271406894, "grad_norm": 1.484375, "learning_rate": 3.384521739130435e-06, "loss": 0.244, "step": 9290 }, { "epoch": 0.6451333188814221, "grad_norm": 1.25, "learning_rate": 3.382782608695652e-06, "loss": 0.2169, "step": 9300 }, { "epoch": 0.6458270106221548, "grad_norm": 1.4296875, "learning_rate": 3.38104347826087e-06, "loss": 0.3279, "step": 9310 }, { "epoch": 0.6465207023628875, "grad_norm": 1.046875, "learning_rate": 3.3793043478260873e-06, "loss": 0.2176, "step": 9320 }, { "epoch": 0.6472143941036202, "grad_norm": 1.96875, "learning_rate": 3.3775652173913047e-06, "loss": 0.315, "step": 9330 }, { "epoch": 0.6479080858443529, "grad_norm": 1.2421875, "learning_rate": 3.3758260869565216e-06, "loss": 0.2778, "step": 9340 }, { "epoch": 0.6486017775850856, "grad_norm": 0.92578125, "learning_rate": 3.3740869565217394e-06, "loss": 0.2681, "step": 9350 }, { "epoch": 0.6492954693258184, "grad_norm": 1.078125, "learning_rate": 3.3723478260869568e-06, "loss": 0.2078, "step": 9360 }, { "epoch": 0.649989161066551, "grad_norm": 1.8984375, "learning_rate": 3.3706086956521746e-06, "loss": 0.3201, "step": 9370 }, { "epoch": 0.6506828528072838, "grad_norm": 1.078125, "learning_rate": 3.3688695652173915e-06, "loss": 0.2226, "step": 9380 }, { "epoch": 0.6513765445480165, "grad_norm": 1.171875, "learning_rate": 3.367130434782609e-06, "loss": 0.2847, "step": 9390 }, { "epoch": 0.6520702362887492, "grad_norm": 0.99609375, "learning_rate": 3.3653913043478263e-06, "loss": 0.2418, "step": 9400 }, { "epoch": 0.6527639280294819, "grad_norm": 2.078125, "learning_rate": 3.363652173913044e-06, "loss": 0.2898, "step": 9410 }, { "epoch": 0.6534576197702147, "grad_norm": 1.109375, "learning_rate": 3.361913043478261e-06, "loss": 0.2522, "step": 9420 }, { "epoch": 0.6541513115109473, "grad_norm": 1.203125, "learning_rate": 3.3601739130434784e-06, "loss": 0.2297, "step": 9430 }, { "epoch": 0.65484500325168, "grad_norm": 1.1484375, "learning_rate": 3.358434782608696e-06, "loss": 0.2395, "step": 9440 }, { "epoch": 0.6555386949924128, "grad_norm": 1.4453125, "learning_rate": 3.3566956521739135e-06, "loss": 0.2372, "step": 9450 }, { "epoch": 0.6562323867331454, "grad_norm": 1.265625, "learning_rate": 3.3549565217391305e-06, "loss": 0.2412, "step": 9460 }, { "epoch": 0.6569260784738782, "grad_norm": 1.28125, "learning_rate": 3.3532173913043483e-06, "loss": 0.2586, "step": 9470 }, { "epoch": 0.6576197702146109, "grad_norm": 1.5703125, "learning_rate": 3.3514782608695656e-06, "loss": 0.269, "step": 9480 }, { "epoch": 0.6583134619553436, "grad_norm": 1.1796875, "learning_rate": 3.349739130434783e-06, "loss": 0.2298, "step": 9490 }, { "epoch": 0.6590071536960763, "grad_norm": 0.9609375, "learning_rate": 3.348e-06, "loss": 0.2737, "step": 9500 }, { "epoch": 0.6597008454368091, "grad_norm": 1.4453125, "learning_rate": 3.3462608695652178e-06, "loss": 0.2729, "step": 9510 }, { "epoch": 0.6603945371775417, "grad_norm": 1.0625, "learning_rate": 3.344521739130435e-06, "loss": 0.2285, "step": 9520 }, { "epoch": 0.6610882289182745, "grad_norm": 1.0390625, "learning_rate": 3.342782608695652e-06, "loss": 0.2581, "step": 9530 }, { "epoch": 0.6617819206590072, "grad_norm": 1.21875, "learning_rate": 3.34104347826087e-06, "loss": 0.3278, "step": 9540 }, { "epoch": 0.6624756123997398, "grad_norm": 0.9765625, "learning_rate": 3.3393043478260872e-06, "loss": 0.2526, "step": 9550 }, { "epoch": 0.6631693041404726, "grad_norm": 1.265625, "learning_rate": 3.3375652173913046e-06, "loss": 0.2401, "step": 9560 }, { "epoch": 0.6638629958812053, "grad_norm": 0.9765625, "learning_rate": 3.3358260869565216e-06, "loss": 0.2359, "step": 9570 }, { "epoch": 0.664556687621938, "grad_norm": 1.2265625, "learning_rate": 3.3340869565217394e-06, "loss": 0.2697, "step": 9580 }, { "epoch": 0.6652503793626707, "grad_norm": 1.3203125, "learning_rate": 3.3323478260869567e-06, "loss": 0.2191, "step": 9590 }, { "epoch": 0.6659440711034035, "grad_norm": 1.0859375, "learning_rate": 3.3306086956521745e-06, "loss": 0.2142, "step": 9600 }, { "epoch": 0.6666377628441361, "grad_norm": 1.2578125, "learning_rate": 3.3288695652173915e-06, "loss": 0.2708, "step": 9610 }, { "epoch": 0.6673314545848689, "grad_norm": 1.265625, "learning_rate": 3.327130434782609e-06, "loss": 0.2285, "step": 9620 }, { "epoch": 0.6680251463256016, "grad_norm": 1.4609375, "learning_rate": 3.325391304347826e-06, "loss": 0.2216, "step": 9630 }, { "epoch": 0.6687188380663343, "grad_norm": 1.1796875, "learning_rate": 3.323652173913044e-06, "loss": 0.2208, "step": 9640 }, { "epoch": 0.669412529807067, "grad_norm": 0.86328125, "learning_rate": 3.321913043478261e-06, "loss": 0.1785, "step": 9650 }, { "epoch": 0.6701062215477998, "grad_norm": 1.0390625, "learning_rate": 3.3201739130434783e-06, "loss": 0.2453, "step": 9660 }, { "epoch": 0.6707999132885324, "grad_norm": 1.09375, "learning_rate": 3.318434782608696e-06, "loss": 0.2761, "step": 9670 }, { "epoch": 0.6714936050292651, "grad_norm": 1.1015625, "learning_rate": 3.3166956521739135e-06, "loss": 0.2701, "step": 9680 }, { "epoch": 0.6721872967699978, "grad_norm": 1.5546875, "learning_rate": 3.3149565217391304e-06, "loss": 0.258, "step": 9690 }, { "epoch": 0.6728809885107305, "grad_norm": 1.21875, "learning_rate": 3.3132173913043482e-06, "loss": 0.2648, "step": 9700 }, { "epoch": 0.6735746802514633, "grad_norm": 1.3125, "learning_rate": 3.3114782608695656e-06, "loss": 0.2885, "step": 9710 }, { "epoch": 0.6742683719921959, "grad_norm": 1.234375, "learning_rate": 3.309739130434783e-06, "loss": 0.264, "step": 9720 }, { "epoch": 0.6749620637329287, "grad_norm": 1.09375, "learning_rate": 3.308e-06, "loss": 0.2387, "step": 9730 }, { "epoch": 0.6756557554736614, "grad_norm": 1.375, "learning_rate": 3.3062608695652177e-06, "loss": 0.3259, "step": 9740 }, { "epoch": 0.676349447214394, "grad_norm": 1.421875, "learning_rate": 3.304521739130435e-06, "loss": 0.251, "step": 9750 }, { "epoch": 0.6770431389551268, "grad_norm": 1.0859375, "learning_rate": 3.302782608695653e-06, "loss": 0.2134, "step": 9760 }, { "epoch": 0.6777368306958595, "grad_norm": 1.1171875, "learning_rate": 3.30104347826087e-06, "loss": 0.2788, "step": 9770 }, { "epoch": 0.6784305224365922, "grad_norm": 1.265625, "learning_rate": 3.299304347826087e-06, "loss": 0.2509, "step": 9780 }, { "epoch": 0.6791242141773249, "grad_norm": 1.3359375, "learning_rate": 3.2975652173913046e-06, "loss": 0.2613, "step": 9790 }, { "epoch": 0.6798179059180577, "grad_norm": 1.1328125, "learning_rate": 3.2958260869565224e-06, "loss": 0.2278, "step": 9800 }, { "epoch": 0.6805115976587903, "grad_norm": 1.0859375, "learning_rate": 3.2940869565217393e-06, "loss": 0.2223, "step": 9810 }, { "epoch": 0.6812052893995231, "grad_norm": 1.484375, "learning_rate": 3.2923478260869567e-06, "loss": 0.2233, "step": 9820 }, { "epoch": 0.6818989811402558, "grad_norm": 1.234375, "learning_rate": 3.2906086956521745e-06, "loss": 0.2087, "step": 9830 }, { "epoch": 0.6825926728809885, "grad_norm": 1.125, "learning_rate": 3.2888695652173914e-06, "loss": 0.2244, "step": 9840 }, { "epoch": 0.6832863646217212, "grad_norm": 1.03125, "learning_rate": 3.287130434782609e-06, "loss": 0.2528, "step": 9850 }, { "epoch": 0.683980056362454, "grad_norm": 1.3203125, "learning_rate": 3.2853913043478266e-06, "loss": 0.2369, "step": 9860 }, { "epoch": 0.6846737481031866, "grad_norm": 1.1875, "learning_rate": 3.283652173913044e-06, "loss": 0.2412, "step": 9870 }, { "epoch": 0.6853674398439193, "grad_norm": 1.9296875, "learning_rate": 3.281913043478261e-06, "loss": 0.2932, "step": 9880 }, { "epoch": 0.6860611315846521, "grad_norm": 1.4296875, "learning_rate": 3.2801739130434783e-06, "loss": 0.2391, "step": 9890 }, { "epoch": 0.6867548233253847, "grad_norm": 1.015625, "learning_rate": 3.278434782608696e-06, "loss": 0.2181, "step": 9900 }, { "epoch": 0.6874485150661175, "grad_norm": 0.890625, "learning_rate": 3.2766956521739134e-06, "loss": 0.2253, "step": 9910 }, { "epoch": 0.6881422068068502, "grad_norm": 1.46875, "learning_rate": 3.2749565217391304e-06, "loss": 0.2034, "step": 9920 }, { "epoch": 0.6888358985475829, "grad_norm": 1.265625, "learning_rate": 3.273217391304348e-06, "loss": 0.2552, "step": 9930 }, { "epoch": 0.6895295902883156, "grad_norm": 1.6171875, "learning_rate": 3.2714782608695656e-06, "loss": 0.3157, "step": 9940 }, { "epoch": 0.6902232820290484, "grad_norm": 0.96875, "learning_rate": 3.269739130434783e-06, "loss": 0.218, "step": 9950 }, { "epoch": 0.690916973769781, "grad_norm": 1.2265625, "learning_rate": 3.268e-06, "loss": 0.2232, "step": 9960 }, { "epoch": 0.6916106655105138, "grad_norm": 1.296875, "learning_rate": 3.2662608695652177e-06, "loss": 0.2971, "step": 9970 }, { "epoch": 0.6923043572512465, "grad_norm": 1.1953125, "learning_rate": 3.264521739130435e-06, "loss": 0.2534, "step": 9980 }, { "epoch": 0.6929980489919791, "grad_norm": 1.1328125, "learning_rate": 3.262782608695653e-06, "loss": 0.2167, "step": 9990 }, { "epoch": 0.6936917407327119, "grad_norm": 0.98828125, "learning_rate": 3.26104347826087e-06, "loss": 0.2506, "step": 10000 }, { "epoch": 0.6943854324734446, "grad_norm": 1.1484375, "learning_rate": 3.259304347826087e-06, "loss": 0.2381, "step": 10010 }, { "epoch": 0.6950791242141773, "grad_norm": 1.3515625, "learning_rate": 3.2575652173913045e-06, "loss": 0.2228, "step": 10020 }, { "epoch": 0.69577281595491, "grad_norm": 1.0078125, "learning_rate": 3.2558260869565223e-06, "loss": 0.299, "step": 10030 }, { "epoch": 0.6964665076956428, "grad_norm": 1.125, "learning_rate": 3.2540869565217393e-06, "loss": 0.2691, "step": 10040 }, { "epoch": 0.6971601994363754, "grad_norm": 1.0625, "learning_rate": 3.2523478260869566e-06, "loss": 0.2412, "step": 10050 }, { "epoch": 0.6978538911771082, "grad_norm": 1.453125, "learning_rate": 3.2506086956521744e-06, "loss": 0.2672, "step": 10060 }, { "epoch": 0.6985475829178409, "grad_norm": 1.2734375, "learning_rate": 3.248869565217392e-06, "loss": 0.2148, "step": 10070 }, { "epoch": 0.6992412746585736, "grad_norm": 1.28125, "learning_rate": 3.2471304347826088e-06, "loss": 0.246, "step": 10080 }, { "epoch": 0.6999349663993063, "grad_norm": 0.8984375, "learning_rate": 3.2453913043478266e-06, "loss": 0.2934, "step": 10090 }, { "epoch": 0.700628658140039, "grad_norm": 1.1875, "learning_rate": 3.243652173913044e-06, "loss": 0.2409, "step": 10100 }, { "epoch": 0.7013223498807717, "grad_norm": 1.046875, "learning_rate": 3.241913043478261e-06, "loss": 0.2713, "step": 10110 }, { "epoch": 0.7020160416215044, "grad_norm": 1.15625, "learning_rate": 3.2401739130434782e-06, "loss": 0.2528, "step": 10120 }, { "epoch": 0.7027097333622372, "grad_norm": 1.21875, "learning_rate": 3.238434782608696e-06, "loss": 0.2619, "step": 10130 }, { "epoch": 0.7034034251029698, "grad_norm": 1.1875, "learning_rate": 3.2366956521739134e-06, "loss": 0.2569, "step": 10140 }, { "epoch": 0.7040971168437026, "grad_norm": 1.2265625, "learning_rate": 3.2349565217391304e-06, "loss": 0.2033, "step": 10150 }, { "epoch": 0.7047908085844353, "grad_norm": 1.4765625, "learning_rate": 3.233217391304348e-06, "loss": 0.2447, "step": 10160 }, { "epoch": 0.705484500325168, "grad_norm": 0.83203125, "learning_rate": 3.2314782608695655e-06, "loss": 0.2063, "step": 10170 }, { "epoch": 0.7061781920659007, "grad_norm": 1.234375, "learning_rate": 3.229739130434783e-06, "loss": 0.235, "step": 10180 }, { "epoch": 0.7068718838066335, "grad_norm": 1.375, "learning_rate": 3.228e-06, "loss": 0.2407, "step": 10190 }, { "epoch": 0.7075655755473661, "grad_norm": 1.2421875, "learning_rate": 3.2262608695652176e-06, "loss": 0.2973, "step": 10200 }, { "epoch": 0.7082592672880988, "grad_norm": 1.34375, "learning_rate": 3.224521739130435e-06, "loss": 0.2291, "step": 10210 }, { "epoch": 0.7089529590288316, "grad_norm": 1.1328125, "learning_rate": 3.222782608695653e-06, "loss": 0.2552, "step": 10220 }, { "epoch": 0.7096466507695642, "grad_norm": 1.1953125, "learning_rate": 3.2210434782608697e-06, "loss": 0.2343, "step": 10230 }, { "epoch": 0.710340342510297, "grad_norm": 1.265625, "learning_rate": 3.219304347826087e-06, "loss": 0.2158, "step": 10240 }, { "epoch": 0.7110340342510297, "grad_norm": 1.0703125, "learning_rate": 3.2175652173913045e-06, "loss": 0.2512, "step": 10250 }, { "epoch": 0.7117277259917624, "grad_norm": 0.890625, "learning_rate": 3.2158260869565223e-06, "loss": 0.2346, "step": 10260 }, { "epoch": 0.7124214177324951, "grad_norm": 1.0078125, "learning_rate": 3.2140869565217392e-06, "loss": 0.2133, "step": 10270 }, { "epoch": 0.7131151094732279, "grad_norm": 1.15625, "learning_rate": 3.2123478260869566e-06, "loss": 0.2386, "step": 10280 }, { "epoch": 0.7138088012139605, "grad_norm": 1.109375, "learning_rate": 3.2106086956521744e-06, "loss": 0.1876, "step": 10290 }, { "epoch": 0.7145024929546933, "grad_norm": 0.953125, "learning_rate": 3.2088695652173918e-06, "loss": 0.2198, "step": 10300 }, { "epoch": 0.715196184695426, "grad_norm": 0.9453125, "learning_rate": 3.2071304347826087e-06, "loss": 0.218, "step": 10310 }, { "epoch": 0.7158898764361586, "grad_norm": 0.97265625, "learning_rate": 3.2053913043478265e-06, "loss": 0.2749, "step": 10320 }, { "epoch": 0.7165835681768914, "grad_norm": 0.99609375, "learning_rate": 3.203652173913044e-06, "loss": 0.2199, "step": 10330 }, { "epoch": 0.7172772599176241, "grad_norm": 0.9921875, "learning_rate": 3.2019130434782613e-06, "loss": 0.2523, "step": 10340 }, { "epoch": 0.7179709516583568, "grad_norm": 0.98828125, "learning_rate": 3.200173913043478e-06, "loss": 0.2313, "step": 10350 }, { "epoch": 0.7186646433990895, "grad_norm": 1.25, "learning_rate": 3.198434782608696e-06, "loss": 0.3035, "step": 10360 }, { "epoch": 0.7193583351398223, "grad_norm": 1.6015625, "learning_rate": 3.1966956521739134e-06, "loss": 0.3116, "step": 10370 }, { "epoch": 0.7200520268805549, "grad_norm": 1.21875, "learning_rate": 3.194956521739131e-06, "loss": 0.2449, "step": 10380 }, { "epoch": 0.7207457186212877, "grad_norm": 1.796875, "learning_rate": 3.193217391304348e-06, "loss": 0.3198, "step": 10390 }, { "epoch": 0.7214394103620204, "grad_norm": 1.25, "learning_rate": 3.1914782608695655e-06, "loss": 0.2539, "step": 10400 }, { "epoch": 0.722133102102753, "grad_norm": 1.125, "learning_rate": 3.189739130434783e-06, "loss": 0.3254, "step": 10410 }, { "epoch": 0.7228267938434858, "grad_norm": 1.375, "learning_rate": 3.188e-06, "loss": 0.2163, "step": 10420 }, { "epoch": 0.7235204855842186, "grad_norm": 1.2109375, "learning_rate": 3.1862608695652176e-06, "loss": 0.253, "step": 10430 }, { "epoch": 0.7242141773249512, "grad_norm": 1.0234375, "learning_rate": 3.184521739130435e-06, "loss": 0.2237, "step": 10440 }, { "epoch": 0.7249078690656839, "grad_norm": 1.234375, "learning_rate": 3.1827826086956528e-06, "loss": 0.2336, "step": 10450 }, { "epoch": 0.7256015608064167, "grad_norm": 1.046875, "learning_rate": 3.1810434782608697e-06, "loss": 0.2501, "step": 10460 }, { "epoch": 0.7262952525471493, "grad_norm": 1.3515625, "learning_rate": 3.179304347826087e-06, "loss": 0.2205, "step": 10470 }, { "epoch": 0.7269889442878821, "grad_norm": 1.515625, "learning_rate": 3.1775652173913045e-06, "loss": 0.2509, "step": 10480 }, { "epoch": 0.7276826360286148, "grad_norm": 1.109375, "learning_rate": 3.1758260869565222e-06, "loss": 0.2539, "step": 10490 }, { "epoch": 0.7283763277693475, "grad_norm": 1.0703125, "learning_rate": 3.174086956521739e-06, "loss": 0.2217, "step": 10500 }, { "epoch": 0.7290700195100802, "grad_norm": 1.4375, "learning_rate": 3.1723478260869566e-06, "loss": 0.26, "step": 10510 }, { "epoch": 0.729763711250813, "grad_norm": 1.0703125, "learning_rate": 3.1706086956521744e-06, "loss": 0.2377, "step": 10520 }, { "epoch": 0.7304574029915456, "grad_norm": 1.265625, "learning_rate": 3.1688695652173917e-06, "loss": 0.2349, "step": 10530 }, { "epoch": 0.7311510947322784, "grad_norm": 0.953125, "learning_rate": 3.1671304347826087e-06, "loss": 0.2118, "step": 10540 }, { "epoch": 0.7318447864730111, "grad_norm": 1.328125, "learning_rate": 3.1653913043478265e-06, "loss": 0.2557, "step": 10550 }, { "epoch": 0.7325384782137437, "grad_norm": 1.125, "learning_rate": 3.163652173913044e-06, "loss": 0.272, "step": 10560 }, { "epoch": 0.7332321699544765, "grad_norm": 1.078125, "learning_rate": 3.1619130434782612e-06, "loss": 0.2569, "step": 10570 }, { "epoch": 0.7339258616952092, "grad_norm": 0.77734375, "learning_rate": 3.160173913043478e-06, "loss": 0.2273, "step": 10580 }, { "epoch": 0.7346195534359419, "grad_norm": 1.109375, "learning_rate": 3.158434782608696e-06, "loss": 0.2184, "step": 10590 }, { "epoch": 0.7353132451766746, "grad_norm": 1.15625, "learning_rate": 3.1566956521739133e-06, "loss": 0.2226, "step": 10600 }, { "epoch": 0.7360069369174074, "grad_norm": 0.8984375, "learning_rate": 3.154956521739131e-06, "loss": 0.2256, "step": 10610 }, { "epoch": 0.73670062865814, "grad_norm": 1.578125, "learning_rate": 3.153217391304348e-06, "loss": 0.2162, "step": 10620 }, { "epoch": 0.7373943203988728, "grad_norm": 1.15625, "learning_rate": 3.1514782608695654e-06, "loss": 0.2247, "step": 10630 }, { "epoch": 0.7380880121396055, "grad_norm": 1.6015625, "learning_rate": 3.149739130434783e-06, "loss": 0.2441, "step": 10640 }, { "epoch": 0.7387817038803381, "grad_norm": 1.265625, "learning_rate": 3.1480000000000006e-06, "loss": 0.2476, "step": 10650 }, { "epoch": 0.7394753956210709, "grad_norm": 1.1953125, "learning_rate": 3.1462608695652176e-06, "loss": 0.301, "step": 10660 }, { "epoch": 0.7401690873618036, "grad_norm": 1.296875, "learning_rate": 3.144521739130435e-06, "loss": 0.267, "step": 10670 }, { "epoch": 0.7408627791025363, "grad_norm": 1.1328125, "learning_rate": 3.1427826086956527e-06, "loss": 0.2188, "step": 10680 }, { "epoch": 0.741556470843269, "grad_norm": 1.1875, "learning_rate": 3.14104347826087e-06, "loss": 0.2561, "step": 10690 }, { "epoch": 0.7422501625840018, "grad_norm": 1.171875, "learning_rate": 3.139304347826087e-06, "loss": 0.2115, "step": 10700 }, { "epoch": 0.7429438543247344, "grad_norm": 1.40625, "learning_rate": 3.1375652173913044e-06, "loss": 0.2606, "step": 10710 }, { "epoch": 0.7436375460654672, "grad_norm": 1.0625, "learning_rate": 3.135826086956522e-06, "loss": 0.3279, "step": 10720 }, { "epoch": 0.7443312378061999, "grad_norm": 1.59375, "learning_rate": 3.134086956521739e-06, "loss": 0.2229, "step": 10730 }, { "epoch": 0.7450249295469326, "grad_norm": 1.546875, "learning_rate": 3.1323478260869565e-06, "loss": 0.3244, "step": 10740 }, { "epoch": 0.7457186212876653, "grad_norm": 1.28125, "learning_rate": 3.1306086956521743e-06, "loss": 0.2381, "step": 10750 }, { "epoch": 0.7464123130283981, "grad_norm": 0.9765625, "learning_rate": 3.1288695652173917e-06, "loss": 0.1984, "step": 10760 }, { "epoch": 0.7471060047691307, "grad_norm": 1.140625, "learning_rate": 3.1271304347826086e-06, "loss": 0.2426, "step": 10770 }, { "epoch": 0.7477996965098634, "grad_norm": 1.2734375, "learning_rate": 3.1253913043478264e-06, "loss": 0.2656, "step": 10780 }, { "epoch": 0.7484933882505962, "grad_norm": 1.359375, "learning_rate": 3.123652173913044e-06, "loss": 0.2148, "step": 10790 }, { "epoch": 0.7491870799913288, "grad_norm": 1.28125, "learning_rate": 3.121913043478261e-06, "loss": 0.2359, "step": 10800 }, { "epoch": 0.7498807717320616, "grad_norm": 1.25, "learning_rate": 3.120173913043478e-06, "loss": 0.2798, "step": 10810 }, { "epoch": 0.7505744634727943, "grad_norm": 1.203125, "learning_rate": 3.118434782608696e-06, "loss": 0.2446, "step": 10820 }, { "epoch": 0.751268155213527, "grad_norm": 1.140625, "learning_rate": 3.1166956521739133e-06, "loss": 0.2492, "step": 10830 }, { "epoch": 0.7519618469542597, "grad_norm": 1.0234375, "learning_rate": 3.114956521739131e-06, "loss": 0.2174, "step": 10840 }, { "epoch": 0.7526555386949925, "grad_norm": 1.0234375, "learning_rate": 3.113217391304348e-06, "loss": 0.2919, "step": 10850 }, { "epoch": 0.7533492304357251, "grad_norm": 0.97265625, "learning_rate": 3.1114782608695654e-06, "loss": 0.2294, "step": 10860 }, { "epoch": 0.7540429221764579, "grad_norm": 1.171875, "learning_rate": 3.1097391304347828e-06, "loss": 0.2915, "step": 10870 }, { "epoch": 0.7547366139171906, "grad_norm": 1.5234375, "learning_rate": 3.1080000000000006e-06, "loss": 0.2996, "step": 10880 }, { "epoch": 0.7554303056579232, "grad_norm": 1.140625, "learning_rate": 3.1062608695652175e-06, "loss": 0.3066, "step": 10890 }, { "epoch": 0.756123997398656, "grad_norm": 1.3671875, "learning_rate": 3.104521739130435e-06, "loss": 0.2634, "step": 10900 }, { "epoch": 0.7568176891393887, "grad_norm": 1.265625, "learning_rate": 3.1027826086956527e-06, "loss": 0.2178, "step": 10910 }, { "epoch": 0.7575113808801214, "grad_norm": 0.91015625, "learning_rate": 3.10104347826087e-06, "loss": 0.2464, "step": 10920 }, { "epoch": 0.7582050726208541, "grad_norm": 1.375, "learning_rate": 3.099304347826087e-06, "loss": 0.2308, "step": 10930 }, { "epoch": 0.7588987643615869, "grad_norm": 1.1015625, "learning_rate": 3.0975652173913044e-06, "loss": 0.2179, "step": 10940 }, { "epoch": 0.7595924561023195, "grad_norm": 1.203125, "learning_rate": 3.095826086956522e-06, "loss": 0.2317, "step": 10950 }, { "epoch": 0.7602861478430523, "grad_norm": 0.86328125, "learning_rate": 3.0940869565217395e-06, "loss": 0.2443, "step": 10960 }, { "epoch": 0.7609798395837849, "grad_norm": 1.3515625, "learning_rate": 3.0923478260869565e-06, "loss": 0.3217, "step": 10970 }, { "epoch": 0.7616735313245176, "grad_norm": 1.3359375, "learning_rate": 3.0906086956521743e-06, "loss": 0.2126, "step": 10980 }, { "epoch": 0.7623672230652504, "grad_norm": 1.1953125, "learning_rate": 3.0888695652173916e-06, "loss": 0.2326, "step": 10990 }, { "epoch": 0.763060914805983, "grad_norm": 1.296875, "learning_rate": 3.087130434782609e-06, "loss": 0.2266, "step": 11000 }, { "epoch": 0.7637546065467158, "grad_norm": 1.078125, "learning_rate": 3.0853913043478264e-06, "loss": 0.2807, "step": 11010 }, { "epoch": 0.7644482982874485, "grad_norm": 1.2890625, "learning_rate": 3.0836521739130438e-06, "loss": 0.3183, "step": 11020 }, { "epoch": 0.7651419900281812, "grad_norm": 1.2890625, "learning_rate": 3.081913043478261e-06, "loss": 0.2456, "step": 11030 }, { "epoch": 0.7658356817689139, "grad_norm": 1.203125, "learning_rate": 3.080173913043478e-06, "loss": 0.2157, "step": 11040 }, { "epoch": 0.7665293735096467, "grad_norm": 1.171875, "learning_rate": 3.078434782608696e-06, "loss": 0.2612, "step": 11050 }, { "epoch": 0.7672230652503793, "grad_norm": 1.1953125, "learning_rate": 3.0766956521739132e-06, "loss": 0.253, "step": 11060 }, { "epoch": 0.7679167569911121, "grad_norm": 0.90625, "learning_rate": 3.074956521739131e-06, "loss": 0.225, "step": 11070 }, { "epoch": 0.7686104487318448, "grad_norm": 1.703125, "learning_rate": 3.073217391304348e-06, "loss": 0.2247, "step": 11080 }, { "epoch": 0.7693041404725774, "grad_norm": 1.2109375, "learning_rate": 3.0714782608695654e-06, "loss": 0.2493, "step": 11090 }, { "epoch": 0.7699978322133102, "grad_norm": 1.390625, "learning_rate": 3.0697391304347827e-06, "loss": 0.3239, "step": 11100 }, { "epoch": 0.770691523954043, "grad_norm": 1.1015625, "learning_rate": 3.0680000000000005e-06, "loss": 0.2053, "step": 11110 }, { "epoch": 0.7713852156947756, "grad_norm": 1.2578125, "learning_rate": 3.0662608695652175e-06, "loss": 0.2254, "step": 11120 }, { "epoch": 0.7720789074355083, "grad_norm": 1.140625, "learning_rate": 3.064521739130435e-06, "loss": 0.2715, "step": 11130 }, { "epoch": 0.7727725991762411, "grad_norm": 1.234375, "learning_rate": 3.0627826086956526e-06, "loss": 0.2294, "step": 11140 }, { "epoch": 0.7734662909169737, "grad_norm": 1.390625, "learning_rate": 3.06104347826087e-06, "loss": 0.2423, "step": 11150 }, { "epoch": 0.7741599826577065, "grad_norm": 1.453125, "learning_rate": 3.059304347826087e-06, "loss": 0.2477, "step": 11160 }, { "epoch": 0.7748536743984392, "grad_norm": 1.2578125, "learning_rate": 3.0575652173913043e-06, "loss": 0.2103, "step": 11170 }, { "epoch": 0.7755473661391719, "grad_norm": 1.484375, "learning_rate": 3.055826086956522e-06, "loss": 0.2501, "step": 11180 }, { "epoch": 0.7762410578799046, "grad_norm": 1.0859375, "learning_rate": 3.0540869565217395e-06, "loss": 0.2741, "step": 11190 }, { "epoch": 0.7769347496206374, "grad_norm": 1.4453125, "learning_rate": 3.0523478260869564e-06, "loss": 0.2483, "step": 11200 }, { "epoch": 0.77762844136137, "grad_norm": 1.53125, "learning_rate": 3.0506086956521742e-06, "loss": 0.2672, "step": 11210 }, { "epoch": 0.7783221331021027, "grad_norm": 1.21875, "learning_rate": 3.0488695652173916e-06, "loss": 0.225, "step": 11220 }, { "epoch": 0.7790158248428355, "grad_norm": 1.21875, "learning_rate": 3.047130434782609e-06, "loss": 0.2435, "step": 11230 }, { "epoch": 0.7797095165835681, "grad_norm": 1.1484375, "learning_rate": 3.0453913043478264e-06, "loss": 0.251, "step": 11240 }, { "epoch": 0.7804032083243009, "grad_norm": 1.1015625, "learning_rate": 3.0436521739130437e-06, "loss": 0.2056, "step": 11250 }, { "epoch": 0.7810969000650336, "grad_norm": 1.4921875, "learning_rate": 3.041913043478261e-06, "loss": 0.2781, "step": 11260 }, { "epoch": 0.7817905918057663, "grad_norm": 1.6171875, "learning_rate": 3.040173913043479e-06, "loss": 0.2197, "step": 11270 }, { "epoch": 0.782484283546499, "grad_norm": 1.171875, "learning_rate": 3.038434782608696e-06, "loss": 0.2486, "step": 11280 }, { "epoch": 0.7831779752872318, "grad_norm": 1.0, "learning_rate": 3.036695652173913e-06, "loss": 0.2142, "step": 11290 }, { "epoch": 0.7838716670279644, "grad_norm": 1.5, "learning_rate": 3.034956521739131e-06, "loss": 0.2634, "step": 11300 }, { "epoch": 0.7845653587686972, "grad_norm": 1.3984375, "learning_rate": 3.0332173913043484e-06, "loss": 0.2851, "step": 11310 }, { "epoch": 0.7852590505094299, "grad_norm": 1.0859375, "learning_rate": 3.0314782608695653e-06, "loss": 0.2177, "step": 11320 }, { "epoch": 0.7859527422501625, "grad_norm": 1.15625, "learning_rate": 3.0297391304347827e-06, "loss": 0.2772, "step": 11330 }, { "epoch": 0.7866464339908953, "grad_norm": 1.40625, "learning_rate": 3.0280000000000005e-06, "loss": 0.2819, "step": 11340 }, { "epoch": 0.787340125731628, "grad_norm": 0.87890625, "learning_rate": 3.0262608695652174e-06, "loss": 0.2356, "step": 11350 }, { "epoch": 0.7880338174723607, "grad_norm": 0.859375, "learning_rate": 3.024521739130435e-06, "loss": 0.2149, "step": 11360 }, { "epoch": 0.7887275092130934, "grad_norm": 1.265625, "learning_rate": 3.0227826086956526e-06, "loss": 0.2252, "step": 11370 }, { "epoch": 0.7894212009538262, "grad_norm": 1.8515625, "learning_rate": 3.02104347826087e-06, "loss": 0.2917, "step": 11380 }, { "epoch": 0.7901148926945588, "grad_norm": 1.4296875, "learning_rate": 3.019304347826087e-06, "loss": 0.257, "step": 11390 }, { "epoch": 0.7908085844352916, "grad_norm": 1.296875, "learning_rate": 3.0175652173913043e-06, "loss": 0.2266, "step": 11400 }, { "epoch": 0.7915022761760243, "grad_norm": 1.0546875, "learning_rate": 3.015826086956522e-06, "loss": 0.2351, "step": 11410 }, { "epoch": 0.792195967916757, "grad_norm": 1.46875, "learning_rate": 3.0140869565217395e-06, "loss": 0.2555, "step": 11420 }, { "epoch": 0.7928896596574897, "grad_norm": 1.078125, "learning_rate": 3.0123478260869564e-06, "loss": 0.2709, "step": 11430 }, { "epoch": 0.7935833513982224, "grad_norm": 1.3359375, "learning_rate": 3.010608695652174e-06, "loss": 0.2159, "step": 11440 }, { "epoch": 0.7942770431389551, "grad_norm": 1.4375, "learning_rate": 3.0088695652173916e-06, "loss": 0.3059, "step": 11450 }, { "epoch": 0.7949707348796878, "grad_norm": 1.125, "learning_rate": 3.007130434782609e-06, "loss": 0.2167, "step": 11460 }, { "epoch": 0.7956644266204206, "grad_norm": 0.84765625, "learning_rate": 3.0053913043478263e-06, "loss": 0.2494, "step": 11470 }, { "epoch": 0.7963581183611532, "grad_norm": 1.2421875, "learning_rate": 3.0036521739130437e-06, "loss": 0.2852, "step": 11480 }, { "epoch": 0.797051810101886, "grad_norm": 1.171875, "learning_rate": 3.001913043478261e-06, "loss": 0.2671, "step": 11490 }, { "epoch": 0.7977455018426187, "grad_norm": 1.265625, "learning_rate": 3.000173913043479e-06, "loss": 0.2252, "step": 11500 }, { "epoch": 0.7984391935833514, "grad_norm": 1.171875, "learning_rate": 2.998434782608696e-06, "loss": 0.2481, "step": 11510 }, { "epoch": 0.7991328853240841, "grad_norm": 1.2578125, "learning_rate": 2.996695652173913e-06, "loss": 0.2194, "step": 11520 }, { "epoch": 0.7998265770648169, "grad_norm": 1.125, "learning_rate": 2.994956521739131e-06, "loss": 0.2264, "step": 11530 }, { "epoch": 0.8005202688055495, "grad_norm": 1.3828125, "learning_rate": 2.9932173913043483e-06, "loss": 0.2368, "step": 11540 }, { "epoch": 0.8012139605462822, "grad_norm": 1.109375, "learning_rate": 2.9914782608695653e-06, "loss": 0.2505, "step": 11550 }, { "epoch": 0.801907652287015, "grad_norm": 1.0546875, "learning_rate": 2.9897391304347827e-06, "loss": 0.2175, "step": 11560 }, { "epoch": 0.8026013440277476, "grad_norm": 1.0703125, "learning_rate": 2.9880000000000004e-06, "loss": 0.2256, "step": 11570 }, { "epoch": 0.8032950357684804, "grad_norm": 1.140625, "learning_rate": 2.986260869565218e-06, "loss": 0.2488, "step": 11580 }, { "epoch": 0.8039887275092131, "grad_norm": 1.3828125, "learning_rate": 2.9845217391304348e-06, "loss": 0.2574, "step": 11590 }, { "epoch": 0.8046824192499458, "grad_norm": 1.0234375, "learning_rate": 2.9827826086956526e-06, "loss": 0.2263, "step": 11600 }, { "epoch": 0.8053761109906785, "grad_norm": 1.3203125, "learning_rate": 2.98104347826087e-06, "loss": 0.2127, "step": 11610 }, { "epoch": 0.8060698027314113, "grad_norm": 1.2109375, "learning_rate": 2.9793043478260873e-06, "loss": 0.2328, "step": 11620 }, { "epoch": 0.8067634944721439, "grad_norm": 1.25, "learning_rate": 2.9775652173913042e-06, "loss": 0.279, "step": 11630 }, { "epoch": 0.8074571862128767, "grad_norm": 1.265625, "learning_rate": 2.975826086956522e-06, "loss": 0.1984, "step": 11640 }, { "epoch": 0.8081508779536094, "grad_norm": 1.203125, "learning_rate": 2.9740869565217394e-06, "loss": 0.2232, "step": 11650 }, { "epoch": 0.808844569694342, "grad_norm": 1.3359375, "learning_rate": 2.9723478260869564e-06, "loss": 0.2565, "step": 11660 }, { "epoch": 0.8095382614350748, "grad_norm": 1.25, "learning_rate": 2.970608695652174e-06, "loss": 0.2786, "step": 11670 }, { "epoch": 0.8102319531758075, "grad_norm": 1.3828125, "learning_rate": 2.9688695652173915e-06, "loss": 0.326, "step": 11680 }, { "epoch": 0.8109256449165402, "grad_norm": 1.171875, "learning_rate": 2.967130434782609e-06, "loss": 0.3024, "step": 11690 }, { "epoch": 0.8116193366572729, "grad_norm": 1.1328125, "learning_rate": 2.9653913043478263e-06, "loss": 0.2609, "step": 11700 }, { "epoch": 0.8123130283980057, "grad_norm": 1.2109375, "learning_rate": 2.9636521739130436e-06, "loss": 0.2344, "step": 11710 }, { "epoch": 0.8130067201387383, "grad_norm": 1.4921875, "learning_rate": 2.961913043478261e-06, "loss": 0.2705, "step": 11720 }, { "epoch": 0.8137004118794711, "grad_norm": 1.390625, "learning_rate": 2.960173913043479e-06, "loss": 0.2354, "step": 11730 }, { "epoch": 0.8143941036202038, "grad_norm": 0.94140625, "learning_rate": 2.9584347826086958e-06, "loss": 0.2062, "step": 11740 }, { "epoch": 0.8150877953609365, "grad_norm": 1.3515625, "learning_rate": 2.956695652173913e-06, "loss": 0.2292, "step": 11750 }, { "epoch": 0.8157814871016692, "grad_norm": 1.296875, "learning_rate": 2.954956521739131e-06, "loss": 0.2944, "step": 11760 }, { "epoch": 0.816475178842402, "grad_norm": 1.2421875, "learning_rate": 2.9532173913043483e-06, "loss": 0.2349, "step": 11770 }, { "epoch": 0.8171688705831346, "grad_norm": 1.0859375, "learning_rate": 2.9514782608695652e-06, "loss": 0.2075, "step": 11780 }, { "epoch": 0.8178625623238673, "grad_norm": 1.171875, "learning_rate": 2.9497391304347826e-06, "loss": 0.2725, "step": 11790 }, { "epoch": 0.8185562540646001, "grad_norm": 0.92578125, "learning_rate": 2.9480000000000004e-06, "loss": 0.2386, "step": 11800 }, { "epoch": 0.8192499458053327, "grad_norm": 1.3515625, "learning_rate": 2.9462608695652178e-06, "loss": 0.2515, "step": 11810 }, { "epoch": 0.8199436375460655, "grad_norm": 1.296875, "learning_rate": 2.9445217391304347e-06, "loss": 0.2432, "step": 11820 }, { "epoch": 0.8206373292867982, "grad_norm": 1.5703125, "learning_rate": 2.9427826086956525e-06, "loss": 0.2537, "step": 11830 }, { "epoch": 0.8213310210275309, "grad_norm": 1.234375, "learning_rate": 2.94104347826087e-06, "loss": 0.2072, "step": 11840 }, { "epoch": 0.8220247127682636, "grad_norm": 1.0703125, "learning_rate": 2.9393043478260873e-06, "loss": 0.215, "step": 11850 }, { "epoch": 0.8227184045089964, "grad_norm": 1.171875, "learning_rate": 2.937565217391304e-06, "loss": 0.2244, "step": 11860 }, { "epoch": 0.823412096249729, "grad_norm": 1.1328125, "learning_rate": 2.935826086956522e-06, "loss": 0.2119, "step": 11870 }, { "epoch": 0.8241057879904617, "grad_norm": 1.625, "learning_rate": 2.9340869565217394e-06, "loss": 0.2654, "step": 11880 }, { "epoch": 0.8247994797311945, "grad_norm": 2.21875, "learning_rate": 2.932347826086957e-06, "loss": 0.2935, "step": 11890 }, { "epoch": 0.8254931714719271, "grad_norm": 1.171875, "learning_rate": 2.930608695652174e-06, "loss": 0.3218, "step": 11900 }, { "epoch": 0.8261868632126599, "grad_norm": 1.3046875, "learning_rate": 2.9288695652173915e-06, "loss": 0.2326, "step": 11910 }, { "epoch": 0.8268805549533926, "grad_norm": 1.3203125, "learning_rate": 2.927130434782609e-06, "loss": 0.265, "step": 11920 }, { "epoch": 0.8275742466941253, "grad_norm": 1.3515625, "learning_rate": 2.9253913043478267e-06, "loss": 0.2347, "step": 11930 }, { "epoch": 0.828267938434858, "grad_norm": 1.0625, "learning_rate": 2.9236521739130436e-06, "loss": 0.2534, "step": 11940 }, { "epoch": 0.8289616301755908, "grad_norm": 1.09375, "learning_rate": 2.921913043478261e-06, "loss": 0.2471, "step": 11950 }, { "epoch": 0.8296553219163234, "grad_norm": 1.46875, "learning_rate": 2.9201739130434788e-06, "loss": 0.2258, "step": 11960 }, { "epoch": 0.8303490136570562, "grad_norm": 1.0078125, "learning_rate": 2.9184347826086957e-06, "loss": 0.2336, "step": 11970 }, { "epoch": 0.8310427053977889, "grad_norm": 0.9296875, "learning_rate": 2.916695652173913e-06, "loss": 0.1999, "step": 11980 }, { "epoch": 0.8317363971385215, "grad_norm": 1.3828125, "learning_rate": 2.914956521739131e-06, "loss": 0.2423, "step": 11990 }, { "epoch": 0.8324300888792543, "grad_norm": 0.98046875, "learning_rate": 2.9132173913043483e-06, "loss": 0.3134, "step": 12000 }, { "epoch": 0.833123780619987, "grad_norm": 1.2109375, "learning_rate": 2.911478260869565e-06, "loss": 0.2421, "step": 12010 }, { "epoch": 0.8338174723607197, "grad_norm": 1.2578125, "learning_rate": 2.9097391304347826e-06, "loss": 0.2408, "step": 12020 }, { "epoch": 0.8345111641014524, "grad_norm": 1.09375, "learning_rate": 2.9080000000000004e-06, "loss": 0.3132, "step": 12030 }, { "epoch": 0.8352048558421852, "grad_norm": 1.4921875, "learning_rate": 2.9062608695652177e-06, "loss": 0.3039, "step": 12040 }, { "epoch": 0.8358985475829178, "grad_norm": 1.3046875, "learning_rate": 2.9045217391304347e-06, "loss": 0.2368, "step": 12050 }, { "epoch": 0.8365922393236506, "grad_norm": 1.1015625, "learning_rate": 2.9027826086956525e-06, "loss": 0.2429, "step": 12060 }, { "epoch": 0.8372859310643833, "grad_norm": 1.34375, "learning_rate": 2.90104347826087e-06, "loss": 0.2199, "step": 12070 }, { "epoch": 0.837979622805116, "grad_norm": 0.9609375, "learning_rate": 2.8993043478260872e-06, "loss": 0.2216, "step": 12080 }, { "epoch": 0.8386733145458487, "grad_norm": 1.1171875, "learning_rate": 2.897565217391304e-06, "loss": 0.2114, "step": 12090 }, { "epoch": 0.8393670062865815, "grad_norm": 1.0859375, "learning_rate": 2.895826086956522e-06, "loss": 0.2314, "step": 12100 }, { "epoch": 0.8400606980273141, "grad_norm": 1.2421875, "learning_rate": 2.8940869565217393e-06, "loss": 0.2591, "step": 12110 }, { "epoch": 0.8407543897680468, "grad_norm": 1.1953125, "learning_rate": 2.892347826086957e-06, "loss": 0.2981, "step": 12120 }, { "epoch": 0.8414480815087796, "grad_norm": 1.2734375, "learning_rate": 2.890608695652174e-06, "loss": 0.2615, "step": 12130 }, { "epoch": 0.8421417732495122, "grad_norm": 1.1328125, "learning_rate": 2.8888695652173914e-06, "loss": 0.2214, "step": 12140 }, { "epoch": 0.842835464990245, "grad_norm": 1.46875, "learning_rate": 2.887130434782609e-06, "loss": 0.2462, "step": 12150 }, { "epoch": 0.8435291567309777, "grad_norm": 1.2109375, "learning_rate": 2.8853913043478266e-06, "loss": 0.2383, "step": 12160 }, { "epoch": 0.8442228484717104, "grad_norm": 1.1484375, "learning_rate": 2.8836521739130436e-06, "loss": 0.2856, "step": 12170 }, { "epoch": 0.8449165402124431, "grad_norm": 1.4375, "learning_rate": 2.881913043478261e-06, "loss": 0.2847, "step": 12180 }, { "epoch": 0.8456102319531759, "grad_norm": 1.15625, "learning_rate": 2.8801739130434787e-06, "loss": 0.2508, "step": 12190 }, { "epoch": 0.8463039236939085, "grad_norm": 1.34375, "learning_rate": 2.878434782608696e-06, "loss": 0.2317, "step": 12200 }, { "epoch": 0.8469976154346412, "grad_norm": 1.3359375, "learning_rate": 2.876695652173913e-06, "loss": 0.3621, "step": 12210 }, { "epoch": 0.847691307175374, "grad_norm": 1.109375, "learning_rate": 2.874956521739131e-06, "loss": 0.2664, "step": 12220 }, { "epoch": 0.8483849989161066, "grad_norm": 0.86328125, "learning_rate": 2.873217391304348e-06, "loss": 0.2206, "step": 12230 }, { "epoch": 0.8490786906568394, "grad_norm": 1.6484375, "learning_rate": 2.8714782608695656e-06, "loss": 0.3451, "step": 12240 }, { "epoch": 0.849772382397572, "grad_norm": 1.5390625, "learning_rate": 2.8697391304347825e-06, "loss": 0.2597, "step": 12250 }, { "epoch": 0.8504660741383048, "grad_norm": 1.1328125, "learning_rate": 2.8680000000000003e-06, "loss": 0.2391, "step": 12260 }, { "epoch": 0.8511597658790375, "grad_norm": 0.92578125, "learning_rate": 2.8662608695652177e-06, "loss": 0.2241, "step": 12270 }, { "epoch": 0.8518534576197702, "grad_norm": 1.1796875, "learning_rate": 2.8645217391304346e-06, "loss": 0.2928, "step": 12280 }, { "epoch": 0.8525471493605029, "grad_norm": 1.1640625, "learning_rate": 2.8627826086956524e-06, "loss": 0.2468, "step": 12290 }, { "epoch": 0.8532408411012357, "grad_norm": 1.0546875, "learning_rate": 2.86104347826087e-06, "loss": 0.2234, "step": 12300 }, { "epoch": 0.8539345328419683, "grad_norm": 1.2421875, "learning_rate": 2.859304347826087e-06, "loss": 0.2329, "step": 12310 }, { "epoch": 0.854628224582701, "grad_norm": 1.390625, "learning_rate": 2.857565217391304e-06, "loss": 0.2267, "step": 12320 }, { "epoch": 0.8553219163234338, "grad_norm": 1.3203125, "learning_rate": 2.855826086956522e-06, "loss": 0.2083, "step": 12330 }, { "epoch": 0.8560156080641664, "grad_norm": 1.0546875, "learning_rate": 2.8540869565217393e-06, "loss": 0.2738, "step": 12340 }, { "epoch": 0.8567092998048992, "grad_norm": 1.421875, "learning_rate": 2.852347826086957e-06, "loss": 0.2128, "step": 12350 }, { "epoch": 0.8574029915456319, "grad_norm": 1.515625, "learning_rate": 2.850608695652174e-06, "loss": 0.2226, "step": 12360 }, { "epoch": 0.8580966832863646, "grad_norm": 1.40625, "learning_rate": 2.8488695652173914e-06, "loss": 0.2691, "step": 12370 }, { "epoch": 0.8587903750270973, "grad_norm": 1.3359375, "learning_rate": 2.8471304347826088e-06, "loss": 0.2323, "step": 12380 }, { "epoch": 0.8594840667678301, "grad_norm": 0.9921875, "learning_rate": 2.8453913043478266e-06, "loss": 0.228, "step": 12390 }, { "epoch": 0.8601777585085627, "grad_norm": 0.96484375, "learning_rate": 2.8436521739130435e-06, "loss": 0.2741, "step": 12400 }, { "epoch": 0.8608714502492955, "grad_norm": 1.3046875, "learning_rate": 2.841913043478261e-06, "loss": 0.2483, "step": 12410 }, { "epoch": 0.8615651419900282, "grad_norm": 1.3515625, "learning_rate": 2.8401739130434787e-06, "loss": 0.2552, "step": 12420 }, { "epoch": 0.8622588337307608, "grad_norm": 1.3828125, "learning_rate": 2.838434782608696e-06, "loss": 0.2283, "step": 12430 }, { "epoch": 0.8629525254714936, "grad_norm": 1.2109375, "learning_rate": 2.836695652173913e-06, "loss": 0.226, "step": 12440 }, { "epoch": 0.8636462172122263, "grad_norm": 1.3828125, "learning_rate": 2.834956521739131e-06, "loss": 0.2357, "step": 12450 }, { "epoch": 0.864339908952959, "grad_norm": 1.1484375, "learning_rate": 2.833217391304348e-06, "loss": 0.2832, "step": 12460 }, { "epoch": 0.8650336006936917, "grad_norm": 0.9375, "learning_rate": 2.8314782608695655e-06, "loss": 0.2512, "step": 12470 }, { "epoch": 0.8657272924344245, "grad_norm": 1.1015625, "learning_rate": 2.8297391304347825e-06, "loss": 0.1749, "step": 12480 }, { "epoch": 0.8664209841751571, "grad_norm": 1.578125, "learning_rate": 2.8280000000000003e-06, "loss": 0.3021, "step": 12490 }, { "epoch": 0.8671146759158899, "grad_norm": 1.015625, "learning_rate": 2.8262608695652177e-06, "loss": 0.257, "step": 12500 }, { "epoch": 0.8678083676566226, "grad_norm": 1.1328125, "learning_rate": 2.8245217391304354e-06, "loss": 0.229, "step": 12510 }, { "epoch": 0.8685020593973553, "grad_norm": 1.0234375, "learning_rate": 2.8227826086956524e-06, "loss": 0.2163, "step": 12520 }, { "epoch": 0.869195751138088, "grad_norm": 1.4609375, "learning_rate": 2.8210434782608698e-06, "loss": 0.264, "step": 12530 }, { "epoch": 0.8698894428788208, "grad_norm": 1.1171875, "learning_rate": 2.819304347826087e-06, "loss": 0.2647, "step": 12540 }, { "epoch": 0.8705831346195534, "grad_norm": 1.25, "learning_rate": 2.817565217391305e-06, "loss": 0.2262, "step": 12550 }, { "epoch": 0.8712768263602861, "grad_norm": 1.2578125, "learning_rate": 2.815826086956522e-06, "loss": 0.2163, "step": 12560 }, { "epoch": 0.8719705181010189, "grad_norm": 1.3515625, "learning_rate": 2.8140869565217393e-06, "loss": 0.2299, "step": 12570 }, { "epoch": 0.8726642098417515, "grad_norm": 1.0703125, "learning_rate": 2.812347826086957e-06, "loss": 0.2756, "step": 12580 }, { "epoch": 0.8733579015824843, "grad_norm": 1.3125, "learning_rate": 2.810608695652174e-06, "loss": 0.2429, "step": 12590 }, { "epoch": 0.874051593323217, "grad_norm": 1.328125, "learning_rate": 2.8088695652173914e-06, "loss": 0.2404, "step": 12600 }, { "epoch": 0.8747452850639497, "grad_norm": 1.1640625, "learning_rate": 2.8071304347826087e-06, "loss": 0.2412, "step": 12610 }, { "epoch": 0.8754389768046824, "grad_norm": 1.1875, "learning_rate": 2.8053913043478265e-06, "loss": 0.2682, "step": 12620 }, { "epoch": 0.8761326685454152, "grad_norm": 1.203125, "learning_rate": 2.8036521739130435e-06, "loss": 0.2255, "step": 12630 }, { "epoch": 0.8768263602861478, "grad_norm": 1.1875, "learning_rate": 2.801913043478261e-06, "loss": 0.2766, "step": 12640 }, { "epoch": 0.8775200520268805, "grad_norm": 1.1796875, "learning_rate": 2.8001739130434786e-06, "loss": 0.2353, "step": 12650 }, { "epoch": 0.8782137437676133, "grad_norm": 0.8671875, "learning_rate": 2.798434782608696e-06, "loss": 0.1858, "step": 12660 }, { "epoch": 0.8789074355083459, "grad_norm": 1.2890625, "learning_rate": 2.796695652173913e-06, "loss": 0.214, "step": 12670 }, { "epoch": 0.8796011272490787, "grad_norm": 1.40625, "learning_rate": 2.7949565217391308e-06, "loss": 0.2371, "step": 12680 }, { "epoch": 0.8802948189898114, "grad_norm": 1.2734375, "learning_rate": 2.793217391304348e-06, "loss": 0.23, "step": 12690 }, { "epoch": 0.8809885107305441, "grad_norm": 1.28125, "learning_rate": 2.7914782608695655e-06, "loss": 0.2453, "step": 12700 }, { "epoch": 0.8816822024712768, "grad_norm": 1.265625, "learning_rate": 2.7897391304347824e-06, "loss": 0.2131, "step": 12710 }, { "epoch": 0.8823758942120096, "grad_norm": 1.390625, "learning_rate": 2.7880000000000002e-06, "loss": 0.242, "step": 12720 }, { "epoch": 0.8830695859527422, "grad_norm": 0.953125, "learning_rate": 2.7862608695652176e-06, "loss": 0.3201, "step": 12730 }, { "epoch": 0.883763277693475, "grad_norm": 0.94140625, "learning_rate": 2.7845217391304354e-06, "loss": 0.2541, "step": 12740 }, { "epoch": 0.8844569694342077, "grad_norm": 1.0859375, "learning_rate": 2.7827826086956524e-06, "loss": 0.2767, "step": 12750 }, { "epoch": 0.8851506611749403, "grad_norm": 1.28125, "learning_rate": 2.7810434782608697e-06, "loss": 0.2321, "step": 12760 }, { "epoch": 0.8858443529156731, "grad_norm": 1.203125, "learning_rate": 2.779304347826087e-06, "loss": 0.2789, "step": 12770 }, { "epoch": 0.8865380446564058, "grad_norm": 1.5546875, "learning_rate": 2.777565217391305e-06, "loss": 0.2188, "step": 12780 }, { "epoch": 0.8872317363971385, "grad_norm": 1.21875, "learning_rate": 2.775826086956522e-06, "loss": 0.2593, "step": 12790 }, { "epoch": 0.8879254281378712, "grad_norm": 1.015625, "learning_rate": 2.774086956521739e-06, "loss": 0.215, "step": 12800 }, { "epoch": 0.888619119878604, "grad_norm": 1.3203125, "learning_rate": 2.772347826086957e-06, "loss": 0.2497, "step": 12810 }, { "epoch": 0.8893128116193366, "grad_norm": 0.984375, "learning_rate": 2.7706086956521744e-06, "loss": 0.2003, "step": 12820 }, { "epoch": 0.8900065033600694, "grad_norm": 1.171875, "learning_rate": 2.7688695652173913e-06, "loss": 0.2473, "step": 12830 }, { "epoch": 0.8907001951008021, "grad_norm": 0.90234375, "learning_rate": 2.7671304347826087e-06, "loss": 0.2628, "step": 12840 }, { "epoch": 0.8913938868415348, "grad_norm": 1.0546875, "learning_rate": 2.7653913043478265e-06, "loss": 0.2674, "step": 12850 }, { "epoch": 0.8920875785822675, "grad_norm": 1.3828125, "learning_rate": 2.763652173913044e-06, "loss": 0.2468, "step": 12860 }, { "epoch": 0.8927812703230003, "grad_norm": 1.109375, "learning_rate": 2.761913043478261e-06, "loss": 0.2161, "step": 12870 }, { "epoch": 0.8934749620637329, "grad_norm": 1.1484375, "learning_rate": 2.7601739130434786e-06, "loss": 0.2284, "step": 12880 }, { "epoch": 0.8941686538044656, "grad_norm": 1.1640625, "learning_rate": 2.758434782608696e-06, "loss": 0.2432, "step": 12890 }, { "epoch": 0.8948623455451984, "grad_norm": 1.3359375, "learning_rate": 2.756695652173913e-06, "loss": 0.2964, "step": 12900 }, { "epoch": 0.895556037285931, "grad_norm": 1.375, "learning_rate": 2.7549565217391307e-06, "loss": 0.2177, "step": 12910 }, { "epoch": 0.8962497290266638, "grad_norm": 1.484375, "learning_rate": 2.753217391304348e-06, "loss": 0.2864, "step": 12920 }, { "epoch": 0.8969434207673965, "grad_norm": 1.078125, "learning_rate": 2.7514782608695655e-06, "loss": 0.265, "step": 12930 }, { "epoch": 0.8976371125081292, "grad_norm": 1.53125, "learning_rate": 2.7497391304347824e-06, "loss": 0.3763, "step": 12940 }, { "epoch": 0.8983308042488619, "grad_norm": 1.1640625, "learning_rate": 2.748e-06, "loss": 0.2525, "step": 12950 }, { "epoch": 0.8990244959895947, "grad_norm": 1.3984375, "learning_rate": 2.7462608695652176e-06, "loss": 0.2358, "step": 12960 }, { "epoch": 0.8997181877303273, "grad_norm": 1.40625, "learning_rate": 2.7445217391304354e-06, "loss": 0.2315, "step": 12970 }, { "epoch": 0.90041187947106, "grad_norm": 1.328125, "learning_rate": 2.7427826086956523e-06, "loss": 0.2688, "step": 12980 }, { "epoch": 0.9011055712117928, "grad_norm": 1.4296875, "learning_rate": 2.7410434782608697e-06, "loss": 0.2491, "step": 12990 }, { "epoch": 0.9017992629525254, "grad_norm": 1.3203125, "learning_rate": 2.739304347826087e-06, "loss": 0.2402, "step": 13000 }, { "epoch": 0.9024929546932582, "grad_norm": 1.15625, "learning_rate": 2.737565217391305e-06, "loss": 0.258, "step": 13010 }, { "epoch": 0.9031866464339909, "grad_norm": 1.109375, "learning_rate": 2.735826086956522e-06, "loss": 0.263, "step": 13020 }, { "epoch": 0.9038803381747236, "grad_norm": 1.203125, "learning_rate": 2.734086956521739e-06, "loss": 0.2312, "step": 13030 }, { "epoch": 0.9045740299154563, "grad_norm": 1.1875, "learning_rate": 2.732347826086957e-06, "loss": 0.2835, "step": 13040 }, { "epoch": 0.9052677216561891, "grad_norm": 1.1796875, "learning_rate": 2.7306086956521743e-06, "loss": 0.2328, "step": 13050 }, { "epoch": 0.9059614133969217, "grad_norm": 1.2734375, "learning_rate": 2.7288695652173913e-06, "loss": 0.2367, "step": 13060 }, { "epoch": 0.9066551051376545, "grad_norm": 1.6796875, "learning_rate": 2.7271304347826087e-06, "loss": 0.2629, "step": 13070 }, { "epoch": 0.9073487968783872, "grad_norm": 1.1171875, "learning_rate": 2.7253913043478264e-06, "loss": 0.2833, "step": 13080 }, { "epoch": 0.9080424886191198, "grad_norm": 1.078125, "learning_rate": 2.723652173913044e-06, "loss": 0.2077, "step": 13090 }, { "epoch": 0.9087361803598526, "grad_norm": 1.2734375, "learning_rate": 2.7219130434782608e-06, "loss": 0.213, "step": 13100 }, { "epoch": 0.9094298721005853, "grad_norm": 1.3515625, "learning_rate": 2.7201739130434786e-06, "loss": 0.2784, "step": 13110 }, { "epoch": 0.910123563841318, "grad_norm": 1.3203125, "learning_rate": 2.718434782608696e-06, "loss": 0.2505, "step": 13120 }, { "epoch": 0.9108172555820507, "grad_norm": 1.171875, "learning_rate": 2.7166956521739133e-06, "loss": 0.2348, "step": 13130 }, { "epoch": 0.9115109473227835, "grad_norm": 1.828125, "learning_rate": 2.7149565217391307e-06, "loss": 0.2753, "step": 13140 }, { "epoch": 0.9122046390635161, "grad_norm": 1.5625, "learning_rate": 2.713217391304348e-06, "loss": 0.263, "step": 13150 }, { "epoch": 0.9128983308042489, "grad_norm": 1.1875, "learning_rate": 2.7114782608695654e-06, "loss": 0.2445, "step": 13160 }, { "epoch": 0.9135920225449816, "grad_norm": 1.5078125, "learning_rate": 2.7097391304347832e-06, "loss": 0.2877, "step": 13170 }, { "epoch": 0.9142857142857143, "grad_norm": 1.0859375, "learning_rate": 2.708e-06, "loss": 0.2414, "step": 13180 }, { "epoch": 0.914979406026447, "grad_norm": 1.171875, "learning_rate": 2.7062608695652175e-06, "loss": 0.2208, "step": 13190 }, { "epoch": 0.9156730977671798, "grad_norm": 1.21875, "learning_rate": 2.7045217391304353e-06, "loss": 0.2381, "step": 13200 }, { "epoch": 0.9163667895079124, "grad_norm": 1.5078125, "learning_rate": 2.7027826086956523e-06, "loss": 0.2036, "step": 13210 }, { "epoch": 0.9170604812486451, "grad_norm": 1.2109375, "learning_rate": 2.7010434782608696e-06, "loss": 0.2259, "step": 13220 }, { "epoch": 0.9177541729893779, "grad_norm": 1.0078125, "learning_rate": 2.699304347826087e-06, "loss": 0.2317, "step": 13230 }, { "epoch": 0.9184478647301105, "grad_norm": 1.359375, "learning_rate": 2.697565217391305e-06, "loss": 0.2817, "step": 13240 }, { "epoch": 0.9191415564708433, "grad_norm": 1.46875, "learning_rate": 2.6958260869565218e-06, "loss": 0.2527, "step": 13250 }, { "epoch": 0.919835248211576, "grad_norm": 1.15625, "learning_rate": 2.694086956521739e-06, "loss": 0.2398, "step": 13260 }, { "epoch": 0.9205289399523087, "grad_norm": 1.203125, "learning_rate": 2.692347826086957e-06, "loss": 0.2632, "step": 13270 }, { "epoch": 0.9212226316930414, "grad_norm": 1.2109375, "learning_rate": 2.6906086956521743e-06, "loss": 0.218, "step": 13280 }, { "epoch": 0.9219163234337742, "grad_norm": 1.296875, "learning_rate": 2.6888695652173912e-06, "loss": 0.2906, "step": 13290 }, { "epoch": 0.9226100151745068, "grad_norm": 1.21875, "learning_rate": 2.6871304347826086e-06, "loss": 0.233, "step": 13300 }, { "epoch": 0.9233037069152396, "grad_norm": 1.1875, "learning_rate": 2.6853913043478264e-06, "loss": 0.2417, "step": 13310 }, { "epoch": 0.9239973986559723, "grad_norm": 1.3671875, "learning_rate": 2.6836521739130438e-06, "loss": 0.3028, "step": 13320 }, { "epoch": 0.9246910903967049, "grad_norm": 1.2578125, "learning_rate": 2.6819130434782607e-06, "loss": 0.2826, "step": 13330 }, { "epoch": 0.9253847821374377, "grad_norm": 1.3515625, "learning_rate": 2.6801739130434785e-06, "loss": 0.238, "step": 13340 }, { "epoch": 0.9260784738781704, "grad_norm": 1.0234375, "learning_rate": 2.678434782608696e-06, "loss": 0.2322, "step": 13350 }, { "epoch": 0.9267721656189031, "grad_norm": 1.3515625, "learning_rate": 2.6766956521739133e-06, "loss": 0.2983, "step": 13360 }, { "epoch": 0.9274658573596358, "grad_norm": 0.9609375, "learning_rate": 2.6749565217391306e-06, "loss": 0.2692, "step": 13370 }, { "epoch": 0.9281595491003686, "grad_norm": 1.28125, "learning_rate": 2.673217391304348e-06, "loss": 0.2064, "step": 13380 }, { "epoch": 0.9288532408411012, "grad_norm": 1.265625, "learning_rate": 2.6714782608695654e-06, "loss": 0.2225, "step": 13390 }, { "epoch": 0.929546932581834, "grad_norm": 1.375, "learning_rate": 2.669739130434783e-06, "loss": 0.2397, "step": 13400 }, { "epoch": 0.9302406243225667, "grad_norm": 1.28125, "learning_rate": 2.668e-06, "loss": 0.238, "step": 13410 }, { "epoch": 0.9309343160632993, "grad_norm": 1.8828125, "learning_rate": 2.6662608695652175e-06, "loss": 0.2753, "step": 13420 }, { "epoch": 0.9316280078040321, "grad_norm": 0.8671875, "learning_rate": 2.6645217391304353e-06, "loss": 0.225, "step": 13430 }, { "epoch": 0.9323216995447648, "grad_norm": 1.2421875, "learning_rate": 2.6627826086956527e-06, "loss": 0.2449, "step": 13440 }, { "epoch": 0.9330153912854975, "grad_norm": 0.98046875, "learning_rate": 2.6610434782608696e-06, "loss": 0.2416, "step": 13450 }, { "epoch": 0.9337090830262302, "grad_norm": 1.0703125, "learning_rate": 2.659304347826087e-06, "loss": 0.217, "step": 13460 }, { "epoch": 0.934402774766963, "grad_norm": 1.5078125, "learning_rate": 2.6575652173913048e-06, "loss": 0.2213, "step": 13470 }, { "epoch": 0.9350964665076956, "grad_norm": 1.0390625, "learning_rate": 2.655826086956522e-06, "loss": 0.3161, "step": 13480 }, { "epoch": 0.9357901582484284, "grad_norm": 1.5234375, "learning_rate": 2.654086956521739e-06, "loss": 0.2487, "step": 13490 }, { "epoch": 0.9364838499891611, "grad_norm": 1.6796875, "learning_rate": 2.652347826086957e-06, "loss": 0.3213, "step": 13500 }, { "epoch": 0.9371775417298938, "grad_norm": 1.1875, "learning_rate": 2.6506086956521743e-06, "loss": 0.2486, "step": 13510 }, { "epoch": 0.9378712334706265, "grad_norm": 1.5078125, "learning_rate": 2.648869565217391e-06, "loss": 0.2348, "step": 13520 }, { "epoch": 0.9385649252113591, "grad_norm": 1.0859375, "learning_rate": 2.6471304347826086e-06, "loss": 0.2137, "step": 13530 }, { "epoch": 0.9392586169520919, "grad_norm": 1.1640625, "learning_rate": 2.6453913043478264e-06, "loss": 0.2431, "step": 13540 }, { "epoch": 0.9399523086928246, "grad_norm": 0.82421875, "learning_rate": 2.6436521739130437e-06, "loss": 0.2795, "step": 13550 }, { "epoch": 0.9406460004335573, "grad_norm": 1.109375, "learning_rate": 2.6419130434782607e-06, "loss": 0.2674, "step": 13560 }, { "epoch": 0.94133969217429, "grad_norm": 1.0859375, "learning_rate": 2.6401739130434785e-06, "loss": 0.2346, "step": 13570 }, { "epoch": 0.9420333839150228, "grad_norm": 1.3125, "learning_rate": 2.638434782608696e-06, "loss": 0.2416, "step": 13580 }, { "epoch": 0.9427270756557554, "grad_norm": 1.703125, "learning_rate": 2.6366956521739132e-06, "loss": 0.2999, "step": 13590 }, { "epoch": 0.9434207673964882, "grad_norm": 1.1015625, "learning_rate": 2.6349565217391306e-06, "loss": 0.1903, "step": 13600 }, { "epoch": 0.9441144591372209, "grad_norm": 0.96875, "learning_rate": 2.633217391304348e-06, "loss": 0.2281, "step": 13610 }, { "epoch": 0.9448081508779536, "grad_norm": 1.203125, "learning_rate": 2.6314782608695653e-06, "loss": 0.2115, "step": 13620 }, { "epoch": 0.9455018426186863, "grad_norm": 1.2578125, "learning_rate": 2.629739130434783e-06, "loss": 0.2522, "step": 13630 }, { "epoch": 0.946195534359419, "grad_norm": 1.1796875, "learning_rate": 2.628e-06, "loss": 0.2353, "step": 13640 }, { "epoch": 0.9468892261001517, "grad_norm": 1.3671875, "learning_rate": 2.6262608695652175e-06, "loss": 0.2585, "step": 13650 }, { "epoch": 0.9475829178408844, "grad_norm": 1.0546875, "learning_rate": 2.6245217391304352e-06, "loss": 0.2067, "step": 13660 }, { "epoch": 0.9482766095816172, "grad_norm": 1.4609375, "learning_rate": 2.6227826086956526e-06, "loss": 0.2414, "step": 13670 }, { "epoch": 0.9489703013223498, "grad_norm": 0.9921875, "learning_rate": 2.6210434782608696e-06, "loss": 0.2992, "step": 13680 }, { "epoch": 0.9496639930630826, "grad_norm": 1.578125, "learning_rate": 2.619304347826087e-06, "loss": 0.2543, "step": 13690 }, { "epoch": 0.9503576848038153, "grad_norm": 1.3046875, "learning_rate": 2.6175652173913047e-06, "loss": 0.2452, "step": 13700 }, { "epoch": 0.951051376544548, "grad_norm": 1.2421875, "learning_rate": 2.615826086956522e-06, "loss": 0.2564, "step": 13710 }, { "epoch": 0.9517450682852807, "grad_norm": 1.2109375, "learning_rate": 2.614086956521739e-06, "loss": 0.2446, "step": 13720 }, { "epoch": 0.9524387600260135, "grad_norm": 1.3515625, "learning_rate": 2.612347826086957e-06, "loss": 0.2343, "step": 13730 }, { "epoch": 0.9531324517667461, "grad_norm": 1.0546875, "learning_rate": 2.6106086956521742e-06, "loss": 0.2255, "step": 13740 }, { "epoch": 0.9538261435074789, "grad_norm": 1.6328125, "learning_rate": 2.6088695652173916e-06, "loss": 0.2345, "step": 13750 }, { "epoch": 0.9545198352482116, "grad_norm": 1.4921875, "learning_rate": 2.6071304347826085e-06, "loss": 0.2453, "step": 13760 }, { "epoch": 0.9552135269889442, "grad_norm": 1.2421875, "learning_rate": 2.6053913043478263e-06, "loss": 0.2377, "step": 13770 }, { "epoch": 0.955907218729677, "grad_norm": 0.88671875, "learning_rate": 2.6036521739130437e-06, "loss": 0.2412, "step": 13780 }, { "epoch": 0.9566009104704097, "grad_norm": 1.265625, "learning_rate": 2.6019130434782615e-06, "loss": 0.3032, "step": 13790 }, { "epoch": 0.9572946022111424, "grad_norm": 1.5703125, "learning_rate": 2.6001739130434784e-06, "loss": 0.2317, "step": 13800 }, { "epoch": 0.9579882939518751, "grad_norm": 1.125, "learning_rate": 2.598434782608696e-06, "loss": 0.2275, "step": 13810 }, { "epoch": 0.9586819856926079, "grad_norm": 1.25, "learning_rate": 2.596695652173913e-06, "loss": 0.2155, "step": 13820 }, { "epoch": 0.9593756774333405, "grad_norm": 1.2734375, "learning_rate": 2.5949565217391306e-06, "loss": 0.2261, "step": 13830 }, { "epoch": 0.9600693691740733, "grad_norm": 1.3203125, "learning_rate": 2.593217391304348e-06, "loss": 0.2183, "step": 13840 }, { "epoch": 0.960763060914806, "grad_norm": 1.328125, "learning_rate": 2.5914782608695653e-06, "loss": 0.2203, "step": 13850 }, { "epoch": 0.9614567526555386, "grad_norm": 1.2421875, "learning_rate": 2.589739130434783e-06, "loss": 0.2219, "step": 13860 }, { "epoch": 0.9621504443962714, "grad_norm": 1.0546875, "learning_rate": 2.588e-06, "loss": 0.25, "step": 13870 }, { "epoch": 0.9628441361370041, "grad_norm": 0.921875, "learning_rate": 2.5862608695652174e-06, "loss": 0.2036, "step": 13880 }, { "epoch": 0.9635378278777368, "grad_norm": 1.015625, "learning_rate": 2.584521739130435e-06, "loss": 0.2385, "step": 13890 }, { "epoch": 0.9642315196184695, "grad_norm": 1.2421875, "learning_rate": 2.5827826086956526e-06, "loss": 0.2036, "step": 13900 }, { "epoch": 0.9649252113592023, "grad_norm": 1.3203125, "learning_rate": 2.5810434782608695e-06, "loss": 0.2155, "step": 13910 }, { "epoch": 0.9656189030999349, "grad_norm": 0.93359375, "learning_rate": 2.579304347826087e-06, "loss": 0.238, "step": 13920 }, { "epoch": 0.9663125948406677, "grad_norm": 1.1640625, "learning_rate": 2.5775652173913047e-06, "loss": 0.2178, "step": 13930 }, { "epoch": 0.9670062865814004, "grad_norm": 1.15625, "learning_rate": 2.575826086956522e-06, "loss": 0.2891, "step": 13940 }, { "epoch": 0.9676999783221331, "grad_norm": 1.140625, "learning_rate": 2.574086956521739e-06, "loss": 0.2354, "step": 13950 }, { "epoch": 0.9683936700628658, "grad_norm": 1.40625, "learning_rate": 2.572347826086957e-06, "loss": 0.2305, "step": 13960 }, { "epoch": 0.9690873618035986, "grad_norm": 1.0625, "learning_rate": 2.570608695652174e-06, "loss": 0.2175, "step": 13970 }, { "epoch": 0.9697810535443312, "grad_norm": 1.1484375, "learning_rate": 2.5688695652173915e-06, "loss": 0.232, "step": 13980 }, { "epoch": 0.9704747452850639, "grad_norm": 1.03125, "learning_rate": 2.5671304347826085e-06, "loss": 0.2387, "step": 13990 }, { "epoch": 0.9711684370257967, "grad_norm": 0.8671875, "learning_rate": 2.5653913043478263e-06, "loss": 0.3122, "step": 14000 }, { "epoch": 0.9718621287665293, "grad_norm": 1.1171875, "learning_rate": 2.5636521739130437e-06, "loss": 0.2158, "step": 14010 }, { "epoch": 0.9725558205072621, "grad_norm": 0.75390625, "learning_rate": 2.5619130434782615e-06, "loss": 0.222, "step": 14020 }, { "epoch": 0.9732495122479948, "grad_norm": 1.125, "learning_rate": 2.5601739130434784e-06, "loss": 0.2692, "step": 14030 }, { "epoch": 0.9739432039887275, "grad_norm": 1.4453125, "learning_rate": 2.5584347826086958e-06, "loss": 0.2721, "step": 14040 }, { "epoch": 0.9746368957294602, "grad_norm": 1.5234375, "learning_rate": 2.5566956521739136e-06, "loss": 0.2335, "step": 14050 }, { "epoch": 0.975330587470193, "grad_norm": 1.0703125, "learning_rate": 2.554956521739131e-06, "loss": 0.2132, "step": 14060 }, { "epoch": 0.9760242792109256, "grad_norm": 1.25, "learning_rate": 2.553217391304348e-06, "loss": 0.2424, "step": 14070 }, { "epoch": 0.9767179709516584, "grad_norm": 1.140625, "learning_rate": 2.5514782608695653e-06, "loss": 0.2583, "step": 14080 }, { "epoch": 0.9774116626923911, "grad_norm": 1.21875, "learning_rate": 2.549739130434783e-06, "loss": 0.2165, "step": 14090 }, { "epoch": 0.9781053544331237, "grad_norm": 1.2734375, "learning_rate": 2.5480000000000004e-06, "loss": 0.2442, "step": 14100 }, { "epoch": 0.9787990461738565, "grad_norm": 1.21875, "learning_rate": 2.5462608695652174e-06, "loss": 0.3018, "step": 14110 }, { "epoch": 0.9794927379145892, "grad_norm": 1.1640625, "learning_rate": 2.544521739130435e-06, "loss": 0.2094, "step": 14120 }, { "epoch": 0.9801864296553219, "grad_norm": 1.0859375, "learning_rate": 2.5427826086956525e-06, "loss": 0.2546, "step": 14130 }, { "epoch": 0.9808801213960546, "grad_norm": 1.3515625, "learning_rate": 2.5410434782608695e-06, "loss": 0.2204, "step": 14140 }, { "epoch": 0.9815738131367874, "grad_norm": 1.3515625, "learning_rate": 2.539304347826087e-06, "loss": 0.176, "step": 14150 }, { "epoch": 0.98226750487752, "grad_norm": 1.015625, "learning_rate": 2.5375652173913046e-06, "loss": 0.2255, "step": 14160 }, { "epoch": 0.9829611966182528, "grad_norm": 0.9765625, "learning_rate": 2.535826086956522e-06, "loss": 0.228, "step": 14170 }, { "epoch": 0.9836548883589855, "grad_norm": 1.421875, "learning_rate": 2.534086956521739e-06, "loss": 0.2245, "step": 14180 }, { "epoch": 0.9843485800997182, "grad_norm": 1.0703125, "learning_rate": 2.5323478260869568e-06, "loss": 0.2602, "step": 14190 }, { "epoch": 0.9850422718404509, "grad_norm": 1.453125, "learning_rate": 2.530608695652174e-06, "loss": 0.3022, "step": 14200 }, { "epoch": 0.9857359635811836, "grad_norm": 1.2421875, "learning_rate": 2.5288695652173915e-06, "loss": 0.2025, "step": 14210 }, { "epoch": 0.9864296553219163, "grad_norm": 1.140625, "learning_rate": 2.527130434782609e-06, "loss": 0.2147, "step": 14220 }, { "epoch": 0.987123347062649, "grad_norm": 1.28125, "learning_rate": 2.5253913043478262e-06, "loss": 0.2519, "step": 14230 }, { "epoch": 0.9878170388033818, "grad_norm": 1.15625, "learning_rate": 2.5236521739130436e-06, "loss": 0.2693, "step": 14240 }, { "epoch": 0.9885107305441144, "grad_norm": 0.90625, "learning_rate": 2.5219130434782614e-06, "loss": 0.246, "step": 14250 }, { "epoch": 0.9892044222848472, "grad_norm": 1.125, "learning_rate": 2.5201739130434784e-06, "loss": 0.235, "step": 14260 }, { "epoch": 0.9898981140255799, "grad_norm": 1.1640625, "learning_rate": 2.5184347826086957e-06, "loss": 0.2056, "step": 14270 }, { "epoch": 0.9905918057663126, "grad_norm": 1.2578125, "learning_rate": 2.5166956521739135e-06, "loss": 0.2063, "step": 14280 }, { "epoch": 0.9912854975070453, "grad_norm": 1.484375, "learning_rate": 2.514956521739131e-06, "loss": 0.2341, "step": 14290 }, { "epoch": 0.9919791892477781, "grad_norm": 1.3671875, "learning_rate": 2.513217391304348e-06, "loss": 0.21, "step": 14300 }, { "epoch": 0.9926728809885107, "grad_norm": 1.4140625, "learning_rate": 2.5114782608695652e-06, "loss": 0.2236, "step": 14310 }, { "epoch": 0.9933665727292434, "grad_norm": 1.2109375, "learning_rate": 2.509739130434783e-06, "loss": 0.2257, "step": 14320 }, { "epoch": 0.9940602644699762, "grad_norm": 1.4453125, "learning_rate": 2.5080000000000004e-06, "loss": 0.2403, "step": 14330 }, { "epoch": 0.9947539562107088, "grad_norm": 1.0390625, "learning_rate": 2.5062608695652173e-06, "loss": 0.2051, "step": 14340 }, { "epoch": 0.9954476479514416, "grad_norm": 0.94921875, "learning_rate": 2.504521739130435e-06, "loss": 0.2264, "step": 14350 }, { "epoch": 0.9961413396921743, "grad_norm": 1.1953125, "learning_rate": 2.5027826086956525e-06, "loss": 0.2702, "step": 14360 }, { "epoch": 0.996835031432907, "grad_norm": 0.92578125, "learning_rate": 2.50104347826087e-06, "loss": 0.2241, "step": 14370 }, { "epoch": 0.9975287231736397, "grad_norm": 1.0625, "learning_rate": 2.4993043478260872e-06, "loss": 0.2191, "step": 14380 }, { "epoch": 0.9982224149143725, "grad_norm": 1.5546875, "learning_rate": 2.4975652173913046e-06, "loss": 0.3005, "step": 14390 }, { "epoch": 0.9989161066551051, "grad_norm": 1.2578125, "learning_rate": 2.495826086956522e-06, "loss": 0.3011, "step": 14400 }, { "epoch": 0.9996097983958379, "grad_norm": 1.7109375, "learning_rate": 2.4940869565217394e-06, "loss": 0.2558, "step": 14410 }, { "epoch": 1.0002774766962932, "grad_norm": 1.484375, "learning_rate": 2.4923478260869567e-06, "loss": 0.2414, "step": 14420 }, { "epoch": 1.0009711684370257, "grad_norm": 1.03125, "learning_rate": 2.490608695652174e-06, "loss": 0.2269, "step": 14430 }, { "epoch": 1.0016648601777585, "grad_norm": 1.203125, "learning_rate": 2.4888695652173915e-06, "loss": 0.2052, "step": 14440 }, { "epoch": 1.0023585519184912, "grad_norm": 1.09375, "learning_rate": 2.487130434782609e-06, "loss": 0.2343, "step": 14450 }, { "epoch": 1.003052243659224, "grad_norm": 1.046875, "learning_rate": 2.485391304347826e-06, "loss": 0.2279, "step": 14460 }, { "epoch": 1.0037459353999567, "grad_norm": 0.94921875, "learning_rate": 2.4836521739130436e-06, "loss": 0.2713, "step": 14470 }, { "epoch": 1.0044396271406895, "grad_norm": 1.2109375, "learning_rate": 2.481913043478261e-06, "loss": 0.2522, "step": 14480 }, { "epoch": 1.005133318881422, "grad_norm": 1.3046875, "learning_rate": 2.4801739130434783e-06, "loss": 0.2336, "step": 14490 }, { "epoch": 1.0058270106221547, "grad_norm": 1.140625, "learning_rate": 2.4784347826086957e-06, "loss": 0.228, "step": 14500 }, { "epoch": 1.0065207023628875, "grad_norm": 1.1640625, "learning_rate": 2.4766956521739135e-06, "loss": 0.2082, "step": 14510 }, { "epoch": 1.0072143941036202, "grad_norm": 1.25, "learning_rate": 2.4749565217391304e-06, "loss": 0.2847, "step": 14520 }, { "epoch": 1.007908085844353, "grad_norm": 0.9453125, "learning_rate": 2.4732173913043482e-06, "loss": 0.2132, "step": 14530 }, { "epoch": 1.0086017775850857, "grad_norm": 1.3671875, "learning_rate": 2.471478260869565e-06, "loss": 0.2656, "step": 14540 }, { "epoch": 1.0092954693258183, "grad_norm": 1.3046875, "learning_rate": 2.469739130434783e-06, "loss": 0.2238, "step": 14550 }, { "epoch": 1.009989161066551, "grad_norm": 1.6640625, "learning_rate": 2.468e-06, "loss": 0.3057, "step": 14560 }, { "epoch": 1.0106828528072838, "grad_norm": 1.1484375, "learning_rate": 2.4662608695652177e-06, "loss": 0.2365, "step": 14570 }, { "epoch": 1.0113765445480165, "grad_norm": 0.98046875, "learning_rate": 2.464521739130435e-06, "loss": 0.2215, "step": 14580 }, { "epoch": 1.0120702362887493, "grad_norm": 1.4921875, "learning_rate": 2.4627826086956525e-06, "loss": 0.2266, "step": 14590 }, { "epoch": 1.012763928029482, "grad_norm": 2.265625, "learning_rate": 2.46104347826087e-06, "loss": 0.2978, "step": 14600 }, { "epoch": 1.0134576197702145, "grad_norm": 1.328125, "learning_rate": 2.459304347826087e-06, "loss": 0.2397, "step": 14610 }, { "epoch": 1.0141513115109473, "grad_norm": 1.2109375, "learning_rate": 2.4575652173913046e-06, "loss": 0.3155, "step": 14620 }, { "epoch": 1.01484500325168, "grad_norm": 1.3828125, "learning_rate": 2.455826086956522e-06, "loss": 0.2873, "step": 14630 }, { "epoch": 1.0155386949924128, "grad_norm": 1.7734375, "learning_rate": 2.4540869565217393e-06, "loss": 0.2612, "step": 14640 }, { "epoch": 1.0162323867331455, "grad_norm": 1.1171875, "learning_rate": 2.4523478260869567e-06, "loss": 0.2231, "step": 14650 }, { "epoch": 1.0169260784738783, "grad_norm": 1.5234375, "learning_rate": 2.450608695652174e-06, "loss": 0.3073, "step": 14660 }, { "epoch": 1.0176197702146108, "grad_norm": 1.1875, "learning_rate": 2.4488695652173914e-06, "loss": 0.3001, "step": 14670 }, { "epoch": 1.0183134619553436, "grad_norm": 1.2109375, "learning_rate": 2.447130434782609e-06, "loss": 0.2108, "step": 14680 }, { "epoch": 1.0190071536960763, "grad_norm": 1.1953125, "learning_rate": 2.4453913043478266e-06, "loss": 0.2234, "step": 14690 }, { "epoch": 1.019700845436809, "grad_norm": 1.25, "learning_rate": 2.4436521739130435e-06, "loss": 0.235, "step": 14700 }, { "epoch": 1.0203945371775418, "grad_norm": 1.1171875, "learning_rate": 2.4419130434782613e-06, "loss": 0.2167, "step": 14710 }, { "epoch": 1.0210882289182743, "grad_norm": 0.97265625, "learning_rate": 2.4401739130434783e-06, "loss": 0.2383, "step": 14720 }, { "epoch": 1.021781920659007, "grad_norm": 1.375, "learning_rate": 2.438434782608696e-06, "loss": 0.2186, "step": 14730 }, { "epoch": 1.0224756123997398, "grad_norm": 1.1015625, "learning_rate": 2.4366956521739134e-06, "loss": 0.2027, "step": 14740 }, { "epoch": 1.0231693041404726, "grad_norm": 1.078125, "learning_rate": 2.4349565217391304e-06, "loss": 0.2397, "step": 14750 }, { "epoch": 1.0238629958812053, "grad_norm": 0.9921875, "learning_rate": 2.433217391304348e-06, "loss": 0.2239, "step": 14760 }, { "epoch": 1.024556687621938, "grad_norm": 1.21875, "learning_rate": 2.431478260869565e-06, "loss": 0.2252, "step": 14770 }, { "epoch": 1.0252503793626706, "grad_norm": 1.296875, "learning_rate": 2.429739130434783e-06, "loss": 0.2448, "step": 14780 }, { "epoch": 1.0259440711034034, "grad_norm": 1.375, "learning_rate": 2.428e-06, "loss": 0.2628, "step": 14790 }, { "epoch": 1.026637762844136, "grad_norm": 1.5703125, "learning_rate": 2.4262608695652177e-06, "loss": 0.2583, "step": 14800 }, { "epoch": 1.0273314545848689, "grad_norm": 1.171875, "learning_rate": 2.424521739130435e-06, "loss": 0.2329, "step": 14810 }, { "epoch": 1.0280251463256016, "grad_norm": 1.171875, "learning_rate": 2.4227826086956524e-06, "loss": 0.2318, "step": 14820 }, { "epoch": 1.0287188380663344, "grad_norm": 1.46875, "learning_rate": 2.4210434782608698e-06, "loss": 0.2351, "step": 14830 }, { "epoch": 1.0294125298070669, "grad_norm": 1.078125, "learning_rate": 2.419304347826087e-06, "loss": 0.2533, "step": 14840 }, { "epoch": 1.0301062215477996, "grad_norm": 1.6328125, "learning_rate": 2.4175652173913045e-06, "loss": 0.2254, "step": 14850 }, { "epoch": 1.0307999132885324, "grad_norm": 0.90234375, "learning_rate": 2.415826086956522e-06, "loss": 0.2149, "step": 14860 }, { "epoch": 1.0314936050292651, "grad_norm": 1.4453125, "learning_rate": 2.4140869565217393e-06, "loss": 0.2698, "step": 14870 }, { "epoch": 1.0321872967699979, "grad_norm": 1.40625, "learning_rate": 2.4123478260869566e-06, "loss": 0.1998, "step": 14880 }, { "epoch": 1.0328809885107306, "grad_norm": 1.1484375, "learning_rate": 2.410608695652174e-06, "loss": 0.241, "step": 14890 }, { "epoch": 1.0335746802514632, "grad_norm": 1.0859375, "learning_rate": 2.4088695652173914e-06, "loss": 0.2182, "step": 14900 }, { "epoch": 1.034268371992196, "grad_norm": 1.3359375, "learning_rate": 2.4071304347826088e-06, "loss": 0.2561, "step": 14910 }, { "epoch": 1.0349620637329286, "grad_norm": 1.0859375, "learning_rate": 2.4053913043478265e-06, "loss": 0.2321, "step": 14920 }, { "epoch": 1.0356557554736614, "grad_norm": 1.3828125, "learning_rate": 2.4036521739130435e-06, "loss": 0.2393, "step": 14930 }, { "epoch": 1.0363494472143941, "grad_norm": 0.8984375, "learning_rate": 2.4019130434782613e-06, "loss": 0.23, "step": 14940 }, { "epoch": 1.037043138955127, "grad_norm": 1.125, "learning_rate": 2.4001739130434782e-06, "loss": 0.2397, "step": 14950 }, { "epoch": 1.0377368306958594, "grad_norm": 1.078125, "learning_rate": 2.398434782608696e-06, "loss": 0.3271, "step": 14960 }, { "epoch": 1.0384305224365922, "grad_norm": 1.1953125, "learning_rate": 2.3966956521739134e-06, "loss": 0.2324, "step": 14970 }, { "epoch": 1.039124214177325, "grad_norm": 1.265625, "learning_rate": 2.3949565217391308e-06, "loss": 0.2342, "step": 14980 }, { "epoch": 1.0398179059180577, "grad_norm": 1.0625, "learning_rate": 2.393217391304348e-06, "loss": 0.2481, "step": 14990 }, { "epoch": 1.0405115976587904, "grad_norm": 1.1875, "learning_rate": 2.3914782608695655e-06, "loss": 0.2157, "step": 15000 }, { "epoch": 1.0412052893995232, "grad_norm": 1.515625, "learning_rate": 2.389739130434783e-06, "loss": 0.2339, "step": 15010 }, { "epoch": 1.0418989811402557, "grad_norm": 1.1484375, "learning_rate": 2.3880000000000003e-06, "loss": 0.2366, "step": 15020 }, { "epoch": 1.0425926728809884, "grad_norm": 1.0859375, "learning_rate": 2.3862608695652176e-06, "loss": 0.227, "step": 15030 }, { "epoch": 1.0432863646217212, "grad_norm": 1.1796875, "learning_rate": 2.384521739130435e-06, "loss": 0.2355, "step": 15040 }, { "epoch": 1.043980056362454, "grad_norm": 1.25, "learning_rate": 2.3827826086956524e-06, "loss": 0.2874, "step": 15050 }, { "epoch": 1.0446737481031867, "grad_norm": 1.0703125, "learning_rate": 2.3810434782608697e-06, "loss": 0.235, "step": 15060 }, { "epoch": 1.0453674398439194, "grad_norm": 1.0546875, "learning_rate": 2.379304347826087e-06, "loss": 0.2298, "step": 15070 }, { "epoch": 1.046061131584652, "grad_norm": 1.328125, "learning_rate": 2.3775652173913045e-06, "loss": 0.2549, "step": 15080 }, { "epoch": 1.0467548233253847, "grad_norm": 1.0859375, "learning_rate": 2.375826086956522e-06, "loss": 0.1974, "step": 15090 }, { "epoch": 1.0474485150661175, "grad_norm": 1.359375, "learning_rate": 2.3740869565217392e-06, "loss": 0.2338, "step": 15100 }, { "epoch": 1.0481422068068502, "grad_norm": 1.640625, "learning_rate": 2.3723478260869566e-06, "loss": 0.2567, "step": 15110 }, { "epoch": 1.048835898547583, "grad_norm": 1.453125, "learning_rate": 2.370608695652174e-06, "loss": 0.2979, "step": 15120 }, { "epoch": 1.0495295902883157, "grad_norm": 1.328125, "learning_rate": 2.3688695652173913e-06, "loss": 0.2831, "step": 15130 }, { "epoch": 1.0502232820290482, "grad_norm": 1.15625, "learning_rate": 2.3671304347826087e-06, "loss": 0.2648, "step": 15140 }, { "epoch": 1.050916973769781, "grad_norm": 1.1015625, "learning_rate": 2.3653913043478265e-06, "loss": 0.2652, "step": 15150 }, { "epoch": 1.0516106655105137, "grad_norm": 0.89453125, "learning_rate": 2.3636521739130435e-06, "loss": 0.1974, "step": 15160 }, { "epoch": 1.0523043572512465, "grad_norm": 1.0, "learning_rate": 2.3619130434782613e-06, "loss": 0.2288, "step": 15170 }, { "epoch": 1.0529980489919792, "grad_norm": 1.0546875, "learning_rate": 2.360173913043478e-06, "loss": 0.2257, "step": 15180 }, { "epoch": 1.053691740732712, "grad_norm": 1.0078125, "learning_rate": 2.358434782608696e-06, "loss": 0.2343, "step": 15190 }, { "epoch": 1.0543854324734445, "grad_norm": 1.78125, "learning_rate": 2.3566956521739134e-06, "loss": 0.2327, "step": 15200 }, { "epoch": 1.0550791242141773, "grad_norm": 1.1875, "learning_rate": 2.3549565217391307e-06, "loss": 0.2529, "step": 15210 }, { "epoch": 1.05577281595491, "grad_norm": 1.09375, "learning_rate": 2.353217391304348e-06, "loss": 0.2498, "step": 15220 }, { "epoch": 1.0564665076956428, "grad_norm": 1.046875, "learning_rate": 2.3514782608695655e-06, "loss": 0.2349, "step": 15230 }, { "epoch": 1.0571601994363755, "grad_norm": 0.9765625, "learning_rate": 2.349739130434783e-06, "loss": 0.2656, "step": 15240 }, { "epoch": 1.0578538911771083, "grad_norm": 1.25, "learning_rate": 2.3480000000000002e-06, "loss": 0.2531, "step": 15250 }, { "epoch": 1.0585475829178408, "grad_norm": 1.6171875, "learning_rate": 2.3462608695652176e-06, "loss": 0.2521, "step": 15260 }, { "epoch": 1.0592412746585735, "grad_norm": 1.375, "learning_rate": 2.344521739130435e-06, "loss": 0.266, "step": 15270 }, { "epoch": 1.0599349663993063, "grad_norm": 1.171875, "learning_rate": 2.3427826086956523e-06, "loss": 0.2564, "step": 15280 }, { "epoch": 1.060628658140039, "grad_norm": 1.1171875, "learning_rate": 2.3410434782608697e-06, "loss": 0.2466, "step": 15290 }, { "epoch": 1.0613223498807718, "grad_norm": 1.125, "learning_rate": 2.339304347826087e-06, "loss": 0.2282, "step": 15300 }, { "epoch": 1.0620160416215045, "grad_norm": 1.1796875, "learning_rate": 2.3375652173913044e-06, "loss": 0.3059, "step": 15310 }, { "epoch": 1.062709733362237, "grad_norm": 1.265625, "learning_rate": 2.335826086956522e-06, "loss": 0.3653, "step": 15320 }, { "epoch": 1.0634034251029698, "grad_norm": 1.265625, "learning_rate": 2.3340869565217396e-06, "loss": 0.228, "step": 15330 }, { "epoch": 1.0640971168437026, "grad_norm": 1.0390625, "learning_rate": 2.3323478260869566e-06, "loss": 0.2498, "step": 15340 }, { "epoch": 1.0647908085844353, "grad_norm": 1.0078125, "learning_rate": 2.3306086956521744e-06, "loss": 0.2772, "step": 15350 }, { "epoch": 1.065484500325168, "grad_norm": 1.1015625, "learning_rate": 2.3288695652173913e-06, "loss": 0.2754, "step": 15360 }, { "epoch": 1.0661781920659008, "grad_norm": 1.140625, "learning_rate": 2.3271304347826087e-06, "loss": 0.2358, "step": 15370 }, { "epoch": 1.0668718838066333, "grad_norm": 1.265625, "learning_rate": 2.3253913043478265e-06, "loss": 0.3299, "step": 15380 }, { "epoch": 1.067565575547366, "grad_norm": 1.1171875, "learning_rate": 2.3236521739130434e-06, "loss": 0.2327, "step": 15390 }, { "epoch": 1.0682592672880988, "grad_norm": 1.1328125, "learning_rate": 2.321913043478261e-06, "loss": 0.2344, "step": 15400 }, { "epoch": 1.0689529590288316, "grad_norm": 1.3203125, "learning_rate": 2.320173913043478e-06, "loss": 0.2527, "step": 15410 }, { "epoch": 1.0696466507695643, "grad_norm": 1.625, "learning_rate": 2.318434782608696e-06, "loss": 0.2592, "step": 15420 }, { "epoch": 1.070340342510297, "grad_norm": 1.3203125, "learning_rate": 2.3166956521739133e-06, "loss": 0.271, "step": 15430 }, { "epoch": 1.0710340342510296, "grad_norm": 1.2109375, "learning_rate": 2.3149565217391307e-06, "loss": 0.2486, "step": 15440 }, { "epoch": 1.0717277259917624, "grad_norm": 1.09375, "learning_rate": 2.313217391304348e-06, "loss": 0.2046, "step": 15450 }, { "epoch": 1.072421417732495, "grad_norm": 1.3984375, "learning_rate": 2.3114782608695654e-06, "loss": 0.2947, "step": 15460 }, { "epoch": 1.0731151094732279, "grad_norm": 1.078125, "learning_rate": 2.309739130434783e-06, "loss": 0.2328, "step": 15470 }, { "epoch": 1.0738088012139606, "grad_norm": 1.2734375, "learning_rate": 2.308e-06, "loss": 0.2694, "step": 15480 }, { "epoch": 1.0745024929546934, "grad_norm": 1.171875, "learning_rate": 2.3062608695652176e-06, "loss": 0.2282, "step": 15490 }, { "epoch": 1.0751961846954259, "grad_norm": 1.15625, "learning_rate": 2.304521739130435e-06, "loss": 0.2505, "step": 15500 }, { "epoch": 1.0758898764361586, "grad_norm": 1.34375, "learning_rate": 2.3027826086956523e-06, "loss": 0.219, "step": 15510 }, { "epoch": 1.0765835681768914, "grad_norm": 1.2578125, "learning_rate": 2.3010434782608697e-06, "loss": 0.2595, "step": 15520 }, { "epoch": 1.0772772599176241, "grad_norm": 0.9609375, "learning_rate": 2.299304347826087e-06, "loss": 0.2773, "step": 15530 }, { "epoch": 1.0779709516583569, "grad_norm": 1.0546875, "learning_rate": 2.2975652173913044e-06, "loss": 0.2845, "step": 15540 }, { "epoch": 1.0786646433990896, "grad_norm": 1.3203125, "learning_rate": 2.2958260869565218e-06, "loss": 0.2097, "step": 15550 }, { "epoch": 1.0793583351398222, "grad_norm": 1.390625, "learning_rate": 2.2940869565217396e-06, "loss": 0.2575, "step": 15560 }, { "epoch": 1.080052026880555, "grad_norm": 1.421875, "learning_rate": 2.2923478260869565e-06, "loss": 0.2349, "step": 15570 }, { "epoch": 1.0807457186212877, "grad_norm": 1.203125, "learning_rate": 2.2906086956521743e-06, "loss": 0.2144, "step": 15580 }, { "epoch": 1.0814394103620204, "grad_norm": 1.5234375, "learning_rate": 2.2888695652173913e-06, "loss": 0.2695, "step": 15590 }, { "epoch": 1.0821331021027532, "grad_norm": 1.171875, "learning_rate": 2.287130434782609e-06, "loss": 0.2336, "step": 15600 }, { "epoch": 1.082826793843486, "grad_norm": 1.046875, "learning_rate": 2.2853913043478264e-06, "loss": 0.2254, "step": 15610 }, { "epoch": 1.0835204855842184, "grad_norm": 1.0546875, "learning_rate": 2.283652173913044e-06, "loss": 0.1932, "step": 15620 }, { "epoch": 1.0842141773249512, "grad_norm": 1.40625, "learning_rate": 2.281913043478261e-06, "loss": 0.2222, "step": 15630 }, { "epoch": 1.084907869065684, "grad_norm": 1.1640625, "learning_rate": 2.2801739130434785e-06, "loss": 0.197, "step": 15640 }, { "epoch": 1.0856015608064167, "grad_norm": 1.203125, "learning_rate": 2.278434782608696e-06, "loss": 0.2951, "step": 15650 }, { "epoch": 1.0862952525471494, "grad_norm": 1.1953125, "learning_rate": 2.2766956521739133e-06, "loss": 0.2323, "step": 15660 }, { "epoch": 1.0869889442878822, "grad_norm": 1.2109375, "learning_rate": 2.2749565217391307e-06, "loss": 0.2331, "step": 15670 }, { "epoch": 1.0876826360286147, "grad_norm": 1.0234375, "learning_rate": 2.273217391304348e-06, "loss": 0.2251, "step": 15680 }, { "epoch": 1.0883763277693475, "grad_norm": 1.2265625, "learning_rate": 2.2714782608695654e-06, "loss": 0.2431, "step": 15690 }, { "epoch": 1.0890700195100802, "grad_norm": 1.2109375, "learning_rate": 2.2697391304347828e-06, "loss": 0.2459, "step": 15700 }, { "epoch": 1.089763711250813, "grad_norm": 1.046875, "learning_rate": 2.268e-06, "loss": 0.1969, "step": 15710 }, { "epoch": 1.0904574029915457, "grad_norm": 1.3125, "learning_rate": 2.2662608695652175e-06, "loss": 0.2647, "step": 15720 }, { "epoch": 1.0911510947322784, "grad_norm": 1.0546875, "learning_rate": 2.264521739130435e-06, "loss": 0.2552, "step": 15730 }, { "epoch": 1.091844786473011, "grad_norm": 1.0625, "learning_rate": 2.2627826086956523e-06, "loss": 0.2258, "step": 15740 }, { "epoch": 1.0925384782137437, "grad_norm": 1.203125, "learning_rate": 2.2610434782608696e-06, "loss": 0.268, "step": 15750 }, { "epoch": 1.0932321699544765, "grad_norm": 1.09375, "learning_rate": 2.259304347826087e-06, "loss": 0.2232, "step": 15760 }, { "epoch": 1.0939258616952092, "grad_norm": 1.625, "learning_rate": 2.2575652173913044e-06, "loss": 0.2888, "step": 15770 }, { "epoch": 1.094619553435942, "grad_norm": 1.03125, "learning_rate": 2.2558260869565217e-06, "loss": 0.2236, "step": 15780 }, { "epoch": 1.0953132451766745, "grad_norm": 1.3515625, "learning_rate": 2.2540869565217395e-06, "loss": 0.2405, "step": 15790 }, { "epoch": 1.0960069369174072, "grad_norm": 1.484375, "learning_rate": 2.2523478260869565e-06, "loss": 0.2135, "step": 15800 }, { "epoch": 1.09670062865814, "grad_norm": 1.171875, "learning_rate": 2.2506086956521743e-06, "loss": 0.2673, "step": 15810 }, { "epoch": 1.0973943203988727, "grad_norm": 1.0, "learning_rate": 2.2488695652173912e-06, "loss": 0.2108, "step": 15820 }, { "epoch": 1.0980880121396055, "grad_norm": 1.3046875, "learning_rate": 2.247130434782609e-06, "loss": 0.2335, "step": 15830 }, { "epoch": 1.0987817038803382, "grad_norm": 1.3828125, "learning_rate": 2.2453913043478264e-06, "loss": 0.2494, "step": 15840 }, { "epoch": 1.099475395621071, "grad_norm": 1.8125, "learning_rate": 2.2436521739130438e-06, "loss": 0.371, "step": 15850 }, { "epoch": 1.1001690873618035, "grad_norm": 1.1171875, "learning_rate": 2.241913043478261e-06, "loss": 0.3074, "step": 15860 }, { "epoch": 1.1008627791025363, "grad_norm": 1.15625, "learning_rate": 2.2401739130434785e-06, "loss": 0.2505, "step": 15870 }, { "epoch": 1.101556470843269, "grad_norm": 1.0859375, "learning_rate": 2.238434782608696e-06, "loss": 0.2206, "step": 15880 }, { "epoch": 1.1022501625840018, "grad_norm": 1.1015625, "learning_rate": 2.2366956521739132e-06, "loss": 0.28, "step": 15890 }, { "epoch": 1.1029438543247345, "grad_norm": 1.1484375, "learning_rate": 2.2349565217391306e-06, "loss": 0.2172, "step": 15900 }, { "epoch": 1.103637546065467, "grad_norm": 1.140625, "learning_rate": 2.233217391304348e-06, "loss": 0.2868, "step": 15910 }, { "epoch": 1.1043312378061998, "grad_norm": 0.91796875, "learning_rate": 2.2314782608695654e-06, "loss": 0.2454, "step": 15920 }, { "epoch": 1.1050249295469325, "grad_norm": 1.4609375, "learning_rate": 2.2297391304347827e-06, "loss": 0.2951, "step": 15930 }, { "epoch": 1.1057186212876653, "grad_norm": 1.1640625, "learning_rate": 2.228e-06, "loss": 0.2183, "step": 15940 }, { "epoch": 1.106412313028398, "grad_norm": 1.0546875, "learning_rate": 2.226260869565218e-06, "loss": 0.2182, "step": 15950 }, { "epoch": 1.1071060047691308, "grad_norm": 1.1875, "learning_rate": 2.224521739130435e-06, "loss": 0.1977, "step": 15960 }, { "epoch": 1.1077996965098635, "grad_norm": 1.4140625, "learning_rate": 2.2227826086956526e-06, "loss": 0.2098, "step": 15970 }, { "epoch": 1.108493388250596, "grad_norm": 0.9375, "learning_rate": 2.2210434782608696e-06, "loss": 0.2466, "step": 15980 }, { "epoch": 1.1091870799913288, "grad_norm": 1.0390625, "learning_rate": 2.219304347826087e-06, "loss": 0.2097, "step": 15990 }, { "epoch": 1.1098807717320616, "grad_norm": 1.125, "learning_rate": 2.2175652173913043e-06, "loss": 0.2058, "step": 16000 }, { "epoch": 1.1105744634727943, "grad_norm": 1.703125, "learning_rate": 2.2158260869565217e-06, "loss": 0.2597, "step": 16010 }, { "epoch": 1.111268155213527, "grad_norm": 1.28125, "learning_rate": 2.2140869565217395e-06, "loss": 0.2305, "step": 16020 }, { "epoch": 1.1119618469542596, "grad_norm": 1.2265625, "learning_rate": 2.2123478260869564e-06, "loss": 0.2129, "step": 16030 }, { "epoch": 1.1126555386949923, "grad_norm": 0.984375, "learning_rate": 2.2106086956521742e-06, "loss": 0.2537, "step": 16040 }, { "epoch": 1.113349230435725, "grad_norm": 1.0703125, "learning_rate": 2.208869565217391e-06, "loss": 0.2468, "step": 16050 }, { "epoch": 1.1140429221764578, "grad_norm": 1.078125, "learning_rate": 2.207130434782609e-06, "loss": 0.2024, "step": 16060 }, { "epoch": 1.1147366139171906, "grad_norm": 1.4375, "learning_rate": 2.2053913043478263e-06, "loss": 0.2803, "step": 16070 }, { "epoch": 1.1154303056579233, "grad_norm": 1.2421875, "learning_rate": 2.2036521739130437e-06, "loss": 0.2135, "step": 16080 }, { "epoch": 1.116123997398656, "grad_norm": 1.0078125, "learning_rate": 2.201913043478261e-06, "loss": 0.2603, "step": 16090 }, { "epoch": 1.1168176891393886, "grad_norm": 1.0703125, "learning_rate": 2.2001739130434785e-06, "loss": 0.2369, "step": 16100 }, { "epoch": 1.1175113808801214, "grad_norm": 1.1953125, "learning_rate": 2.198434782608696e-06, "loss": 0.2354, "step": 16110 }, { "epoch": 1.1182050726208541, "grad_norm": 1.109375, "learning_rate": 2.196695652173913e-06, "loss": 0.1965, "step": 16120 }, { "epoch": 1.1188987643615869, "grad_norm": 1.0546875, "learning_rate": 2.1949565217391306e-06, "loss": 0.2309, "step": 16130 }, { "epoch": 1.1195924561023196, "grad_norm": 1.140625, "learning_rate": 2.193217391304348e-06, "loss": 0.244, "step": 16140 }, { "epoch": 1.1202861478430521, "grad_norm": 1.3984375, "learning_rate": 2.1914782608695653e-06, "loss": 0.2881, "step": 16150 }, { "epoch": 1.1209798395837849, "grad_norm": 1.109375, "learning_rate": 2.1897391304347827e-06, "loss": 0.2484, "step": 16160 }, { "epoch": 1.1216735313245176, "grad_norm": 1.0703125, "learning_rate": 2.188e-06, "loss": 0.2577, "step": 16170 }, { "epoch": 1.1223672230652504, "grad_norm": 1.25, "learning_rate": 2.186260869565218e-06, "loss": 0.2607, "step": 16180 }, { "epoch": 1.1230609148059831, "grad_norm": 1.7109375, "learning_rate": 2.184521739130435e-06, "loss": 0.2481, "step": 16190 }, { "epoch": 1.1237546065467159, "grad_norm": 1.0234375, "learning_rate": 2.1827826086956526e-06, "loss": 0.2204, "step": 16200 }, { "epoch": 1.1244482982874484, "grad_norm": 1.328125, "learning_rate": 2.1810434782608695e-06, "loss": 0.2596, "step": 16210 }, { "epoch": 1.1251419900281812, "grad_norm": 1.109375, "learning_rate": 2.1793043478260873e-06, "loss": 0.2827, "step": 16220 }, { "epoch": 1.125835681768914, "grad_norm": 1.109375, "learning_rate": 2.1775652173913047e-06, "loss": 0.2533, "step": 16230 }, { "epoch": 1.1265293735096467, "grad_norm": 0.953125, "learning_rate": 2.175826086956522e-06, "loss": 0.2578, "step": 16240 }, { "epoch": 1.1272230652503794, "grad_norm": 1.125, "learning_rate": 2.1740869565217395e-06, "loss": 0.2142, "step": 16250 }, { "epoch": 1.1279167569911122, "grad_norm": 1.1953125, "learning_rate": 2.172347826086957e-06, "loss": 0.2088, "step": 16260 }, { "epoch": 1.1286104487318447, "grad_norm": 1.109375, "learning_rate": 2.170608695652174e-06, "loss": 0.2214, "step": 16270 }, { "epoch": 1.1293041404725774, "grad_norm": 1.0703125, "learning_rate": 2.1688695652173916e-06, "loss": 0.2313, "step": 16280 }, { "epoch": 1.1299978322133102, "grad_norm": 1.0859375, "learning_rate": 2.167130434782609e-06, "loss": 0.2488, "step": 16290 }, { "epoch": 1.130691523954043, "grad_norm": 1.1171875, "learning_rate": 2.1653913043478263e-06, "loss": 0.2474, "step": 16300 }, { "epoch": 1.1313852156947757, "grad_norm": 1.2421875, "learning_rate": 2.1636521739130437e-06, "loss": 0.2476, "step": 16310 }, { "epoch": 1.1320789074355084, "grad_norm": 1.03125, "learning_rate": 2.161913043478261e-06, "loss": 0.2353, "step": 16320 }, { "epoch": 1.1327725991762412, "grad_norm": 0.796875, "learning_rate": 2.1601739130434784e-06, "loss": 0.2156, "step": 16330 }, { "epoch": 1.1334662909169737, "grad_norm": 1.1640625, "learning_rate": 2.158434782608696e-06, "loss": 0.2233, "step": 16340 }, { "epoch": 1.1341599826577065, "grad_norm": 1.375, "learning_rate": 2.156695652173913e-06, "loss": 0.2461, "step": 16350 }, { "epoch": 1.1348536743984392, "grad_norm": 0.96875, "learning_rate": 2.1549565217391305e-06, "loss": 0.2388, "step": 16360 }, { "epoch": 1.135547366139172, "grad_norm": 1.140625, "learning_rate": 2.153217391304348e-06, "loss": 0.1843, "step": 16370 }, { "epoch": 1.1362410578799047, "grad_norm": 1.328125, "learning_rate": 2.1514782608695653e-06, "loss": 0.2327, "step": 16380 }, { "epoch": 1.1369347496206372, "grad_norm": 1.2421875, "learning_rate": 2.1497391304347826e-06, "loss": 0.364, "step": 16390 }, { "epoch": 1.13762844136137, "grad_norm": 1.078125, "learning_rate": 2.148e-06, "loss": 0.2377, "step": 16400 }, { "epoch": 1.1383221331021027, "grad_norm": 1.2734375, "learning_rate": 2.146260869565218e-06, "loss": 0.2195, "step": 16410 }, { "epoch": 1.1390158248428355, "grad_norm": 1.2265625, "learning_rate": 2.1445217391304348e-06, "loss": 0.2238, "step": 16420 }, { "epoch": 1.1397095165835682, "grad_norm": 1.1875, "learning_rate": 2.1427826086956526e-06, "loss": 0.2505, "step": 16430 }, { "epoch": 1.140403208324301, "grad_norm": 1.0625, "learning_rate": 2.1410434782608695e-06, "loss": 0.2871, "step": 16440 }, { "epoch": 1.1410969000650335, "grad_norm": 0.98828125, "learning_rate": 2.1393043478260873e-06, "loss": 0.248, "step": 16450 }, { "epoch": 1.1417905918057663, "grad_norm": 0.98828125, "learning_rate": 2.1375652173913047e-06, "loss": 0.2396, "step": 16460 }, { "epoch": 1.142484283546499, "grad_norm": 1.171875, "learning_rate": 2.135826086956522e-06, "loss": 0.225, "step": 16470 }, { "epoch": 1.1431779752872318, "grad_norm": 1.0390625, "learning_rate": 2.1340869565217394e-06, "loss": 0.2223, "step": 16480 }, { "epoch": 1.1438716670279645, "grad_norm": 1.2109375, "learning_rate": 2.1323478260869568e-06, "loss": 0.2289, "step": 16490 }, { "epoch": 1.1445653587686972, "grad_norm": 1.2734375, "learning_rate": 2.130608695652174e-06, "loss": 0.2196, "step": 16500 }, { "epoch": 1.1452590505094298, "grad_norm": 1.375, "learning_rate": 2.1288695652173915e-06, "loss": 0.3, "step": 16510 }, { "epoch": 1.1459527422501625, "grad_norm": 1.078125, "learning_rate": 2.127130434782609e-06, "loss": 0.2349, "step": 16520 }, { "epoch": 1.1466464339908953, "grad_norm": 1.2578125, "learning_rate": 2.1253913043478263e-06, "loss": 0.2661, "step": 16530 }, { "epoch": 1.147340125731628, "grad_norm": 1.0546875, "learning_rate": 2.1236521739130436e-06, "loss": 0.2279, "step": 16540 }, { "epoch": 1.1480338174723608, "grad_norm": 0.9375, "learning_rate": 2.121913043478261e-06, "loss": 0.1994, "step": 16550 }, { "epoch": 1.1487275092130935, "grad_norm": 1.1328125, "learning_rate": 2.1201739130434784e-06, "loss": 0.2183, "step": 16560 }, { "epoch": 1.149421200953826, "grad_norm": 1.2109375, "learning_rate": 2.1184347826086957e-06, "loss": 0.213, "step": 16570 }, { "epoch": 1.1501148926945588, "grad_norm": 1.140625, "learning_rate": 2.116695652173913e-06, "loss": 0.2149, "step": 16580 }, { "epoch": 1.1508085844352915, "grad_norm": 1.171875, "learning_rate": 2.114956521739131e-06, "loss": 0.2344, "step": 16590 }, { "epoch": 1.1515022761760243, "grad_norm": 1.1640625, "learning_rate": 2.113217391304348e-06, "loss": 0.307, "step": 16600 }, { "epoch": 1.152195967916757, "grad_norm": 1.125, "learning_rate": 2.1114782608695652e-06, "loss": 0.2201, "step": 16610 }, { "epoch": 1.1528896596574898, "grad_norm": 1.234375, "learning_rate": 2.1097391304347826e-06, "loss": 0.2571, "step": 16620 }, { "epoch": 1.1535833513982223, "grad_norm": 1.4140625, "learning_rate": 2.108e-06, "loss": 0.2089, "step": 16630 }, { "epoch": 1.154277043138955, "grad_norm": 1.25, "learning_rate": 2.1062608695652178e-06, "loss": 0.3009, "step": 16640 }, { "epoch": 1.1549707348796878, "grad_norm": 1.15625, "learning_rate": 2.1045217391304347e-06, "loss": 0.2993, "step": 16650 }, { "epoch": 1.1556644266204206, "grad_norm": 1.03125, "learning_rate": 2.1027826086956525e-06, "loss": 0.2074, "step": 16660 }, { "epoch": 1.1563581183611533, "grad_norm": 1.2578125, "learning_rate": 2.1010434782608695e-06, "loss": 0.2703, "step": 16670 }, { "epoch": 1.157051810101886, "grad_norm": 0.91796875, "learning_rate": 2.0993043478260873e-06, "loss": 0.2232, "step": 16680 }, { "epoch": 1.1577455018426186, "grad_norm": 1.1484375, "learning_rate": 2.0975652173913046e-06, "loss": 0.2249, "step": 16690 }, { "epoch": 1.1584391935833513, "grad_norm": 1.3984375, "learning_rate": 2.095826086956522e-06, "loss": 0.2514, "step": 16700 }, { "epoch": 1.159132885324084, "grad_norm": 1.6171875, "learning_rate": 2.0940869565217394e-06, "loss": 0.2142, "step": 16710 }, { "epoch": 1.1598265770648168, "grad_norm": 1.1640625, "learning_rate": 2.0923478260869567e-06, "loss": 0.2381, "step": 16720 }, { "epoch": 1.1605202688055496, "grad_norm": 1.359375, "learning_rate": 2.090608695652174e-06, "loss": 0.2305, "step": 16730 }, { "epoch": 1.1612139605462823, "grad_norm": 1.203125, "learning_rate": 2.0888695652173915e-06, "loss": 0.2416, "step": 16740 }, { "epoch": 1.1619076522870149, "grad_norm": 1.5625, "learning_rate": 2.087130434782609e-06, "loss": 0.3016, "step": 16750 }, { "epoch": 1.1626013440277476, "grad_norm": 1.9765625, "learning_rate": 2.0853913043478262e-06, "loss": 0.3109, "step": 16760 }, { "epoch": 1.1632950357684804, "grad_norm": 1.2734375, "learning_rate": 2.0836521739130436e-06, "loss": 0.2107, "step": 16770 }, { "epoch": 1.1639887275092131, "grad_norm": 1.2421875, "learning_rate": 2.081913043478261e-06, "loss": 0.2386, "step": 16780 }, { "epoch": 1.1646824192499459, "grad_norm": 1.1640625, "learning_rate": 2.0801739130434783e-06, "loss": 0.2904, "step": 16790 }, { "epoch": 1.1653761109906786, "grad_norm": 1.078125, "learning_rate": 2.0784347826086957e-06, "loss": 0.2027, "step": 16800 }, { "epoch": 1.1660698027314111, "grad_norm": 1.3046875, "learning_rate": 2.076695652173913e-06, "loss": 0.2178, "step": 16810 }, { "epoch": 1.166763494472144, "grad_norm": 1.3828125, "learning_rate": 2.074956521739131e-06, "loss": 0.2418, "step": 16820 }, { "epoch": 1.1674571862128766, "grad_norm": 1.4453125, "learning_rate": 2.073217391304348e-06, "loss": 0.2702, "step": 16830 }, { "epoch": 1.1681508779536094, "grad_norm": 1.484375, "learning_rate": 2.0714782608695656e-06, "loss": 0.2903, "step": 16840 }, { "epoch": 1.1688445696943421, "grad_norm": 1.7109375, "learning_rate": 2.0697391304347826e-06, "loss": 0.2985, "step": 16850 }, { "epoch": 1.1695382614350747, "grad_norm": 1.0625, "learning_rate": 2.0680000000000004e-06, "loss": 0.2587, "step": 16860 }, { "epoch": 1.1702319531758074, "grad_norm": 1.1171875, "learning_rate": 2.0662608695652177e-06, "loss": 0.2319, "step": 16870 }, { "epoch": 1.1709256449165402, "grad_norm": 1.1796875, "learning_rate": 2.064521739130435e-06, "loss": 0.2573, "step": 16880 }, { "epoch": 1.171619336657273, "grad_norm": 1.3515625, "learning_rate": 2.0627826086956525e-06, "loss": 0.2489, "step": 16890 }, { "epoch": 1.1723130283980057, "grad_norm": 1.3828125, "learning_rate": 2.06104347826087e-06, "loss": 0.2214, "step": 16900 }, { "epoch": 1.1730067201387384, "grad_norm": 1.15625, "learning_rate": 2.0593043478260872e-06, "loss": 0.2204, "step": 16910 }, { "epoch": 1.1737004118794712, "grad_norm": 1.234375, "learning_rate": 2.0575652173913046e-06, "loss": 0.2512, "step": 16920 }, { "epoch": 1.1743941036202037, "grad_norm": 1.3359375, "learning_rate": 2.055826086956522e-06, "loss": 0.2447, "step": 16930 }, { "epoch": 1.1750877953609364, "grad_norm": 1.296875, "learning_rate": 2.0540869565217393e-06, "loss": 0.2006, "step": 16940 }, { "epoch": 1.1757814871016692, "grad_norm": 1.09375, "learning_rate": 2.0523478260869567e-06, "loss": 0.2405, "step": 16950 }, { "epoch": 1.176475178842402, "grad_norm": 1.5703125, "learning_rate": 2.050608695652174e-06, "loss": 0.2739, "step": 16960 }, { "epoch": 1.1771688705831347, "grad_norm": 1.1171875, "learning_rate": 2.0488695652173914e-06, "loss": 0.2256, "step": 16970 }, { "epoch": 1.1778625623238672, "grad_norm": 1.1640625, "learning_rate": 2.047130434782609e-06, "loss": 0.2264, "step": 16980 }, { "epoch": 1.1785562540646, "grad_norm": 1.59375, "learning_rate": 2.045391304347826e-06, "loss": 0.2409, "step": 16990 }, { "epoch": 1.1792499458053327, "grad_norm": 1.265625, "learning_rate": 2.0436521739130436e-06, "loss": 0.2711, "step": 17000 }, { "epoch": 1.1799436375460655, "grad_norm": 1.1328125, "learning_rate": 2.041913043478261e-06, "loss": 0.2456, "step": 17010 }, { "epoch": 1.1806373292867982, "grad_norm": 1.171875, "learning_rate": 2.0401739130434783e-06, "loss": 0.244, "step": 17020 }, { "epoch": 1.181331021027531, "grad_norm": 1.5390625, "learning_rate": 2.0384347826086957e-06, "loss": 0.2305, "step": 17030 }, { "epoch": 1.1820247127682637, "grad_norm": 0.91796875, "learning_rate": 2.036695652173913e-06, "loss": 0.2297, "step": 17040 }, { "epoch": 1.1827184045089962, "grad_norm": 1.09375, "learning_rate": 2.034956521739131e-06, "loss": 0.2263, "step": 17050 }, { "epoch": 1.183412096249729, "grad_norm": 1.6328125, "learning_rate": 2.0332173913043478e-06, "loss": 0.2718, "step": 17060 }, { "epoch": 1.1841057879904617, "grad_norm": 1.3359375, "learning_rate": 2.0314782608695656e-06, "loss": 0.2617, "step": 17070 }, { "epoch": 1.1847994797311945, "grad_norm": 1.3515625, "learning_rate": 2.0297391304347825e-06, "loss": 0.2891, "step": 17080 }, { "epoch": 1.1854931714719272, "grad_norm": 1.25, "learning_rate": 2.0280000000000003e-06, "loss": 0.2944, "step": 17090 }, { "epoch": 1.1861868632126598, "grad_norm": 1.140625, "learning_rate": 2.0262608695652177e-06, "loss": 0.2248, "step": 17100 }, { "epoch": 1.1868805549533925, "grad_norm": 1.078125, "learning_rate": 2.024521739130435e-06, "loss": 0.2692, "step": 17110 }, { "epoch": 1.1875742466941253, "grad_norm": 1.328125, "learning_rate": 2.0227826086956524e-06, "loss": 0.2349, "step": 17120 }, { "epoch": 1.188267938434858, "grad_norm": 1.234375, "learning_rate": 2.02104347826087e-06, "loss": 0.2191, "step": 17130 }, { "epoch": 1.1889616301755908, "grad_norm": 1.0078125, "learning_rate": 2.019304347826087e-06, "loss": 0.2398, "step": 17140 }, { "epoch": 1.1896553219163235, "grad_norm": 1.2578125, "learning_rate": 2.0175652173913045e-06, "loss": 0.2761, "step": 17150 }, { "epoch": 1.1903490136570563, "grad_norm": 1.171875, "learning_rate": 2.015826086956522e-06, "loss": 0.2036, "step": 17160 }, { "epoch": 1.1910427053977888, "grad_norm": 1.609375, "learning_rate": 2.0140869565217393e-06, "loss": 0.2639, "step": 17170 }, { "epoch": 1.1917363971385215, "grad_norm": 1.25, "learning_rate": 2.0123478260869567e-06, "loss": 0.195, "step": 17180 }, { "epoch": 1.1924300888792543, "grad_norm": 1.3125, "learning_rate": 2.010608695652174e-06, "loss": 0.2145, "step": 17190 }, { "epoch": 1.193123780619987, "grad_norm": 1.84375, "learning_rate": 2.0088695652173914e-06, "loss": 0.3457, "step": 17200 }, { "epoch": 1.1938174723607198, "grad_norm": 1.28125, "learning_rate": 2.007130434782609e-06, "loss": 0.2548, "step": 17210 }, { "epoch": 1.1945111641014523, "grad_norm": 1.0234375, "learning_rate": 2.005391304347826e-06, "loss": 0.24, "step": 17220 }, { "epoch": 1.195204855842185, "grad_norm": 1.1640625, "learning_rate": 2.0036521739130435e-06, "loss": 0.2872, "step": 17230 }, { "epoch": 1.1958985475829178, "grad_norm": 1.296875, "learning_rate": 2.001913043478261e-06, "loss": 0.231, "step": 17240 }, { "epoch": 1.1965922393236506, "grad_norm": 1.3515625, "learning_rate": 2.0001739130434783e-06, "loss": 0.2923, "step": 17250 }, { "epoch": 1.1972859310643833, "grad_norm": 1.2109375, "learning_rate": 1.9984347826086956e-06, "loss": 0.2832, "step": 17260 }, { "epoch": 1.197979622805116, "grad_norm": 1.4921875, "learning_rate": 1.996695652173913e-06, "loss": 0.2875, "step": 17270 }, { "epoch": 1.1986733145458488, "grad_norm": 1.0390625, "learning_rate": 1.994956521739131e-06, "loss": 0.2527, "step": 17280 }, { "epoch": 1.1993670062865813, "grad_norm": 1.265625, "learning_rate": 1.9932173913043477e-06, "loss": 0.2425, "step": 17290 }, { "epoch": 1.200060698027314, "grad_norm": 1.265625, "learning_rate": 1.9914782608695655e-06, "loss": 0.2546, "step": 17300 }, { "epoch": 1.2007543897680468, "grad_norm": 1.4140625, "learning_rate": 1.9897391304347825e-06, "loss": 0.255, "step": 17310 }, { "epoch": 1.2014480815087796, "grad_norm": 0.9609375, "learning_rate": 1.9880000000000003e-06, "loss": 0.2478, "step": 17320 }, { "epoch": 1.2021417732495123, "grad_norm": 1.0390625, "learning_rate": 1.9862608695652176e-06, "loss": 0.2083, "step": 17330 }, { "epoch": 1.2028354649902449, "grad_norm": 1.5078125, "learning_rate": 1.984521739130435e-06, "loss": 0.2753, "step": 17340 }, { "epoch": 1.2035291567309776, "grad_norm": 1.2265625, "learning_rate": 1.9827826086956524e-06, "loss": 0.3128, "step": 17350 }, { "epoch": 1.2042228484717103, "grad_norm": 1.0, "learning_rate": 1.9810434782608698e-06, "loss": 0.2478, "step": 17360 }, { "epoch": 1.204916540212443, "grad_norm": 1.15625, "learning_rate": 1.979304347826087e-06, "loss": 0.328, "step": 17370 }, { "epoch": 1.2056102319531758, "grad_norm": 1.1875, "learning_rate": 1.9775652173913045e-06, "loss": 0.2781, "step": 17380 }, { "epoch": 1.2063039236939086, "grad_norm": 1.203125, "learning_rate": 1.975826086956522e-06, "loss": 0.3133, "step": 17390 }, { "epoch": 1.2069976154346413, "grad_norm": 1.140625, "learning_rate": 1.9740869565217392e-06, "loss": 0.212, "step": 17400 }, { "epoch": 1.2076913071753739, "grad_norm": 1.3515625, "learning_rate": 1.9723478260869566e-06, "loss": 0.2839, "step": 17410 }, { "epoch": 1.2083849989161066, "grad_norm": 1.34375, "learning_rate": 1.970608695652174e-06, "loss": 0.2442, "step": 17420 }, { "epoch": 1.2090786906568394, "grad_norm": 1.28125, "learning_rate": 1.9688695652173914e-06, "loss": 0.2751, "step": 17430 }, { "epoch": 1.2097723823975721, "grad_norm": 1.265625, "learning_rate": 1.967130434782609e-06, "loss": 0.2529, "step": 17440 }, { "epoch": 1.2104660741383049, "grad_norm": 1.3125, "learning_rate": 1.965391304347826e-06, "loss": 0.2791, "step": 17450 }, { "epoch": 1.2111597658790374, "grad_norm": 1.2109375, "learning_rate": 1.963652173913044e-06, "loss": 0.2276, "step": 17460 }, { "epoch": 1.2118534576197701, "grad_norm": 1.7890625, "learning_rate": 1.961913043478261e-06, "loss": 0.2515, "step": 17470 }, { "epoch": 1.212547149360503, "grad_norm": 1.140625, "learning_rate": 1.9601739130434786e-06, "loss": 0.2579, "step": 17480 }, { "epoch": 1.2132408411012356, "grad_norm": 0.98046875, "learning_rate": 1.9584347826086956e-06, "loss": 0.2222, "step": 17490 }, { "epoch": 1.2139345328419684, "grad_norm": 0.98046875, "learning_rate": 1.9566956521739134e-06, "loss": 0.2072, "step": 17500 }, { "epoch": 1.2146282245827011, "grad_norm": 1.203125, "learning_rate": 1.9549565217391308e-06, "loss": 0.2278, "step": 17510 }, { "epoch": 1.215321916323434, "grad_norm": 1.0390625, "learning_rate": 1.953217391304348e-06, "loss": 0.2338, "step": 17520 }, { "epoch": 1.2160156080641664, "grad_norm": 1.109375, "learning_rate": 1.9514782608695655e-06, "loss": 0.235, "step": 17530 }, { "epoch": 1.2167092998048992, "grad_norm": 1.40625, "learning_rate": 1.9497391304347824e-06, "loss": 0.2458, "step": 17540 }, { "epoch": 1.217402991545632, "grad_norm": 1.21875, "learning_rate": 1.9480000000000002e-06, "loss": 0.2322, "step": 17550 }, { "epoch": 1.2180966832863647, "grad_norm": 0.8671875, "learning_rate": 1.9462608695652176e-06, "loss": 0.2301, "step": 17560 }, { "epoch": 1.2187903750270974, "grad_norm": 1.015625, "learning_rate": 1.944521739130435e-06, "loss": 0.2707, "step": 17570 }, { "epoch": 1.21948406676783, "grad_norm": 1.3046875, "learning_rate": 1.9427826086956524e-06, "loss": 0.2158, "step": 17580 }, { "epoch": 1.2201777585085627, "grad_norm": 1.28125, "learning_rate": 1.9410434782608697e-06, "loss": 0.2404, "step": 17590 }, { "epoch": 1.2208714502492954, "grad_norm": 1.1484375, "learning_rate": 1.939304347826087e-06, "loss": 0.2562, "step": 17600 }, { "epoch": 1.2215651419900282, "grad_norm": 1.2109375, "learning_rate": 1.9375652173913045e-06, "loss": 0.2154, "step": 17610 }, { "epoch": 1.222258833730761, "grad_norm": 1.2890625, "learning_rate": 1.935826086956522e-06, "loss": 0.2309, "step": 17620 }, { "epoch": 1.2229525254714937, "grad_norm": 1.296875, "learning_rate": 1.934086956521739e-06, "loss": 0.229, "step": 17630 }, { "epoch": 1.2236462172122264, "grad_norm": 1.453125, "learning_rate": 1.9323478260869566e-06, "loss": 0.2428, "step": 17640 }, { "epoch": 1.224339908952959, "grad_norm": 1.1171875, "learning_rate": 1.930608695652174e-06, "loss": 0.2227, "step": 17650 }, { "epoch": 1.2250336006936917, "grad_norm": 1.59375, "learning_rate": 1.9288695652173913e-06, "loss": 0.3012, "step": 17660 }, { "epoch": 1.2257272924344245, "grad_norm": 0.9765625, "learning_rate": 1.927130434782609e-06, "loss": 0.2796, "step": 17670 }, { "epoch": 1.2264209841751572, "grad_norm": 0.953125, "learning_rate": 1.925391304347826e-06, "loss": 0.2393, "step": 17680 }, { "epoch": 1.22711467591589, "grad_norm": 1.3984375, "learning_rate": 1.923652173913044e-06, "loss": 0.2685, "step": 17690 }, { "epoch": 1.2278083676566225, "grad_norm": 1.15625, "learning_rate": 1.921913043478261e-06, "loss": 0.2598, "step": 17700 }, { "epoch": 1.2285020593973552, "grad_norm": 1.0625, "learning_rate": 1.9201739130434786e-06, "loss": 0.237, "step": 17710 }, { "epoch": 1.229195751138088, "grad_norm": 0.9765625, "learning_rate": 1.9184347826086955e-06, "loss": 0.2421, "step": 17720 }, { "epoch": 1.2298894428788207, "grad_norm": 1.1484375, "learning_rate": 1.9166956521739133e-06, "loss": 0.2501, "step": 17730 }, { "epoch": 1.2305831346195535, "grad_norm": 1.1640625, "learning_rate": 1.9149565217391307e-06, "loss": 0.21, "step": 17740 }, { "epoch": 1.2312768263602862, "grad_norm": 1.1328125, "learning_rate": 1.913217391304348e-06, "loss": 0.2558, "step": 17750 }, { "epoch": 1.2319705181010188, "grad_norm": 1.203125, "learning_rate": 1.9114782608695655e-06, "loss": 0.2314, "step": 17760 }, { "epoch": 1.2326642098417515, "grad_norm": 1.6171875, "learning_rate": 1.909739130434783e-06, "loss": 0.2358, "step": 17770 }, { "epoch": 1.2333579015824843, "grad_norm": 1.0234375, "learning_rate": 1.908e-06, "loss": 0.2434, "step": 17780 }, { "epoch": 1.234051593323217, "grad_norm": 1.3515625, "learning_rate": 1.9062608695652176e-06, "loss": 0.2677, "step": 17790 }, { "epoch": 1.2347452850639498, "grad_norm": 1.21875, "learning_rate": 1.904521739130435e-06, "loss": 0.2215, "step": 17800 }, { "epoch": 1.2354389768046825, "grad_norm": 1.0, "learning_rate": 1.9027826086956525e-06, "loss": 0.2329, "step": 17810 }, { "epoch": 1.236132668545415, "grad_norm": 1.203125, "learning_rate": 1.9010434782608697e-06, "loss": 0.2311, "step": 17820 }, { "epoch": 1.2368263602861478, "grad_norm": 1.0390625, "learning_rate": 1.8993043478260873e-06, "loss": 0.2237, "step": 17830 }, { "epoch": 1.2375200520268805, "grad_norm": 1.3984375, "learning_rate": 1.8975652173913044e-06, "loss": 0.254, "step": 17840 }, { "epoch": 1.2382137437676133, "grad_norm": 1.171875, "learning_rate": 1.8958260869565218e-06, "loss": 0.2176, "step": 17850 }, { "epoch": 1.238907435508346, "grad_norm": 1.3671875, "learning_rate": 1.8940869565217394e-06, "loss": 0.2342, "step": 17860 }, { "epoch": 1.2396011272490788, "grad_norm": 1.0703125, "learning_rate": 1.8923478260869565e-06, "loss": 0.2417, "step": 17870 }, { "epoch": 1.2402948189898113, "grad_norm": 1.2109375, "learning_rate": 1.8906086956521741e-06, "loss": 0.2707, "step": 17880 }, { "epoch": 1.240988510730544, "grad_norm": 1.484375, "learning_rate": 1.8888695652173913e-06, "loss": 0.2278, "step": 17890 }, { "epoch": 1.2416822024712768, "grad_norm": 1.203125, "learning_rate": 1.8871304347826089e-06, "loss": 0.2455, "step": 17900 }, { "epoch": 1.2423758942120096, "grad_norm": 1.34375, "learning_rate": 1.8853913043478262e-06, "loss": 0.2575, "step": 17910 }, { "epoch": 1.2430695859527423, "grad_norm": 1.5, "learning_rate": 1.8836521739130436e-06, "loss": 0.2959, "step": 17920 }, { "epoch": 1.243763277693475, "grad_norm": 1.234375, "learning_rate": 1.881913043478261e-06, "loss": 0.2936, "step": 17930 }, { "epoch": 1.2444569694342076, "grad_norm": 1.2265625, "learning_rate": 1.8801739130434786e-06, "loss": 0.3073, "step": 17940 }, { "epoch": 1.2451506611749403, "grad_norm": 1.3125, "learning_rate": 1.8784347826086957e-06, "loss": 0.2216, "step": 17950 }, { "epoch": 1.245844352915673, "grad_norm": 1.328125, "learning_rate": 1.8766956521739133e-06, "loss": 0.2443, "step": 17960 }, { "epoch": 1.2465380446564058, "grad_norm": 1.15625, "learning_rate": 1.8749565217391305e-06, "loss": 0.2522, "step": 17970 }, { "epoch": 1.2472317363971386, "grad_norm": 1.0546875, "learning_rate": 1.873217391304348e-06, "loss": 0.2404, "step": 17980 }, { "epoch": 1.2479254281378713, "grad_norm": 1.3046875, "learning_rate": 1.8714782608695652e-06, "loss": 0.2511, "step": 17990 }, { "epoch": 1.2486191198786039, "grad_norm": 0.95703125, "learning_rate": 1.8697391304347828e-06, "loss": 0.3144, "step": 18000 }, { "epoch": 1.2493128116193366, "grad_norm": 1.0703125, "learning_rate": 1.8680000000000002e-06, "loss": 0.2274, "step": 18010 }, { "epoch": 1.2500065033600694, "grad_norm": 1.2421875, "learning_rate": 1.8662608695652175e-06, "loss": 0.3148, "step": 18020 }, { "epoch": 1.250700195100802, "grad_norm": 1.0546875, "learning_rate": 1.864521739130435e-06, "loss": 0.2123, "step": 18030 }, { "epoch": 1.2513938868415349, "grad_norm": 1.25, "learning_rate": 1.8627826086956525e-06, "loss": 0.2383, "step": 18040 }, { "epoch": 1.2520875785822674, "grad_norm": 1.140625, "learning_rate": 1.8610434782608696e-06, "loss": 0.2308, "step": 18050 }, { "epoch": 1.2527812703230001, "grad_norm": 1.1328125, "learning_rate": 1.8593043478260872e-06, "loss": 0.2131, "step": 18060 }, { "epoch": 1.2534749620637329, "grad_norm": 1.1640625, "learning_rate": 1.8575652173913044e-06, "loss": 0.264, "step": 18070 }, { "epoch": 1.2541686538044656, "grad_norm": 1.1953125, "learning_rate": 1.855826086956522e-06, "loss": 0.2561, "step": 18080 }, { "epoch": 1.2548623455451984, "grad_norm": 1.5078125, "learning_rate": 1.8540869565217393e-06, "loss": 0.3174, "step": 18090 }, { "epoch": 1.2555560372859311, "grad_norm": 1.1015625, "learning_rate": 1.8523478260869567e-06, "loss": 0.2361, "step": 18100 }, { "epoch": 1.2562497290266639, "grad_norm": 1.4765625, "learning_rate": 1.850608695652174e-06, "loss": 0.2928, "step": 18110 }, { "epoch": 1.2569434207673966, "grad_norm": 1.21875, "learning_rate": 1.8488695652173917e-06, "loss": 0.1963, "step": 18120 }, { "epoch": 1.2576371125081292, "grad_norm": 1.25, "learning_rate": 1.8471304347826088e-06, "loss": 0.261, "step": 18130 }, { "epoch": 1.258330804248862, "grad_norm": 0.95703125, "learning_rate": 1.8453913043478264e-06, "loss": 0.2202, "step": 18140 }, { "epoch": 1.2590244959895946, "grad_norm": 1.21875, "learning_rate": 1.8436521739130436e-06, "loss": 0.3148, "step": 18150 }, { "epoch": 1.2597181877303274, "grad_norm": 1.1484375, "learning_rate": 1.841913043478261e-06, "loss": 0.2427, "step": 18160 }, { "epoch": 1.26041187947106, "grad_norm": 1.125, "learning_rate": 1.8401739130434785e-06, "loss": 0.2313, "step": 18170 }, { "epoch": 1.2611055712117927, "grad_norm": 1.40625, "learning_rate": 1.8384347826086957e-06, "loss": 0.2316, "step": 18180 }, { "epoch": 1.2617992629525254, "grad_norm": 1.3203125, "learning_rate": 1.8366956521739133e-06, "loss": 0.2298, "step": 18190 }, { "epoch": 1.2624929546932582, "grad_norm": 1.0234375, "learning_rate": 1.8349565217391304e-06, "loss": 0.2165, "step": 18200 }, { "epoch": 1.263186646433991, "grad_norm": 1.296875, "learning_rate": 1.833217391304348e-06, "loss": 0.2387, "step": 18210 }, { "epoch": 1.2638803381747237, "grad_norm": 1.0703125, "learning_rate": 1.8314782608695652e-06, "loss": 0.2883, "step": 18220 }, { "epoch": 1.2645740299154564, "grad_norm": 1.6015625, "learning_rate": 1.8297391304347827e-06, "loss": 0.2552, "step": 18230 }, { "epoch": 1.265267721656189, "grad_norm": 1.2734375, "learning_rate": 1.8280000000000001e-06, "loss": 0.2563, "step": 18240 }, { "epoch": 1.2659614133969217, "grad_norm": 1.0859375, "learning_rate": 1.8262608695652175e-06, "loss": 0.2093, "step": 18250 }, { "epoch": 1.2666551051376544, "grad_norm": 1.2265625, "learning_rate": 1.8245217391304349e-06, "loss": 0.241, "step": 18260 }, { "epoch": 1.2673487968783872, "grad_norm": 1.4140625, "learning_rate": 1.8227826086956524e-06, "loss": 0.2887, "step": 18270 }, { "epoch": 1.26804248861912, "grad_norm": 1.203125, "learning_rate": 1.8210434782608696e-06, "loss": 0.2639, "step": 18280 }, { "epoch": 1.2687361803598525, "grad_norm": 1.1640625, "learning_rate": 1.8193043478260872e-06, "loss": 0.2598, "step": 18290 }, { "epoch": 1.2694298721005852, "grad_norm": 1.1640625, "learning_rate": 1.8175652173913043e-06, "loss": 0.248, "step": 18300 }, { "epoch": 1.270123563841318, "grad_norm": 1.296875, "learning_rate": 1.815826086956522e-06, "loss": 0.1977, "step": 18310 }, { "epoch": 1.2708172555820507, "grad_norm": 1.5859375, "learning_rate": 1.8140869565217393e-06, "loss": 0.2536, "step": 18320 }, { "epoch": 1.2715109473227835, "grad_norm": 1.1484375, "learning_rate": 1.8123478260869567e-06, "loss": 0.2557, "step": 18330 }, { "epoch": 1.2722046390635162, "grad_norm": 1.3828125, "learning_rate": 1.810608695652174e-06, "loss": 0.3069, "step": 18340 }, { "epoch": 1.272898330804249, "grad_norm": 1.0234375, "learning_rate": 1.8088695652173916e-06, "loss": 0.2315, "step": 18350 }, { "epoch": 1.2735920225449815, "grad_norm": 1.2890625, "learning_rate": 1.8071304347826088e-06, "loss": 0.202, "step": 18360 }, { "epoch": 1.2742857142857142, "grad_norm": 1.1796875, "learning_rate": 1.8053913043478264e-06, "loss": 0.304, "step": 18370 }, { "epoch": 1.274979406026447, "grad_norm": 1.59375, "learning_rate": 1.8036521739130435e-06, "loss": 0.259, "step": 18380 }, { "epoch": 1.2756730977671797, "grad_norm": 1.2578125, "learning_rate": 1.8019130434782611e-06, "loss": 0.2263, "step": 18390 }, { "epoch": 1.2763667895079125, "grad_norm": 1.1328125, "learning_rate": 1.8001739130434785e-06, "loss": 0.2381, "step": 18400 }, { "epoch": 1.277060481248645, "grad_norm": 1.5, "learning_rate": 1.7984347826086958e-06, "loss": 0.2154, "step": 18410 }, { "epoch": 1.2777541729893778, "grad_norm": 1.28125, "learning_rate": 1.7966956521739132e-06, "loss": 0.2191, "step": 18420 }, { "epoch": 1.2784478647301105, "grad_norm": 1.0, "learning_rate": 1.7949565217391308e-06, "loss": 0.2516, "step": 18430 }, { "epoch": 1.2791415564708433, "grad_norm": 1.2109375, "learning_rate": 1.793217391304348e-06, "loss": 0.2271, "step": 18440 }, { "epoch": 1.279835248211576, "grad_norm": 1.1015625, "learning_rate": 1.7914782608695655e-06, "loss": 0.2308, "step": 18450 }, { "epoch": 1.2805289399523088, "grad_norm": 1.25, "learning_rate": 1.7897391304347827e-06, "loss": 0.3075, "step": 18460 }, { "epoch": 1.2812226316930415, "grad_norm": 1.4609375, "learning_rate": 1.788e-06, "loss": 0.2446, "step": 18470 }, { "epoch": 1.281916323433774, "grad_norm": 1.1953125, "learning_rate": 1.7862608695652174e-06, "loss": 0.2359, "step": 18480 }, { "epoch": 1.2826100151745068, "grad_norm": 2.171875, "learning_rate": 1.7845217391304348e-06, "loss": 0.3021, "step": 18490 }, { "epoch": 1.2833037069152395, "grad_norm": 1.4609375, "learning_rate": 1.7827826086956524e-06, "loss": 0.2255, "step": 18500 }, { "epoch": 1.2839973986559723, "grad_norm": 1.2578125, "learning_rate": 1.7810434782608696e-06, "loss": 0.2562, "step": 18510 }, { "epoch": 1.284691090396705, "grad_norm": 1.3515625, "learning_rate": 1.7793043478260871e-06, "loss": 0.2513, "step": 18520 }, { "epoch": 1.2853847821374376, "grad_norm": 1.4140625, "learning_rate": 1.7775652173913043e-06, "loss": 0.2296, "step": 18530 }, { "epoch": 1.2860784738781703, "grad_norm": 1.3046875, "learning_rate": 1.7758260869565219e-06, "loss": 0.2612, "step": 18540 }, { "epoch": 1.286772165618903, "grad_norm": 1.1171875, "learning_rate": 1.7740869565217393e-06, "loss": 0.2088, "step": 18550 }, { "epoch": 1.2874658573596358, "grad_norm": 1.046875, "learning_rate": 1.7723478260869566e-06, "loss": 0.2278, "step": 18560 }, { "epoch": 1.2881595491003686, "grad_norm": 1.015625, "learning_rate": 1.770608695652174e-06, "loss": 0.2695, "step": 18570 }, { "epoch": 1.2888532408411013, "grad_norm": 0.921875, "learning_rate": 1.7688695652173916e-06, "loss": 0.237, "step": 18580 }, { "epoch": 1.289546932581834, "grad_norm": 1.53125, "learning_rate": 1.7671304347826087e-06, "loss": 0.3094, "step": 18590 }, { "epoch": 1.2902406243225666, "grad_norm": 1.03125, "learning_rate": 1.7653913043478263e-06, "loss": 0.2231, "step": 18600 }, { "epoch": 1.2909343160632993, "grad_norm": 1.1328125, "learning_rate": 1.7636521739130435e-06, "loss": 0.2858, "step": 18610 }, { "epoch": 1.291628007804032, "grad_norm": 1.1484375, "learning_rate": 1.761913043478261e-06, "loss": 0.2832, "step": 18620 }, { "epoch": 1.2923216995447648, "grad_norm": 1.3359375, "learning_rate": 1.7601739130434784e-06, "loss": 0.2447, "step": 18630 }, { "epoch": 1.2930153912854976, "grad_norm": 1.1484375, "learning_rate": 1.7584347826086958e-06, "loss": 0.2911, "step": 18640 }, { "epoch": 1.2937090830262301, "grad_norm": 1.078125, "learning_rate": 1.7566956521739132e-06, "loss": 0.261, "step": 18650 }, { "epoch": 1.2944027747669629, "grad_norm": 1.5, "learning_rate": 1.7549565217391308e-06, "loss": 0.2486, "step": 18660 }, { "epoch": 1.2950964665076956, "grad_norm": 0.9765625, "learning_rate": 1.753217391304348e-06, "loss": 0.236, "step": 18670 }, { "epoch": 1.2957901582484284, "grad_norm": 1.390625, "learning_rate": 1.7514782608695655e-06, "loss": 0.2216, "step": 18680 }, { "epoch": 1.296483849989161, "grad_norm": 1.125, "learning_rate": 1.7497391304347827e-06, "loss": 0.2348, "step": 18690 }, { "epoch": 1.2971775417298939, "grad_norm": 1.1328125, "learning_rate": 1.7480000000000002e-06, "loss": 0.2821, "step": 18700 }, { "epoch": 1.2978712334706266, "grad_norm": 1.0703125, "learning_rate": 1.7462608695652174e-06, "loss": 0.2281, "step": 18710 }, { "epoch": 1.2985649252113591, "grad_norm": 1.21875, "learning_rate": 1.744521739130435e-06, "loss": 0.3438, "step": 18720 }, { "epoch": 1.2992586169520919, "grad_norm": 1.8515625, "learning_rate": 1.7427826086956524e-06, "loss": 0.307, "step": 18730 }, { "epoch": 1.2999523086928246, "grad_norm": 1.3125, "learning_rate": 1.7410434782608697e-06, "loss": 0.2792, "step": 18740 }, { "epoch": 1.3006460004335574, "grad_norm": 1.328125, "learning_rate": 1.739304347826087e-06, "loss": 0.246, "step": 18750 }, { "epoch": 1.3013396921742901, "grad_norm": 1.203125, "learning_rate": 1.7375652173913047e-06, "loss": 0.2229, "step": 18760 }, { "epoch": 1.3020333839150227, "grad_norm": 1.4921875, "learning_rate": 1.7358260869565218e-06, "loss": 0.2921, "step": 18770 }, { "epoch": 1.3027270756557554, "grad_norm": 0.9921875, "learning_rate": 1.7340869565217392e-06, "loss": 0.2882, "step": 18780 }, { "epoch": 1.3034207673964882, "grad_norm": 1.2265625, "learning_rate": 1.7323478260869566e-06, "loss": 0.2465, "step": 18790 }, { "epoch": 1.304114459137221, "grad_norm": 1.21875, "learning_rate": 1.730608695652174e-06, "loss": 0.2151, "step": 18800 }, { "epoch": 1.3048081508779537, "grad_norm": 1.1640625, "learning_rate": 1.7288695652173915e-06, "loss": 0.2568, "step": 18810 }, { "epoch": 1.3055018426186864, "grad_norm": 1.5703125, "learning_rate": 1.7271304347826087e-06, "loss": 0.2507, "step": 18820 }, { "epoch": 1.3061955343594192, "grad_norm": 1.21875, "learning_rate": 1.7253913043478263e-06, "loss": 0.2791, "step": 18830 }, { "epoch": 1.3068892261001517, "grad_norm": 1.5390625, "learning_rate": 1.7236521739130434e-06, "loss": 0.2841, "step": 18840 }, { "epoch": 1.3075829178408844, "grad_norm": 0.9609375, "learning_rate": 1.721913043478261e-06, "loss": 0.2459, "step": 18850 }, { "epoch": 1.3082766095816172, "grad_norm": 1.078125, "learning_rate": 1.7201739130434784e-06, "loss": 0.2317, "step": 18860 }, { "epoch": 1.30897030132235, "grad_norm": 1.328125, "learning_rate": 1.7184347826086958e-06, "loss": 0.2455, "step": 18870 }, { "epoch": 1.3096639930630825, "grad_norm": 1.6953125, "learning_rate": 1.7166956521739131e-06, "loss": 0.2476, "step": 18880 }, { "epoch": 1.3103576848038152, "grad_norm": 1.484375, "learning_rate": 1.7149565217391307e-06, "loss": 0.2529, "step": 18890 }, { "epoch": 1.311051376544548, "grad_norm": 1.2734375, "learning_rate": 1.7132173913043479e-06, "loss": 0.2066, "step": 18900 }, { "epoch": 1.3117450682852807, "grad_norm": 1.2109375, "learning_rate": 1.7114782608695655e-06, "loss": 0.2273, "step": 18910 }, { "epoch": 1.3124387600260135, "grad_norm": 1.1171875, "learning_rate": 1.7097391304347826e-06, "loss": 0.1843, "step": 18920 }, { "epoch": 1.3131324517667462, "grad_norm": 1.21875, "learning_rate": 1.7080000000000002e-06, "loss": 0.2144, "step": 18930 }, { "epoch": 1.313826143507479, "grad_norm": 1.1875, "learning_rate": 1.7062608695652174e-06, "loss": 0.2223, "step": 18940 }, { "epoch": 1.3145198352482117, "grad_norm": 1.296875, "learning_rate": 1.704521739130435e-06, "loss": 0.2149, "step": 18950 }, { "epoch": 1.3152135269889442, "grad_norm": 1.28125, "learning_rate": 1.7027826086956523e-06, "loss": 0.2343, "step": 18960 }, { "epoch": 1.315907218729677, "grad_norm": 1.59375, "learning_rate": 1.7010434782608697e-06, "loss": 0.2878, "step": 18970 }, { "epoch": 1.3166009104704097, "grad_norm": 1.484375, "learning_rate": 1.699304347826087e-06, "loss": 0.2503, "step": 18980 }, { "epoch": 1.3172946022111425, "grad_norm": 1.328125, "learning_rate": 1.6975652173913046e-06, "loss": 0.2683, "step": 18990 }, { "epoch": 1.317988293951875, "grad_norm": 0.9453125, "learning_rate": 1.6958260869565218e-06, "loss": 0.226, "step": 19000 }, { "epoch": 1.3186819856926077, "grad_norm": 1.3203125, "learning_rate": 1.6940869565217394e-06, "loss": 0.2354, "step": 19010 }, { "epoch": 1.3193756774333405, "grad_norm": 1.3359375, "learning_rate": 1.6923478260869565e-06, "loss": 0.2512, "step": 19020 }, { "epoch": 1.3200693691740732, "grad_norm": 1.1171875, "learning_rate": 1.6906086956521741e-06, "loss": 0.215, "step": 19030 }, { "epoch": 1.320763060914806, "grad_norm": 1.0703125, "learning_rate": 1.6888695652173915e-06, "loss": 0.2324, "step": 19040 }, { "epoch": 1.3214567526555387, "grad_norm": 1.0390625, "learning_rate": 1.6871304347826089e-06, "loss": 0.2898, "step": 19050 }, { "epoch": 1.3221504443962715, "grad_norm": 1.4453125, "learning_rate": 1.6853913043478262e-06, "loss": 0.2415, "step": 19060 }, { "epoch": 1.3228441361370042, "grad_norm": 1.46875, "learning_rate": 1.6836521739130438e-06, "loss": 0.2365, "step": 19070 }, { "epoch": 1.3235378278777368, "grad_norm": 1.2421875, "learning_rate": 1.681913043478261e-06, "loss": 0.2251, "step": 19080 }, { "epoch": 1.3242315196184695, "grad_norm": 0.9921875, "learning_rate": 1.6801739130434784e-06, "loss": 0.2137, "step": 19090 }, { "epoch": 1.3249252113592023, "grad_norm": 1.3203125, "learning_rate": 1.6784347826086957e-06, "loss": 0.2239, "step": 19100 }, { "epoch": 1.325618903099935, "grad_norm": 1.4609375, "learning_rate": 1.676695652173913e-06, "loss": 0.2748, "step": 19110 }, { "epoch": 1.3263125948406675, "grad_norm": 1.21875, "learning_rate": 1.6749565217391307e-06, "loss": 0.2748, "step": 19120 }, { "epoch": 1.3270062865814003, "grad_norm": 1.0078125, "learning_rate": 1.6732173913043478e-06, "loss": 0.2845, "step": 19130 }, { "epoch": 1.327699978322133, "grad_norm": 1.265625, "learning_rate": 1.6714782608695654e-06, "loss": 0.2227, "step": 19140 }, { "epoch": 1.3283936700628658, "grad_norm": 1.140625, "learning_rate": 1.6697391304347826e-06, "loss": 0.2701, "step": 19150 }, { "epoch": 1.3290873618035985, "grad_norm": 1.1796875, "learning_rate": 1.6680000000000002e-06, "loss": 0.3026, "step": 19160 }, { "epoch": 1.3297810535443313, "grad_norm": 1.34375, "learning_rate": 1.6662608695652175e-06, "loss": 0.2401, "step": 19170 }, { "epoch": 1.330474745285064, "grad_norm": 1.28125, "learning_rate": 1.664521739130435e-06, "loss": 0.2907, "step": 19180 }, { "epoch": 1.3311684370257968, "grad_norm": 1.09375, "learning_rate": 1.6627826086956523e-06, "loss": 0.2404, "step": 19190 }, { "epoch": 1.3318621287665293, "grad_norm": 1.3359375, "learning_rate": 1.6610434782608699e-06, "loss": 0.2548, "step": 19200 }, { "epoch": 1.332555820507262, "grad_norm": 1.2109375, "learning_rate": 1.659304347826087e-06, "loss": 0.3054, "step": 19210 }, { "epoch": 1.3332495122479948, "grad_norm": 1.0859375, "learning_rate": 1.6575652173913046e-06, "loss": 0.2174, "step": 19220 }, { "epoch": 1.3339432039887276, "grad_norm": 1.2734375, "learning_rate": 1.6558260869565218e-06, "loss": 0.2712, "step": 19230 }, { "epoch": 1.33463689572946, "grad_norm": 1.5234375, "learning_rate": 1.6540869565217393e-06, "loss": 0.2191, "step": 19240 }, { "epoch": 1.3353305874701928, "grad_norm": 1.0859375, "learning_rate": 1.6523478260869565e-06, "loss": 0.2889, "step": 19250 }, { "epoch": 1.3360242792109256, "grad_norm": 1.21875, "learning_rate": 1.650608695652174e-06, "loss": 0.2662, "step": 19260 }, { "epoch": 1.3367179709516583, "grad_norm": 1.1328125, "learning_rate": 1.6488695652173915e-06, "loss": 0.2417, "step": 19270 }, { "epoch": 1.337411662692391, "grad_norm": 1.0703125, "learning_rate": 1.6471304347826088e-06, "loss": 0.2668, "step": 19280 }, { "epoch": 1.3381053544331238, "grad_norm": 1.0234375, "learning_rate": 1.6453913043478262e-06, "loss": 0.2028, "step": 19290 }, { "epoch": 1.3387990461738566, "grad_norm": 1.3203125, "learning_rate": 1.6436521739130438e-06, "loss": 0.2831, "step": 19300 }, { "epoch": 1.3394927379145893, "grad_norm": 1.0625, "learning_rate": 1.641913043478261e-06, "loss": 0.2186, "step": 19310 }, { "epoch": 1.3401864296553219, "grad_norm": 1.359375, "learning_rate": 1.6401739130434785e-06, "loss": 0.2243, "step": 19320 }, { "epoch": 1.3408801213960546, "grad_norm": 1.09375, "learning_rate": 1.6384347826086957e-06, "loss": 0.2012, "step": 19330 }, { "epoch": 1.3415738131367874, "grad_norm": 1.3046875, "learning_rate": 1.6366956521739133e-06, "loss": 0.2255, "step": 19340 }, { "epoch": 1.3422675048775201, "grad_norm": 1.046875, "learning_rate": 1.6349565217391306e-06, "loss": 0.2735, "step": 19350 }, { "epoch": 1.3429611966182526, "grad_norm": 1.90625, "learning_rate": 1.633217391304348e-06, "loss": 0.3033, "step": 19360 }, { "epoch": 1.3436548883589854, "grad_norm": 1.3046875, "learning_rate": 1.6314782608695654e-06, "loss": 0.2539, "step": 19370 }, { "epoch": 1.3443485800997181, "grad_norm": 1.1171875, "learning_rate": 1.629739130434783e-06, "loss": 0.2277, "step": 19380 }, { "epoch": 1.3450422718404509, "grad_norm": 1.09375, "learning_rate": 1.6280000000000001e-06, "loss": 0.2215, "step": 19390 }, { "epoch": 1.3457359635811836, "grad_norm": 1.03125, "learning_rate": 1.6262608695652175e-06, "loss": 0.2031, "step": 19400 }, { "epoch": 1.3464296553219164, "grad_norm": 1.2734375, "learning_rate": 1.6245217391304349e-06, "loss": 0.2154, "step": 19410 }, { "epoch": 1.3471233470626491, "grad_norm": 1.359375, "learning_rate": 1.6227826086956522e-06, "loss": 0.2311, "step": 19420 }, { "epoch": 1.3478170388033819, "grad_norm": 1.0546875, "learning_rate": 1.6210434782608698e-06, "loss": 0.2294, "step": 19430 }, { "epoch": 1.3485107305441144, "grad_norm": 1.2421875, "learning_rate": 1.619304347826087e-06, "loss": 0.3163, "step": 19440 }, { "epoch": 1.3492044222848472, "grad_norm": 1.0078125, "learning_rate": 1.6175652173913046e-06, "loss": 0.1935, "step": 19450 }, { "epoch": 1.34989811402558, "grad_norm": 1.453125, "learning_rate": 1.6158260869565217e-06, "loss": 0.2372, "step": 19460 }, { "epoch": 1.3505918057663127, "grad_norm": 1.1484375, "learning_rate": 1.6140869565217393e-06, "loss": 0.3196, "step": 19470 }, { "epoch": 1.3512854975070452, "grad_norm": 1.21875, "learning_rate": 1.6123478260869565e-06, "loss": 0.2137, "step": 19480 }, { "epoch": 1.351979189247778, "grad_norm": 1.0078125, "learning_rate": 1.610608695652174e-06, "loss": 0.2553, "step": 19490 }, { "epoch": 1.3526728809885107, "grad_norm": 1.0859375, "learning_rate": 1.6088695652173914e-06, "loss": 0.2622, "step": 19500 }, { "epoch": 1.3533665727292434, "grad_norm": 0.98046875, "learning_rate": 1.6071304347826088e-06, "loss": 0.2121, "step": 19510 }, { "epoch": 1.3540602644699762, "grad_norm": 1.953125, "learning_rate": 1.6053913043478262e-06, "loss": 0.2425, "step": 19520 }, { "epoch": 1.354753956210709, "grad_norm": 1.0625, "learning_rate": 1.6036521739130437e-06, "loss": 0.2537, "step": 19530 }, { "epoch": 1.3554476479514417, "grad_norm": 1.7890625, "learning_rate": 1.601913043478261e-06, "loss": 0.3255, "step": 19540 }, { "epoch": 1.3561413396921742, "grad_norm": 1.46875, "learning_rate": 1.6001739130434785e-06, "loss": 0.2601, "step": 19550 }, { "epoch": 1.356835031432907, "grad_norm": 1.296875, "learning_rate": 1.5984347826086956e-06, "loss": 0.2258, "step": 19560 }, { "epoch": 1.3575287231736397, "grad_norm": 1.3203125, "learning_rate": 1.5966956521739132e-06, "loss": 0.2909, "step": 19570 }, { "epoch": 1.3582224149143725, "grad_norm": 1.1328125, "learning_rate": 1.5949565217391306e-06, "loss": 0.254, "step": 19580 }, { "epoch": 1.3589161066551052, "grad_norm": 0.94140625, "learning_rate": 1.593217391304348e-06, "loss": 0.2798, "step": 19590 }, { "epoch": 1.3596097983958377, "grad_norm": 1.2890625, "learning_rate": 1.5914782608695653e-06, "loss": 0.2097, "step": 19600 }, { "epoch": 1.3603034901365705, "grad_norm": 1.171875, "learning_rate": 1.589739130434783e-06, "loss": 0.3109, "step": 19610 }, { "epoch": 1.3609971818773032, "grad_norm": 0.94140625, "learning_rate": 1.588e-06, "loss": 0.3035, "step": 19620 }, { "epoch": 1.361690873618036, "grad_norm": 1.3359375, "learning_rate": 1.5862608695652177e-06, "loss": 0.3036, "step": 19630 }, { "epoch": 1.3623845653587687, "grad_norm": 1.203125, "learning_rate": 1.5845217391304348e-06, "loss": 0.2218, "step": 19640 }, { "epoch": 1.3630782570995015, "grad_norm": 1.2890625, "learning_rate": 1.5827826086956524e-06, "loss": 0.2462, "step": 19650 }, { "epoch": 1.3637719488402342, "grad_norm": 0.984375, "learning_rate": 1.5810434782608698e-06, "loss": 0.2062, "step": 19660 }, { "epoch": 1.3644656405809668, "grad_norm": 1.1875, "learning_rate": 1.5793043478260872e-06, "loss": 0.2235, "step": 19670 }, { "epoch": 1.3651593323216995, "grad_norm": 1.5703125, "learning_rate": 1.5775652173913045e-06, "loss": 0.2577, "step": 19680 }, { "epoch": 1.3658530240624323, "grad_norm": 1.0625, "learning_rate": 1.5758260869565221e-06, "loss": 0.2769, "step": 19690 }, { "epoch": 1.366546715803165, "grad_norm": 1.1171875, "learning_rate": 1.5740869565217393e-06, "loss": 0.222, "step": 19700 }, { "epoch": 1.3672404075438978, "grad_norm": 1.1640625, "learning_rate": 1.5723478260869564e-06, "loss": 0.2199, "step": 19710 }, { "epoch": 1.3679340992846303, "grad_norm": 1.1015625, "learning_rate": 1.570608695652174e-06, "loss": 0.2524, "step": 19720 }, { "epoch": 1.368627791025363, "grad_norm": 1.0390625, "learning_rate": 1.5688695652173914e-06, "loss": 0.2201, "step": 19730 }, { "epoch": 1.3693214827660958, "grad_norm": 1.0078125, "learning_rate": 1.5671304347826088e-06, "loss": 0.3256, "step": 19740 }, { "epoch": 1.3700151745068285, "grad_norm": 1.546875, "learning_rate": 1.5653913043478261e-06, "loss": 0.2929, "step": 19750 }, { "epoch": 1.3707088662475613, "grad_norm": 1.2421875, "learning_rate": 1.5636521739130437e-06, "loss": 0.2536, "step": 19760 }, { "epoch": 1.371402557988294, "grad_norm": 1.3828125, "learning_rate": 1.5619130434782609e-06, "loss": 0.2349, "step": 19770 }, { "epoch": 1.3720962497290268, "grad_norm": 1.203125, "learning_rate": 1.5601739130434784e-06, "loss": 0.2195, "step": 19780 }, { "epoch": 1.3727899414697593, "grad_norm": 1.171875, "learning_rate": 1.5584347826086956e-06, "loss": 0.278, "step": 19790 }, { "epoch": 1.373483633210492, "grad_norm": 1.6953125, "learning_rate": 1.5566956521739132e-06, "loss": 0.2423, "step": 19800 }, { "epoch": 1.3741773249512248, "grad_norm": 1.140625, "learning_rate": 1.5549565217391306e-06, "loss": 0.2768, "step": 19810 }, { "epoch": 1.3748710166919575, "grad_norm": 1.0078125, "learning_rate": 1.553217391304348e-06, "loss": 0.2441, "step": 19820 }, { "epoch": 1.3755647084326903, "grad_norm": 1.6015625, "learning_rate": 1.5514782608695653e-06, "loss": 0.2551, "step": 19830 }, { "epoch": 1.3762584001734228, "grad_norm": 1.90625, "learning_rate": 1.5497391304347829e-06, "loss": 0.2861, "step": 19840 }, { "epoch": 1.3769520919141556, "grad_norm": 1.5, "learning_rate": 1.548e-06, "loss": 0.2289, "step": 19850 }, { "epoch": 1.3776457836548883, "grad_norm": 1.3828125, "learning_rate": 1.5462608695652176e-06, "loss": 0.2622, "step": 19860 }, { "epoch": 1.378339475395621, "grad_norm": 1.046875, "learning_rate": 1.5445217391304348e-06, "loss": 0.3027, "step": 19870 }, { "epoch": 1.3790331671363538, "grad_norm": 1.2421875, "learning_rate": 1.5427826086956524e-06, "loss": 0.2128, "step": 19880 }, { "epoch": 1.3797268588770866, "grad_norm": 1.1015625, "learning_rate": 1.5410434782608697e-06, "loss": 0.2341, "step": 19890 }, { "epoch": 1.3804205506178193, "grad_norm": 1.078125, "learning_rate": 1.5393043478260871e-06, "loss": 0.2186, "step": 19900 }, { "epoch": 1.3811142423585518, "grad_norm": 1.21875, "learning_rate": 1.5375652173913045e-06, "loss": 0.2346, "step": 19910 }, { "epoch": 1.3818079340992846, "grad_norm": 1.15625, "learning_rate": 1.535826086956522e-06, "loss": 0.2231, "step": 19920 }, { "epoch": 1.3825016258400173, "grad_norm": 1.1796875, "learning_rate": 1.5340869565217392e-06, "loss": 0.2264, "step": 19930 }, { "epoch": 1.38319531758075, "grad_norm": 1.1640625, "learning_rate": 1.5323478260869568e-06, "loss": 0.2376, "step": 19940 }, { "epoch": 1.3838890093214828, "grad_norm": 1.34375, "learning_rate": 1.530608695652174e-06, "loss": 0.2588, "step": 19950 }, { "epoch": 1.3845827010622154, "grad_norm": 1.2578125, "learning_rate": 1.5288695652173916e-06, "loss": 0.2747, "step": 19960 }, { "epoch": 1.3852763928029481, "grad_norm": 1.421875, "learning_rate": 1.5271304347826087e-06, "loss": 0.2317, "step": 19970 }, { "epoch": 1.3859700845436809, "grad_norm": 1.296875, "learning_rate": 1.5253913043478263e-06, "loss": 0.2596, "step": 19980 }, { "epoch": 1.3866637762844136, "grad_norm": 1.328125, "learning_rate": 1.5236521739130437e-06, "loss": 0.2658, "step": 19990 }, { "epoch": 1.3873574680251464, "grad_norm": 1.140625, "learning_rate": 1.521913043478261e-06, "loss": 0.2576, "step": 20000 }, { "epoch": 1.3880511597658791, "grad_norm": 1.5390625, "learning_rate": 1.5201739130434784e-06, "loss": 0.237, "step": 20010 }, { "epoch": 1.3887448515066119, "grad_norm": 1.1953125, "learning_rate": 1.5184347826086956e-06, "loss": 0.2249, "step": 20020 }, { "epoch": 1.3894385432473444, "grad_norm": 1.3359375, "learning_rate": 1.5166956521739131e-06, "loss": 0.2541, "step": 20030 }, { "epoch": 1.3901322349880771, "grad_norm": 0.87109375, "learning_rate": 1.5149565217391305e-06, "loss": 0.2328, "step": 20040 }, { "epoch": 1.39082592672881, "grad_norm": 0.8515625, "learning_rate": 1.5132173913043479e-06, "loss": 0.2299, "step": 20050 }, { "epoch": 1.3915196184695426, "grad_norm": 1.21875, "learning_rate": 1.5114782608695653e-06, "loss": 0.2728, "step": 20060 }, { "epoch": 1.3922133102102754, "grad_norm": 0.94140625, "learning_rate": 1.5097391304347828e-06, "loss": 0.2113, "step": 20070 }, { "epoch": 1.392907001951008, "grad_norm": 1.1953125, "learning_rate": 1.508e-06, "loss": 0.2075, "step": 20080 }, { "epoch": 1.3936006936917407, "grad_norm": 1.1484375, "learning_rate": 1.5062608695652176e-06, "loss": 0.2242, "step": 20090 }, { "epoch": 1.3942943854324734, "grad_norm": 0.90625, "learning_rate": 1.5045217391304347e-06, "loss": 0.2404, "step": 20100 }, { "epoch": 1.3949880771732062, "grad_norm": 1.25, "learning_rate": 1.5027826086956523e-06, "loss": 0.2921, "step": 20110 }, { "epoch": 1.395681768913939, "grad_norm": 1.3359375, "learning_rate": 1.5010434782608697e-06, "loss": 0.2469, "step": 20120 }, { "epoch": 1.3963754606546717, "grad_norm": 1.53125, "learning_rate": 1.499304347826087e-06, "loss": 0.2326, "step": 20130 }, { "epoch": 1.3970691523954044, "grad_norm": 1.140625, "learning_rate": 1.4975652173913044e-06, "loss": 0.2378, "step": 20140 }, { "epoch": 1.397762844136137, "grad_norm": 1.34375, "learning_rate": 1.495826086956522e-06, "loss": 0.2893, "step": 20150 }, { "epoch": 1.3984565358768697, "grad_norm": 0.7890625, "learning_rate": 1.4940869565217392e-06, "loss": 0.2289, "step": 20160 }, { "epoch": 1.3991502276176024, "grad_norm": 1.3046875, "learning_rate": 1.4923478260869568e-06, "loss": 0.2284, "step": 20170 }, { "epoch": 1.3998439193583352, "grad_norm": 0.953125, "learning_rate": 1.490608695652174e-06, "loss": 0.2175, "step": 20180 }, { "epoch": 1.4005376110990677, "grad_norm": 1.2421875, "learning_rate": 1.4888695652173915e-06, "loss": 0.213, "step": 20190 }, { "epoch": 1.4012313028398005, "grad_norm": 1.6484375, "learning_rate": 1.4871304347826087e-06, "loss": 0.2365, "step": 20200 }, { "epoch": 1.4019249945805332, "grad_norm": 1.515625, "learning_rate": 1.4853913043478263e-06, "loss": 0.238, "step": 20210 }, { "epoch": 1.402618686321266, "grad_norm": 1.4609375, "learning_rate": 1.4836521739130436e-06, "loss": 0.227, "step": 20220 }, { "epoch": 1.4033123780619987, "grad_norm": 1.0546875, "learning_rate": 1.481913043478261e-06, "loss": 0.2311, "step": 20230 }, { "epoch": 1.4040060698027315, "grad_norm": 1.0390625, "learning_rate": 1.4801739130434784e-06, "loss": 0.2447, "step": 20240 }, { "epoch": 1.4046997615434642, "grad_norm": 1.4140625, "learning_rate": 1.478434782608696e-06, "loss": 0.2528, "step": 20250 }, { "epoch": 1.405393453284197, "grad_norm": 1.2734375, "learning_rate": 1.4766956521739131e-06, "loss": 0.2466, "step": 20260 }, { "epoch": 1.4060871450249295, "grad_norm": 1.515625, "learning_rate": 1.4749565217391307e-06, "loss": 0.2668, "step": 20270 }, { "epoch": 1.4067808367656622, "grad_norm": 1.25, "learning_rate": 1.4732173913043478e-06, "loss": 0.2382, "step": 20280 }, { "epoch": 1.407474528506395, "grad_norm": 1.2265625, "learning_rate": 1.4714782608695654e-06, "loss": 0.2393, "step": 20290 }, { "epoch": 1.4081682202471277, "grad_norm": 0.828125, "learning_rate": 1.4697391304347828e-06, "loss": 0.2099, "step": 20300 }, { "epoch": 1.4088619119878603, "grad_norm": 1.1328125, "learning_rate": 1.4680000000000002e-06, "loss": 0.2382, "step": 20310 }, { "epoch": 1.409555603728593, "grad_norm": 1.5703125, "learning_rate": 1.4662608695652175e-06, "loss": 0.3246, "step": 20320 }, { "epoch": 1.4102492954693258, "grad_norm": 1.0703125, "learning_rate": 1.4645217391304347e-06, "loss": 0.3255, "step": 20330 }, { "epoch": 1.4109429872100585, "grad_norm": 1.0234375, "learning_rate": 1.4627826086956523e-06, "loss": 0.1941, "step": 20340 }, { "epoch": 1.4116366789507913, "grad_norm": 1.0859375, "learning_rate": 1.4610434782608697e-06, "loss": 0.2243, "step": 20350 }, { "epoch": 1.412330370691524, "grad_norm": 1.140625, "learning_rate": 1.459304347826087e-06, "loss": 0.219, "step": 20360 }, { "epoch": 1.4130240624322568, "grad_norm": 1.203125, "learning_rate": 1.4575652173913044e-06, "loss": 0.225, "step": 20370 }, { "epoch": 1.4137177541729895, "grad_norm": 1.140625, "learning_rate": 1.455826086956522e-06, "loss": 0.2449, "step": 20380 }, { "epoch": 1.414411445913722, "grad_norm": 0.984375, "learning_rate": 1.4540869565217391e-06, "loss": 0.2189, "step": 20390 }, { "epoch": 1.4151051376544548, "grad_norm": 1.390625, "learning_rate": 1.4523478260869567e-06, "loss": 0.2627, "step": 20400 }, { "epoch": 1.4157988293951875, "grad_norm": 1.046875, "learning_rate": 1.4506086956521739e-06, "loss": 0.2063, "step": 20410 }, { "epoch": 1.4164925211359203, "grad_norm": 1.0078125, "learning_rate": 1.4488695652173915e-06, "loss": 0.213, "step": 20420 }, { "epoch": 1.4171862128766528, "grad_norm": 1.3671875, "learning_rate": 1.4471304347826086e-06, "loss": 0.2139, "step": 20430 }, { "epoch": 1.4178799046173856, "grad_norm": 1.2265625, "learning_rate": 1.4453913043478262e-06, "loss": 0.2276, "step": 20440 }, { "epoch": 1.4185735963581183, "grad_norm": 1.234375, "learning_rate": 1.4436521739130436e-06, "loss": 0.2084, "step": 20450 }, { "epoch": 1.419267288098851, "grad_norm": 0.9765625, "learning_rate": 1.441913043478261e-06, "loss": 0.2437, "step": 20460 }, { "epoch": 1.4199609798395838, "grad_norm": 1.15625, "learning_rate": 1.4401739130434783e-06, "loss": 0.2394, "step": 20470 }, { "epoch": 1.4206546715803166, "grad_norm": 1.4765625, "learning_rate": 1.438434782608696e-06, "loss": 0.251, "step": 20480 }, { "epoch": 1.4213483633210493, "grad_norm": 1.3125, "learning_rate": 1.436695652173913e-06, "loss": 0.288, "step": 20490 }, { "epoch": 1.422042055061782, "grad_norm": 1.0390625, "learning_rate": 1.4349565217391306e-06, "loss": 0.2525, "step": 20500 }, { "epoch": 1.4227357468025146, "grad_norm": 1.2578125, "learning_rate": 1.4332173913043478e-06, "loss": 0.2392, "step": 20510 }, { "epoch": 1.4234294385432473, "grad_norm": 0.96484375, "learning_rate": 1.4314782608695654e-06, "loss": 0.2057, "step": 20520 }, { "epoch": 1.42412313028398, "grad_norm": 1.5, "learning_rate": 1.4297391304347828e-06, "loss": 0.2368, "step": 20530 }, { "epoch": 1.4248168220247128, "grad_norm": 1.265625, "learning_rate": 1.4280000000000001e-06, "loss": 0.2351, "step": 20540 }, { "epoch": 1.4255105137654454, "grad_norm": 1.234375, "learning_rate": 1.4262608695652175e-06, "loss": 0.2066, "step": 20550 }, { "epoch": 1.426204205506178, "grad_norm": 1.6328125, "learning_rate": 1.424521739130435e-06, "loss": 0.2405, "step": 20560 }, { "epoch": 1.4268978972469109, "grad_norm": 1.328125, "learning_rate": 1.4227826086956522e-06, "loss": 0.2749, "step": 20570 }, { "epoch": 1.4275915889876436, "grad_norm": 1.5078125, "learning_rate": 1.4210434782608698e-06, "loss": 0.2632, "step": 20580 }, { "epoch": 1.4282852807283763, "grad_norm": 1.2109375, "learning_rate": 1.419304347826087e-06, "loss": 0.3155, "step": 20590 }, { "epoch": 1.428978972469109, "grad_norm": 1.40625, "learning_rate": 1.4175652173913046e-06, "loss": 0.2016, "step": 20600 }, { "epoch": 1.4296726642098418, "grad_norm": 1.1484375, "learning_rate": 1.415826086956522e-06, "loss": 0.2906, "step": 20610 }, { "epoch": 1.4303663559505746, "grad_norm": 1.7890625, "learning_rate": 1.4140869565217393e-06, "loss": 0.2672, "step": 20620 }, { "epoch": 1.4310600476913071, "grad_norm": 1.609375, "learning_rate": 1.4123478260869567e-06, "loss": 0.2527, "step": 20630 }, { "epoch": 1.4317537394320399, "grad_norm": 1.1796875, "learning_rate": 1.4106086956521738e-06, "loss": 0.3494, "step": 20640 }, { "epoch": 1.4324474311727726, "grad_norm": 0.8984375, "learning_rate": 1.4088695652173914e-06, "loss": 0.2156, "step": 20650 }, { "epoch": 1.4331411229135054, "grad_norm": 1.2421875, "learning_rate": 1.4071304347826086e-06, "loss": 0.2444, "step": 20660 }, { "epoch": 1.433834814654238, "grad_norm": 0.8828125, "learning_rate": 1.4053913043478262e-06, "loss": 0.2617, "step": 20670 }, { "epoch": 1.4345285063949706, "grad_norm": 1.15625, "learning_rate": 1.4036521739130435e-06, "loss": 0.2533, "step": 20680 }, { "epoch": 1.4352221981357034, "grad_norm": 1.1328125, "learning_rate": 1.401913043478261e-06, "loss": 0.2659, "step": 20690 }, { "epoch": 1.4359158898764361, "grad_norm": 1.484375, "learning_rate": 1.4001739130434783e-06, "loss": 0.2591, "step": 20700 }, { "epoch": 1.436609581617169, "grad_norm": 1.1953125, "learning_rate": 1.3984347826086959e-06, "loss": 0.2581, "step": 20710 }, { "epoch": 1.4373032733579016, "grad_norm": 0.96484375, "learning_rate": 1.396695652173913e-06, "loss": 0.273, "step": 20720 }, { "epoch": 1.4379969650986344, "grad_norm": 1.125, "learning_rate": 1.3949565217391306e-06, "loss": 0.2176, "step": 20730 }, { "epoch": 1.4386906568393671, "grad_norm": 1.3359375, "learning_rate": 1.3932173913043478e-06, "loss": 0.2122, "step": 20740 }, { "epoch": 1.4393843485800997, "grad_norm": 1.4140625, "learning_rate": 1.3914782608695654e-06, "loss": 0.2135, "step": 20750 }, { "epoch": 1.4400780403208324, "grad_norm": 0.75, "learning_rate": 1.3897391304347827e-06, "loss": 0.2196, "step": 20760 }, { "epoch": 1.4407717320615652, "grad_norm": 0.82421875, "learning_rate": 1.388e-06, "loss": 0.2397, "step": 20770 }, { "epoch": 1.441465423802298, "grad_norm": 1.2578125, "learning_rate": 1.3862608695652175e-06, "loss": 0.2441, "step": 20780 }, { "epoch": 1.4421591155430304, "grad_norm": 1.1484375, "learning_rate": 1.384521739130435e-06, "loss": 0.2584, "step": 20790 }, { "epoch": 1.4428528072837632, "grad_norm": 1.15625, "learning_rate": 1.3827826086956522e-06, "loss": 0.2123, "step": 20800 }, { "epoch": 1.443546499024496, "grad_norm": 0.96484375, "learning_rate": 1.3810434782608698e-06, "loss": 0.2157, "step": 20810 }, { "epoch": 1.4442401907652287, "grad_norm": 1.4609375, "learning_rate": 1.379304347826087e-06, "loss": 0.2407, "step": 20820 }, { "epoch": 1.4449338825059614, "grad_norm": 1.3203125, "learning_rate": 1.3775652173913045e-06, "loss": 0.2347, "step": 20830 }, { "epoch": 1.4456275742466942, "grad_norm": 1.234375, "learning_rate": 1.375826086956522e-06, "loss": 0.2429, "step": 20840 }, { "epoch": 1.446321265987427, "grad_norm": 1.1328125, "learning_rate": 1.3740869565217393e-06, "loss": 0.2441, "step": 20850 }, { "epoch": 1.4470149577281595, "grad_norm": 1.421875, "learning_rate": 1.3723478260869566e-06, "loss": 0.2429, "step": 20860 }, { "epoch": 1.4477086494688922, "grad_norm": 1.0859375, "learning_rate": 1.3706086956521742e-06, "loss": 0.2133, "step": 20870 }, { "epoch": 1.448402341209625, "grad_norm": 1.0546875, "learning_rate": 1.3688695652173914e-06, "loss": 0.232, "step": 20880 }, { "epoch": 1.4490960329503577, "grad_norm": 1.4375, "learning_rate": 1.367130434782609e-06, "loss": 0.2202, "step": 20890 }, { "epoch": 1.4497897246910905, "grad_norm": 1.109375, "learning_rate": 1.3653913043478261e-06, "loss": 0.2187, "step": 20900 }, { "epoch": 1.450483416431823, "grad_norm": 1.0078125, "learning_rate": 1.3636521739130437e-06, "loss": 0.2306, "step": 20910 }, { "epoch": 1.4511771081725557, "grad_norm": 1.40625, "learning_rate": 1.3619130434782609e-06, "loss": 0.2472, "step": 20920 }, { "epoch": 1.4518707999132885, "grad_norm": 1.78125, "learning_rate": 1.3601739130434782e-06, "loss": 0.2569, "step": 20930 }, { "epoch": 1.4525644916540212, "grad_norm": 0.85546875, "learning_rate": 1.3584347826086958e-06, "loss": 0.2277, "step": 20940 }, { "epoch": 1.453258183394754, "grad_norm": 1.140625, "learning_rate": 1.356695652173913e-06, "loss": 0.2238, "step": 20950 }, { "epoch": 1.4539518751354867, "grad_norm": 1.375, "learning_rate": 1.3549565217391306e-06, "loss": 0.2185, "step": 20960 }, { "epoch": 1.4546455668762195, "grad_norm": 1.2734375, "learning_rate": 1.3532173913043477e-06, "loss": 0.2834, "step": 20970 }, { "epoch": 1.455339258616952, "grad_norm": 1.25, "learning_rate": 1.3514782608695653e-06, "loss": 0.2911, "step": 20980 }, { "epoch": 1.4560329503576848, "grad_norm": 1.4453125, "learning_rate": 1.3497391304347827e-06, "loss": 0.2226, "step": 20990 }, { "epoch": 1.4567266420984175, "grad_norm": 1.0546875, "learning_rate": 1.348e-06, "loss": 0.2248, "step": 21000 }, { "epoch": 1.4574203338391503, "grad_norm": 1.1171875, "learning_rate": 1.3462608695652174e-06, "loss": 0.2437, "step": 21010 }, { "epoch": 1.458114025579883, "grad_norm": 1.3125, "learning_rate": 1.344521739130435e-06, "loss": 0.2421, "step": 21020 }, { "epoch": 1.4588077173206155, "grad_norm": 1.328125, "learning_rate": 1.3427826086956522e-06, "loss": 0.2161, "step": 21030 }, { "epoch": 1.4595014090613483, "grad_norm": 1.34375, "learning_rate": 1.3410434782608697e-06, "loss": 0.245, "step": 21040 }, { "epoch": 1.460195100802081, "grad_norm": 1.4140625, "learning_rate": 1.339304347826087e-06, "loss": 0.2432, "step": 21050 }, { "epoch": 1.4608887925428138, "grad_norm": 1.5625, "learning_rate": 1.3375652173913045e-06, "loss": 0.2379, "step": 21060 }, { "epoch": 1.4615824842835465, "grad_norm": 1.5859375, "learning_rate": 1.3358260869565219e-06, "loss": 0.2513, "step": 21070 }, { "epoch": 1.4622761760242793, "grad_norm": 1.203125, "learning_rate": 1.3340869565217392e-06, "loss": 0.3091, "step": 21080 }, { "epoch": 1.462969867765012, "grad_norm": 1.4296875, "learning_rate": 1.3323478260869566e-06, "loss": 0.2542, "step": 21090 }, { "epoch": 1.4636635595057446, "grad_norm": 1.296875, "learning_rate": 1.3306086956521742e-06, "loss": 0.2138, "step": 21100 }, { "epoch": 1.4643572512464773, "grad_norm": 1.1484375, "learning_rate": 1.3288695652173913e-06, "loss": 0.252, "step": 21110 }, { "epoch": 1.46505094298721, "grad_norm": 1.25, "learning_rate": 1.327130434782609e-06, "loss": 0.2159, "step": 21120 }, { "epoch": 1.4657446347279428, "grad_norm": 1.3359375, "learning_rate": 1.325391304347826e-06, "loss": 0.2535, "step": 21130 }, { "epoch": 1.4664383264686756, "grad_norm": 1.6328125, "learning_rate": 1.3236521739130437e-06, "loss": 0.3247, "step": 21140 }, { "epoch": 1.467132018209408, "grad_norm": 1.1328125, "learning_rate": 1.3219130434782608e-06, "loss": 0.2415, "step": 21150 }, { "epoch": 1.4678257099501408, "grad_norm": 1.125, "learning_rate": 1.3201739130434784e-06, "loss": 0.2295, "step": 21160 }, { "epoch": 1.4685194016908736, "grad_norm": 1.3828125, "learning_rate": 1.3184347826086958e-06, "loss": 0.2348, "step": 21170 }, { "epoch": 1.4692130934316063, "grad_norm": 1.1796875, "learning_rate": 1.3166956521739134e-06, "loss": 0.2122, "step": 21180 }, { "epoch": 1.469906785172339, "grad_norm": 1.078125, "learning_rate": 1.3149565217391305e-06, "loss": 0.2565, "step": 21190 }, { "epoch": 1.4706004769130718, "grad_norm": 1.390625, "learning_rate": 1.3132173913043481e-06, "loss": 0.2724, "step": 21200 }, { "epoch": 1.4712941686538046, "grad_norm": 1.140625, "learning_rate": 1.3114782608695653e-06, "loss": 0.2413, "step": 21210 }, { "epoch": 1.471987860394537, "grad_norm": 1.7890625, "learning_rate": 1.3097391304347829e-06, "loss": 0.235, "step": 21220 }, { "epoch": 1.4726815521352699, "grad_norm": 0.92578125, "learning_rate": 1.308e-06, "loss": 0.2664, "step": 21230 }, { "epoch": 1.4733752438760026, "grad_norm": 1.2578125, "learning_rate": 1.3062608695652174e-06, "loss": 0.2151, "step": 21240 }, { "epoch": 1.4740689356167354, "grad_norm": 1.34375, "learning_rate": 1.304521739130435e-06, "loss": 0.2598, "step": 21250 }, { "epoch": 1.474762627357468, "grad_norm": 1.7265625, "learning_rate": 1.3027826086956521e-06, "loss": 0.2444, "step": 21260 }, { "epoch": 1.4754563190982006, "grad_norm": 1.671875, "learning_rate": 1.3010434782608697e-06, "loss": 0.3088, "step": 21270 }, { "epoch": 1.4761500108389334, "grad_norm": 1.1484375, "learning_rate": 1.2993043478260869e-06, "loss": 0.2525, "step": 21280 }, { "epoch": 1.4768437025796661, "grad_norm": 1.2109375, "learning_rate": 1.2975652173913045e-06, "loss": 0.2769, "step": 21290 }, { "epoch": 1.4775373943203989, "grad_norm": 1.125, "learning_rate": 1.2958260869565218e-06, "loss": 0.2298, "step": 21300 }, { "epoch": 1.4782310860611316, "grad_norm": 2.3125, "learning_rate": 1.2940869565217392e-06, "loss": 0.3014, "step": 21310 }, { "epoch": 1.4789247778018644, "grad_norm": 2.015625, "learning_rate": 1.2923478260869566e-06, "loss": 0.2391, "step": 21320 }, { "epoch": 1.4796184695425971, "grad_norm": 1.1640625, "learning_rate": 1.2906086956521741e-06, "loss": 0.2179, "step": 21330 }, { "epoch": 1.4803121612833297, "grad_norm": 1.25, "learning_rate": 1.2888695652173913e-06, "loss": 0.2062, "step": 21340 }, { "epoch": 1.4810058530240624, "grad_norm": 1.453125, "learning_rate": 1.2871304347826089e-06, "loss": 0.2396, "step": 21350 }, { "epoch": 1.4816995447647952, "grad_norm": 1.09375, "learning_rate": 1.285391304347826e-06, "loss": 0.2306, "step": 21360 }, { "epoch": 1.482393236505528, "grad_norm": 1.0390625, "learning_rate": 1.2836521739130436e-06, "loss": 0.3046, "step": 21370 }, { "epoch": 1.4830869282462606, "grad_norm": 1.1796875, "learning_rate": 1.281913043478261e-06, "loss": 0.2427, "step": 21380 }, { "epoch": 1.4837806199869932, "grad_norm": 1.96875, "learning_rate": 1.2801739130434784e-06, "loss": 0.2876, "step": 21390 }, { "epoch": 1.484474311727726, "grad_norm": 1.25, "learning_rate": 1.2784347826086957e-06, "loss": 0.2684, "step": 21400 }, { "epoch": 1.4851680034684587, "grad_norm": 1.34375, "learning_rate": 1.2766956521739133e-06, "loss": 0.2441, "step": 21410 }, { "epoch": 1.4858616952091914, "grad_norm": 1.15625, "learning_rate": 1.2749565217391305e-06, "loss": 0.2629, "step": 21420 }, { "epoch": 1.4865553869499242, "grad_norm": 1.265625, "learning_rate": 1.273217391304348e-06, "loss": 0.3055, "step": 21430 }, { "epoch": 1.487249078690657, "grad_norm": 1.7265625, "learning_rate": 1.2714782608695652e-06, "loss": 0.2489, "step": 21440 }, { "epoch": 1.4879427704313897, "grad_norm": 1.0703125, "learning_rate": 1.2697391304347828e-06, "loss": 0.2639, "step": 21450 }, { "epoch": 1.4886364621721222, "grad_norm": 1.2578125, "learning_rate": 1.268e-06, "loss": 0.2326, "step": 21460 }, { "epoch": 1.489330153912855, "grad_norm": 1.2109375, "learning_rate": 1.2662608695652176e-06, "loss": 0.2768, "step": 21470 }, { "epoch": 1.4900238456535877, "grad_norm": 1.3671875, "learning_rate": 1.264521739130435e-06, "loss": 0.2324, "step": 21480 }, { "epoch": 1.4907175373943204, "grad_norm": 1.1953125, "learning_rate": 1.2627826086956523e-06, "loss": 0.2632, "step": 21490 }, { "epoch": 1.491411229135053, "grad_norm": 1.34375, "learning_rate": 1.2610434782608697e-06, "loss": 0.1857, "step": 21500 }, { "epoch": 1.4921049208757857, "grad_norm": 1.0078125, "learning_rate": 1.2593043478260873e-06, "loss": 0.2186, "step": 21510 }, { "epoch": 1.4927986126165185, "grad_norm": 1.234375, "learning_rate": 1.2575652173913044e-06, "loss": 0.2557, "step": 21520 }, { "epoch": 1.4934923043572512, "grad_norm": 1.234375, "learning_rate": 1.255826086956522e-06, "loss": 0.2264, "step": 21530 }, { "epoch": 1.494185996097984, "grad_norm": 0.99609375, "learning_rate": 1.2540869565217392e-06, "loss": 0.2318, "step": 21540 }, { "epoch": 1.4948796878387167, "grad_norm": 1.21875, "learning_rate": 1.2523478260869565e-06, "loss": 0.2262, "step": 21550 }, { "epoch": 1.4955733795794495, "grad_norm": 1.3515625, "learning_rate": 1.2506086956521741e-06, "loss": 0.2534, "step": 21560 }, { "epoch": 1.4962670713201822, "grad_norm": 1.328125, "learning_rate": 1.2488695652173915e-06, "loss": 0.2823, "step": 21570 }, { "epoch": 1.4969607630609147, "grad_norm": 1.3671875, "learning_rate": 1.2471304347826088e-06, "loss": 0.2572, "step": 21580 }, { "epoch": 1.4976544548016475, "grad_norm": 1.109375, "learning_rate": 1.2453913043478262e-06, "loss": 0.2499, "step": 21590 }, { "epoch": 1.4983481465423802, "grad_norm": 0.96484375, "learning_rate": 1.2436521739130436e-06, "loss": 0.2307, "step": 21600 }, { "epoch": 1.499041838283113, "grad_norm": 1.21875, "learning_rate": 1.241913043478261e-06, "loss": 0.2116, "step": 21610 }, { "epoch": 1.4997355300238455, "grad_norm": 1.2109375, "learning_rate": 1.2401739130434783e-06, "loss": 0.2305, "step": 21620 }, { "epoch": 1.5004292217645783, "grad_norm": 1.3203125, "learning_rate": 1.2384347826086957e-06, "loss": 0.3057, "step": 21630 }, { "epoch": 1.501122913505311, "grad_norm": 1.3203125, "learning_rate": 1.2366956521739133e-06, "loss": 0.24, "step": 21640 }, { "epoch": 1.5018166052460438, "grad_norm": 1.40625, "learning_rate": 1.2349565217391307e-06, "loss": 0.2924, "step": 21650 }, { "epoch": 1.5025102969867765, "grad_norm": 1.125, "learning_rate": 1.233217391304348e-06, "loss": 0.2801, "step": 21660 }, { "epoch": 1.5032039887275093, "grad_norm": 1.203125, "learning_rate": 1.2314782608695654e-06, "loss": 0.2273, "step": 21670 }, { "epoch": 1.503897680468242, "grad_norm": 1.1640625, "learning_rate": 1.2297391304347828e-06, "loss": 0.2464, "step": 21680 }, { "epoch": 1.5045913722089748, "grad_norm": 1.1953125, "learning_rate": 1.2280000000000001e-06, "loss": 0.284, "step": 21690 }, { "epoch": 1.5052850639497075, "grad_norm": 1.140625, "learning_rate": 1.2262608695652175e-06, "loss": 0.2401, "step": 21700 }, { "epoch": 1.50597875569044, "grad_norm": 1.1328125, "learning_rate": 1.2245217391304349e-06, "loss": 0.2709, "step": 21710 }, { "epoch": 1.5066724474311728, "grad_norm": 1.1328125, "learning_rate": 1.2227826086956523e-06, "loss": 0.2462, "step": 21720 }, { "epoch": 1.5073661391719055, "grad_norm": 1.28125, "learning_rate": 1.2210434782608696e-06, "loss": 0.2562, "step": 21730 }, { "epoch": 1.508059830912638, "grad_norm": 0.9765625, "learning_rate": 1.219304347826087e-06, "loss": 0.2227, "step": 21740 }, { "epoch": 1.5087535226533708, "grad_norm": 1.265625, "learning_rate": 1.2175652173913044e-06, "loss": 0.2564, "step": 21750 }, { "epoch": 1.5094472143941036, "grad_norm": 0.921875, "learning_rate": 1.2158260869565217e-06, "loss": 0.2372, "step": 21760 }, { "epoch": 1.5101409061348363, "grad_norm": 0.99609375, "learning_rate": 1.2140869565217391e-06, "loss": 0.2139, "step": 21770 }, { "epoch": 1.510834597875569, "grad_norm": 1.3359375, "learning_rate": 1.2123478260869565e-06, "loss": 0.2311, "step": 21780 }, { "epoch": 1.5115282896163018, "grad_norm": 1.1796875, "learning_rate": 1.210608695652174e-06, "loss": 0.2667, "step": 21790 }, { "epoch": 1.5122219813570346, "grad_norm": 1.1171875, "learning_rate": 1.2088695652173914e-06, "loss": 0.2117, "step": 21800 }, { "epoch": 1.5129156730977673, "grad_norm": 1.03125, "learning_rate": 1.2071304347826088e-06, "loss": 0.2567, "step": 21810 }, { "epoch": 1.5136093648384998, "grad_norm": 1.3671875, "learning_rate": 1.2053913043478262e-06, "loss": 0.2249, "step": 21820 }, { "epoch": 1.5143030565792326, "grad_norm": 1.125, "learning_rate": 1.2036521739130436e-06, "loss": 0.2845, "step": 21830 }, { "epoch": 1.5149967483199653, "grad_norm": 1.359375, "learning_rate": 1.201913043478261e-06, "loss": 0.2194, "step": 21840 }, { "epoch": 1.5156904400606979, "grad_norm": 1.2734375, "learning_rate": 1.2001739130434783e-06, "loss": 0.2333, "step": 21850 }, { "epoch": 1.5163841318014306, "grad_norm": 1.234375, "learning_rate": 1.1984347826086957e-06, "loss": 0.2, "step": 21860 }, { "epoch": 1.5170778235421634, "grad_norm": 1.1328125, "learning_rate": 1.1966956521739132e-06, "loss": 0.2649, "step": 21870 }, { "epoch": 1.517771515282896, "grad_norm": 1.6171875, "learning_rate": 1.1949565217391306e-06, "loss": 0.2606, "step": 21880 }, { "epoch": 1.5184652070236289, "grad_norm": 1.109375, "learning_rate": 1.193217391304348e-06, "loss": 0.2429, "step": 21890 }, { "epoch": 1.5191588987643616, "grad_norm": 1.359375, "learning_rate": 1.1914782608695654e-06, "loss": 0.2327, "step": 21900 }, { "epoch": 1.5198525905050944, "grad_norm": 1.0, "learning_rate": 1.1897391304347827e-06, "loss": 0.2417, "step": 21910 }, { "epoch": 1.520546282245827, "grad_norm": 1.046875, "learning_rate": 1.188e-06, "loss": 0.2538, "step": 21920 }, { "epoch": 1.5212399739865599, "grad_norm": 1.078125, "learning_rate": 1.1862608695652175e-06, "loss": 0.2746, "step": 21930 }, { "epoch": 1.5219336657272924, "grad_norm": 1.3359375, "learning_rate": 1.1845217391304348e-06, "loss": 0.2102, "step": 21940 }, { "epoch": 1.5226273574680251, "grad_norm": 1.2421875, "learning_rate": 1.1827826086956522e-06, "loss": 0.2367, "step": 21950 }, { "epoch": 1.5233210492087579, "grad_norm": 0.85546875, "learning_rate": 1.1810434782608698e-06, "loss": 0.2139, "step": 21960 }, { "epoch": 1.5240147409494904, "grad_norm": 1.15625, "learning_rate": 1.1793043478260872e-06, "loss": 0.3149, "step": 21970 }, { "epoch": 1.5247084326902232, "grad_norm": 1.1796875, "learning_rate": 1.1775652173913045e-06, "loss": 0.2062, "step": 21980 }, { "epoch": 1.525402124430956, "grad_norm": 1.46875, "learning_rate": 1.175826086956522e-06, "loss": 0.2454, "step": 21990 }, { "epoch": 1.5260958161716887, "grad_norm": 1.0078125, "learning_rate": 1.1740869565217393e-06, "loss": 0.1977, "step": 22000 }, { "epoch": 1.5267895079124214, "grad_norm": 1.5546875, "learning_rate": 1.1723478260869567e-06, "loss": 0.26, "step": 22010 }, { "epoch": 1.5274831996531542, "grad_norm": 1.015625, "learning_rate": 1.170608695652174e-06, "loss": 0.2235, "step": 22020 }, { "epoch": 1.528176891393887, "grad_norm": 1.265625, "learning_rate": 1.1688695652173914e-06, "loss": 0.2297, "step": 22030 }, { "epoch": 1.5288705831346197, "grad_norm": 1.25, "learning_rate": 1.1671304347826088e-06, "loss": 0.2571, "step": 22040 }, { "epoch": 1.5295642748753524, "grad_norm": 1.4453125, "learning_rate": 1.1653913043478261e-06, "loss": 0.2845, "step": 22050 }, { "epoch": 1.530257966616085, "grad_norm": 1.453125, "learning_rate": 1.1636521739130435e-06, "loss": 0.2432, "step": 22060 }, { "epoch": 1.5309516583568177, "grad_norm": 1.96875, "learning_rate": 1.1619130434782609e-06, "loss": 0.2807, "step": 22070 }, { "epoch": 1.5316453500975504, "grad_norm": 1.09375, "learning_rate": 1.1601739130434783e-06, "loss": 0.222, "step": 22080 }, { "epoch": 1.532339041838283, "grad_norm": 1.1875, "learning_rate": 1.1584347826086956e-06, "loss": 0.2292, "step": 22090 }, { "epoch": 1.5330327335790157, "grad_norm": 1.8203125, "learning_rate": 1.1566956521739132e-06, "loss": 0.2663, "step": 22100 }, { "epoch": 1.5337264253197485, "grad_norm": 1.3359375, "learning_rate": 1.1549565217391306e-06, "loss": 0.2528, "step": 22110 }, { "epoch": 1.5344201170604812, "grad_norm": 1.484375, "learning_rate": 1.153217391304348e-06, "loss": 0.2552, "step": 22120 }, { "epoch": 1.535113808801214, "grad_norm": 1.0859375, "learning_rate": 1.1514782608695653e-06, "loss": 0.1995, "step": 22130 }, { "epoch": 1.5358075005419467, "grad_norm": 1.453125, "learning_rate": 1.1497391304347827e-06, "loss": 0.2302, "step": 22140 }, { "epoch": 1.5365011922826795, "grad_norm": 1.1640625, "learning_rate": 1.148e-06, "loss": 0.2027, "step": 22150 }, { "epoch": 1.5371948840234122, "grad_norm": 1.1875, "learning_rate": 1.1462608695652174e-06, "loss": 0.234, "step": 22160 }, { "epoch": 1.537888575764145, "grad_norm": 1.3359375, "learning_rate": 1.1445217391304348e-06, "loss": 0.2325, "step": 22170 }, { "epoch": 1.5385822675048775, "grad_norm": 1.703125, "learning_rate": 1.1427826086956522e-06, "loss": 0.2567, "step": 22180 }, { "epoch": 1.5392759592456102, "grad_norm": 1.21875, "learning_rate": 1.1410434782608698e-06, "loss": 0.2226, "step": 22190 }, { "epoch": 1.539969650986343, "grad_norm": 1.1015625, "learning_rate": 1.1393043478260871e-06, "loss": 0.2206, "step": 22200 }, { "epoch": 1.5406633427270755, "grad_norm": 1.0234375, "learning_rate": 1.1375652173913045e-06, "loss": 0.2342, "step": 22210 }, { "epoch": 1.5413570344678083, "grad_norm": 1.1875, "learning_rate": 1.1358260869565219e-06, "loss": 0.2695, "step": 22220 }, { "epoch": 1.542050726208541, "grad_norm": 1.3671875, "learning_rate": 1.1340869565217392e-06, "loss": 0.2812, "step": 22230 }, { "epoch": 1.5427444179492737, "grad_norm": 0.95703125, "learning_rate": 1.1323478260869566e-06, "loss": 0.2701, "step": 22240 }, { "epoch": 1.5434381096900065, "grad_norm": 1.3046875, "learning_rate": 1.130608695652174e-06, "loss": 0.2258, "step": 22250 }, { "epoch": 1.5441318014307392, "grad_norm": 0.96484375, "learning_rate": 1.1288695652173914e-06, "loss": 0.2496, "step": 22260 }, { "epoch": 1.544825493171472, "grad_norm": 1.1796875, "learning_rate": 1.127130434782609e-06, "loss": 0.2058, "step": 22270 }, { "epoch": 1.5455191849122047, "grad_norm": 1.265625, "learning_rate": 1.1253913043478263e-06, "loss": 0.2076, "step": 22280 }, { "epoch": 1.5462128766529375, "grad_norm": 0.8984375, "learning_rate": 1.1236521739130437e-06, "loss": 0.2568, "step": 22290 }, { "epoch": 1.54690656839367, "grad_norm": 1.2578125, "learning_rate": 1.121913043478261e-06, "loss": 0.2804, "step": 22300 }, { "epoch": 1.5476002601344028, "grad_norm": 1.109375, "learning_rate": 1.1201739130434784e-06, "loss": 0.2401, "step": 22310 }, { "epoch": 1.5482939518751355, "grad_norm": 0.953125, "learning_rate": 1.1184347826086958e-06, "loss": 0.2465, "step": 22320 }, { "epoch": 1.548987643615868, "grad_norm": 1.171875, "learning_rate": 1.1166956521739132e-06, "loss": 0.3361, "step": 22330 }, { "epoch": 1.5496813353566008, "grad_norm": 1.3359375, "learning_rate": 1.1149565217391305e-06, "loss": 0.2217, "step": 22340 }, { "epoch": 1.5503750270973335, "grad_norm": 1.1875, "learning_rate": 1.113217391304348e-06, "loss": 0.2402, "step": 22350 }, { "epoch": 1.5510687188380663, "grad_norm": 1.09375, "learning_rate": 1.1114782608695653e-06, "loss": 0.2065, "step": 22360 }, { "epoch": 1.551762410578799, "grad_norm": 0.921875, "learning_rate": 1.1097391304347827e-06, "loss": 0.237, "step": 22370 }, { "epoch": 1.5524561023195318, "grad_norm": 1.03125, "learning_rate": 1.108e-06, "loss": 0.3074, "step": 22380 }, { "epoch": 1.5531497940602645, "grad_norm": 1.1796875, "learning_rate": 1.1062608695652174e-06, "loss": 0.2363, "step": 22390 }, { "epoch": 1.5538434858009973, "grad_norm": 1.328125, "learning_rate": 1.1045217391304348e-06, "loss": 0.3468, "step": 22400 }, { "epoch": 1.55453717754173, "grad_norm": 1.578125, "learning_rate": 1.1027826086956521e-06, "loss": 0.2699, "step": 22410 }, { "epoch": 1.5552308692824626, "grad_norm": 1.375, "learning_rate": 1.1010434782608697e-06, "loss": 0.2687, "step": 22420 }, { "epoch": 1.5559245610231953, "grad_norm": 0.9375, "learning_rate": 1.099304347826087e-06, "loss": 0.2097, "step": 22430 }, { "epoch": 1.556618252763928, "grad_norm": 1.109375, "learning_rate": 1.0975652173913045e-06, "loss": 0.2544, "step": 22440 }, { "epoch": 1.5573119445046606, "grad_norm": 1.1015625, "learning_rate": 1.0958260869565218e-06, "loss": 0.229, "step": 22450 }, { "epoch": 1.5580056362453933, "grad_norm": 1.15625, "learning_rate": 1.0940869565217392e-06, "loss": 0.3094, "step": 22460 }, { "epoch": 1.558699327986126, "grad_norm": 0.9296875, "learning_rate": 1.0923478260869566e-06, "loss": 0.1948, "step": 22470 }, { "epoch": 1.5593930197268588, "grad_norm": 1.4375, "learning_rate": 1.090608695652174e-06, "loss": 0.2203, "step": 22480 }, { "epoch": 1.5600867114675916, "grad_norm": 1.1328125, "learning_rate": 1.0888695652173913e-06, "loss": 0.2523, "step": 22490 }, { "epoch": 1.5607804032083243, "grad_norm": 1.203125, "learning_rate": 1.087130434782609e-06, "loss": 0.2415, "step": 22500 }, { "epoch": 1.561474094949057, "grad_norm": 1.15625, "learning_rate": 1.0853913043478263e-06, "loss": 0.268, "step": 22510 }, { "epoch": 1.5621677866897898, "grad_norm": 1.4453125, "learning_rate": 1.0836521739130436e-06, "loss": 0.2474, "step": 22520 }, { "epoch": 1.5628614784305226, "grad_norm": 1.15625, "learning_rate": 1.081913043478261e-06, "loss": 0.3104, "step": 22530 }, { "epoch": 1.5635551701712551, "grad_norm": 1.0703125, "learning_rate": 1.0801739130434784e-06, "loss": 0.2487, "step": 22540 }, { "epoch": 1.5642488619119879, "grad_norm": 1.21875, "learning_rate": 1.0784347826086958e-06, "loss": 0.2224, "step": 22550 }, { "epoch": 1.5649425536527206, "grad_norm": 1.046875, "learning_rate": 1.0766956521739131e-06, "loss": 0.2211, "step": 22560 }, { "epoch": 1.5656362453934531, "grad_norm": 0.99609375, "learning_rate": 1.0749565217391305e-06, "loss": 0.2382, "step": 22570 }, { "epoch": 1.566329937134186, "grad_norm": 1.5078125, "learning_rate": 1.0732173913043479e-06, "loss": 0.2285, "step": 22580 }, { "epoch": 1.5670236288749186, "grad_norm": 1.5078125, "learning_rate": 1.0714782608695655e-06, "loss": 0.3084, "step": 22590 }, { "epoch": 1.5677173206156514, "grad_norm": 1.140625, "learning_rate": 1.0697391304347828e-06, "loss": 0.2316, "step": 22600 }, { "epoch": 1.5684110123563841, "grad_norm": 1.203125, "learning_rate": 1.0680000000000002e-06, "loss": 0.2359, "step": 22610 }, { "epoch": 1.5691047040971169, "grad_norm": 1.0390625, "learning_rate": 1.0662608695652176e-06, "loss": 0.3312, "step": 22620 }, { "epoch": 1.5697983958378496, "grad_norm": 1.2734375, "learning_rate": 1.064521739130435e-06, "loss": 0.2094, "step": 22630 }, { "epoch": 1.5704920875785824, "grad_norm": 1.2265625, "learning_rate": 1.062782608695652e-06, "loss": 0.2005, "step": 22640 }, { "epoch": 1.5711857793193151, "grad_norm": 1.3671875, "learning_rate": 1.0610434782608697e-06, "loss": 0.2151, "step": 22650 }, { "epoch": 1.5718794710600477, "grad_norm": 1.28125, "learning_rate": 1.059304347826087e-06, "loss": 0.226, "step": 22660 }, { "epoch": 1.5725731628007804, "grad_norm": 1.09375, "learning_rate": 1.0575652173913044e-06, "loss": 0.2764, "step": 22670 }, { "epoch": 1.5732668545415132, "grad_norm": 1.1484375, "learning_rate": 1.0558260869565218e-06, "loss": 0.2149, "step": 22680 }, { "epoch": 1.5739605462822457, "grad_norm": 0.8046875, "learning_rate": 1.0540869565217392e-06, "loss": 0.2572, "step": 22690 }, { "epoch": 1.5746542380229784, "grad_norm": 1.359375, "learning_rate": 1.0523478260869565e-06, "loss": 0.2572, "step": 22700 }, { "epoch": 1.5753479297637112, "grad_norm": 1.0703125, "learning_rate": 1.050608695652174e-06, "loss": 0.241, "step": 22710 }, { "epoch": 1.576041621504444, "grad_norm": 1.078125, "learning_rate": 1.0488695652173913e-06, "loss": 0.2464, "step": 22720 }, { "epoch": 1.5767353132451767, "grad_norm": 1.5, "learning_rate": 1.0471304347826089e-06, "loss": 0.2494, "step": 22730 }, { "epoch": 1.5774290049859094, "grad_norm": 1.1640625, "learning_rate": 1.0453913043478262e-06, "loss": 0.314, "step": 22740 }, { "epoch": 1.5781226967266422, "grad_norm": 1.203125, "learning_rate": 1.0436521739130436e-06, "loss": 0.231, "step": 22750 }, { "epoch": 1.578816388467375, "grad_norm": 1.4453125, "learning_rate": 1.041913043478261e-06, "loss": 0.3283, "step": 22760 }, { "epoch": 1.5795100802081077, "grad_norm": 1.0859375, "learning_rate": 1.0401739130434783e-06, "loss": 0.3071, "step": 22770 }, { "epoch": 1.5802037719488402, "grad_norm": 1.125, "learning_rate": 1.0384347826086957e-06, "loss": 0.2743, "step": 22780 }, { "epoch": 1.580897463689573, "grad_norm": 1.2890625, "learning_rate": 1.036695652173913e-06, "loss": 0.253, "step": 22790 }, { "epoch": 1.5815911554303057, "grad_norm": 1.046875, "learning_rate": 1.0349565217391305e-06, "loss": 0.2143, "step": 22800 }, { "epoch": 1.5822848471710382, "grad_norm": 1.140625, "learning_rate": 1.0332173913043478e-06, "loss": 0.2481, "step": 22810 }, { "epoch": 1.582978538911771, "grad_norm": 1.375, "learning_rate": 1.0314782608695654e-06, "loss": 0.2572, "step": 22820 }, { "epoch": 1.5836722306525037, "grad_norm": 1.171875, "learning_rate": 1.0297391304347828e-06, "loss": 0.2987, "step": 22830 }, { "epoch": 1.5843659223932365, "grad_norm": 1.375, "learning_rate": 1.0280000000000002e-06, "loss": 0.2331, "step": 22840 }, { "epoch": 1.5850596141339692, "grad_norm": 0.95703125, "learning_rate": 1.0262608695652175e-06, "loss": 0.2076, "step": 22850 }, { "epoch": 1.585753305874702, "grad_norm": 1.125, "learning_rate": 1.024521739130435e-06, "loss": 0.2365, "step": 22860 }, { "epoch": 1.5864469976154347, "grad_norm": 1.8828125, "learning_rate": 1.0227826086956523e-06, "loss": 0.2808, "step": 22870 }, { "epoch": 1.5871406893561675, "grad_norm": 1.2265625, "learning_rate": 1.0210434782608696e-06, "loss": 0.2484, "step": 22880 }, { "epoch": 1.5878343810969002, "grad_norm": 1.625, "learning_rate": 1.019304347826087e-06, "loss": 0.2585, "step": 22890 }, { "epoch": 1.5885280728376328, "grad_norm": 1.1015625, "learning_rate": 1.0175652173913044e-06, "loss": 0.2316, "step": 22900 }, { "epoch": 1.5892217645783655, "grad_norm": 1.6015625, "learning_rate": 1.015826086956522e-06, "loss": 0.2097, "step": 22910 }, { "epoch": 1.5899154563190983, "grad_norm": 1.3203125, "learning_rate": 1.0140869565217393e-06, "loss": 0.275, "step": 22920 }, { "epoch": 1.5906091480598308, "grad_norm": 1.359375, "learning_rate": 1.0123478260869567e-06, "loss": 0.2504, "step": 22930 }, { "epoch": 1.5913028398005635, "grad_norm": 1.1953125, "learning_rate": 1.0106086956521739e-06, "loss": 0.2771, "step": 22940 }, { "epoch": 1.5919965315412963, "grad_norm": 1.375, "learning_rate": 1.0088695652173912e-06, "loss": 0.2555, "step": 22950 }, { "epoch": 1.592690223282029, "grad_norm": 1.2421875, "learning_rate": 1.0071304347826088e-06, "loss": 0.2447, "step": 22960 }, { "epoch": 1.5933839150227618, "grad_norm": 1.015625, "learning_rate": 1.0053913043478262e-06, "loss": 0.2536, "step": 22970 }, { "epoch": 1.5940776067634945, "grad_norm": 1.1328125, "learning_rate": 1.0036521739130436e-06, "loss": 0.2336, "step": 22980 }, { "epoch": 1.5947712985042273, "grad_norm": 1.4765625, "learning_rate": 1.001913043478261e-06, "loss": 0.2328, "step": 22990 }, { "epoch": 1.59546499024496, "grad_norm": 1.703125, "learning_rate": 1.0001739130434783e-06, "loss": 0.2339, "step": 23000 }, { "epoch": 1.5961586819856928, "grad_norm": 1.4609375, "learning_rate": 9.984347826086957e-07, "loss": 0.2749, "step": 23010 }, { "epoch": 1.5968523737264253, "grad_norm": 1.390625, "learning_rate": 9.96695652173913e-07, "loss": 0.2484, "step": 23020 }, { "epoch": 1.597546065467158, "grad_norm": 1.2734375, "learning_rate": 9.949565217391304e-07, "loss": 0.2099, "step": 23030 }, { "epoch": 1.5982397572078908, "grad_norm": 2.0, "learning_rate": 9.932173913043478e-07, "loss": 0.2897, "step": 23040 }, { "epoch": 1.5989334489486233, "grad_norm": 1.296875, "learning_rate": 9.914782608695654e-07, "loss": 0.2628, "step": 23050 }, { "epoch": 1.599627140689356, "grad_norm": 1.15625, "learning_rate": 9.897391304347827e-07, "loss": 0.1823, "step": 23060 }, { "epoch": 1.6003208324300888, "grad_norm": 0.796875, "learning_rate": 9.880000000000001e-07, "loss": 0.2547, "step": 23070 }, { "epoch": 1.6010145241708216, "grad_norm": 1.1328125, "learning_rate": 9.862608695652175e-07, "loss": 0.2082, "step": 23080 }, { "epoch": 1.6017082159115543, "grad_norm": 0.78515625, "learning_rate": 9.845217391304349e-07, "loss": 0.2021, "step": 23090 }, { "epoch": 1.602401907652287, "grad_norm": 1.0859375, "learning_rate": 9.827826086956522e-07, "loss": 0.2168, "step": 23100 }, { "epoch": 1.6030955993930198, "grad_norm": 1.3984375, "learning_rate": 9.810434782608696e-07, "loss": 0.3124, "step": 23110 }, { "epoch": 1.6037892911337526, "grad_norm": 1.234375, "learning_rate": 9.79304347826087e-07, "loss": 0.2166, "step": 23120 }, { "epoch": 1.604482982874485, "grad_norm": 0.9609375, "learning_rate": 9.775652173913043e-07, "loss": 0.2531, "step": 23130 }, { "epoch": 1.6051766746152178, "grad_norm": 1.109375, "learning_rate": 9.75826086956522e-07, "loss": 0.3279, "step": 23140 }, { "epoch": 1.6058703663559506, "grad_norm": 1.453125, "learning_rate": 9.740869565217393e-07, "loss": 0.242, "step": 23150 }, { "epoch": 1.6065640580966831, "grad_norm": 1.078125, "learning_rate": 9.723478260869567e-07, "loss": 0.1916, "step": 23160 }, { "epoch": 1.6072577498374159, "grad_norm": 1.53125, "learning_rate": 9.70608695652174e-07, "loss": 0.3144, "step": 23170 }, { "epoch": 1.6079514415781486, "grad_norm": 1.15625, "learning_rate": 9.688695652173914e-07, "loss": 0.251, "step": 23180 }, { "epoch": 1.6086451333188814, "grad_norm": 1.359375, "learning_rate": 9.671304347826088e-07, "loss": 0.2514, "step": 23190 }, { "epoch": 1.6093388250596141, "grad_norm": 1.8984375, "learning_rate": 9.653913043478261e-07, "loss": 0.3102, "step": 23200 }, { "epoch": 1.6100325168003469, "grad_norm": 1.2421875, "learning_rate": 9.636521739130435e-07, "loss": 0.2325, "step": 23210 }, { "epoch": 1.6107262085410796, "grad_norm": 0.984375, "learning_rate": 9.61913043478261e-07, "loss": 0.2195, "step": 23220 }, { "epoch": 1.6114199002818124, "grad_norm": 1.28125, "learning_rate": 9.601739130434785e-07, "loss": 0.2705, "step": 23230 }, { "epoch": 1.6121135920225451, "grad_norm": 1.0234375, "learning_rate": 9.584347826086958e-07, "loss": 0.2394, "step": 23240 }, { "epoch": 1.6128072837632776, "grad_norm": 1.2578125, "learning_rate": 9.56695652173913e-07, "loss": 0.2567, "step": 23250 }, { "epoch": 1.6135009755040104, "grad_norm": 0.9453125, "learning_rate": 9.549565217391304e-07, "loss": 0.2116, "step": 23260 }, { "epoch": 1.6141946672447431, "grad_norm": 1.125, "learning_rate": 9.532173913043479e-07, "loss": 0.2329, "step": 23270 }, { "epoch": 1.6148883589854757, "grad_norm": 1.4765625, "learning_rate": 9.514782608695652e-07, "loss": 0.2868, "step": 23280 }, { "epoch": 1.6155820507262084, "grad_norm": 1.1796875, "learning_rate": 9.497391304347826e-07, "loss": 0.289, "step": 23290 }, { "epoch": 1.6162757424669412, "grad_norm": 1.171875, "learning_rate": 9.480000000000001e-07, "loss": 0.2032, "step": 23300 }, { "epoch": 1.616969434207674, "grad_norm": 1.2890625, "learning_rate": 9.462608695652174e-07, "loss": 0.257, "step": 23310 }, { "epoch": 1.6176631259484067, "grad_norm": 1.3046875, "learning_rate": 9.445217391304348e-07, "loss": 0.2119, "step": 23320 }, { "epoch": 1.6183568176891394, "grad_norm": 1.296875, "learning_rate": 9.427826086956522e-07, "loss": 0.247, "step": 23330 }, { "epoch": 1.6190505094298722, "grad_norm": 1.296875, "learning_rate": 9.410434782608697e-07, "loss": 0.2446, "step": 23340 }, { "epoch": 1.619744201170605, "grad_norm": 1.5625, "learning_rate": 9.39304347826087e-07, "loss": 0.2619, "step": 23350 }, { "epoch": 1.6204378929113377, "grad_norm": 1.390625, "learning_rate": 9.375652173913044e-07, "loss": 0.257, "step": 23360 }, { "epoch": 1.6211315846520702, "grad_norm": 1.296875, "learning_rate": 9.358260869565218e-07, "loss": 0.2905, "step": 23370 }, { "epoch": 1.621825276392803, "grad_norm": 1.0703125, "learning_rate": 9.340869565217391e-07, "loss": 0.2398, "step": 23380 }, { "epoch": 1.6225189681335357, "grad_norm": 1.2421875, "learning_rate": 9.323478260869566e-07, "loss": 0.2423, "step": 23390 }, { "epoch": 1.6232126598742682, "grad_norm": 1.3046875, "learning_rate": 9.30608695652174e-07, "loss": 0.2582, "step": 23400 }, { "epoch": 1.623906351615001, "grad_norm": 1.3359375, "learning_rate": 9.288695652173914e-07, "loss": 0.2746, "step": 23410 }, { "epoch": 1.6246000433557337, "grad_norm": 1.125, "learning_rate": 9.271304347826087e-07, "loss": 0.2465, "step": 23420 }, { "epoch": 1.6252937350964665, "grad_norm": 1.296875, "learning_rate": 9.253913043478262e-07, "loss": 0.223, "step": 23430 }, { "epoch": 1.6259874268371992, "grad_norm": 1.203125, "learning_rate": 9.236521739130436e-07, "loss": 0.2014, "step": 23440 }, { "epoch": 1.626681118577932, "grad_norm": 1.21875, "learning_rate": 9.21913043478261e-07, "loss": 0.2196, "step": 23450 }, { "epoch": 1.6273748103186647, "grad_norm": 0.9765625, "learning_rate": 9.201739130434783e-07, "loss": 0.2466, "step": 23460 }, { "epoch": 1.6280685020593975, "grad_norm": 1.03125, "learning_rate": 9.184347826086958e-07, "loss": 0.2713, "step": 23470 }, { "epoch": 1.6287621938001302, "grad_norm": 0.7734375, "learning_rate": 9.166956521739132e-07, "loss": 0.2448, "step": 23480 }, { "epoch": 1.6294558855408627, "grad_norm": 1.8203125, "learning_rate": 9.149565217391305e-07, "loss": 0.2153, "step": 23490 }, { "epoch": 1.6301495772815955, "grad_norm": 1.1484375, "learning_rate": 9.132173913043479e-07, "loss": 0.2345, "step": 23500 }, { "epoch": 1.6308432690223282, "grad_norm": 1.484375, "learning_rate": 9.114782608695653e-07, "loss": 0.2398, "step": 23510 }, { "epoch": 1.6315369607630608, "grad_norm": 1.546875, "learning_rate": 9.097391304347828e-07, "loss": 0.2641, "step": 23520 }, { "epoch": 1.6322306525037935, "grad_norm": 1.265625, "learning_rate": 9.080000000000001e-07, "loss": 0.216, "step": 23530 }, { "epoch": 1.6329243442445263, "grad_norm": 1.109375, "learning_rate": 9.062608695652175e-07, "loss": 0.2306, "step": 23540 }, { "epoch": 1.633618035985259, "grad_norm": 1.2109375, "learning_rate": 9.045217391304349e-07, "loss": 0.233, "step": 23550 }, { "epoch": 1.6343117277259918, "grad_norm": 1.1953125, "learning_rate": 9.027826086956521e-07, "loss": 0.2194, "step": 23560 }, { "epoch": 1.6350054194667245, "grad_norm": 0.96484375, "learning_rate": 9.010434782608696e-07, "loss": 0.1982, "step": 23570 }, { "epoch": 1.6356991112074573, "grad_norm": 1.40625, "learning_rate": 8.99304347826087e-07, "loss": 0.2591, "step": 23580 }, { "epoch": 1.63639280294819, "grad_norm": 1.3046875, "learning_rate": 8.975652173913044e-07, "loss": 0.2702, "step": 23590 }, { "epoch": 1.6370864946889228, "grad_norm": 1.5625, "learning_rate": 8.958260869565217e-07, "loss": 0.2622, "step": 23600 }, { "epoch": 1.6377801864296553, "grad_norm": 0.9453125, "learning_rate": 8.940869565217391e-07, "loss": 0.2386, "step": 23610 }, { "epoch": 1.638473878170388, "grad_norm": 1.0234375, "learning_rate": 8.923478260869566e-07, "loss": 0.2167, "step": 23620 }, { "epoch": 1.6391675699111208, "grad_norm": 1.109375, "learning_rate": 8.90608695652174e-07, "loss": 0.2375, "step": 23630 }, { "epoch": 1.6398612616518533, "grad_norm": 1.21875, "learning_rate": 8.888695652173913e-07, "loss": 0.2261, "step": 23640 }, { "epoch": 1.640554953392586, "grad_norm": 1.1953125, "learning_rate": 8.871304347826087e-07, "loss": 0.2658, "step": 23650 }, { "epoch": 1.6412486451333188, "grad_norm": 1.0625, "learning_rate": 8.853913043478262e-07, "loss": 0.2705, "step": 23660 }, { "epoch": 1.6419423368740516, "grad_norm": 1.0078125, "learning_rate": 8.836521739130435e-07, "loss": 0.2295, "step": 23670 }, { "epoch": 1.6426360286147843, "grad_norm": 0.99609375, "learning_rate": 8.819130434782609e-07, "loss": 0.2608, "step": 23680 }, { "epoch": 1.643329720355517, "grad_norm": 0.984375, "learning_rate": 8.801739130434783e-07, "loss": 0.2073, "step": 23690 }, { "epoch": 1.6440234120962498, "grad_norm": 1.25, "learning_rate": 8.784347826086958e-07, "loss": 0.2729, "step": 23700 }, { "epoch": 1.6447171038369826, "grad_norm": 1.09375, "learning_rate": 8.766956521739131e-07, "loss": 0.2614, "step": 23710 }, { "epoch": 1.6454107955777153, "grad_norm": 1.09375, "learning_rate": 8.749565217391305e-07, "loss": 0.2278, "step": 23720 }, { "epoch": 1.6461044873184478, "grad_norm": 1.2109375, "learning_rate": 8.732173913043479e-07, "loss": 0.2365, "step": 23730 }, { "epoch": 1.6467981790591806, "grad_norm": 1.3125, "learning_rate": 8.714782608695654e-07, "loss": 0.2512, "step": 23740 }, { "epoch": 1.6474918707999133, "grad_norm": 1.46875, "learning_rate": 8.697391304347827e-07, "loss": 0.3113, "step": 23750 }, { "epoch": 1.6481855625406459, "grad_norm": 1.375, "learning_rate": 8.680000000000001e-07, "loss": 0.2599, "step": 23760 }, { "epoch": 1.6488792542813786, "grad_norm": 0.9921875, "learning_rate": 8.662608695652175e-07, "loss": 0.2318, "step": 23770 }, { "epoch": 1.6495729460221114, "grad_norm": 0.96484375, "learning_rate": 8.645217391304348e-07, "loss": 0.2484, "step": 23780 }, { "epoch": 1.650266637762844, "grad_norm": 1.609375, "learning_rate": 8.627826086956523e-07, "loss": 0.2319, "step": 23790 }, { "epoch": 1.6509603295035769, "grad_norm": 1.1953125, "learning_rate": 8.610434782608697e-07, "loss": 0.2098, "step": 23800 }, { "epoch": 1.6516540212443096, "grad_norm": 1.0078125, "learning_rate": 8.593043478260871e-07, "loss": 0.2516, "step": 23810 }, { "epoch": 1.6523477129850423, "grad_norm": 0.9765625, "learning_rate": 8.575652173913044e-07, "loss": 0.205, "step": 23820 }, { "epoch": 1.653041404725775, "grad_norm": 1.4375, "learning_rate": 8.558260869565219e-07, "loss": 0.2468, "step": 23830 }, { "epoch": 1.6537350964665078, "grad_norm": 1.375, "learning_rate": 8.540869565217393e-07, "loss": 0.2423, "step": 23840 }, { "epoch": 1.6544287882072404, "grad_norm": 1.171875, "learning_rate": 8.523478260869566e-07, "loss": 0.2577, "step": 23850 }, { "epoch": 1.6551224799479731, "grad_norm": 1.296875, "learning_rate": 8.50608695652174e-07, "loss": 0.2681, "step": 23860 }, { "epoch": 1.6558161716887059, "grad_norm": 1.234375, "learning_rate": 8.488695652173913e-07, "loss": 0.2362, "step": 23870 }, { "epoch": 1.6565098634294384, "grad_norm": 1.3203125, "learning_rate": 8.471304347826087e-07, "loss": 0.2296, "step": 23880 }, { "epoch": 1.6572035551701711, "grad_norm": 1.265625, "learning_rate": 8.453913043478261e-07, "loss": 0.2451, "step": 23890 }, { "epoch": 1.657897246910904, "grad_norm": 1.140625, "learning_rate": 8.436521739130435e-07, "loss": 0.2586, "step": 23900 }, { "epoch": 1.6585909386516366, "grad_norm": 1.3203125, "learning_rate": 8.419130434782609e-07, "loss": 0.1875, "step": 23910 }, { "epoch": 1.6592846303923694, "grad_norm": 1.2109375, "learning_rate": 8.401739130434782e-07, "loss": 0.2252, "step": 23920 }, { "epoch": 1.6599783221331021, "grad_norm": 1.4375, "learning_rate": 8.384347826086957e-07, "loss": 0.2808, "step": 23930 }, { "epoch": 1.660672013873835, "grad_norm": 1.3125, "learning_rate": 8.366956521739131e-07, "loss": 0.2576, "step": 23940 }, { "epoch": 1.6613657056145676, "grad_norm": 1.1484375, "learning_rate": 8.349565217391305e-07, "loss": 0.2447, "step": 23950 }, { "epoch": 1.6620593973553004, "grad_norm": 1.4921875, "learning_rate": 8.332173913043478e-07, "loss": 0.2325, "step": 23960 }, { "epoch": 1.662753089096033, "grad_norm": 1.265625, "learning_rate": 8.314782608695653e-07, "loss": 0.1994, "step": 23970 }, { "epoch": 1.6634467808367657, "grad_norm": 1.109375, "learning_rate": 8.297391304347827e-07, "loss": 0.2345, "step": 23980 }, { "epoch": 1.6641404725774984, "grad_norm": 1.4453125, "learning_rate": 8.280000000000001e-07, "loss": 0.3115, "step": 23990 }, { "epoch": 1.664834164318231, "grad_norm": 1.09375, "learning_rate": 8.262608695652174e-07, "loss": 0.2616, "step": 24000 }, { "epoch": 1.6655278560589637, "grad_norm": 1.390625, "learning_rate": 8.245217391304348e-07, "loss": 0.2968, "step": 24010 }, { "epoch": 1.6662215477996964, "grad_norm": 1.28125, "learning_rate": 8.227826086956523e-07, "loss": 0.2652, "step": 24020 }, { "epoch": 1.6669152395404292, "grad_norm": 1.0625, "learning_rate": 8.210434782608696e-07, "loss": 0.275, "step": 24030 }, { "epoch": 1.667608931281162, "grad_norm": 1.1171875, "learning_rate": 8.19304347826087e-07, "loss": 0.202, "step": 24040 }, { "epoch": 1.6683026230218947, "grad_norm": 1.1796875, "learning_rate": 8.175652173913044e-07, "loss": 0.2373, "step": 24050 }, { "epoch": 1.6689963147626274, "grad_norm": 1.21875, "learning_rate": 8.158260869565219e-07, "loss": 0.2368, "step": 24060 }, { "epoch": 1.6696900065033602, "grad_norm": 1.546875, "learning_rate": 8.140869565217392e-07, "loss": 0.2361, "step": 24070 }, { "epoch": 1.670383698244093, "grad_norm": 1.5078125, "learning_rate": 8.123478260869566e-07, "loss": 0.2362, "step": 24080 }, { "epoch": 1.6710773899848255, "grad_norm": 1.28125, "learning_rate": 8.10608695652174e-07, "loss": 0.2434, "step": 24090 }, { "epoch": 1.6717710817255582, "grad_norm": 1.2578125, "learning_rate": 8.088695652173915e-07, "loss": 0.2348, "step": 24100 }, { "epoch": 1.672464773466291, "grad_norm": 1.0625, "learning_rate": 8.071304347826088e-07, "loss": 0.2239, "step": 24110 }, { "epoch": 1.6731584652070235, "grad_norm": 1.3359375, "learning_rate": 8.053913043478262e-07, "loss": 0.2476, "step": 24120 }, { "epoch": 1.6738521569477562, "grad_norm": 1.5234375, "learning_rate": 8.036521739130436e-07, "loss": 0.3045, "step": 24130 }, { "epoch": 1.674545848688489, "grad_norm": 1.03125, "learning_rate": 8.019130434782609e-07, "loss": 0.2166, "step": 24140 }, { "epoch": 1.6752395404292217, "grad_norm": 1.25, "learning_rate": 8.001739130434784e-07, "loss": 0.2401, "step": 24150 }, { "epoch": 1.6759332321699545, "grad_norm": 1.0, "learning_rate": 7.984347826086958e-07, "loss": 0.2398, "step": 24160 }, { "epoch": 1.6766269239106872, "grad_norm": 1.21875, "learning_rate": 7.966956521739132e-07, "loss": 0.2428, "step": 24170 }, { "epoch": 1.67732061565142, "grad_norm": 1.0859375, "learning_rate": 7.949565217391304e-07, "loss": 0.2544, "step": 24180 }, { "epoch": 1.6780143073921527, "grad_norm": 0.9765625, "learning_rate": 7.932173913043478e-07, "loss": 0.1938, "step": 24190 }, { "epoch": 1.6787079991328855, "grad_norm": 1.2265625, "learning_rate": 7.914782608695653e-07, "loss": 0.2606, "step": 24200 }, { "epoch": 1.679401690873618, "grad_norm": 1.125, "learning_rate": 7.897391304347826e-07, "loss": 0.2166, "step": 24210 }, { "epoch": 1.6800953826143508, "grad_norm": 1.3125, "learning_rate": 7.88e-07, "loss": 0.2149, "step": 24220 }, { "epoch": 1.6807890743550835, "grad_norm": 0.875, "learning_rate": 7.862608695652174e-07, "loss": 0.2509, "step": 24230 }, { "epoch": 1.681482766095816, "grad_norm": 1.2734375, "learning_rate": 7.845217391304348e-07, "loss": 0.255, "step": 24240 }, { "epoch": 1.6821764578365488, "grad_norm": 0.84375, "learning_rate": 7.827826086956522e-07, "loss": 0.2084, "step": 24250 }, { "epoch": 1.6828701495772815, "grad_norm": 1.3671875, "learning_rate": 7.810434782608696e-07, "loss": 0.2378, "step": 24260 }, { "epoch": 1.6835638413180143, "grad_norm": 1.2265625, "learning_rate": 7.79304347826087e-07, "loss": 0.2314, "step": 24270 }, { "epoch": 1.684257533058747, "grad_norm": 1.3359375, "learning_rate": 7.775652173913043e-07, "loss": 0.2701, "step": 24280 }, { "epoch": 1.6849512247994798, "grad_norm": 1.171875, "learning_rate": 7.758260869565218e-07, "loss": 0.2682, "step": 24290 }, { "epoch": 1.6856449165402125, "grad_norm": 1.40625, "learning_rate": 7.740869565217392e-07, "loss": 0.235, "step": 24300 }, { "epoch": 1.6863386082809453, "grad_norm": 1.4609375, "learning_rate": 7.723478260869566e-07, "loss": 0.2136, "step": 24310 }, { "epoch": 1.687032300021678, "grad_norm": 1.4140625, "learning_rate": 7.706086956521739e-07, "loss": 0.2159, "step": 24320 }, { "epoch": 1.6877259917624106, "grad_norm": 1.25, "learning_rate": 7.688695652173914e-07, "loss": 0.1848, "step": 24330 }, { "epoch": 1.6884196835031433, "grad_norm": 1.3359375, "learning_rate": 7.671304347826088e-07, "loss": 0.2464, "step": 24340 }, { "epoch": 1.6891133752438758, "grad_norm": 1.2109375, "learning_rate": 7.653913043478262e-07, "loss": 0.2249, "step": 24350 }, { "epoch": 1.6898070669846086, "grad_norm": 1.3046875, "learning_rate": 7.636521739130435e-07, "loss": 0.2389, "step": 24360 }, { "epoch": 1.6905007587253413, "grad_norm": 1.125, "learning_rate": 7.619130434782609e-07, "loss": 0.2296, "step": 24370 }, { "epoch": 1.691194450466074, "grad_norm": 1.3984375, "learning_rate": 7.601739130434784e-07, "loss": 0.2403, "step": 24380 }, { "epoch": 1.6918881422068068, "grad_norm": 1.515625, "learning_rate": 7.584347826086957e-07, "loss": 0.3061, "step": 24390 }, { "epoch": 1.6925818339475396, "grad_norm": 1.4140625, "learning_rate": 7.566956521739131e-07, "loss": 0.225, "step": 24400 }, { "epoch": 1.6932755256882723, "grad_norm": 1.21875, "learning_rate": 7.549565217391305e-07, "loss": 0.234, "step": 24410 }, { "epoch": 1.693969217429005, "grad_norm": 1.046875, "learning_rate": 7.53217391304348e-07, "loss": 0.2418, "step": 24420 }, { "epoch": 1.6946629091697378, "grad_norm": 1.375, "learning_rate": 7.514782608695653e-07, "loss": 0.3734, "step": 24430 }, { "epoch": 1.6953566009104704, "grad_norm": 1.234375, "learning_rate": 7.497391304347827e-07, "loss": 0.2375, "step": 24440 }, { "epoch": 1.696050292651203, "grad_norm": 1.3046875, "learning_rate": 7.480000000000001e-07, "loss": 0.2323, "step": 24450 }, { "epoch": 1.6967439843919359, "grad_norm": 1.125, "learning_rate": 7.462608695652176e-07, "loss": 0.3004, "step": 24460 }, { "epoch": 1.6974376761326684, "grad_norm": 1.28125, "learning_rate": 7.445217391304349e-07, "loss": 0.2572, "step": 24470 }, { "epoch": 1.6981313678734011, "grad_norm": 1.4375, "learning_rate": 7.427826086956523e-07, "loss": 0.2329, "step": 24480 }, { "epoch": 1.6988250596141339, "grad_norm": 1.125, "learning_rate": 7.410434782608696e-07, "loss": 0.2483, "step": 24490 }, { "epoch": 1.6995187513548666, "grad_norm": 1.578125, "learning_rate": 7.393043478260869e-07, "loss": 0.205, "step": 24500 }, { "epoch": 1.7002124430955994, "grad_norm": 1.25, "learning_rate": 7.375652173913043e-07, "loss": 0.2956, "step": 24510 }, { "epoch": 1.7009061348363321, "grad_norm": 1.15625, "learning_rate": 7.358260869565218e-07, "loss": 0.2632, "step": 24520 }, { "epoch": 1.7015998265770649, "grad_norm": 1.234375, "learning_rate": 7.340869565217392e-07, "loss": 0.244, "step": 24530 }, { "epoch": 1.7022935183177976, "grad_norm": 1.1875, "learning_rate": 7.323478260869565e-07, "loss": 0.2039, "step": 24540 }, { "epoch": 1.7029872100585304, "grad_norm": 1.734375, "learning_rate": 7.306086956521739e-07, "loss": 0.3392, "step": 24550 }, { "epoch": 1.703680901799263, "grad_norm": 1.4140625, "learning_rate": 7.288695652173914e-07, "loss": 0.2325, "step": 24560 }, { "epoch": 1.7043745935399957, "grad_norm": 1.171875, "learning_rate": 7.271304347826087e-07, "loss": 0.219, "step": 24570 }, { "epoch": 1.7050682852807284, "grad_norm": 1.1875, "learning_rate": 7.253913043478261e-07, "loss": 0.229, "step": 24580 }, { "epoch": 1.705761977021461, "grad_norm": 1.1328125, "learning_rate": 7.236521739130435e-07, "loss": 0.232, "step": 24590 }, { "epoch": 1.7064556687621937, "grad_norm": 1.015625, "learning_rate": 7.219130434782609e-07, "loss": 0.2213, "step": 24600 }, { "epoch": 1.7071493605029264, "grad_norm": 1.125, "learning_rate": 7.201739130434783e-07, "loss": 0.2818, "step": 24610 }, { "epoch": 1.7078430522436592, "grad_norm": 1.375, "learning_rate": 7.184347826086957e-07, "loss": 0.2612, "step": 24620 }, { "epoch": 1.708536743984392, "grad_norm": 1.609375, "learning_rate": 7.166956521739131e-07, "loss": 0.2474, "step": 24630 }, { "epoch": 1.7092304357251247, "grad_norm": 1.484375, "learning_rate": 7.149565217391304e-07, "loss": 0.2656, "step": 24640 }, { "epoch": 1.7099241274658574, "grad_norm": 1.1328125, "learning_rate": 7.132173913043479e-07, "loss": 0.2126, "step": 24650 }, { "epoch": 1.7106178192065902, "grad_norm": 1.3203125, "learning_rate": 7.114782608695653e-07, "loss": 0.2265, "step": 24660 }, { "epoch": 1.711311510947323, "grad_norm": 1.4609375, "learning_rate": 7.097391304347827e-07, "loss": 0.2468, "step": 24670 }, { "epoch": 1.7120052026880554, "grad_norm": 1.1015625, "learning_rate": 7.08e-07, "loss": 0.2395, "step": 24680 }, { "epoch": 1.7126988944287882, "grad_norm": 1.21875, "learning_rate": 7.062608695652175e-07, "loss": 0.2194, "step": 24690 }, { "epoch": 1.713392586169521, "grad_norm": 1.265625, "learning_rate": 7.045217391304349e-07, "loss": 0.2355, "step": 24700 }, { "epoch": 1.7140862779102535, "grad_norm": 1.125, "learning_rate": 7.027826086956523e-07, "loss": 0.2146, "step": 24710 }, { "epoch": 1.7147799696509862, "grad_norm": 1.3125, "learning_rate": 7.010434782608696e-07, "loss": 0.2325, "step": 24720 }, { "epoch": 1.715473661391719, "grad_norm": 1.046875, "learning_rate": 6.99304347826087e-07, "loss": 0.252, "step": 24730 }, { "epoch": 1.7161673531324517, "grad_norm": 0.9921875, "learning_rate": 6.975652173913045e-07, "loss": 0.2504, "step": 24740 }, { "epoch": 1.7168610448731845, "grad_norm": 0.97265625, "learning_rate": 6.958260869565218e-07, "loss": 0.2483, "step": 24750 }, { "epoch": 1.7175547366139172, "grad_norm": 1.765625, "learning_rate": 6.940869565217392e-07, "loss": 0.2785, "step": 24760 }, { "epoch": 1.71824842835465, "grad_norm": 1.828125, "learning_rate": 6.923478260869566e-07, "loss": 0.3625, "step": 24770 }, { "epoch": 1.7189421200953827, "grad_norm": 1.21875, "learning_rate": 6.906086956521741e-07, "loss": 0.2435, "step": 24780 }, { "epoch": 1.7196358118361155, "grad_norm": 1.5390625, "learning_rate": 6.888695652173914e-07, "loss": 0.2389, "step": 24790 }, { "epoch": 1.720329503576848, "grad_norm": 1.25, "learning_rate": 6.871304347826087e-07, "loss": 0.2638, "step": 24800 }, { "epoch": 1.7210231953175807, "grad_norm": 1.109375, "learning_rate": 6.853913043478261e-07, "loss": 0.2871, "step": 24810 }, { "epoch": 1.7217168870583135, "grad_norm": 1.015625, "learning_rate": 6.836521739130434e-07, "loss": 0.2482, "step": 24820 }, { "epoch": 1.722410578799046, "grad_norm": 1.6484375, "learning_rate": 6.819130434782609e-07, "loss": 0.2159, "step": 24830 }, { "epoch": 1.7231042705397788, "grad_norm": 1.2421875, "learning_rate": 6.801739130434783e-07, "loss": 0.2426, "step": 24840 }, { "epoch": 1.7237979622805115, "grad_norm": 1.6484375, "learning_rate": 6.784347826086957e-07, "loss": 0.2381, "step": 24850 }, { "epoch": 1.7244916540212443, "grad_norm": 1.046875, "learning_rate": 6.76695652173913e-07, "loss": 0.2482, "step": 24860 }, { "epoch": 1.725185345761977, "grad_norm": 1.09375, "learning_rate": 6.749565217391304e-07, "loss": 0.2337, "step": 24870 }, { "epoch": 1.7258790375027098, "grad_norm": 0.7578125, "learning_rate": 6.732173913043479e-07, "loss": 0.2348, "step": 24880 }, { "epoch": 1.7265727292434425, "grad_norm": 1.578125, "learning_rate": 6.714782608695653e-07, "loss": 0.271, "step": 24890 }, { "epoch": 1.7272664209841753, "grad_norm": 0.96875, "learning_rate": 6.697391304347826e-07, "loss": 0.2612, "step": 24900 }, { "epoch": 1.727960112724908, "grad_norm": 1.1796875, "learning_rate": 6.68e-07, "loss": 0.2523, "step": 24910 }, { "epoch": 1.7286538044656405, "grad_norm": 1.3828125, "learning_rate": 6.662608695652175e-07, "loss": 0.2531, "step": 24920 }, { "epoch": 1.7293474962063733, "grad_norm": 1.171875, "learning_rate": 6.645217391304348e-07, "loss": 0.226, "step": 24930 }, { "epoch": 1.730041187947106, "grad_norm": 1.15625, "learning_rate": 6.627826086956522e-07, "loss": 0.2511, "step": 24940 }, { "epoch": 1.7307348796878386, "grad_norm": 1.1328125, "learning_rate": 6.610434782608696e-07, "loss": 0.2648, "step": 24950 }, { "epoch": 1.7314285714285713, "grad_norm": 1.4296875, "learning_rate": 6.593043478260871e-07, "loss": 0.2495, "step": 24960 }, { "epoch": 1.732122263169304, "grad_norm": 1.03125, "learning_rate": 6.575652173913044e-07, "loss": 0.2491, "step": 24970 }, { "epoch": 1.7328159549100368, "grad_norm": 1.1875, "learning_rate": 6.558260869565218e-07, "loss": 0.1996, "step": 24980 }, { "epoch": 1.7335096466507696, "grad_norm": 1.265625, "learning_rate": 6.540869565217392e-07, "loss": 0.2316, "step": 24990 }, { "epoch": 1.7342033383915023, "grad_norm": 1.1484375, "learning_rate": 6.523478260869566e-07, "loss": 0.2417, "step": 25000 }, { "epoch": 1.734897030132235, "grad_norm": 1.0078125, "learning_rate": 6.50608695652174e-07, "loss": 0.2705, "step": 25010 }, { "epoch": 1.7355907218729678, "grad_norm": 1.265625, "learning_rate": 6.488695652173914e-07, "loss": 0.234, "step": 25020 }, { "epoch": 1.7362844136137006, "grad_norm": 1.3515625, "learning_rate": 6.471304347826088e-07, "loss": 0.2018, "step": 25030 }, { "epoch": 1.736978105354433, "grad_norm": 1.71875, "learning_rate": 6.453913043478261e-07, "loss": 0.26, "step": 25040 }, { "epoch": 1.7376717970951658, "grad_norm": 1.4375, "learning_rate": 6.436521739130436e-07, "loss": 0.2964, "step": 25050 }, { "epoch": 1.7383654888358986, "grad_norm": 1.1484375, "learning_rate": 6.41913043478261e-07, "loss": 0.2349, "step": 25060 }, { "epoch": 1.7390591805766311, "grad_norm": 1.359375, "learning_rate": 6.401739130434784e-07, "loss": 0.249, "step": 25070 }, { "epoch": 1.7397528723173639, "grad_norm": 1.953125, "learning_rate": 6.384347826086957e-07, "loss": 0.2878, "step": 25080 }, { "epoch": 1.7404465640580966, "grad_norm": 1.3671875, "learning_rate": 6.366956521739132e-07, "loss": 0.2922, "step": 25090 }, { "epoch": 1.7411402557988294, "grad_norm": 1.328125, "learning_rate": 6.349565217391306e-07, "loss": 0.2339, "step": 25100 }, { "epoch": 1.741833947539562, "grad_norm": 1.3671875, "learning_rate": 6.332173913043478e-07, "loss": 0.2677, "step": 25110 }, { "epoch": 1.7425276392802949, "grad_norm": 1.5546875, "learning_rate": 6.314782608695652e-07, "loss": 0.2195, "step": 25120 }, { "epoch": 1.7432213310210276, "grad_norm": 1.234375, "learning_rate": 6.297391304347826e-07, "loss": 0.2228, "step": 25130 }, { "epoch": 1.7439150227617604, "grad_norm": 1.15625, "learning_rate": 6.28e-07, "loss": 0.1983, "step": 25140 }, { "epoch": 1.744608714502493, "grad_norm": 1.3359375, "learning_rate": 6.262608695652174e-07, "loss": 0.2045, "step": 25150 }, { "epoch": 1.7453024062432256, "grad_norm": 1.53125, "learning_rate": 6.245217391304348e-07, "loss": 0.2562, "step": 25160 }, { "epoch": 1.7459960979839584, "grad_norm": 1.3984375, "learning_rate": 6.227826086956523e-07, "loss": 0.2394, "step": 25170 }, { "epoch": 1.7466897897246911, "grad_norm": 0.98046875, "learning_rate": 6.210434782608697e-07, "loss": 0.2807, "step": 25180 }, { "epoch": 1.7473834814654237, "grad_norm": 1.21875, "learning_rate": 6.19304347826087e-07, "loss": 0.2084, "step": 25190 }, { "epoch": 1.7480771732061564, "grad_norm": 1.125, "learning_rate": 6.175652173913044e-07, "loss": 0.2275, "step": 25200 }, { "epoch": 1.7487708649468892, "grad_norm": 1.9921875, "learning_rate": 6.158260869565218e-07, "loss": 0.3696, "step": 25210 }, { "epoch": 1.749464556687622, "grad_norm": 1.6484375, "learning_rate": 6.140869565217391e-07, "loss": 0.3133, "step": 25220 }, { "epoch": 1.7501582484283547, "grad_norm": 1.2265625, "learning_rate": 6.123478260869565e-07, "loss": 0.2205, "step": 25230 }, { "epoch": 1.7508519401690874, "grad_norm": 0.9765625, "learning_rate": 6.10608695652174e-07, "loss": 0.2292, "step": 25240 }, { "epoch": 1.7515456319098202, "grad_norm": 1.1640625, "learning_rate": 6.088695652173914e-07, "loss": 0.2705, "step": 25250 }, { "epoch": 1.752239323650553, "grad_norm": 1.0625, "learning_rate": 6.071304347826087e-07, "loss": 0.2202, "step": 25260 }, { "epoch": 1.7529330153912857, "grad_norm": 1.546875, "learning_rate": 6.053913043478261e-07, "loss": 0.2228, "step": 25270 }, { "epoch": 1.7536267071320182, "grad_norm": 1.3046875, "learning_rate": 6.036521739130436e-07, "loss": 0.242, "step": 25280 }, { "epoch": 1.754320398872751, "grad_norm": 0.96875, "learning_rate": 6.01913043478261e-07, "loss": 0.2173, "step": 25290 }, { "epoch": 1.7550140906134837, "grad_norm": 1.375, "learning_rate": 6.001739130434783e-07, "loss": 0.2418, "step": 25300 }, { "epoch": 1.7557077823542162, "grad_norm": 1.375, "learning_rate": 5.984347826086957e-07, "loss": 0.3065, "step": 25310 }, { "epoch": 1.756401474094949, "grad_norm": 1.1875, "learning_rate": 5.966956521739132e-07, "loss": 0.2139, "step": 25320 }, { "epoch": 1.7570951658356817, "grad_norm": 1.03125, "learning_rate": 5.949565217391305e-07, "loss": 0.2255, "step": 25330 }, { "epoch": 1.7577888575764145, "grad_norm": 0.953125, "learning_rate": 5.932173913043478e-07, "loss": 0.2343, "step": 25340 }, { "epoch": 1.7584825493171472, "grad_norm": 1.265625, "learning_rate": 5.914782608695653e-07, "loss": 0.2387, "step": 25350 }, { "epoch": 1.75917624105788, "grad_norm": 1.171875, "learning_rate": 5.897391304347827e-07, "loss": 0.2096, "step": 25360 }, { "epoch": 1.7598699327986127, "grad_norm": 1.6796875, "learning_rate": 5.88e-07, "loss": 0.2429, "step": 25370 }, { "epoch": 1.7605636245393455, "grad_norm": 1.03125, "learning_rate": 5.862608695652174e-07, "loss": 0.2416, "step": 25380 }, { "epoch": 1.7612573162800782, "grad_norm": 1.28125, "learning_rate": 5.845217391304349e-07, "loss": 0.2747, "step": 25390 }, { "epoch": 1.7619510080208107, "grad_norm": 1.2265625, "learning_rate": 5.827826086956522e-07, "loss": 0.2036, "step": 25400 }, { "epoch": 1.7626446997615435, "grad_norm": 1.1328125, "learning_rate": 5.810434782608696e-07, "loss": 0.2278, "step": 25410 }, { "epoch": 1.7633383915022762, "grad_norm": 1.2890625, "learning_rate": 5.79304347826087e-07, "loss": 0.2085, "step": 25420 }, { "epoch": 1.7640320832430088, "grad_norm": 0.98828125, "learning_rate": 5.775652173913044e-07, "loss": 0.2329, "step": 25430 }, { "epoch": 1.7647257749837415, "grad_norm": 1.3203125, "learning_rate": 5.758260869565218e-07, "loss": 0.2339, "step": 25440 }, { "epoch": 1.7654194667244743, "grad_norm": 1.140625, "learning_rate": 5.740869565217392e-07, "loss": 0.2597, "step": 25450 }, { "epoch": 1.766113158465207, "grad_norm": 1.1796875, "learning_rate": 5.723478260869566e-07, "loss": 0.2197, "step": 25460 }, { "epoch": 1.7668068502059397, "grad_norm": 1.0859375, "learning_rate": 5.70608695652174e-07, "loss": 0.2542, "step": 25470 }, { "epoch": 1.7675005419466725, "grad_norm": 1.15625, "learning_rate": 5.688695652173914e-07, "loss": 0.2738, "step": 25480 }, { "epoch": 1.7681942336874052, "grad_norm": 1.4140625, "learning_rate": 5.671304347826087e-07, "loss": 0.2285, "step": 25490 }, { "epoch": 1.768887925428138, "grad_norm": 1.2265625, "learning_rate": 5.653913043478261e-07, "loss": 0.2296, "step": 25500 }, { "epoch": 1.7695816171688707, "grad_norm": 1.6015625, "learning_rate": 5.636521739130435e-07, "loss": 0.2071, "step": 25510 }, { "epoch": 1.7702753089096033, "grad_norm": 1.1953125, "learning_rate": 5.619130434782609e-07, "loss": 0.2482, "step": 25520 }, { "epoch": 1.770969000650336, "grad_norm": 1.109375, "learning_rate": 5.601739130434783e-07, "loss": 0.3006, "step": 25530 }, { "epoch": 1.7716626923910688, "grad_norm": 1.21875, "learning_rate": 5.584347826086957e-07, "loss": 0.2639, "step": 25540 }, { "epoch": 1.7723563841318013, "grad_norm": 1.5546875, "learning_rate": 5.566956521739131e-07, "loss": 0.2591, "step": 25550 }, { "epoch": 1.773050075872534, "grad_norm": 1.1875, "learning_rate": 5.549565217391305e-07, "loss": 0.3238, "step": 25560 }, { "epoch": 1.7737437676132668, "grad_norm": 1.8515625, "learning_rate": 5.532173913043479e-07, "loss": 0.2856, "step": 25570 }, { "epoch": 1.7744374593539995, "grad_norm": 1.1640625, "learning_rate": 5.514782608695652e-07, "loss": 0.3195, "step": 25580 }, { "epoch": 1.7751311510947323, "grad_norm": 1.59375, "learning_rate": 5.497391304347826e-07, "loss": 0.2509, "step": 25590 }, { "epoch": 1.775824842835465, "grad_norm": 1.140625, "learning_rate": 5.480000000000001e-07, "loss": 0.2397, "step": 25600 }, { "epoch": 1.7765185345761978, "grad_norm": 1.1328125, "learning_rate": 5.462608695652175e-07, "loss": 0.2355, "step": 25610 }, { "epoch": 1.7772122263169305, "grad_norm": 1.21875, "learning_rate": 5.445217391304348e-07, "loss": 0.1725, "step": 25620 }, { "epoch": 1.777905918057663, "grad_norm": 1.1484375, "learning_rate": 5.427826086956522e-07, "loss": 0.3111, "step": 25630 }, { "epoch": 1.7785996097983958, "grad_norm": 1.3046875, "learning_rate": 5.410434782608697e-07, "loss": 0.2527, "step": 25640 }, { "epoch": 1.7792933015391286, "grad_norm": 1.28125, "learning_rate": 5.393043478260869e-07, "loss": 0.2612, "step": 25650 }, { "epoch": 1.779986993279861, "grad_norm": 1.5390625, "learning_rate": 5.375652173913043e-07, "loss": 0.2509, "step": 25660 }, { "epoch": 1.7806806850205938, "grad_norm": 1.390625, "learning_rate": 5.358260869565218e-07, "loss": 0.2457, "step": 25670 }, { "epoch": 1.7813743767613266, "grad_norm": 1.15625, "learning_rate": 5.340869565217392e-07, "loss": 0.2486, "step": 25680 }, { "epoch": 1.7820680685020593, "grad_norm": 1.3125, "learning_rate": 5.323478260869565e-07, "loss": 0.2046, "step": 25690 }, { "epoch": 1.782761760242792, "grad_norm": 1.125, "learning_rate": 5.306086956521739e-07, "loss": 0.2119, "step": 25700 }, { "epoch": 1.7834554519835248, "grad_norm": 1.0, "learning_rate": 5.288695652173914e-07, "loss": 0.2471, "step": 25710 }, { "epoch": 1.7841491437242576, "grad_norm": 0.93359375, "learning_rate": 5.271304347826088e-07, "loss": 0.2273, "step": 25720 }, { "epoch": 1.7848428354649903, "grad_norm": 0.95703125, "learning_rate": 5.253913043478261e-07, "loss": 0.2592, "step": 25730 }, { "epoch": 1.785536527205723, "grad_norm": 1.3359375, "learning_rate": 5.236521739130435e-07, "loss": 0.2367, "step": 25740 }, { "epoch": 1.7862302189464556, "grad_norm": 1.3046875, "learning_rate": 5.21913043478261e-07, "loss": 0.2357, "step": 25750 }, { "epoch": 1.7869239106871884, "grad_norm": 1.2734375, "learning_rate": 5.201739130434783e-07, "loss": 0.219, "step": 25760 }, { "epoch": 1.7876176024279211, "grad_norm": 1.171875, "learning_rate": 5.184347826086957e-07, "loss": 0.2937, "step": 25770 }, { "epoch": 1.7883112941686536, "grad_norm": 0.984375, "learning_rate": 5.166956521739131e-07, "loss": 0.2343, "step": 25780 }, { "epoch": 1.7890049859093864, "grad_norm": 1.3515625, "learning_rate": 5.149565217391305e-07, "loss": 0.226, "step": 25790 }, { "epoch": 1.7896986776501191, "grad_norm": 0.921875, "learning_rate": 5.132173913043478e-07, "loss": 0.2387, "step": 25800 }, { "epoch": 1.790392369390852, "grad_norm": 1.1015625, "learning_rate": 5.114782608695652e-07, "loss": 0.2074, "step": 25810 }, { "epoch": 1.7910860611315846, "grad_norm": 1.1328125, "learning_rate": 5.097391304347827e-07, "loss": 0.243, "step": 25820 }, { "epoch": 1.7917797528723174, "grad_norm": 1.21875, "learning_rate": 5.08e-07, "loss": 0.2289, "step": 25830 }, { "epoch": 1.7924734446130501, "grad_norm": 1.1875, "learning_rate": 5.062608695652174e-07, "loss": 0.2942, "step": 25840 }, { "epoch": 1.7931671363537829, "grad_norm": 1.1953125, "learning_rate": 5.045217391304348e-07, "loss": 0.2439, "step": 25850 }, { "epoch": 1.7938608280945156, "grad_norm": 0.98046875, "learning_rate": 5.027826086956522e-07, "loss": 0.2651, "step": 25860 }, { "epoch": 1.7945545198352482, "grad_norm": 1.4375, "learning_rate": 5.010434782608696e-07, "loss": 0.2474, "step": 25870 }, { "epoch": 1.795248211575981, "grad_norm": 1.578125, "learning_rate": 4.99304347826087e-07, "loss": 0.2417, "step": 25880 }, { "epoch": 1.7959419033167137, "grad_norm": 1.234375, "learning_rate": 4.975652173913044e-07, "loss": 0.2635, "step": 25890 }, { "epoch": 1.7966355950574462, "grad_norm": 1.84375, "learning_rate": 4.958260869565218e-07, "loss": 0.3054, "step": 25900 }, { "epoch": 1.797329286798179, "grad_norm": 1.1953125, "learning_rate": 4.940869565217392e-07, "loss": 0.2265, "step": 25910 }, { "epoch": 1.7980229785389117, "grad_norm": 1.2109375, "learning_rate": 4.923478260869566e-07, "loss": 0.2727, "step": 25920 }, { "epoch": 1.7987166702796444, "grad_norm": 1.0390625, "learning_rate": 4.90608695652174e-07, "loss": 0.2491, "step": 25930 }, { "epoch": 1.7994103620203772, "grad_norm": 1.2578125, "learning_rate": 4.888695652173913e-07, "loss": 0.2025, "step": 25940 }, { "epoch": 1.80010405376111, "grad_norm": 0.96484375, "learning_rate": 4.871304347826088e-07, "loss": 0.2422, "step": 25950 }, { "epoch": 1.8007977455018427, "grad_norm": 1.4453125, "learning_rate": 4.853913043478261e-07, "loss": 0.2329, "step": 25960 }, { "epoch": 1.8014914372425754, "grad_norm": 1.609375, "learning_rate": 4.836521739130435e-07, "loss": 0.2516, "step": 25970 }, { "epoch": 1.8021851289833082, "grad_norm": 1.1953125, "learning_rate": 4.819130434782609e-07, "loss": 0.1947, "step": 25980 }, { "epoch": 1.8028788207240407, "grad_norm": 1.390625, "learning_rate": 4.801739130434783e-07, "loss": 0.2751, "step": 25990 }, { "epoch": 1.8035725124647735, "grad_norm": 1.5234375, "learning_rate": 4.784347826086957e-07, "loss": 0.2421, "step": 26000 }, { "epoch": 1.8042662042055062, "grad_norm": 1.4609375, "learning_rate": 4.7669565217391305e-07, "loss": 0.2389, "step": 26010 }, { "epoch": 1.8049598959462387, "grad_norm": 1.21875, "learning_rate": 4.7495652173913047e-07, "loss": 0.2199, "step": 26020 }, { "epoch": 1.8056535876869715, "grad_norm": 1.71875, "learning_rate": 4.7321739130434784e-07, "loss": 0.2385, "step": 26030 }, { "epoch": 1.8063472794277042, "grad_norm": 1.25, "learning_rate": 4.7147826086956527e-07, "loss": 0.2346, "step": 26040 }, { "epoch": 1.807040971168437, "grad_norm": 1.3203125, "learning_rate": 4.6973913043478264e-07, "loss": 0.2409, "step": 26050 }, { "epoch": 1.8077346629091697, "grad_norm": 1.2890625, "learning_rate": 4.6800000000000006e-07, "loss": 0.2329, "step": 26060 }, { "epoch": 1.8084283546499025, "grad_norm": 1.28125, "learning_rate": 4.6626086956521743e-07, "loss": 0.2252, "step": 26070 }, { "epoch": 1.8091220463906352, "grad_norm": 1.28125, "learning_rate": 4.6452173913043486e-07, "loss": 0.2564, "step": 26080 }, { "epoch": 1.809815738131368, "grad_norm": 1.5078125, "learning_rate": 4.6278260869565223e-07, "loss": 0.2508, "step": 26090 }, { "epoch": 1.8105094298721007, "grad_norm": 1.3828125, "learning_rate": 4.6104347826086965e-07, "loss": 0.2399, "step": 26100 }, { "epoch": 1.8112031216128333, "grad_norm": 1.5234375, "learning_rate": 4.5930434782608697e-07, "loss": 0.2153, "step": 26110 }, { "epoch": 1.811896813353566, "grad_norm": 1.3828125, "learning_rate": 4.5756521739130434e-07, "loss": 0.2163, "step": 26120 }, { "epoch": 1.8125905050942988, "grad_norm": 1.3515625, "learning_rate": 4.5582608695652177e-07, "loss": 0.2719, "step": 26130 }, { "epoch": 1.8132841968350313, "grad_norm": 0.9609375, "learning_rate": 4.5408695652173914e-07, "loss": 0.1888, "step": 26140 }, { "epoch": 1.813977888575764, "grad_norm": 1.2734375, "learning_rate": 4.5234782608695656e-07, "loss": 0.3202, "step": 26150 }, { "epoch": 1.8146715803164968, "grad_norm": 1.1171875, "learning_rate": 4.5060869565217393e-07, "loss": 0.21, "step": 26160 }, { "epoch": 1.8153652720572295, "grad_norm": 1.1171875, "learning_rate": 4.4886956521739136e-07, "loss": 0.194, "step": 26170 }, { "epoch": 1.8160589637979623, "grad_norm": 1.296875, "learning_rate": 4.4713043478260873e-07, "loss": 0.2186, "step": 26180 }, { "epoch": 1.816752655538695, "grad_norm": 0.74609375, "learning_rate": 4.4539130434782615e-07, "loss": 0.2346, "step": 26190 }, { "epoch": 1.8174463472794278, "grad_norm": 1.375, "learning_rate": 4.436521739130435e-07, "loss": 0.2373, "step": 26200 }, { "epoch": 1.8181400390201605, "grad_norm": 1.0, "learning_rate": 4.419130434782609e-07, "loss": 0.262, "step": 26210 }, { "epoch": 1.8188337307608933, "grad_norm": 0.97265625, "learning_rate": 4.401739130434783e-07, "loss": 0.2297, "step": 26220 }, { "epoch": 1.8195274225016258, "grad_norm": 1.1171875, "learning_rate": 4.384347826086957e-07, "loss": 0.2302, "step": 26230 }, { "epoch": 1.8202211142423586, "grad_norm": 0.94921875, "learning_rate": 4.366956521739131e-07, "loss": 0.2239, "step": 26240 }, { "epoch": 1.8209148059830913, "grad_norm": 1.1796875, "learning_rate": 4.349565217391305e-07, "loss": 0.2234, "step": 26250 }, { "epoch": 1.8216084977238238, "grad_norm": 1.09375, "learning_rate": 4.332173913043479e-07, "loss": 0.242, "step": 26260 }, { "epoch": 1.8223021894645566, "grad_norm": 1.9609375, "learning_rate": 4.314782608695652e-07, "loss": 0.2617, "step": 26270 }, { "epoch": 1.8229958812052893, "grad_norm": 1.4765625, "learning_rate": 4.297391304347826e-07, "loss": 0.2493, "step": 26280 }, { "epoch": 1.823689572946022, "grad_norm": 1.3046875, "learning_rate": 4.28e-07, "loss": 0.2419, "step": 26290 }, { "epoch": 1.8243832646867548, "grad_norm": 1.2265625, "learning_rate": 4.262608695652174e-07, "loss": 0.2478, "step": 26300 }, { "epoch": 1.8250769564274876, "grad_norm": 1.3046875, "learning_rate": 4.245217391304348e-07, "loss": 0.2801, "step": 26310 }, { "epoch": 1.8257706481682203, "grad_norm": 1.03125, "learning_rate": 4.227826086956522e-07, "loss": 0.2001, "step": 26320 }, { "epoch": 1.826464339908953, "grad_norm": 0.890625, "learning_rate": 4.210434782608696e-07, "loss": 0.2197, "step": 26330 }, { "epoch": 1.8271580316496858, "grad_norm": 1.28125, "learning_rate": 4.19304347826087e-07, "loss": 0.2616, "step": 26340 }, { "epoch": 1.8278517233904183, "grad_norm": 1.40625, "learning_rate": 4.175652173913044e-07, "loss": 0.2663, "step": 26350 }, { "epoch": 1.828545415131151, "grad_norm": 1.3515625, "learning_rate": 4.158260869565218e-07, "loss": 0.2307, "step": 26360 }, { "epoch": 1.8292391068718838, "grad_norm": 1.3671875, "learning_rate": 4.140869565217392e-07, "loss": 0.2576, "step": 26370 }, { "epoch": 1.8299327986126164, "grad_norm": 1.109375, "learning_rate": 4.1234782608695657e-07, "loss": 0.1986, "step": 26380 }, { "epoch": 1.8306264903533491, "grad_norm": 1.3359375, "learning_rate": 4.1060869565217394e-07, "loss": 0.2378, "step": 26390 }, { "epoch": 1.8313201820940819, "grad_norm": 1.78125, "learning_rate": 4.0886956521739137e-07, "loss": 0.2416, "step": 26400 }, { "epoch": 1.8320138738348146, "grad_norm": 1.7890625, "learning_rate": 4.0713043478260874e-07, "loss": 0.3052, "step": 26410 }, { "epoch": 1.8327075655755474, "grad_norm": 1.15625, "learning_rate": 4.053913043478261e-07, "loss": 0.2222, "step": 26420 }, { "epoch": 1.8334012573162801, "grad_norm": 1.2890625, "learning_rate": 4.036521739130435e-07, "loss": 0.2901, "step": 26430 }, { "epoch": 1.8340949490570129, "grad_norm": 1.4921875, "learning_rate": 4.0191304347826085e-07, "loss": 0.2526, "step": 26440 }, { "epoch": 1.8347886407977456, "grad_norm": 1.234375, "learning_rate": 4.001739130434783e-07, "loss": 0.2955, "step": 26450 }, { "epoch": 1.8354823325384784, "grad_norm": 1.8359375, "learning_rate": 3.9843478260869565e-07, "loss": 0.301, "step": 26460 }, { "epoch": 1.836176024279211, "grad_norm": 1.28125, "learning_rate": 3.9669565217391307e-07, "loss": 0.2689, "step": 26470 }, { "epoch": 1.8368697160199436, "grad_norm": 0.921875, "learning_rate": 3.9495652173913044e-07, "loss": 0.2165, "step": 26480 }, { "epoch": 1.8375634077606764, "grad_norm": 1.96875, "learning_rate": 3.9321739130434787e-07, "loss": 0.3363, "step": 26490 }, { "epoch": 1.838257099501409, "grad_norm": 1.5625, "learning_rate": 3.9147826086956524e-07, "loss": 0.2889, "step": 26500 }, { "epoch": 1.8389507912421417, "grad_norm": 1.140625, "learning_rate": 3.8973913043478266e-07, "loss": 0.205, "step": 26510 }, { "epoch": 1.8396444829828744, "grad_norm": 1.265625, "learning_rate": 3.8800000000000003e-07, "loss": 0.2951, "step": 26520 }, { "epoch": 1.8403381747236072, "grad_norm": 1.25, "learning_rate": 3.8626086956521746e-07, "loss": 0.2299, "step": 26530 }, { "epoch": 1.84103186646434, "grad_norm": 1.40625, "learning_rate": 3.8452173913043483e-07, "loss": 0.233, "step": 26540 }, { "epoch": 1.8417255582050727, "grad_norm": 1.234375, "learning_rate": 3.8278260869565225e-07, "loss": 0.2723, "step": 26550 }, { "epoch": 1.8424192499458054, "grad_norm": 1.21875, "learning_rate": 3.810434782608696e-07, "loss": 0.2245, "step": 26560 }, { "epoch": 1.8431129416865382, "grad_norm": 1.0, "learning_rate": 3.7930434782608705e-07, "loss": 0.2378, "step": 26570 }, { "epoch": 1.843806633427271, "grad_norm": 1.2734375, "learning_rate": 3.7756521739130437e-07, "loss": 0.2427, "step": 26580 }, { "epoch": 1.8445003251680034, "grad_norm": 1.0703125, "learning_rate": 3.7582608695652174e-07, "loss": 0.2357, "step": 26590 }, { "epoch": 1.8451940169087362, "grad_norm": 1.359375, "learning_rate": 3.7408695652173916e-07, "loss": 0.3183, "step": 26600 }, { "epoch": 1.845887708649469, "grad_norm": 1.03125, "learning_rate": 3.7234782608695653e-07, "loss": 0.3006, "step": 26610 }, { "epoch": 1.8465814003902015, "grad_norm": 1.5546875, "learning_rate": 3.7060869565217396e-07, "loss": 0.2224, "step": 26620 }, { "epoch": 1.8472750921309342, "grad_norm": 1.140625, "learning_rate": 3.6886956521739133e-07, "loss": 0.221, "step": 26630 }, { "epoch": 1.847968783871667, "grad_norm": 1.109375, "learning_rate": 3.671304347826087e-07, "loss": 0.2246, "step": 26640 }, { "epoch": 1.8486624756123997, "grad_norm": 1.53125, "learning_rate": 3.653913043478261e-07, "loss": 0.2503, "step": 26650 }, { "epoch": 1.8493561673531325, "grad_norm": 1.34375, "learning_rate": 3.636521739130435e-07, "loss": 0.1998, "step": 26660 }, { "epoch": 1.8500498590938652, "grad_norm": 1.171875, "learning_rate": 3.619130434782609e-07, "loss": 0.2254, "step": 26670 }, { "epoch": 1.850743550834598, "grad_norm": 1.25, "learning_rate": 3.601739130434783e-07, "loss": 0.2482, "step": 26680 }, { "epoch": 1.8514372425753307, "grad_norm": 1.0, "learning_rate": 3.584347826086957e-07, "loss": 0.2333, "step": 26690 }, { "epoch": 1.8521309343160635, "grad_norm": 1.515625, "learning_rate": 3.566956521739131e-07, "loss": 0.2732, "step": 26700 }, { "epoch": 1.852824626056796, "grad_norm": 1.15625, "learning_rate": 3.549565217391305e-07, "loss": 0.2439, "step": 26710 }, { "epoch": 1.8535183177975287, "grad_norm": 1.25, "learning_rate": 3.532173913043479e-07, "loss": 0.196, "step": 26720 }, { "epoch": 1.8542120095382615, "grad_norm": 1.625, "learning_rate": 3.514782608695652e-07, "loss": 0.2994, "step": 26730 }, { "epoch": 1.854905701278994, "grad_norm": 1.2421875, "learning_rate": 3.497391304347826e-07, "loss": 0.2272, "step": 26740 }, { "epoch": 1.8555993930197268, "grad_norm": 1.2734375, "learning_rate": 3.48e-07, "loss": 0.2331, "step": 26750 }, { "epoch": 1.8562930847604595, "grad_norm": 1.4453125, "learning_rate": 3.462608695652174e-07, "loss": 0.2274, "step": 26760 }, { "epoch": 1.8569867765011923, "grad_norm": 1.53125, "learning_rate": 3.445217391304348e-07, "loss": 0.2571, "step": 26770 }, { "epoch": 1.857680468241925, "grad_norm": 1.453125, "learning_rate": 3.427826086956522e-07, "loss": 0.2547, "step": 26780 }, { "epoch": 1.8583741599826578, "grad_norm": 1.140625, "learning_rate": 3.410434782608696e-07, "loss": 0.2549, "step": 26790 }, { "epoch": 1.8590678517233905, "grad_norm": 1.140625, "learning_rate": 3.39304347826087e-07, "loss": 0.278, "step": 26800 }, { "epoch": 1.8597615434641233, "grad_norm": 0.97265625, "learning_rate": 3.375652173913044e-07, "loss": 0.2091, "step": 26810 }, { "epoch": 1.860455235204856, "grad_norm": 2.015625, "learning_rate": 3.3582608695652175e-07, "loss": 0.2894, "step": 26820 }, { "epoch": 1.8611489269455885, "grad_norm": 0.85546875, "learning_rate": 3.3408695652173917e-07, "loss": 0.2035, "step": 26830 }, { "epoch": 1.8618426186863213, "grad_norm": 1.65625, "learning_rate": 3.3234782608695654e-07, "loss": 0.2096, "step": 26840 }, { "epoch": 1.862536310427054, "grad_norm": 1.296875, "learning_rate": 3.3060869565217397e-07, "loss": 0.2182, "step": 26850 }, { "epoch": 1.8632300021677866, "grad_norm": 1.2109375, "learning_rate": 3.2886956521739134e-07, "loss": 0.2323, "step": 26860 }, { "epoch": 1.8639236939085193, "grad_norm": 0.96875, "learning_rate": 3.2713043478260876e-07, "loss": 0.307, "step": 26870 }, { "epoch": 1.864617385649252, "grad_norm": 1.125, "learning_rate": 3.2539130434782614e-07, "loss": 0.2165, "step": 26880 }, { "epoch": 1.8653110773899848, "grad_norm": 1.1015625, "learning_rate": 3.2365217391304345e-07, "loss": 0.2663, "step": 26890 }, { "epoch": 1.8660047691307176, "grad_norm": 0.9765625, "learning_rate": 3.219130434782609e-07, "loss": 0.2491, "step": 26900 }, { "epoch": 1.8666984608714503, "grad_norm": 1.609375, "learning_rate": 3.2017391304347825e-07, "loss": 0.3144, "step": 26910 }, { "epoch": 1.867392152612183, "grad_norm": 1.578125, "learning_rate": 3.1843478260869567e-07, "loss": 0.2495, "step": 26920 }, { "epoch": 1.8680858443529158, "grad_norm": 1.28125, "learning_rate": 3.1669565217391304e-07, "loss": 0.3146, "step": 26930 }, { "epoch": 1.8687795360936483, "grad_norm": 1.2734375, "learning_rate": 3.1495652173913047e-07, "loss": 0.2254, "step": 26940 }, { "epoch": 1.869473227834381, "grad_norm": 1.2890625, "learning_rate": 3.1321739130434784e-07, "loss": 0.2286, "step": 26950 }, { "epoch": 1.8701669195751138, "grad_norm": 1.328125, "learning_rate": 3.1147826086956526e-07, "loss": 0.238, "step": 26960 }, { "epoch": 1.8708606113158464, "grad_norm": 1.1015625, "learning_rate": 3.0973913043478263e-07, "loss": 0.2336, "step": 26970 }, { "epoch": 1.871554303056579, "grad_norm": 1.15625, "learning_rate": 3.0800000000000006e-07, "loss": 0.2426, "step": 26980 }, { "epoch": 1.8722479947973119, "grad_norm": 1.0390625, "learning_rate": 3.0626086956521743e-07, "loss": 0.2485, "step": 26990 }, { "epoch": 1.8729416865380446, "grad_norm": 1.0625, "learning_rate": 3.045217391304348e-07, "loss": 0.2637, "step": 27000 }, { "epoch": 1.8736353782787774, "grad_norm": 1.0859375, "learning_rate": 3.0278260869565217e-07, "loss": 0.2391, "step": 27010 }, { "epoch": 1.87432907001951, "grad_norm": 1.3359375, "learning_rate": 3.010434782608696e-07, "loss": 0.2819, "step": 27020 }, { "epoch": 1.8750227617602429, "grad_norm": 1.21875, "learning_rate": 2.9930434782608697e-07, "loss": 0.238, "step": 27030 }, { "epoch": 1.8757164535009756, "grad_norm": 1.3125, "learning_rate": 2.975652173913044e-07, "loss": 0.2429, "step": 27040 }, { "epoch": 1.8764101452417083, "grad_norm": 1.53125, "learning_rate": 2.9582608695652176e-07, "loss": 0.2977, "step": 27050 }, { "epoch": 1.8771038369824409, "grad_norm": 1.375, "learning_rate": 2.940869565217392e-07, "loss": 0.2399, "step": 27060 }, { "epoch": 1.8777975287231736, "grad_norm": 1.5234375, "learning_rate": 2.9234782608695656e-07, "loss": 0.2749, "step": 27070 }, { "epoch": 1.8784912204639064, "grad_norm": 1.3203125, "learning_rate": 2.9060869565217393e-07, "loss": 0.2985, "step": 27080 }, { "epoch": 1.879184912204639, "grad_norm": 1.3046875, "learning_rate": 2.888695652173913e-07, "loss": 0.228, "step": 27090 }, { "epoch": 1.8798786039453717, "grad_norm": 1.3828125, "learning_rate": 2.871304347826087e-07, "loss": 0.2449, "step": 27100 }, { "epoch": 1.8805722956861044, "grad_norm": 1.171875, "learning_rate": 2.853913043478261e-07, "loss": 0.1995, "step": 27110 }, { "epoch": 1.8812659874268371, "grad_norm": 0.9609375, "learning_rate": 2.836521739130435e-07, "loss": 0.2751, "step": 27120 }, { "epoch": 1.88195967916757, "grad_norm": 1.234375, "learning_rate": 2.819130434782609e-07, "loss": 0.2217, "step": 27130 }, { "epoch": 1.8826533709083026, "grad_norm": 1.359375, "learning_rate": 2.801739130434783e-07, "loss": 0.2758, "step": 27140 }, { "epoch": 1.8833470626490354, "grad_norm": 1.296875, "learning_rate": 2.784347826086957e-07, "loss": 0.2817, "step": 27150 }, { "epoch": 1.8840407543897681, "grad_norm": 1.703125, "learning_rate": 2.7669565217391306e-07, "loss": 0.2296, "step": 27160 }, { "epoch": 1.884734446130501, "grad_norm": 1.3046875, "learning_rate": 2.7495652173913043e-07, "loss": 0.219, "step": 27170 }, { "epoch": 1.8854281378712334, "grad_norm": 1.2421875, "learning_rate": 2.7321739130434785e-07, "loss": 0.2468, "step": 27180 }, { "epoch": 1.8861218296119662, "grad_norm": 1.6953125, "learning_rate": 2.714782608695652e-07, "loss": 0.2361, "step": 27190 }, { "epoch": 1.886815521352699, "grad_norm": 1.296875, "learning_rate": 2.6973913043478265e-07, "loss": 0.2341, "step": 27200 }, { "epoch": 1.8875092130934314, "grad_norm": 1.4453125, "learning_rate": 2.68e-07, "loss": 0.258, "step": 27210 }, { "epoch": 1.8882029048341642, "grad_norm": 1.390625, "learning_rate": 2.6626086956521744e-07, "loss": 0.2434, "step": 27220 }, { "epoch": 1.888896596574897, "grad_norm": 1.1171875, "learning_rate": 2.645217391304348e-07, "loss": 0.2194, "step": 27230 }, { "epoch": 1.8895902883156297, "grad_norm": 1.1328125, "learning_rate": 2.627826086956522e-07, "loss": 0.2651, "step": 27240 }, { "epoch": 1.8902839800563624, "grad_norm": 1.25, "learning_rate": 2.6104347826086955e-07, "loss": 0.2527, "step": 27250 }, { "epoch": 1.8909776717970952, "grad_norm": 1.21875, "learning_rate": 2.59304347826087e-07, "loss": 0.2297, "step": 27260 }, { "epoch": 1.891671363537828, "grad_norm": 1.34375, "learning_rate": 2.5756521739130435e-07, "loss": 0.2108, "step": 27270 }, { "epoch": 1.8923650552785607, "grad_norm": 1.0234375, "learning_rate": 2.558260869565218e-07, "loss": 0.2001, "step": 27280 }, { "epoch": 1.8930587470192934, "grad_norm": 0.9921875, "learning_rate": 2.5408695652173915e-07, "loss": 0.2906, "step": 27290 }, { "epoch": 1.893752438760026, "grad_norm": 1.6796875, "learning_rate": 2.5234782608695657e-07, "loss": 0.2958, "step": 27300 }, { "epoch": 1.8944461305007587, "grad_norm": 1.140625, "learning_rate": 2.5060869565217394e-07, "loss": 0.32, "step": 27310 }, { "epoch": 1.8951398222414915, "grad_norm": 1.25, "learning_rate": 2.488695652173913e-07, "loss": 0.2223, "step": 27320 }, { "epoch": 1.895833513982224, "grad_norm": 1.421875, "learning_rate": 2.4713043478260874e-07, "loss": 0.323, "step": 27330 }, { "epoch": 1.8965272057229567, "grad_norm": 1.34375, "learning_rate": 2.453913043478261e-07, "loss": 0.2378, "step": 27340 }, { "epoch": 1.8972208974636895, "grad_norm": 1.2890625, "learning_rate": 2.436521739130435e-07, "loss": 0.26, "step": 27350 }, { "epoch": 1.8979145892044222, "grad_norm": 1.2421875, "learning_rate": 2.419130434782609e-07, "loss": 0.2459, "step": 27360 }, { "epoch": 1.898608280945155, "grad_norm": 1.125, "learning_rate": 2.4017391304347827e-07, "loss": 0.2385, "step": 27370 }, { "epoch": 1.8993019726858877, "grad_norm": 1.390625, "learning_rate": 2.384347826086957e-07, "loss": 0.2089, "step": 27380 }, { "epoch": 1.8999956644266205, "grad_norm": 0.99609375, "learning_rate": 2.3669565217391304e-07, "loss": 0.2246, "step": 27390 }, { "epoch": 1.9006893561673532, "grad_norm": 1.3515625, "learning_rate": 2.3495652173913044e-07, "loss": 0.2281, "step": 27400 }, { "epoch": 1.901383047908086, "grad_norm": 1.4765625, "learning_rate": 2.3321739130434784e-07, "loss": 0.2276, "step": 27410 }, { "epoch": 1.9020767396488185, "grad_norm": 1.0234375, "learning_rate": 2.3147826086956523e-07, "loss": 0.3057, "step": 27420 }, { "epoch": 1.9027704313895513, "grad_norm": 1.203125, "learning_rate": 2.2973913043478263e-07, "loss": 0.237, "step": 27430 }, { "epoch": 1.903464123130284, "grad_norm": 1.3828125, "learning_rate": 2.2800000000000003e-07, "loss": 0.2445, "step": 27440 }, { "epoch": 1.9041578148710165, "grad_norm": 1.171875, "learning_rate": 2.2626086956521743e-07, "loss": 0.2948, "step": 27450 }, { "epoch": 1.9048515066117493, "grad_norm": 1.203125, "learning_rate": 2.2452173913043483e-07, "loss": 0.2938, "step": 27460 }, { "epoch": 1.905545198352482, "grad_norm": 1.21875, "learning_rate": 2.2278260869565217e-07, "loss": 0.3153, "step": 27470 }, { "epoch": 1.9062388900932148, "grad_norm": 1.234375, "learning_rate": 2.2104347826086957e-07, "loss": 0.2448, "step": 27480 }, { "epoch": 1.9069325818339475, "grad_norm": 1.046875, "learning_rate": 2.1930434782608696e-07, "loss": 0.2489, "step": 27490 }, { "epoch": 1.9076262735746803, "grad_norm": 0.95703125, "learning_rate": 2.1756521739130436e-07, "loss": 0.2393, "step": 27500 }, { "epoch": 1.908319965315413, "grad_norm": 0.90234375, "learning_rate": 2.1582608695652176e-07, "loss": 0.2245, "step": 27510 }, { "epoch": 1.9090136570561458, "grad_norm": 1.3359375, "learning_rate": 2.1408695652173916e-07, "loss": 0.2445, "step": 27520 }, { "epoch": 1.9097073487968785, "grad_norm": 1.078125, "learning_rate": 2.1234782608695656e-07, "loss": 0.2321, "step": 27530 }, { "epoch": 1.910401040537611, "grad_norm": 1.03125, "learning_rate": 2.1060869565217393e-07, "loss": 0.2311, "step": 27540 }, { "epoch": 1.9110947322783438, "grad_norm": 1.3359375, "learning_rate": 2.088695652173913e-07, "loss": 0.2688, "step": 27550 }, { "epoch": 1.9117884240190766, "grad_norm": 1.1875, "learning_rate": 2.071304347826087e-07, "loss": 0.2381, "step": 27560 }, { "epoch": 1.912482115759809, "grad_norm": 1.09375, "learning_rate": 2.053913043478261e-07, "loss": 0.2505, "step": 27570 }, { "epoch": 1.9131758075005418, "grad_norm": 1.1796875, "learning_rate": 2.036521739130435e-07, "loss": 0.2384, "step": 27580 }, { "epoch": 1.9138694992412746, "grad_norm": 1.3359375, "learning_rate": 2.019130434782609e-07, "loss": 0.2604, "step": 27590 }, { "epoch": 1.9145631909820073, "grad_norm": 1.2734375, "learning_rate": 2.0017391304347829e-07, "loss": 0.2229, "step": 27600 }, { "epoch": 1.91525688272274, "grad_norm": 1.7421875, "learning_rate": 1.9843478260869568e-07, "loss": 0.3559, "step": 27610 }, { "epoch": 1.9159505744634728, "grad_norm": 1.5390625, "learning_rate": 1.9669565217391305e-07, "loss": 0.2457, "step": 27620 }, { "epoch": 1.9166442662042056, "grad_norm": 1.21875, "learning_rate": 1.9495652173913045e-07, "loss": 0.2954, "step": 27630 }, { "epoch": 1.9173379579449383, "grad_norm": 1.1640625, "learning_rate": 1.9321739130434782e-07, "loss": 0.3314, "step": 27640 }, { "epoch": 1.918031649685671, "grad_norm": 1.0546875, "learning_rate": 1.9147826086956522e-07, "loss": 0.2632, "step": 27650 }, { "epoch": 1.9187253414264036, "grad_norm": 1.3046875, "learning_rate": 1.8973913043478262e-07, "loss": 0.2361, "step": 27660 }, { "epoch": 1.9194190331671364, "grad_norm": 1.34375, "learning_rate": 1.8800000000000002e-07, "loss": 0.2164, "step": 27670 }, { "epoch": 1.920112724907869, "grad_norm": 1.265625, "learning_rate": 1.8626086956521741e-07, "loss": 0.2366, "step": 27680 }, { "epoch": 1.9208064166486016, "grad_norm": 1.421875, "learning_rate": 1.845217391304348e-07, "loss": 0.2236, "step": 27690 }, { "epoch": 1.9215001083893344, "grad_norm": 1.0078125, "learning_rate": 1.8278260869565218e-07, "loss": 0.2692, "step": 27700 }, { "epoch": 1.9221938001300671, "grad_norm": 1.96875, "learning_rate": 1.8104347826086958e-07, "loss": 0.3485, "step": 27710 }, { "epoch": 1.9228874918707999, "grad_norm": 1.171875, "learning_rate": 1.7930434782608698e-07, "loss": 0.3303, "step": 27720 }, { "epoch": 1.9235811836115326, "grad_norm": 1.3828125, "learning_rate": 1.7756521739130437e-07, "loss": 0.2459, "step": 27730 }, { "epoch": 1.9242748753522654, "grad_norm": 1.609375, "learning_rate": 1.7582608695652175e-07, "loss": 0.2606, "step": 27740 }, { "epoch": 1.9249685670929981, "grad_norm": 1.3359375, "learning_rate": 1.7408695652173914e-07, "loss": 0.2583, "step": 27750 }, { "epoch": 1.9256622588337309, "grad_norm": 1.015625, "learning_rate": 1.7234782608695654e-07, "loss": 0.2222, "step": 27760 }, { "epoch": 1.9263559505744636, "grad_norm": 1.1171875, "learning_rate": 1.706086956521739e-07, "loss": 0.2098, "step": 27770 }, { "epoch": 1.9270496423151962, "grad_norm": 1.1796875, "learning_rate": 1.688695652173913e-07, "loss": 0.2314, "step": 27780 }, { "epoch": 1.927743334055929, "grad_norm": 1.265625, "learning_rate": 1.671304347826087e-07, "loss": 0.2344, "step": 27790 }, { "epoch": 1.9284370257966617, "grad_norm": 1.234375, "learning_rate": 1.653913043478261e-07, "loss": 0.2579, "step": 27800 }, { "epoch": 1.9291307175373942, "grad_norm": 1.078125, "learning_rate": 1.636521739130435e-07, "loss": 0.2024, "step": 27810 }, { "epoch": 1.929824409278127, "grad_norm": 1.2890625, "learning_rate": 1.619130434782609e-07, "loss": 0.2661, "step": 27820 }, { "epoch": 1.9305181010188597, "grad_norm": 1.0859375, "learning_rate": 1.6017391304347827e-07, "loss": 0.2161, "step": 27830 }, { "epoch": 1.9312117927595924, "grad_norm": 1.1171875, "learning_rate": 1.5843478260869567e-07, "loss": 0.2477, "step": 27840 }, { "epoch": 1.9319054845003252, "grad_norm": 1.140625, "learning_rate": 1.5669565217391304e-07, "loss": 0.2482, "step": 27850 }, { "epoch": 1.932599176241058, "grad_norm": 1.7265625, "learning_rate": 1.5495652173913046e-07, "loss": 0.2296, "step": 27860 }, { "epoch": 1.9332928679817907, "grad_norm": 1.09375, "learning_rate": 1.5321739130434784e-07, "loss": 0.2083, "step": 27870 }, { "epoch": 1.9339865597225234, "grad_norm": 1.2109375, "learning_rate": 1.5147826086956523e-07, "loss": 0.2222, "step": 27880 }, { "epoch": 1.9346802514632562, "grad_norm": 0.9453125, "learning_rate": 1.4973913043478263e-07, "loss": 0.2657, "step": 27890 }, { "epoch": 1.9353739432039887, "grad_norm": 1.0078125, "learning_rate": 1.4800000000000003e-07, "loss": 0.2672, "step": 27900 }, { "epoch": 1.9360676349447214, "grad_norm": 0.9609375, "learning_rate": 1.462608695652174e-07, "loss": 0.2201, "step": 27910 }, { "epoch": 1.9367613266854542, "grad_norm": 1.078125, "learning_rate": 1.445217391304348e-07, "loss": 0.2807, "step": 27920 }, { "epoch": 1.9374550184261867, "grad_norm": 1.4375, "learning_rate": 1.427826086956522e-07, "loss": 0.2759, "step": 27930 }, { "epoch": 1.9381487101669195, "grad_norm": 1.15625, "learning_rate": 1.410434782608696e-07, "loss": 0.2086, "step": 27940 }, { "epoch": 1.9388424019076522, "grad_norm": 1.3984375, "learning_rate": 1.3930434782608696e-07, "loss": 0.2209, "step": 27950 }, { "epoch": 1.939536093648385, "grad_norm": 2.015625, "learning_rate": 1.3756521739130436e-07, "loss": 0.2617, "step": 27960 }, { "epoch": 1.9402297853891177, "grad_norm": 1.3046875, "learning_rate": 1.3582608695652176e-07, "loss": 0.25, "step": 27970 }, { "epoch": 1.9409234771298505, "grad_norm": 1.2109375, "learning_rate": 1.3408695652173916e-07, "loss": 0.1882, "step": 27980 }, { "epoch": 1.9416171688705832, "grad_norm": 1.078125, "learning_rate": 1.3234782608695653e-07, "loss": 0.279, "step": 27990 }, { "epoch": 1.942310860611316, "grad_norm": 1.25, "learning_rate": 1.3060869565217392e-07, "loss": 0.2479, "step": 28000 }, { "epoch": 1.9430045523520487, "grad_norm": 1.140625, "learning_rate": 1.2886956521739132e-07, "loss": 0.2482, "step": 28010 }, { "epoch": 1.9436982440927812, "grad_norm": 1.1328125, "learning_rate": 1.2713043478260872e-07, "loss": 0.2375, "step": 28020 }, { "epoch": 1.944391935833514, "grad_norm": 1.0390625, "learning_rate": 1.253913043478261e-07, "loss": 0.215, "step": 28030 }, { "epoch": 1.9450856275742467, "grad_norm": 1.3125, "learning_rate": 1.236521739130435e-07, "loss": 0.2363, "step": 28040 }, { "epoch": 1.9457793193149793, "grad_norm": 0.93359375, "learning_rate": 1.2191304347826089e-07, "loss": 0.3115, "step": 28050 }, { "epoch": 1.946473011055712, "grad_norm": 1.1328125, "learning_rate": 1.2017391304347826e-07, "loss": 0.2421, "step": 28060 }, { "epoch": 1.9471667027964448, "grad_norm": 1.1328125, "learning_rate": 1.1843478260869566e-07, "loss": 0.204, "step": 28070 }, { "epoch": 1.9478603945371775, "grad_norm": 1.0859375, "learning_rate": 1.1669565217391305e-07, "loss": 0.2407, "step": 28080 }, { "epoch": 1.9485540862779103, "grad_norm": 1.3828125, "learning_rate": 1.1495652173913045e-07, "loss": 0.2806, "step": 28090 }, { "epoch": 1.949247778018643, "grad_norm": 1.0703125, "learning_rate": 1.1321739130434782e-07, "loss": 0.205, "step": 28100 }, { "epoch": 1.9499414697593758, "grad_norm": 1.4609375, "learning_rate": 1.1147826086956522e-07, "loss": 0.29, "step": 28110 }, { "epoch": 1.9506351615001085, "grad_norm": 1.1484375, "learning_rate": 1.0973913043478262e-07, "loss": 0.256, "step": 28120 }, { "epoch": 1.9513288532408413, "grad_norm": 1.328125, "learning_rate": 1.0800000000000001e-07, "loss": 0.2424, "step": 28130 }, { "epoch": 1.9520225449815738, "grad_norm": 1.15625, "learning_rate": 1.062608695652174e-07, "loss": 0.2434, "step": 28140 }, { "epoch": 1.9527162367223065, "grad_norm": 1.34375, "learning_rate": 1.0452173913043478e-07, "loss": 0.273, "step": 28150 }, { "epoch": 1.9534099284630393, "grad_norm": 1.265625, "learning_rate": 1.0278260869565218e-07, "loss": 0.2397, "step": 28160 }, { "epoch": 1.9541036202037718, "grad_norm": 1.28125, "learning_rate": 1.0104347826086958e-07, "loss": 0.2082, "step": 28170 }, { "epoch": 1.9547973119445046, "grad_norm": 1.140625, "learning_rate": 9.930434782608696e-08, "loss": 0.2713, "step": 28180 }, { "epoch": 1.9554910036852373, "grad_norm": 1.25, "learning_rate": 9.756521739130436e-08, "loss": 0.2822, "step": 28190 }, { "epoch": 1.95618469542597, "grad_norm": 1.296875, "learning_rate": 9.582608695652174e-08, "loss": 0.2366, "step": 28200 }, { "epoch": 1.9568783871667028, "grad_norm": 1.734375, "learning_rate": 9.408695652173914e-08, "loss": 0.2482, "step": 28210 }, { "epoch": 1.9575720789074356, "grad_norm": 1.1171875, "learning_rate": 9.234782608695653e-08, "loss": 0.2706, "step": 28220 }, { "epoch": 1.9582657706481683, "grad_norm": 1.203125, "learning_rate": 9.060869565217392e-08, "loss": 0.2504, "step": 28230 }, { "epoch": 1.958959462388901, "grad_norm": 0.8984375, "learning_rate": 8.886956521739131e-08, "loss": 0.3229, "step": 28240 }, { "epoch": 1.9596531541296336, "grad_norm": 1.09375, "learning_rate": 8.71304347826087e-08, "loss": 0.2716, "step": 28250 }, { "epoch": 1.9603468458703663, "grad_norm": 1.6640625, "learning_rate": 8.539130434782609e-08, "loss": 0.2757, "step": 28260 }, { "epoch": 1.961040537611099, "grad_norm": 1.21875, "learning_rate": 8.365217391304349e-08, "loss": 0.2666, "step": 28270 }, { "epoch": 1.9617342293518316, "grad_norm": 1.203125, "learning_rate": 8.191304347826089e-08, "loss": 0.2189, "step": 28280 }, { "epoch": 1.9624279210925644, "grad_norm": 1.3828125, "learning_rate": 8.017391304347827e-08, "loss": 0.2109, "step": 28290 }, { "epoch": 1.9631216128332971, "grad_norm": 1.1640625, "learning_rate": 7.843478260869565e-08, "loss": 0.2154, "step": 28300 }, { "epoch": 1.9638153045740299, "grad_norm": 1.046875, "learning_rate": 7.669565217391305e-08, "loss": 0.2053, "step": 28310 }, { "epoch": 1.9645089963147626, "grad_norm": 1.171875, "learning_rate": 7.495652173913045e-08, "loss": 0.3179, "step": 28320 }, { "epoch": 1.9652026880554954, "grad_norm": 1.234375, "learning_rate": 7.321739130434783e-08, "loss": 0.2356, "step": 28330 }, { "epoch": 1.965896379796228, "grad_norm": 1.3203125, "learning_rate": 7.147826086956522e-08, "loss": 0.223, "step": 28340 }, { "epoch": 1.9665900715369609, "grad_norm": 1.2109375, "learning_rate": 6.973913043478262e-08, "loss": 0.27, "step": 28350 }, { "epoch": 1.9672837632776936, "grad_norm": 1.0859375, "learning_rate": 6.8e-08, "loss": 0.2565, "step": 28360 }, { "epoch": 1.9679774550184261, "grad_norm": 1.015625, "learning_rate": 6.62608695652174e-08, "loss": 0.2209, "step": 28370 }, { "epoch": 1.9686711467591589, "grad_norm": 1.1484375, "learning_rate": 6.452173913043478e-08, "loss": 0.2149, "step": 28380 }, { "epoch": 1.9693648384998916, "grad_norm": 1.3046875, "learning_rate": 6.278260869565218e-08, "loss": 0.2276, "step": 28390 }, { "epoch": 1.9700585302406242, "grad_norm": 1.5, "learning_rate": 6.104347826086956e-08, "loss": 0.2533, "step": 28400 }, { "epoch": 1.970752221981357, "grad_norm": 1.296875, "learning_rate": 5.930434782608696e-08, "loss": 0.2304, "step": 28410 }, { "epoch": 1.9714459137220897, "grad_norm": 0.97265625, "learning_rate": 5.756521739130435e-08, "loss": 0.2486, "step": 28420 }, { "epoch": 1.9721396054628224, "grad_norm": 1.0, "learning_rate": 5.5826086956521744e-08, "loss": 0.234, "step": 28430 }, { "epoch": 1.9728332972035552, "grad_norm": 1.421875, "learning_rate": 5.4086956521739135e-08, "loss": 0.2633, "step": 28440 }, { "epoch": 1.973526988944288, "grad_norm": 1.046875, "learning_rate": 5.2347826086956526e-08, "loss": 0.2266, "step": 28450 }, { "epoch": 1.9742206806850207, "grad_norm": 1.3359375, "learning_rate": 5.0608695652173917e-08, "loss": 0.2356, "step": 28460 }, { "epoch": 1.9749143724257534, "grad_norm": 1.0234375, "learning_rate": 4.886956521739131e-08, "loss": 0.2128, "step": 28470 }, { "epoch": 1.9756080641664862, "grad_norm": 1.09375, "learning_rate": 4.71304347826087e-08, "loss": 0.2174, "step": 28480 }, { "epoch": 1.9763017559072187, "grad_norm": 1.3046875, "learning_rate": 4.5391304347826096e-08, "loss": 0.2517, "step": 28490 }, { "epoch": 1.9769954476479514, "grad_norm": 1.25, "learning_rate": 4.365217391304348e-08, "loss": 0.227, "step": 28500 }, { "epoch": 1.9776891393886842, "grad_norm": 1.296875, "learning_rate": 4.191304347826088e-08, "loss": 0.2883, "step": 28510 }, { "epoch": 1.9783828311294167, "grad_norm": 1.265625, "learning_rate": 4.017391304347826e-08, "loss": 0.2969, "step": 28520 }, { "epoch": 1.9790765228701495, "grad_norm": 1.359375, "learning_rate": 3.8434782608695653e-08, "loss": 0.2424, "step": 28530 }, { "epoch": 1.9797702146108822, "grad_norm": 1.1015625, "learning_rate": 3.6695652173913044e-08, "loss": 0.261, "step": 28540 }, { "epoch": 1.980463906351615, "grad_norm": 1.015625, "learning_rate": 3.4956521739130435e-08, "loss": 0.2087, "step": 28550 }, { "epoch": 1.9811575980923477, "grad_norm": 1.21875, "learning_rate": 3.3217391304347826e-08, "loss": 0.1823, "step": 28560 }, { "epoch": 1.9818512898330805, "grad_norm": 1.375, "learning_rate": 3.147826086956522e-08, "loss": 0.3054, "step": 28570 }, { "epoch": 1.9825449815738132, "grad_norm": 0.984375, "learning_rate": 2.973913043478261e-08, "loss": 0.2313, "step": 28580 }, { "epoch": 1.983238673314546, "grad_norm": 1.4609375, "learning_rate": 2.8000000000000003e-08, "loss": 0.2757, "step": 28590 }, { "epoch": 1.9839323650552787, "grad_norm": 1.046875, "learning_rate": 2.6260869565217394e-08, "loss": 0.1985, "step": 28600 }, { "epoch": 1.9846260567960112, "grad_norm": 1.421875, "learning_rate": 2.4521739130434785e-08, "loss": 0.3137, "step": 28610 }, { "epoch": 1.985319748536744, "grad_norm": 1.125, "learning_rate": 2.2782608695652176e-08, "loss": 0.2441, "step": 28620 }, { "epoch": 1.9860134402774767, "grad_norm": 1.359375, "learning_rate": 2.1043478260869566e-08, "loss": 0.2487, "step": 28630 }, { "epoch": 1.9867071320182093, "grad_norm": 1.5234375, "learning_rate": 1.9304347826086957e-08, "loss": 0.3294, "step": 28640 }, { "epoch": 1.987400823758942, "grad_norm": 0.9140625, "learning_rate": 1.756521739130435e-08, "loss": 0.2312, "step": 28650 }, { "epoch": 1.9880945154996748, "grad_norm": 1.015625, "learning_rate": 1.5826086956521743e-08, "loss": 0.2543, "step": 28660 }, { "epoch": 1.9887882072404075, "grad_norm": 1.2890625, "learning_rate": 1.4086956521739132e-08, "loss": 0.2649, "step": 28670 }, { "epoch": 1.9894818989811403, "grad_norm": 1.1484375, "learning_rate": 1.2347826086956521e-08, "loss": 0.2906, "step": 28680 }, { "epoch": 1.990175590721873, "grad_norm": 1.28125, "learning_rate": 1.0608695652173912e-08, "loss": 0.2419, "step": 28690 }, { "epoch": 1.9908692824626057, "grad_norm": 1.140625, "learning_rate": 8.869565217391305e-09, "loss": 0.2254, "step": 28700 }, { "epoch": 1.9915629742033385, "grad_norm": 1.421875, "learning_rate": 7.130434782608697e-09, "loss": 0.3015, "step": 28710 }, { "epoch": 1.9922566659440712, "grad_norm": 1.40625, "learning_rate": 5.391304347826087e-09, "loss": 0.342, "step": 28720 }, { "epoch": 1.9929503576848038, "grad_norm": 1.8984375, "learning_rate": 3.6521739130434788e-09, "loss": 0.2908, "step": 28730 }, { "epoch": 1.9936440494255365, "grad_norm": 1.2421875, "learning_rate": 1.9130434782608698e-09, "loss": 0.2137, "step": 28740 }, { "epoch": 1.9943377411662693, "grad_norm": 1.2265625, "learning_rate": 1.7391304347826087e-10, "loss": 0.2152, "step": 28750 } ], "logging_steps": 10, "max_steps": 28750, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.439488076925934e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }