{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9987673228493517, "eval_steps": 0, "global_step": 6690, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004482462365993052, "learning_rate": 2.4875621890547267e-07, "lm_loss": 0.8477, "loss": 0.8477, "step": 1 }, { "epoch": 0.0008964924731986104, "learning_rate": 4.975124378109453e-07, "lm_loss": 2.4237, "loss": 2.4237, "step": 2 }, { "epoch": 0.0013447387097979156, "learning_rate": 7.462686567164179e-07, "lm_loss": 1.666, "loss": 1.666, "step": 3 }, { "epoch": 0.0017929849463972209, "learning_rate": 9.950248756218907e-07, "lm_loss": 3.5064, "loss": 3.5064, "step": 4 }, { "epoch": 0.002241231182996526, "learning_rate": 1.2437810945273632e-06, "lm_loss": 2.2933, "loss": 2.2933, "step": 5 }, { "epoch": 0.002689477419595831, "learning_rate": 1.4925373134328358e-06, "lm_loss": 1.018, "loss": 1.018, "step": 6 }, { "epoch": 0.0031377236561951367, "learning_rate": 1.7412935323383086e-06, "lm_loss": 1.6908, "loss": 1.6908, "step": 7 }, { "epoch": 0.0035859698927944417, "learning_rate": 1.9900497512437813e-06, "lm_loss": 2.0471, "loss": 2.0471, "step": 8 }, { "epoch": 0.004034216129393747, "learning_rate": 2.238805970149254e-06, "lm_loss": 1.2072, "loss": 1.2072, "step": 9 }, { "epoch": 0.004482462365993052, "learning_rate": 2.4875621890547264e-06, "lm_loss": 0.8005, "loss": 0.8005, "step": 10 }, { "epoch": 0.004930708602592358, "learning_rate": 2.736318407960199e-06, "lm_loss": 1.3151, "loss": 1.3151, "step": 11 }, { "epoch": 0.005378954839191662, "learning_rate": 2.9850746268656716e-06, "lm_loss": 0.8607, "loss": 0.8607, "step": 12 }, { "epoch": 0.005827201075790968, "learning_rate": 3.233830845771144e-06, "lm_loss": 0.9526, "loss": 0.9526, "step": 13 }, { "epoch": 0.006275447312390273, "learning_rate": 3.482587064676617e-06, "lm_loss": 0.8963, "loss": 0.8963, "step": 14 }, { "epoch": 0.006723693548989578, "learning_rate": 3.7313432835820893e-06, "lm_loss": 1.2219, "loss": 1.2219, "step": 15 }, { "epoch": 0.0071719397855888834, "learning_rate": 3.980099502487563e-06, "lm_loss": 0.9405, "loss": 0.9405, "step": 16 }, { "epoch": 0.007620186022188189, "learning_rate": 4.228855721393035e-06, "lm_loss": 0.7857, "loss": 0.7857, "step": 17 }, { "epoch": 0.008068432258787494, "learning_rate": 4.477611940298508e-06, "lm_loss": 1.2216, "loss": 1.2216, "step": 18 }, { "epoch": 0.008516678495386799, "learning_rate": 4.72636815920398e-06, "lm_loss": 1.156, "loss": 1.156, "step": 19 }, { "epoch": 0.008964924731986105, "learning_rate": 4.975124378109453e-06, "lm_loss": 0.7098, "loss": 0.7098, "step": 20 }, { "epoch": 0.00941317096858541, "learning_rate": 5.2238805970149255e-06, "lm_loss": 1.1599, "loss": 1.1599, "step": 21 }, { "epoch": 0.009861417205184715, "learning_rate": 5.472636815920398e-06, "lm_loss": 3.1218, "loss": 3.1218, "step": 22 }, { "epoch": 0.01030966344178402, "learning_rate": 5.7213930348258714e-06, "lm_loss": 0.9702, "loss": 0.9702, "step": 23 }, { "epoch": 0.010757909678383325, "learning_rate": 5.970149253731343e-06, "lm_loss": 0.9046, "loss": 0.9046, "step": 24 }, { "epoch": 0.01120615591498263, "learning_rate": 6.2189054726368165e-06, "lm_loss": 1.2674, "loss": 1.2674, "step": 25 }, { "epoch": 0.011654402151581936, "learning_rate": 6.467661691542288e-06, "lm_loss": 0.6784, "loss": 0.6784, "step": 26 }, { "epoch": 0.012102648388181241, "learning_rate": 6.716417910447762e-06, "lm_loss": 1.1761, "loss": 1.1761, "step": 27 }, { "epoch": 0.012550894624780547, "learning_rate": 6.965174129353234e-06, "lm_loss": 0.8816, "loss": 0.8816, "step": 28 }, { "epoch": 0.012999140861379852, "learning_rate": 7.213930348258708e-06, "lm_loss": 1.1602, "loss": 1.1602, "step": 29 }, { "epoch": 0.013447387097979156, "learning_rate": 7.4626865671641785e-06, "lm_loss": 0.9599, "loss": 0.9599, "step": 30 }, { "epoch": 0.013895633334578461, "learning_rate": 7.711442786069652e-06, "lm_loss": 0.9902, "loss": 0.9902, "step": 31 }, { "epoch": 0.014343879571177767, "learning_rate": 7.960199004975125e-06, "lm_loss": 2.4182, "loss": 2.4182, "step": 32 }, { "epoch": 0.014792125807777072, "learning_rate": 8.208955223880597e-06, "lm_loss": 1.6294, "loss": 1.6294, "step": 33 }, { "epoch": 0.015240372044376378, "learning_rate": 8.45771144278607e-06, "lm_loss": 1.2723, "loss": 1.2723, "step": 34 }, { "epoch": 0.01568861828097568, "learning_rate": 8.706467661691544e-06, "lm_loss": 0.7502, "loss": 0.7502, "step": 35 }, { "epoch": 0.016136864517574987, "learning_rate": 8.955223880597016e-06, "lm_loss": 1.1901, "loss": 1.1901, "step": 36 }, { "epoch": 0.016585110754174293, "learning_rate": 9.203980099502487e-06, "lm_loss": 3.0621, "loss": 3.0621, "step": 37 }, { "epoch": 0.017033356990773598, "learning_rate": 9.45273631840796e-06, "lm_loss": 1.046, "loss": 1.046, "step": 38 }, { "epoch": 0.017481603227372904, "learning_rate": 9.701492537313434e-06, "lm_loss": 0.9476, "loss": 0.9476, "step": 39 }, { "epoch": 0.01792984946397221, "learning_rate": 9.950248756218906e-06, "lm_loss": 1.0012, "loss": 1.0012, "step": 40 }, { "epoch": 0.018378095700571515, "learning_rate": 1.0199004975124378e-05, "lm_loss": 2.3945, "loss": 2.3945, "step": 41 }, { "epoch": 0.01882634193717082, "learning_rate": 1.0447761194029851e-05, "lm_loss": 1.6336, "loss": 1.6336, "step": 42 }, { "epoch": 0.019274588173770126, "learning_rate": 1.0696517412935324e-05, "lm_loss": 3.1348, "loss": 3.1348, "step": 43 }, { "epoch": 0.01972283441036943, "learning_rate": 1.0945273631840796e-05, "lm_loss": 0.8664, "loss": 0.8664, "step": 44 }, { "epoch": 0.020171080646968736, "learning_rate": 1.119402985074627e-05, "lm_loss": 1.1933, "loss": 1.1933, "step": 45 }, { "epoch": 0.02061932688356804, "learning_rate": 1.1442786069651743e-05, "lm_loss": 1.747, "loss": 1.747, "step": 46 }, { "epoch": 0.021067573120167344, "learning_rate": 1.1691542288557215e-05, "lm_loss": 2.2028, "loss": 2.2028, "step": 47 }, { "epoch": 0.02151581935676665, "learning_rate": 1.1940298507462686e-05, "lm_loss": 1.2476, "loss": 1.2476, "step": 48 }, { "epoch": 0.021964065593365955, "learning_rate": 1.218905472636816e-05, "lm_loss": 1.1883, "loss": 1.1883, "step": 49 }, { "epoch": 0.02241231182996526, "learning_rate": 1.2437810945273633e-05, "lm_loss": 0.9375, "loss": 0.9375, "step": 50 }, { "epoch": 0.022860558066564566, "learning_rate": 1.2686567164179105e-05, "lm_loss": 0.9729, "loss": 0.9729, "step": 51 }, { "epoch": 0.02330880430316387, "learning_rate": 1.2935323383084577e-05, "lm_loss": 1.2714, "loss": 1.2714, "step": 52 }, { "epoch": 0.023757050539763177, "learning_rate": 1.3184079601990052e-05, "lm_loss": 0.9954, "loss": 0.9954, "step": 53 }, { "epoch": 0.024205296776362482, "learning_rate": 1.3432835820895523e-05, "lm_loss": 0.91, "loss": 0.91, "step": 54 }, { "epoch": 0.024653543012961788, "learning_rate": 1.3681592039800995e-05, "lm_loss": 0.976, "loss": 0.976, "step": 55 }, { "epoch": 0.025101789249561093, "learning_rate": 1.3930348258706468e-05, "lm_loss": 0.9116, "loss": 0.9116, "step": 56 }, { "epoch": 0.0255500354861604, "learning_rate": 1.417910447761194e-05, "lm_loss": 1.2213, "loss": 1.2213, "step": 57 }, { "epoch": 0.025998281722759704, "learning_rate": 1.4427860696517415e-05, "lm_loss": 1.0197, "loss": 1.0197, "step": 58 }, { "epoch": 0.026446527959359006, "learning_rate": 1.4676616915422885e-05, "lm_loss": 1.1689, "loss": 1.1689, "step": 59 }, { "epoch": 0.026894774195958312, "learning_rate": 1.4925373134328357e-05, "lm_loss": 1.0107, "loss": 1.0107, "step": 60 }, { "epoch": 0.027343020432557617, "learning_rate": 1.5174129353233832e-05, "lm_loss": 0.991, "loss": 0.991, "step": 61 }, { "epoch": 0.027791266669156923, "learning_rate": 1.5422885572139304e-05, "lm_loss": 1.0126, "loss": 1.0126, "step": 62 }, { "epoch": 0.02823951290575623, "learning_rate": 1.5671641791044777e-05, "lm_loss": 1.0936, "loss": 1.0936, "step": 63 }, { "epoch": 0.028687759142355534, "learning_rate": 1.592039800995025e-05, "lm_loss": 1.1581, "loss": 1.1581, "step": 64 }, { "epoch": 0.02913600537895484, "learning_rate": 1.6169154228855724e-05, "lm_loss": 0.7173, "loss": 0.7173, "step": 65 }, { "epoch": 0.029584251615554145, "learning_rate": 1.6417910447761194e-05, "lm_loss": 1.1966, "loss": 1.1966, "step": 66 }, { "epoch": 0.03003249785215345, "learning_rate": 1.6666666666666667e-05, "lm_loss": 0.986, "loss": 0.986, "step": 67 }, { "epoch": 0.030480744088752756, "learning_rate": 1.691542288557214e-05, "lm_loss": 1.3068, "loss": 1.3068, "step": 68 }, { "epoch": 0.03092899032535206, "learning_rate": 1.716417910447761e-05, "lm_loss": 0.8987, "loss": 0.8987, "step": 69 }, { "epoch": 0.03137723656195136, "learning_rate": 1.7412935323383088e-05, "lm_loss": 0.9233, "loss": 0.9233, "step": 70 }, { "epoch": 0.03182548279855067, "learning_rate": 1.7661691542288558e-05, "lm_loss": 1.1826, "loss": 1.1826, "step": 71 }, { "epoch": 0.032273729035149974, "learning_rate": 1.791044776119403e-05, "lm_loss": 1.0096, "loss": 1.0096, "step": 72 }, { "epoch": 0.03272197527174928, "learning_rate": 1.8159203980099505e-05, "lm_loss": 1.154, "loss": 1.154, "step": 73 }, { "epoch": 0.033170221508348585, "learning_rate": 1.8407960199004975e-05, "lm_loss": 0.9784, "loss": 0.9784, "step": 74 }, { "epoch": 0.033618467744947894, "learning_rate": 1.865671641791045e-05, "lm_loss": 0.987, "loss": 0.987, "step": 75 }, { "epoch": 0.034066713981547196, "learning_rate": 1.890547263681592e-05, "lm_loss": 0.9964, "loss": 0.9964, "step": 76 }, { "epoch": 0.034514960218146505, "learning_rate": 1.915422885572139e-05, "lm_loss": 1.2521, "loss": 1.2521, "step": 77 }, { "epoch": 0.03496320645474581, "learning_rate": 1.9402985074626868e-05, "lm_loss": 2.7707, "loss": 2.7707, "step": 78 }, { "epoch": 0.03541145269134511, "learning_rate": 1.9651741293532338e-05, "lm_loss": 1.1578, "loss": 1.1578, "step": 79 }, { "epoch": 0.03585969892794442, "learning_rate": 1.990049751243781e-05, "lm_loss": 0.9307, "loss": 0.9307, "step": 80 }, { "epoch": 0.03630794516454372, "learning_rate": 2.0149253731343285e-05, "lm_loss": 0.938, "loss": 0.938, "step": 81 }, { "epoch": 0.03675619140114303, "learning_rate": 2.0398009950248755e-05, "lm_loss": 1.0231, "loss": 1.0231, "step": 82 }, { "epoch": 0.03720443763774233, "learning_rate": 2.0646766169154232e-05, "lm_loss": 1.1302, "loss": 1.1302, "step": 83 }, { "epoch": 0.03765268387434164, "learning_rate": 2.0895522388059702e-05, "lm_loss": 0.9158, "loss": 0.9158, "step": 84 }, { "epoch": 0.03810093011094094, "learning_rate": 2.1144278606965175e-05, "lm_loss": 1.1789, "loss": 1.1789, "step": 85 }, { "epoch": 0.03854917634754025, "learning_rate": 2.139303482587065e-05, "lm_loss": 0.7348, "loss": 0.7348, "step": 86 }, { "epoch": 0.03899742258413955, "learning_rate": 2.164179104477612e-05, "lm_loss": 1.2335, "loss": 1.2335, "step": 87 }, { "epoch": 0.03944566882073886, "learning_rate": 2.1890547263681592e-05, "lm_loss": 0.9882, "loss": 0.9882, "step": 88 }, { "epoch": 0.039893915057338164, "learning_rate": 2.2139303482587065e-05, "lm_loss": 1.0851, "loss": 1.0851, "step": 89 }, { "epoch": 0.04034216129393747, "learning_rate": 2.238805970149254e-05, "lm_loss": 0.9468, "loss": 0.9468, "step": 90 }, { "epoch": 0.040790407530536775, "learning_rate": 2.2636815920398012e-05, "lm_loss": 0.9099, "loss": 0.9099, "step": 91 }, { "epoch": 0.04123865376713608, "learning_rate": 2.2885572139303486e-05, "lm_loss": 1.0052, "loss": 1.0052, "step": 92 }, { "epoch": 0.041686900003735386, "learning_rate": 2.3134328358208956e-05, "lm_loss": 1.1194, "loss": 1.1194, "step": 93 }, { "epoch": 0.04213514624033469, "learning_rate": 2.338308457711443e-05, "lm_loss": 0.928, "loss": 0.928, "step": 94 }, { "epoch": 0.042583392476934, "learning_rate": 2.3631840796019903e-05, "lm_loss": 0.8911, "loss": 0.8911, "step": 95 }, { "epoch": 0.0430316387135333, "learning_rate": 2.3880597014925373e-05, "lm_loss": 1.2947, "loss": 1.2947, "step": 96 }, { "epoch": 0.04347988495013261, "learning_rate": 2.412935323383085e-05, "lm_loss": 3.2589, "loss": 3.2589, "step": 97 }, { "epoch": 0.04392813118673191, "learning_rate": 2.437810945273632e-05, "lm_loss": 0.8976, "loss": 0.8976, "step": 98 }, { "epoch": 0.04437637742333122, "learning_rate": 2.4626865671641793e-05, "lm_loss": 0.9971, "loss": 0.9971, "step": 99 }, { "epoch": 0.04482462365993052, "learning_rate": 2.4875621890547266e-05, "lm_loss": 1.2339, "loss": 1.2339, "step": 100 }, { "epoch": 0.04527286989652983, "learning_rate": 2.512437810945274e-05, "lm_loss": 0.9393, "loss": 0.9393, "step": 101 }, { "epoch": 0.04572111613312913, "learning_rate": 2.537313432835821e-05, "lm_loss": 0.9379, "loss": 0.9379, "step": 102 }, { "epoch": 0.04616936236972844, "learning_rate": 2.5621890547263683e-05, "lm_loss": 0.9625, "loss": 0.9625, "step": 103 }, { "epoch": 0.04661760860632774, "learning_rate": 2.5870646766169153e-05, "lm_loss": 1.2381, "loss": 1.2381, "step": 104 }, { "epoch": 0.047065854842927045, "learning_rate": 2.6119402985074626e-05, "lm_loss": 0.9656, "loss": 0.9656, "step": 105 }, { "epoch": 0.047514101079526354, "learning_rate": 2.6368159203980103e-05, "lm_loss": 1.7467, "loss": 1.7467, "step": 106 }, { "epoch": 0.047962347316125656, "learning_rate": 2.6616915422885573e-05, "lm_loss": 2.2274, "loss": 2.2274, "step": 107 }, { "epoch": 0.048410593552724965, "learning_rate": 2.6865671641791047e-05, "lm_loss": 3.2017, "loss": 3.2017, "step": 108 }, { "epoch": 0.04885883978932427, "learning_rate": 2.7114427860696517e-05, "lm_loss": 0.9813, "loss": 0.9813, "step": 109 }, { "epoch": 0.049307086025923576, "learning_rate": 2.736318407960199e-05, "lm_loss": 1.1757, "loss": 1.1757, "step": 110 }, { "epoch": 0.04975533226252288, "learning_rate": 2.7611940298507467e-05, "lm_loss": 2.9859, "loss": 2.9859, "step": 111 }, { "epoch": 0.05020357849912219, "learning_rate": 2.7860696517412937e-05, "lm_loss": 1.1823, "loss": 1.1823, "step": 112 }, { "epoch": 0.05065182473572149, "learning_rate": 2.810945273631841e-05, "lm_loss": 0.9526, "loss": 0.9526, "step": 113 }, { "epoch": 0.0511000709723208, "learning_rate": 2.835820895522388e-05, "lm_loss": 0.9722, "loss": 0.9722, "step": 114 }, { "epoch": 0.0515483172089201, "learning_rate": 2.8606965174129354e-05, "lm_loss": 0.9307, "loss": 0.9307, "step": 115 }, { "epoch": 0.05199656344551941, "learning_rate": 2.885572139303483e-05, "lm_loss": 0.9779, "loss": 0.9779, "step": 116 }, { "epoch": 0.05244480968211871, "learning_rate": 2.91044776119403e-05, "lm_loss": 1.2262, "loss": 1.2262, "step": 117 }, { "epoch": 0.05289305591871801, "learning_rate": 2.935323383084577e-05, "lm_loss": 1.8008, "loss": 1.8008, "step": 118 }, { "epoch": 0.05334130215531732, "learning_rate": 2.9601990049751244e-05, "lm_loss": 2.4706, "loss": 2.4706, "step": 119 }, { "epoch": 0.053789548391916624, "learning_rate": 2.9850746268656714e-05, "lm_loss": 1.0519, "loss": 1.0519, "step": 120 }, { "epoch": 0.05423779462851593, "learning_rate": 3.009950248756219e-05, "lm_loss": 0.9805, "loss": 0.9805, "step": 121 }, { "epoch": 0.054686040865115235, "learning_rate": 3.0348258706467664e-05, "lm_loss": 0.8522, "loss": 0.8522, "step": 122 }, { "epoch": 0.055134287101714544, "learning_rate": 3.059701492537314e-05, "lm_loss": 1.2252, "loss": 1.2252, "step": 123 }, { "epoch": 0.055582533338313846, "learning_rate": 3.084577114427861e-05, "lm_loss": 0.967, "loss": 0.967, "step": 124 }, { "epoch": 0.056030779574913155, "learning_rate": 3.109452736318408e-05, "lm_loss": 1.2659, "loss": 1.2659, "step": 125 }, { "epoch": 0.05647902581151246, "learning_rate": 3.1343283582089554e-05, "lm_loss": 0.7863, "loss": 0.7863, "step": 126 }, { "epoch": 0.056927272048111766, "learning_rate": 3.1592039800995024e-05, "lm_loss": 1.0772, "loss": 1.0772, "step": 127 }, { "epoch": 0.05737551828471107, "learning_rate": 3.18407960199005e-05, "lm_loss": 1.2181, "loss": 1.2181, "step": 128 }, { "epoch": 0.057823764521310377, "learning_rate": 3.208955223880597e-05, "lm_loss": 2.8547, "loss": 2.8547, "step": 129 }, { "epoch": 0.05827201075790968, "learning_rate": 3.233830845771145e-05, "lm_loss": 1.6716, "loss": 1.6716, "step": 130 }, { "epoch": 0.05872025699450898, "learning_rate": 3.258706467661692e-05, "lm_loss": 2.4402, "loss": 2.4402, "step": 131 }, { "epoch": 0.05916850323110829, "learning_rate": 3.283582089552239e-05, "lm_loss": 2.9356, "loss": 2.9356, "step": 132 }, { "epoch": 0.05961674946770759, "learning_rate": 3.3084577114427865e-05, "lm_loss": 1.0299, "loss": 1.0299, "step": 133 }, { "epoch": 0.0600649957043069, "learning_rate": 3.3333333333333335e-05, "lm_loss": 0.8864, "loss": 0.8864, "step": 134 }, { "epoch": 0.0605132419409062, "learning_rate": 3.358208955223881e-05, "lm_loss": 1.2499, "loss": 1.2499, "step": 135 }, { "epoch": 0.06096148817750551, "learning_rate": 3.383084577114428e-05, "lm_loss": 1.0645, "loss": 1.0645, "step": 136 }, { "epoch": 0.061409734414104813, "learning_rate": 3.407960199004975e-05, "lm_loss": 2.4196, "loss": 2.4196, "step": 137 }, { "epoch": 0.06185798065070412, "learning_rate": 3.432835820895522e-05, "lm_loss": 1.7099, "loss": 1.7099, "step": 138 }, { "epoch": 0.062306226887303424, "learning_rate": 3.45771144278607e-05, "lm_loss": 0.9435, "loss": 0.9435, "step": 139 }, { "epoch": 0.06275447312390273, "learning_rate": 3.4825870646766175e-05, "lm_loss": 1.1451, "loss": 1.1451, "step": 140 }, { "epoch": 0.06320271936050204, "learning_rate": 3.5074626865671645e-05, "lm_loss": 1.0325, "loss": 1.0325, "step": 141 }, { "epoch": 0.06365096559710134, "learning_rate": 3.5323383084577115e-05, "lm_loss": 0.9281, "loss": 0.9281, "step": 142 }, { "epoch": 0.06409921183370064, "learning_rate": 3.5572139303482585e-05, "lm_loss": 1.2127, "loss": 1.2127, "step": 143 }, { "epoch": 0.06454745807029995, "learning_rate": 3.582089552238806e-05, "lm_loss": 0.9433, "loss": 0.9433, "step": 144 }, { "epoch": 0.06499570430689926, "learning_rate": 3.606965174129354e-05, "lm_loss": 0.9804, "loss": 0.9804, "step": 145 }, { "epoch": 0.06544395054349857, "learning_rate": 3.631840796019901e-05, "lm_loss": 1.1394, "loss": 1.1394, "step": 146 }, { "epoch": 0.06589219678009786, "learning_rate": 3.656716417910448e-05, "lm_loss": 0.9588, "loss": 0.9588, "step": 147 }, { "epoch": 0.06634044301669717, "learning_rate": 3.681592039800995e-05, "lm_loss": 0.9327, "loss": 0.9327, "step": 148 }, { "epoch": 0.06678868925329648, "learning_rate": 3.706467661691542e-05, "lm_loss": 1.172, "loss": 1.172, "step": 149 }, { "epoch": 0.06723693548989579, "learning_rate": 3.73134328358209e-05, "lm_loss": 2.9804, "loss": 2.9804, "step": 150 }, { "epoch": 0.06768518172649508, "learning_rate": 3.756218905472637e-05, "lm_loss": 0.8841, "loss": 0.8841, "step": 151 }, { "epoch": 0.06813342796309439, "learning_rate": 3.781094527363184e-05, "lm_loss": 1.0057, "loss": 1.0057, "step": 152 }, { "epoch": 0.0685816741996937, "learning_rate": 3.805970149253731e-05, "lm_loss": 1.2455, "loss": 1.2455, "step": 153 }, { "epoch": 0.06902992043629301, "learning_rate": 3.830845771144278e-05, "lm_loss": 0.9352, "loss": 0.9352, "step": 154 }, { "epoch": 0.0694781666728923, "learning_rate": 3.855721393034826e-05, "lm_loss": 1.1889, "loss": 1.1889, "step": 155 }, { "epoch": 0.06992641290949161, "learning_rate": 3.8805970149253736e-05, "lm_loss": 0.9815, "loss": 0.9815, "step": 156 }, { "epoch": 0.07037465914609092, "learning_rate": 3.9054726368159206e-05, "lm_loss": 0.9575, "loss": 0.9575, "step": 157 }, { "epoch": 0.07082290538269022, "learning_rate": 3.9303482587064676e-05, "lm_loss": 1.198, "loss": 1.198, "step": 158 }, { "epoch": 0.07127115161928953, "learning_rate": 3.9552238805970146e-05, "lm_loss": 1.0076, "loss": 1.0076, "step": 159 }, { "epoch": 0.07171939785588884, "learning_rate": 3.980099502487562e-05, "lm_loss": 0.8765, "loss": 0.8765, "step": 160 }, { "epoch": 0.07216764409248815, "learning_rate": 4.00497512437811e-05, "lm_loss": 1.2097, "loss": 1.2097, "step": 161 }, { "epoch": 0.07261589032908744, "learning_rate": 4.029850746268657e-05, "lm_loss": 0.9724, "loss": 0.9724, "step": 162 }, { "epoch": 0.07306413656568675, "learning_rate": 4.054726368159204e-05, "lm_loss": 1.7402, "loss": 1.7402, "step": 163 }, { "epoch": 0.07351238280228606, "learning_rate": 4.079601990049751e-05, "lm_loss": 2.1996, "loss": 2.1996, "step": 164 }, { "epoch": 0.07396062903888537, "learning_rate": 4.104477611940299e-05, "lm_loss": 1.1287, "loss": 1.1287, "step": 165 }, { "epoch": 0.07440887527548466, "learning_rate": 4.1293532338308464e-05, "lm_loss": 1.0233, "loss": 1.0233, "step": 166 }, { "epoch": 0.07485712151208397, "learning_rate": 4.1542288557213934e-05, "lm_loss": 0.9676, "loss": 0.9676, "step": 167 }, { "epoch": 0.07530536774868328, "learning_rate": 4.1791044776119404e-05, "lm_loss": 1.2416, "loss": 1.2416, "step": 168 }, { "epoch": 0.07575361398528258, "learning_rate": 4.2039800995024874e-05, "lm_loss": 3.0165, "loss": 3.0165, "step": 169 }, { "epoch": 0.07620186022188188, "learning_rate": 4.228855721393035e-05, "lm_loss": 1.3601, "loss": 1.3601, "step": 170 }, { "epoch": 0.0766501064584812, "learning_rate": 4.253731343283582e-05, "lm_loss": 0.9934, "loss": 0.9934, "step": 171 }, { "epoch": 0.0770983526950805, "learning_rate": 4.27860696517413e-05, "lm_loss": 1.2856, "loss": 1.2856, "step": 172 }, { "epoch": 0.0775465989316798, "learning_rate": 4.303482587064677e-05, "lm_loss": 0.9679, "loss": 0.9679, "step": 173 }, { "epoch": 0.0779948451682791, "learning_rate": 4.328358208955224e-05, "lm_loss": 3.1183, "loss": 3.1183, "step": 174 }, { "epoch": 0.07844309140487842, "learning_rate": 4.3532338308457714e-05, "lm_loss": 1.0453, "loss": 1.0453, "step": 175 }, { "epoch": 0.07889133764147772, "learning_rate": 4.3781094527363184e-05, "lm_loss": 1.1865, "loss": 1.1865, "step": 176 }, { "epoch": 0.07933958387807702, "learning_rate": 4.402985074626866e-05, "lm_loss": 1.0521, "loss": 1.0521, "step": 177 }, { "epoch": 0.07978783011467633, "learning_rate": 4.427860696517413e-05, "lm_loss": 1.8332, "loss": 1.8332, "step": 178 }, { "epoch": 0.08023607635127564, "learning_rate": 4.452736318407961e-05, "lm_loss": 2.4487, "loss": 2.4487, "step": 179 }, { "epoch": 0.08068432258787495, "learning_rate": 4.477611940298508e-05, "lm_loss": 1.2171, "loss": 1.2171, "step": 180 }, { "epoch": 0.08113256882447424, "learning_rate": 4.502487562189055e-05, "lm_loss": 1.0996, "loss": 1.0996, "step": 181 }, { "epoch": 0.08158081506107355, "learning_rate": 4.5273631840796025e-05, "lm_loss": 1.1227, "loss": 1.1227, "step": 182 }, { "epoch": 0.08202906129767286, "learning_rate": 4.5522388059701495e-05, "lm_loss": 3.2464, "loss": 3.2464, "step": 183 }, { "epoch": 0.08247730753427215, "learning_rate": 4.577114427860697e-05, "lm_loss": 1.4619, "loss": 1.4619, "step": 184 }, { "epoch": 0.08292555377087146, "learning_rate": 4.601990049751244e-05, "lm_loss": 0.8803, "loss": 0.8803, "step": 185 }, { "epoch": 0.08337380000747077, "learning_rate": 4.626865671641791e-05, "lm_loss": 1.0311, "loss": 1.0311, "step": 186 }, { "epoch": 0.08382204624407008, "learning_rate": 4.651741293532338e-05, "lm_loss": 1.0523, "loss": 1.0523, "step": 187 }, { "epoch": 0.08427029248066938, "learning_rate": 4.676616915422886e-05, "lm_loss": 1.2821, "loss": 1.2821, "step": 188 }, { "epoch": 0.08471853871726868, "learning_rate": 4.7014925373134335e-05, "lm_loss": 1.0189, "loss": 1.0189, "step": 189 }, { "epoch": 0.085166784953868, "learning_rate": 4.7263681592039805e-05, "lm_loss": 1.0132, "loss": 1.0132, "step": 190 }, { "epoch": 0.0856150311904673, "learning_rate": 4.7512437810945275e-05, "lm_loss": 0.8935, "loss": 0.8935, "step": 191 }, { "epoch": 0.0860632774270666, "learning_rate": 4.7761194029850745e-05, "lm_loss": 1.319, "loss": 1.319, "step": 192 }, { "epoch": 0.0865115236636659, "learning_rate": 4.800995024875622e-05, "lm_loss": 1.7305, "loss": 1.7305, "step": 193 }, { "epoch": 0.08695976990026522, "learning_rate": 4.82587064676617e-05, "lm_loss": 2.4404, "loss": 2.4404, "step": 194 }, { "epoch": 0.08740801613686451, "learning_rate": 4.850746268656717e-05, "lm_loss": 1.023, "loss": 1.023, "step": 195 }, { "epoch": 0.08785626237346382, "learning_rate": 4.875621890547264e-05, "lm_loss": 3.1692, "loss": 3.1692, "step": 196 }, { "epoch": 0.08830450861006313, "learning_rate": 4.900497512437811e-05, "lm_loss": 1.1043, "loss": 1.1043, "step": 197 }, { "epoch": 0.08875275484666244, "learning_rate": 4.9253731343283586e-05, "lm_loss": 1.2998, "loss": 1.2998, "step": 198 }, { "epoch": 0.08920100108326173, "learning_rate": 4.950248756218906e-05, "lm_loss": 0.9801, "loss": 0.9801, "step": 199 }, { "epoch": 0.08964924731986104, "learning_rate": 4.975124378109453e-05, "lm_loss": 1.2518, "loss": 1.2518, "step": 200 }, { "epoch": 0.09009749355646035, "learning_rate": 5e-05, "lm_loss": 1.071, "loss": 1.071, "step": 201 }, { "epoch": 0.09054573979305966, "learning_rate": 4.9999997070090534e-05, "lm_loss": 3.3835, "loss": 3.3835, "step": 202 }, { "epoch": 0.09099398602965895, "learning_rate": 4.9999988280362806e-05, "lm_loss": 0.5892, "loss": 0.5892, "step": 203 }, { "epoch": 0.09144223226625826, "learning_rate": 4.9999973630818885e-05, "lm_loss": 1.3328, "loss": 1.3328, "step": 204 }, { "epoch": 0.09189047850285757, "learning_rate": 4.999995312146221e-05, "lm_loss": 3.1368, "loss": 3.1368, "step": 205 }, { "epoch": 0.09233872473945688, "learning_rate": 4.999992675229757e-05, "lm_loss": 0.951, "loss": 0.951, "step": 206 }, { "epoch": 0.09278697097605618, "learning_rate": 4.999989452333117e-05, "lm_loss": 1.4119, "loss": 1.4119, "step": 207 }, { "epoch": 0.09323521721265549, "learning_rate": 4.999985643457053e-05, "lm_loss": 1.0694, "loss": 1.0694, "step": 208 }, { "epoch": 0.0936834634492548, "learning_rate": 4.999981248602462e-05, "lm_loss": 1.1543, "loss": 1.1543, "step": 209 }, { "epoch": 0.09413170968585409, "learning_rate": 4.9999762677703714e-05, "lm_loss": 1.4168, "loss": 1.4168, "step": 210 }, { "epoch": 0.0945799559224534, "learning_rate": 4.9999707009619497e-05, "lm_loss": 1.0671, "loss": 1.0671, "step": 211 }, { "epoch": 0.09502820215905271, "learning_rate": 4.9999645481785006e-05, "lm_loss": 1.3492, "loss": 1.3492, "step": 212 }, { "epoch": 0.09547644839565202, "learning_rate": 4.999957809421467e-05, "lm_loss": 1.0598, "loss": 1.0598, "step": 213 }, { "epoch": 0.09592469463225131, "learning_rate": 4.9999504846924296e-05, "lm_loss": 1.3387, "loss": 1.3387, "step": 214 }, { "epoch": 0.09637294086885062, "learning_rate": 4.999942573993103e-05, "lm_loss": 1.0558, "loss": 1.0558, "step": 215 }, { "epoch": 0.09682118710544993, "learning_rate": 4.999934077325343e-05, "lm_loss": 0.9966, "loss": 0.9966, "step": 216 }, { "epoch": 0.09726943334204924, "learning_rate": 4.9999249946911394e-05, "lm_loss": 1.1018, "loss": 1.1018, "step": 217 }, { "epoch": 0.09771767957864853, "learning_rate": 4.999915326092624e-05, "lm_loss": 2.5597, "loss": 2.5597, "step": 218 }, { "epoch": 0.09816592581524784, "learning_rate": 4.999905071532061e-05, "lm_loss": 1.6944, "loss": 1.6944, "step": 219 }, { "epoch": 0.09861417205184715, "learning_rate": 4.999894231011854e-05, "lm_loss": 1.0622, "loss": 1.0622, "step": 220 }, { "epoch": 0.09906241828844645, "learning_rate": 4.9998828045345444e-05, "lm_loss": 1.3189, "loss": 1.3189, "step": 221 }, { "epoch": 0.09951066452504576, "learning_rate": 4.9998707921028104e-05, "lm_loss": 3.1314, "loss": 3.1314, "step": 222 }, { "epoch": 0.09995891076164506, "learning_rate": 4.999858193719468e-05, "lm_loss": 1.0367, "loss": 1.0367, "step": 223 }, { "epoch": 0.10040715699824437, "learning_rate": 4.999845009387469e-05, "lm_loss": 1.4084, "loss": 1.4084, "step": 224 }, { "epoch": 0.10085540323484367, "learning_rate": 4.999831239109906e-05, "lm_loss": 0.9985, "loss": 0.9985, "step": 225 }, { "epoch": 0.10130364947144298, "learning_rate": 4.9998168828900036e-05, "lm_loss": 1.0071, "loss": 1.0071, "step": 226 }, { "epoch": 0.10175189570804229, "learning_rate": 4.9998019407311293e-05, "lm_loss": 1.346, "loss": 1.346, "step": 227 }, { "epoch": 0.1022001419446416, "learning_rate": 4.999786412636784e-05, "lm_loss": 0.9539, "loss": 0.9539, "step": 228 }, { "epoch": 0.10264838818124089, "learning_rate": 4.9997702986106084e-05, "lm_loss": 0.986, "loss": 0.986, "step": 229 }, { "epoch": 0.1030966344178402, "learning_rate": 4.999753598656378e-05, "lm_loss": 0.9733, "loss": 0.9733, "step": 230 }, { "epoch": 0.10354488065443951, "learning_rate": 4.999736312778009e-05, "lm_loss": 1.3403, "loss": 1.3403, "step": 231 }, { "epoch": 0.10399312689103882, "learning_rate": 4.999718440979553e-05, "lm_loss": 1.4203, "loss": 1.4203, "step": 232 }, { "epoch": 0.10444137312763811, "learning_rate": 4.999699983265197e-05, "lm_loss": 0.6611, "loss": 0.6611, "step": 233 }, { "epoch": 0.10488961936423742, "learning_rate": 4.999680939639268e-05, "lm_loss": 1.2254, "loss": 1.2254, "step": 234 }, { "epoch": 0.10533786560083673, "learning_rate": 4.999661310106232e-05, "lm_loss": 0.9385, "loss": 0.9385, "step": 235 }, { "epoch": 0.10578611183743603, "learning_rate": 4.9996410946706874e-05, "lm_loss": 1.2665, "loss": 1.2665, "step": 236 }, { "epoch": 0.10623435807403533, "learning_rate": 4.999620293337374e-05, "lm_loss": 1.0631, "loss": 1.0631, "step": 237 }, { "epoch": 0.10668260431063464, "learning_rate": 4.9995989061111655e-05, "lm_loss": 1.0448, "loss": 1.0448, "step": 238 }, { "epoch": 0.10713085054723395, "learning_rate": 4.9995769329970773e-05, "lm_loss": 0.9649, "loss": 0.9649, "step": 239 }, { "epoch": 0.10757909678383325, "learning_rate": 4.999554374000259e-05, "lm_loss": 1.2675, "loss": 1.2675, "step": 240 }, { "epoch": 0.10802734302043256, "learning_rate": 4.999531229125999e-05, "lm_loss": 0.972, "loss": 0.972, "step": 241 }, { "epoch": 0.10847558925703187, "learning_rate": 4.99950749837972e-05, "lm_loss": 0.9247, "loss": 0.9247, "step": 242 }, { "epoch": 0.10892383549363117, "learning_rate": 4.9994831817669855e-05, "lm_loss": 1.463, "loss": 1.463, "step": 243 }, { "epoch": 0.10937208173023047, "learning_rate": 4.999458279293496e-05, "lm_loss": 1.1055, "loss": 1.1055, "step": 244 }, { "epoch": 0.10982032796682978, "learning_rate": 4.999432790965087e-05, "lm_loss": 1.1794, "loss": 1.1794, "step": 245 }, { "epoch": 0.11026857420342909, "learning_rate": 4.999406716787734e-05, "lm_loss": 3.1152, "loss": 3.1152, "step": 246 }, { "epoch": 0.11071682044002838, "learning_rate": 4.9993800567675487e-05, "lm_loss": 1.0742, "loss": 1.0742, "step": 247 }, { "epoch": 0.11116506667662769, "learning_rate": 4.999352810910779e-05, "lm_loss": 1.1161, "loss": 1.1161, "step": 248 }, { "epoch": 0.111613312913227, "learning_rate": 4.9993249792238117e-05, "lm_loss": 1.0778, "loss": 1.0778, "step": 249 }, { "epoch": 0.11206155914982631, "learning_rate": 4.9992965617131697e-05, "lm_loss": 1.3526, "loss": 1.3526, "step": 250 }, { "epoch": 0.1125098053864256, "learning_rate": 4.999267558385515e-05, "lm_loss": 1.1025, "loss": 1.1025, "step": 251 }, { "epoch": 0.11295805162302491, "learning_rate": 4.9992379692476446e-05, "lm_loss": 3.0958, "loss": 3.0958, "step": 252 }, { "epoch": 0.11340629785962422, "learning_rate": 4.999207794306495e-05, "lm_loss": 0.9772, "loss": 0.9772, "step": 253 }, { "epoch": 0.11385454409622353, "learning_rate": 4.999177033569138e-05, "lm_loss": 2.728, "loss": 2.728, "step": 254 }, { "epoch": 0.11430279033282283, "learning_rate": 4.999145687042785e-05, "lm_loss": 1.8699, "loss": 1.8699, "step": 255 }, { "epoch": 0.11475103656942214, "learning_rate": 4.999113754734782e-05, "lm_loss": 1.3354, "loss": 1.3354, "step": 256 }, { "epoch": 0.11519928280602144, "learning_rate": 4.999081236652615e-05, "lm_loss": 0.95, "loss": 0.95, "step": 257 }, { "epoch": 0.11564752904262075, "learning_rate": 4.9990481328039045e-05, "lm_loss": 0.9884, "loss": 0.9884, "step": 258 }, { "epoch": 0.11609577527922005, "learning_rate": 4.999014443196411e-05, "lm_loss": 1.2167, "loss": 1.2167, "step": 259 }, { "epoch": 0.11654402151581936, "learning_rate": 4.998980167838031e-05, "lm_loss": 0.7918, "loss": 0.7918, "step": 260 }, { "epoch": 0.11699226775241867, "learning_rate": 4.998945306736798e-05, "lm_loss": 1.2418, "loss": 1.2418, "step": 261 }, { "epoch": 0.11744051398901796, "learning_rate": 4.9989098599008825e-05, "lm_loss": 1.4909, "loss": 1.4909, "step": 262 }, { "epoch": 0.11788876022561727, "learning_rate": 4.998873827338595e-05, "lm_loss": 0.7894, "loss": 0.7894, "step": 263 }, { "epoch": 0.11833700646221658, "learning_rate": 4.998837209058379e-05, "lm_loss": 1.2113, "loss": 1.2113, "step": 264 }, { "epoch": 0.11878525269881589, "learning_rate": 4.998800005068819e-05, "lm_loss": 1.0622, "loss": 1.0622, "step": 265 }, { "epoch": 0.11923349893541518, "learning_rate": 4.998762215378635e-05, "lm_loss": 1.3582, "loss": 1.3582, "step": 266 }, { "epoch": 0.11968174517201449, "learning_rate": 4.998723839996685e-05, "lm_loss": 0.9119, "loss": 0.9119, "step": 267 }, { "epoch": 0.1201299914086138, "learning_rate": 4.998684878931963e-05, "lm_loss": 0.9824, "loss": 0.9824, "step": 268 }, { "epoch": 0.12057823764521311, "learning_rate": 4.9986453321936025e-05, "lm_loss": 1.0411, "loss": 1.0411, "step": 269 }, { "epoch": 0.1210264838818124, "learning_rate": 4.998605199790871e-05, "lm_loss": 1.2467, "loss": 1.2467, "step": 270 }, { "epoch": 0.12147473011841171, "learning_rate": 4.9985644817331776e-05, "lm_loss": 1.2936, "loss": 1.2936, "step": 271 }, { "epoch": 0.12192297635501102, "learning_rate": 4.9985231780300644e-05, "lm_loss": 0.9776, "loss": 0.9776, "step": 272 }, { "epoch": 0.12237122259161032, "learning_rate": 4.9984812886912135e-05, "lm_loss": 3.094, "loss": 3.094, "step": 273 }, { "epoch": 0.12281946882820963, "learning_rate": 4.9984388137264436e-05, "lm_loss": 1.2796, "loss": 1.2796, "step": 274 }, { "epoch": 0.12326771506480894, "learning_rate": 4.99839575314571e-05, "lm_loss": 1.0611, "loss": 1.0611, "step": 275 }, { "epoch": 0.12371596130140824, "learning_rate": 4.998352106959106e-05, "lm_loss": 0.971, "loss": 0.971, "step": 276 }, { "epoch": 0.12416420753800754, "learning_rate": 4.998307875176863e-05, "lm_loss": 1.2447, "loss": 1.2447, "step": 277 }, { "epoch": 0.12461245377460685, "learning_rate": 4.998263057809346e-05, "lm_loss": 1.0001, "loss": 1.0001, "step": 278 }, { "epoch": 0.12506070001120614, "learning_rate": 4.998217654867063e-05, "lm_loss": 1.1868, "loss": 1.1868, "step": 279 }, { "epoch": 0.12550894624780545, "learning_rate": 4.998171666360654e-05, "lm_loss": 1.0139, "loss": 1.0139, "step": 280 }, { "epoch": 0.12595719248440476, "learning_rate": 4.9981250923008996e-05, "lm_loss": 2.6063, "loss": 2.6063, "step": 281 }, { "epoch": 0.12640543872100407, "learning_rate": 4.9980779326987146e-05, "lm_loss": 1.8192, "loss": 1.8192, "step": 282 }, { "epoch": 0.12685368495760338, "learning_rate": 4.998030187565155e-05, "lm_loss": 0.9592, "loss": 0.9592, "step": 283 }, { "epoch": 0.1273019311942027, "learning_rate": 4.9979818569114113e-05, "lm_loss": 1.1373, "loss": 1.1373, "step": 284 }, { "epoch": 0.127750177430802, "learning_rate": 4.997932940748811e-05, "lm_loss": 1.2789, "loss": 1.2789, "step": 285 }, { "epoch": 0.12819842366740128, "learning_rate": 4.9978834390888205e-05, "lm_loss": 1.1023, "loss": 1.1023, "step": 286 }, { "epoch": 0.1286466699040006, "learning_rate": 4.997833351943042e-05, "lm_loss": 0.9923, "loss": 0.9923, "step": 287 }, { "epoch": 0.1290949161405999, "learning_rate": 4.9977826793232165e-05, "lm_loss": 1.2012, "loss": 1.2012, "step": 288 }, { "epoch": 0.1295431623771992, "learning_rate": 4.9977314212412206e-05, "lm_loss": 1.0063, "loss": 1.0063, "step": 289 }, { "epoch": 0.12999140861379851, "learning_rate": 4.997679577709069e-05, "lm_loss": 1.0827, "loss": 1.0827, "step": 290 }, { "epoch": 0.13043965485039782, "learning_rate": 4.997627148738913e-05, "lm_loss": 1.2393, "loss": 1.2393, "step": 291 }, { "epoch": 0.13088790108699713, "learning_rate": 4.9975741343430425e-05, "lm_loss": 1.1946, "loss": 1.1946, "step": 292 }, { "epoch": 0.13133614732359644, "learning_rate": 4.997520534533883e-05, "lm_loss": 1.2083, "loss": 1.2083, "step": 293 }, { "epoch": 0.13178439356019572, "learning_rate": 4.9974663493239984e-05, "lm_loss": 0.9964, "loss": 0.9964, "step": 294 }, { "epoch": 0.13223263979679503, "learning_rate": 4.997411578726089e-05, "lm_loss": 0.9247, "loss": 0.9247, "step": 295 }, { "epoch": 0.13268088603339434, "learning_rate": 4.9973562227529924e-05, "lm_loss": 1.0065, "loss": 1.0065, "step": 296 }, { "epoch": 0.13312913226999365, "learning_rate": 4.997300281417684e-05, "lm_loss": 1.4296, "loss": 1.4296, "step": 297 }, { "epoch": 0.13357737850659296, "learning_rate": 4.997243754733275e-05, "lm_loss": 1.2631, "loss": 1.2631, "step": 298 }, { "epoch": 0.13402562474319227, "learning_rate": 4.997186642713017e-05, "lm_loss": 1.0162, "loss": 1.0162, "step": 299 }, { "epoch": 0.13447387097979158, "learning_rate": 4.997128945370295e-05, "lm_loss": 0.9744, "loss": 0.9744, "step": 300 }, { "epoch": 0.13492211721639086, "learning_rate": 4.9970706627186334e-05, "lm_loss": 1.068, "loss": 1.068, "step": 301 }, { "epoch": 0.13537036345299017, "learning_rate": 4.997011794771693e-05, "lm_loss": 1.3536, "loss": 1.3536, "step": 302 }, { "epoch": 0.13581860968958948, "learning_rate": 4.9969523415432716e-05, "lm_loss": 0.8315, "loss": 0.8315, "step": 303 }, { "epoch": 0.13626685592618878, "learning_rate": 4.996892303047306e-05, "lm_loss": 1.321, "loss": 1.321, "step": 304 }, { "epoch": 0.1367151021627881, "learning_rate": 4.996831679297868e-05, "lm_loss": 0.8848, "loss": 0.8848, "step": 305 }, { "epoch": 0.1371633483993874, "learning_rate": 4.996770470309167e-05, "lm_loss": 3.179, "loss": 3.179, "step": 306 }, { "epoch": 0.1376115946359867, "learning_rate": 4.9967086760955494e-05, "lm_loss": 1.0607, "loss": 1.0607, "step": 307 }, { "epoch": 0.13805984087258602, "learning_rate": 4.996646296671501e-05, "lm_loss": 1.1803, "loss": 1.1803, "step": 308 }, { "epoch": 0.1385080871091853, "learning_rate": 4.996583332051642e-05, "lm_loss": 1.2419, "loss": 1.2419, "step": 309 }, { "epoch": 0.1389563333457846, "learning_rate": 4.996519782250731e-05, "lm_loss": 1.0335, "loss": 1.0335, "step": 310 }, { "epoch": 0.13940457958238392, "learning_rate": 4.996455647283664e-05, "lm_loss": 2.5607, "loss": 2.5607, "step": 311 }, { "epoch": 0.13985282581898323, "learning_rate": 4.996390927165473e-05, "lm_loss": 3.9407, "loss": 3.9407, "step": 312 }, { "epoch": 0.14030107205558254, "learning_rate": 4.9963256219113284e-05, "lm_loss": 1.295, "loss": 1.295, "step": 313 }, { "epoch": 0.14074931829218185, "learning_rate": 4.9962597315365376e-05, "lm_loss": 1.0507, "loss": 1.0507, "step": 314 }, { "epoch": 0.14119756452878116, "learning_rate": 4.996193256056544e-05, "lm_loss": 1.0432, "loss": 1.0432, "step": 315 }, { "epoch": 0.14164581076538044, "learning_rate": 4.996126195486931e-05, "lm_loss": 3.4372, "loss": 3.4372, "step": 316 }, { "epoch": 0.14209405700197975, "learning_rate": 4.9960585498434135e-05, "lm_loss": 0.7978, "loss": 0.7978, "step": 317 }, { "epoch": 0.14254230323857905, "learning_rate": 4.99599031914185e-05, "lm_loss": 1.3847, "loss": 1.3847, "step": 318 }, { "epoch": 0.14299054947517836, "learning_rate": 4.9959215033982334e-05, "lm_loss": 1.3707, "loss": 1.3707, "step": 319 }, { "epoch": 0.14343879571177767, "learning_rate": 4.995852102628691e-05, "lm_loss": 1.0093, "loss": 1.0093, "step": 320 }, { "epoch": 0.14388704194837698, "learning_rate": 4.995782116849493e-05, "lm_loss": 0.9368, "loss": 0.9368, "step": 321 }, { "epoch": 0.1443352881849763, "learning_rate": 4.995711546077042e-05, "lm_loss": 1.1845, "loss": 1.1845, "step": 322 }, { "epoch": 0.1447835344215756, "learning_rate": 4.9956403903278784e-05, "lm_loss": 0.9092, "loss": 0.9092, "step": 323 }, { "epoch": 0.14523178065817488, "learning_rate": 4.995568649618683e-05, "lm_loss": 1.1346, "loss": 1.1346, "step": 324 }, { "epoch": 0.1456800268947742, "learning_rate": 4.9954963239662684e-05, "lm_loss": 1.1156, "loss": 1.1156, "step": 325 }, { "epoch": 0.1461282731313735, "learning_rate": 4.995423413387589e-05, "lm_loss": 2.5997, "loss": 2.5997, "step": 326 }, { "epoch": 0.1465765193679728, "learning_rate": 4.9953499178997346e-05, "lm_loss": 1.8037, "loss": 1.8037, "step": 327 }, { "epoch": 0.14702476560457212, "learning_rate": 4.9952758375199314e-05, "lm_loss": 1.301, "loss": 1.301, "step": 328 }, { "epoch": 0.14747301184117143, "learning_rate": 4.9952011722655445e-05, "lm_loss": 1.164, "loss": 1.164, "step": 329 }, { "epoch": 0.14792125807777073, "learning_rate": 4.995125922154073e-05, "lm_loss": 1.0111, "loss": 1.0111, "step": 330 }, { "epoch": 0.14836950431437002, "learning_rate": 4.9950500872031555e-05, "lm_loss": 1.0726, "loss": 1.0726, "step": 331 }, { "epoch": 0.14881775055096932, "learning_rate": 4.994973667430568e-05, "lm_loss": 2.5176, "loss": 2.5176, "step": 332 }, { "epoch": 0.14926599678756863, "learning_rate": 4.994896662854223e-05, "lm_loss": 1.8215, "loss": 1.8215, "step": 333 }, { "epoch": 0.14971424302416794, "learning_rate": 4.994819073492168e-05, "lm_loss": 1.8185, "loss": 1.8185, "step": 334 }, { "epoch": 0.15016248926076725, "learning_rate": 4.994740899362591e-05, "lm_loss": 2.6543, "loss": 2.6543, "step": 335 }, { "epoch": 0.15061073549736656, "learning_rate": 4.994662140483815e-05, "lm_loss": 0.9681, "loss": 0.9681, "step": 336 }, { "epoch": 0.15105898173396587, "learning_rate": 4.994582796874301e-05, "lm_loss": 1.8027, "loss": 1.8027, "step": 337 }, { "epoch": 0.15150722797056515, "learning_rate": 4.994502868552645e-05, "lm_loss": 2.4647, "loss": 2.4647, "step": 338 }, { "epoch": 0.15195547420716446, "learning_rate": 4.994422355537583e-05, "lm_loss": 1.0377, "loss": 1.0377, "step": 339 }, { "epoch": 0.15240372044376377, "learning_rate": 4.9943412578479864e-05, "lm_loss": 1.3377, "loss": 1.3377, "step": 340 }, { "epoch": 0.15285196668036308, "learning_rate": 4.994259575502864e-05, "lm_loss": 0.7814, "loss": 0.7814, "step": 341 }, { "epoch": 0.1533002129169624, "learning_rate": 4.994177308521361e-05, "lm_loss": 3.3395, "loss": 3.3395, "step": 342 }, { "epoch": 0.1537484591535617, "learning_rate": 4.994094456922761e-05, "lm_loss": 1.7582, "loss": 1.7582, "step": 343 }, { "epoch": 0.154196705390161, "learning_rate": 4.9940110207264836e-05, "lm_loss": 2.6579, "loss": 2.6579, "step": 344 }, { "epoch": 0.1546449516267603, "learning_rate": 4.9939269999520846e-05, "lm_loss": 0.9208, "loss": 0.9208, "step": 345 }, { "epoch": 0.1550931978633596, "learning_rate": 4.993842394619259e-05, "lm_loss": 1.0019, "loss": 1.0019, "step": 346 }, { "epoch": 0.1555414440999589, "learning_rate": 4.9937572047478376e-05, "lm_loss": 1.0466, "loss": 1.0466, "step": 347 }, { "epoch": 0.1559896903365582, "learning_rate": 4.993671430357787e-05, "lm_loss": 1.23, "loss": 1.23, "step": 348 }, { "epoch": 0.15643793657315752, "learning_rate": 4.993585071469215e-05, "lm_loss": 0.944, "loss": 0.944, "step": 349 }, { "epoch": 0.15688618280975683, "learning_rate": 4.99349812810236e-05, "lm_loss": 0.8848, "loss": 0.8848, "step": 350 }, { "epoch": 0.15733442904635614, "learning_rate": 4.9934106002776035e-05, "lm_loss": 1.2724, "loss": 1.2724, "step": 351 }, { "epoch": 0.15778267528295545, "learning_rate": 4.99332248801546e-05, "lm_loss": 1.1126, "loss": 1.1126, "step": 352 }, { "epoch": 0.15823092151955473, "learning_rate": 4.993233791336583e-05, "lm_loss": 0.8468, "loss": 0.8468, "step": 353 }, { "epoch": 0.15867916775615404, "learning_rate": 4.993144510261762e-05, "lm_loss": 1.378, "loss": 1.378, "step": 354 }, { "epoch": 0.15912741399275335, "learning_rate": 4.993054644811924e-05, "lm_loss": 3.3895, "loss": 3.3895, "step": 355 }, { "epoch": 0.15957566022935266, "learning_rate": 4.992964195008133e-05, "lm_loss": 1.1535, "loss": 1.1535, "step": 356 }, { "epoch": 0.16002390646595196, "learning_rate": 4.99287316087159e-05, "lm_loss": 0.9987, "loss": 0.9987, "step": 357 }, { "epoch": 0.16047215270255127, "learning_rate": 4.992781542423631e-05, "lm_loss": 0.956, "loss": 0.956, "step": 358 }, { "epoch": 0.16092039893915058, "learning_rate": 4.992689339685733e-05, "lm_loss": 2.5701, "loss": 2.5701, "step": 359 }, { "epoch": 0.1613686451757499, "learning_rate": 4.992596552679506e-05, "lm_loss": 1.8186, "loss": 1.8186, "step": 360 }, { "epoch": 0.16181689141234917, "learning_rate": 4.9925031814267e-05, "lm_loss": 1.3058, "loss": 1.3058, "step": 361 }, { "epoch": 0.16226513764894848, "learning_rate": 4.9924092259491996e-05, "lm_loss": 1.0799, "loss": 1.0799, "step": 362 }, { "epoch": 0.1627133838855478, "learning_rate": 4.992314686269027e-05, "lm_loss": 0.9616, "loss": 0.9616, "step": 363 }, { "epoch": 0.1631616301221471, "learning_rate": 4.992219562408343e-05, "lm_loss": 1.8086, "loss": 1.8086, "step": 364 }, { "epoch": 0.1636098763587464, "learning_rate": 4.992123854389443e-05, "lm_loss": 2.7439, "loss": 2.7439, "step": 365 }, { "epoch": 0.16405812259534572, "learning_rate": 4.99202756223476e-05, "lm_loss": 0.9711, "loss": 0.9711, "step": 366 }, { "epoch": 0.16450636883194503, "learning_rate": 4.9919306859668644e-05, "lm_loss": 1.3179, "loss": 1.3179, "step": 367 }, { "epoch": 0.1649546150685443, "learning_rate": 4.991833225608463e-05, "lm_loss": 1.0379, "loss": 1.0379, "step": 368 }, { "epoch": 0.16540286130514362, "learning_rate": 4.991735181182401e-05, "lm_loss": 0.9831, "loss": 0.9831, "step": 369 }, { "epoch": 0.16585110754174293, "learning_rate": 4.9916365527116585e-05, "lm_loss": 3.1056, "loss": 3.1056, "step": 370 }, { "epoch": 0.16629935377834223, "learning_rate": 4.991537340219353e-05, "lm_loss": 1.0676, "loss": 1.0676, "step": 371 }, { "epoch": 0.16674760001494154, "learning_rate": 4.9914375437287396e-05, "lm_loss": 1.2313, "loss": 1.2313, "step": 372 }, { "epoch": 0.16719584625154085, "learning_rate": 4.99133716326321e-05, "lm_loss": 0.9675, "loss": 0.9675, "step": 373 }, { "epoch": 0.16764409248814016, "learning_rate": 4.9912361988462926e-05, "lm_loss": 1.3703, "loss": 1.3703, "step": 374 }, { "epoch": 0.16809233872473947, "learning_rate": 4.9911346505016524e-05, "lm_loss": 1.0782, "loss": 1.0782, "step": 375 }, { "epoch": 0.16854058496133875, "learning_rate": 4.9910325182530915e-05, "lm_loss": 1.2528, "loss": 1.2528, "step": 376 }, { "epoch": 0.16898883119793806, "learning_rate": 4.9909298021245485e-05, "lm_loss": 1.1722, "loss": 1.1722, "step": 377 }, { "epoch": 0.16943707743453737, "learning_rate": 4.9908265021401015e-05, "lm_loss": 0.9679, "loss": 0.9679, "step": 378 }, { "epoch": 0.16988532367113668, "learning_rate": 4.990722618323961e-05, "lm_loss": 1.0761, "loss": 1.0761, "step": 379 }, { "epoch": 0.170333569907736, "learning_rate": 4.990618150700478e-05, "lm_loss": 1.016, "loss": 1.016, "step": 380 }, { "epoch": 0.1707818161443353, "learning_rate": 4.990513099294138e-05, "lm_loss": 1.4353, "loss": 1.4353, "step": 381 }, { "epoch": 0.1712300623809346, "learning_rate": 4.990407464129564e-05, "lm_loss": 3.1421, "loss": 3.1421, "step": 382 }, { "epoch": 0.1716783086175339, "learning_rate": 4.990301245231518e-05, "lm_loss": 1.2773, "loss": 1.2773, "step": 383 }, { "epoch": 0.1721265548541332, "learning_rate": 4.990194442624895e-05, "lm_loss": 0.9656, "loss": 0.9656, "step": 384 }, { "epoch": 0.1725748010907325, "learning_rate": 4.99008705633473e-05, "lm_loss": 1.2519, "loss": 1.2519, "step": 385 }, { "epoch": 0.1730230473273318, "learning_rate": 4.9899790863861925e-05, "lm_loss": 0.7967, "loss": 0.7967, "step": 386 }, { "epoch": 0.17347129356393112, "learning_rate": 4.989870532804591e-05, "lm_loss": 1.3188, "loss": 1.3188, "step": 387 }, { "epoch": 0.17391953980053043, "learning_rate": 4.9897613956153685e-05, "lm_loss": 1.7456, "loss": 1.7456, "step": 388 }, { "epoch": 0.17436778603712974, "learning_rate": 4.9896516748441066e-05, "lm_loss": 2.3386, "loss": 2.3386, "step": 389 }, { "epoch": 0.17481603227372902, "learning_rate": 4.9895413705165234e-05, "lm_loss": 1.2632, "loss": 1.2632, "step": 390 }, { "epoch": 0.17526427851032833, "learning_rate": 4.9894304826584723e-05, "lm_loss": 1.0121, "loss": 1.0121, "step": 391 }, { "epoch": 0.17571252474692764, "learning_rate": 4.9893190112959465e-05, "lm_loss": 1.1028, "loss": 1.1028, "step": 392 }, { "epoch": 0.17616077098352695, "learning_rate": 4.9892069564550724e-05, "lm_loss": 0.9833, "loss": 0.9833, "step": 393 }, { "epoch": 0.17660901722012626, "learning_rate": 4.9890943181621155e-05, "lm_loss": 0.8293, "loss": 0.8293, "step": 394 }, { "epoch": 0.17705726345672557, "learning_rate": 4.988981096443477e-05, "lm_loss": 1.0096, "loss": 1.0096, "step": 395 }, { "epoch": 0.17750550969332488, "learning_rate": 4.9888672913256964e-05, "lm_loss": 1.4024, "loss": 1.4024, "step": 396 }, { "epoch": 0.17795375592992418, "learning_rate": 4.988752902835447e-05, "lm_loss": 1.0232, "loss": 1.0232, "step": 397 }, { "epoch": 0.17840200216652347, "learning_rate": 4.988637930999542e-05, "lm_loss": 1.3565, "loss": 1.3565, "step": 398 }, { "epoch": 0.17885024840312277, "learning_rate": 4.988522375844929e-05, "lm_loss": 0.9804, "loss": 0.9804, "step": 399 }, { "epoch": 0.17929849463972208, "learning_rate": 4.988406237398694e-05, "lm_loss": 1.1211, "loss": 1.1211, "step": 400 }, { "epoch": 0.1797467408763214, "learning_rate": 4.9882895156880594e-05, "lm_loss": 1.2249, "loss": 1.2249, "step": 401 }, { "epoch": 0.1801949871129207, "learning_rate": 4.9881722107403826e-05, "lm_loss": 1.0104, "loss": 1.0104, "step": 402 }, { "epoch": 0.18064323334952, "learning_rate": 4.98805432258316e-05, "lm_loss": 1.0516, "loss": 1.0516, "step": 403 }, { "epoch": 0.18109147958611932, "learning_rate": 4.987935851244024e-05, "lm_loss": 1.3737, "loss": 1.3737, "step": 404 }, { "epoch": 0.1815397258227186, "learning_rate": 4.987816796750743e-05, "lm_loss": 1.0376, "loss": 1.0376, "step": 405 }, { "epoch": 0.1819879720593179, "learning_rate": 4.9876971591312214e-05, "lm_loss": 3.2468, "loss": 3.2468, "step": 406 }, { "epoch": 0.18243621829591722, "learning_rate": 4.987576938413504e-05, "lm_loss": 1.1643, "loss": 1.1643, "step": 407 }, { "epoch": 0.18288446453251653, "learning_rate": 4.987456134625767e-05, "lm_loss": 0.9067, "loss": 0.9067, "step": 408 }, { "epoch": 0.18333271076911584, "learning_rate": 4.9873347477963275e-05, "lm_loss": 1.0444, "loss": 1.0444, "step": 409 }, { "epoch": 0.18378095700571515, "learning_rate": 4.987212777953637e-05, "lm_loss": 1.3272, "loss": 1.3272, "step": 410 }, { "epoch": 0.18422920324231445, "learning_rate": 4.9870902251262847e-05, "lm_loss": 0.9207, "loss": 0.9207, "step": 411 }, { "epoch": 0.18467744947891376, "learning_rate": 4.986967089342996e-05, "lm_loss": 1.231, "loss": 1.231, "step": 412 }, { "epoch": 0.18512569571551304, "learning_rate": 4.986843370632633e-05, "lm_loss": 1.2405, "loss": 1.2405, "step": 413 }, { "epoch": 0.18557394195211235, "learning_rate": 4.9867190690241946e-05, "lm_loss": 1.0535, "loss": 1.0535, "step": 414 }, { "epoch": 0.18602218818871166, "learning_rate": 4.986594184546816e-05, "lm_loss": 1.0794, "loss": 1.0794, "step": 415 }, { "epoch": 0.18647043442531097, "learning_rate": 4.9864687172297684e-05, "lm_loss": 1.1574, "loss": 1.1574, "step": 416 }, { "epoch": 0.18691868066191028, "learning_rate": 4.986342667102463e-05, "lm_loss": 1.0858, "loss": 1.0858, "step": 417 }, { "epoch": 0.1873669268985096, "learning_rate": 4.986216034194442e-05, "lm_loss": 1.0807, "loss": 1.0807, "step": 418 }, { "epoch": 0.1878151731351089, "learning_rate": 4.986088818535389e-05, "lm_loss": 0.8395, "loss": 0.8395, "step": 419 }, { "epoch": 0.18826341937170818, "learning_rate": 4.985961020155122e-05, "lm_loss": 1.2723, "loss": 1.2723, "step": 420 }, { "epoch": 0.1887116656083075, "learning_rate": 4.9858326390835965e-05, "lm_loss": 1.8111, "loss": 1.8111, "step": 421 }, { "epoch": 0.1891599118449068, "learning_rate": 4.985703675350904e-05, "lm_loss": 2.5632, "loss": 2.5632, "step": 422 }, { "epoch": 0.1896081580815061, "learning_rate": 4.9855741289872716e-05, "lm_loss": 0.9631, "loss": 0.9631, "step": 423 }, { "epoch": 0.19005640431810542, "learning_rate": 4.985444000023065e-05, "lm_loss": 1.0992, "loss": 1.0992, "step": 424 }, { "epoch": 0.19050465055470472, "learning_rate": 4.9853132884887845e-05, "lm_loss": 2.4202, "loss": 2.4202, "step": 425 }, { "epoch": 0.19095289679130403, "learning_rate": 4.985181994415069e-05, "lm_loss": 1.7745, "loss": 1.7745, "step": 426 }, { "epoch": 0.19140114302790331, "learning_rate": 4.985050117832693e-05, "lm_loss": 1.1359, "loss": 1.1359, "step": 427 }, { "epoch": 0.19184938926450262, "learning_rate": 4.984917658772567e-05, "lm_loss": 0.8675, "loss": 0.8675, "step": 428 }, { "epoch": 0.19229763550110193, "learning_rate": 4.984784617265737e-05, "lm_loss": 3.4193, "loss": 3.4193, "step": 429 }, { "epoch": 0.19274588173770124, "learning_rate": 4.98465099334339e-05, "lm_loss": 1.1643, "loss": 1.1643, "step": 430 }, { "epoch": 0.19319412797430055, "learning_rate": 4.9845167870368446e-05, "lm_loss": 2.4129, "loss": 2.4129, "step": 431 }, { "epoch": 0.19364237421089986, "learning_rate": 4.984381998377557e-05, "lm_loss": 1.8678, "loss": 1.8678, "step": 432 }, { "epoch": 0.19409062044749917, "learning_rate": 4.984246627397123e-05, "lm_loss": 0.9259, "loss": 0.9259, "step": 433 }, { "epoch": 0.19453886668409848, "learning_rate": 4.9841106741272714e-05, "lm_loss": 1.4868, "loss": 1.4868, "step": 434 }, { "epoch": 0.19498711292069776, "learning_rate": 4.9839741385998676e-05, "lm_loss": 3.0065, "loss": 3.0065, "step": 435 }, { "epoch": 0.19543535915729707, "learning_rate": 4.983837020846916e-05, "lm_loss": 1.2931, "loss": 1.2931, "step": 436 }, { "epoch": 0.19588360539389638, "learning_rate": 4.9836993209005554e-05, "lm_loss": 1.0224, "loss": 1.0224, "step": 437 }, { "epoch": 0.19633185163049569, "learning_rate": 4.983561038793062e-05, "lm_loss": 0.9186, "loss": 0.9186, "step": 438 }, { "epoch": 0.196780097867095, "learning_rate": 4.983422174556847e-05, "lm_loss": 1.7947, "loss": 1.7947, "step": 439 }, { "epoch": 0.1972283441036943, "learning_rate": 4.983282728224461e-05, "lm_loss": 2.6377, "loss": 2.6377, "step": 440 }, { "epoch": 0.1976765903402936, "learning_rate": 4.983142699828588e-05, "lm_loss": 0.9615, "loss": 0.9615, "step": 441 }, { "epoch": 0.1981248365768929, "learning_rate": 4.9830020894020505e-05, "lm_loss": 1.0168, "loss": 1.0168, "step": 442 }, { "epoch": 0.1985730828134922, "learning_rate": 4.9828608969778045e-05, "lm_loss": 1.2672, "loss": 1.2672, "step": 443 }, { "epoch": 0.1990213290500915, "learning_rate": 4.982719122588947e-05, "lm_loss": 1.0589, "loss": 1.0589, "step": 444 }, { "epoch": 0.19946957528669082, "learning_rate": 4.982576766268708e-05, "lm_loss": 1.0041, "loss": 1.0041, "step": 445 }, { "epoch": 0.19991782152329013, "learning_rate": 4.982433828050454e-05, "lm_loss": 1.3688, "loss": 1.3688, "step": 446 }, { "epoch": 0.20036606775988944, "learning_rate": 4.9822903079676895e-05, "lm_loss": 3.312, "loss": 3.312, "step": 447 }, { "epoch": 0.20081431399648875, "learning_rate": 4.982146206054055e-05, "lm_loss": 1.2686, "loss": 1.2686, "step": 448 }, { "epoch": 0.20126256023308806, "learning_rate": 4.982001522343326e-05, "lm_loss": 2.2811, "loss": 2.2811, "step": 449 }, { "epoch": 0.20171080646968734, "learning_rate": 4.9818562568694154e-05, "lm_loss": 1.832, "loss": 1.832, "step": 450 }, { "epoch": 0.20215905270628665, "learning_rate": 4.981710409666373e-05, "lm_loss": 0.9585, "loss": 0.9585, "step": 451 }, { "epoch": 0.20260729894288595, "learning_rate": 4.981563980768384e-05, "lm_loss": 1.3744, "loss": 1.3744, "step": 452 }, { "epoch": 0.20305554517948526, "learning_rate": 4.98141697020977e-05, "lm_loss": 1.1584, "loss": 1.1584, "step": 453 }, { "epoch": 0.20350379141608457, "learning_rate": 4.9812693780249894e-05, "lm_loss": 1.0298, "loss": 1.0298, "step": 454 }, { "epoch": 0.20395203765268388, "learning_rate": 4.981121204248637e-05, "lm_loss": 1.3103, "loss": 1.3103, "step": 455 }, { "epoch": 0.2044002838892832, "learning_rate": 4.980972448915443e-05, "lm_loss": 0.9572, "loss": 0.9572, "step": 456 }, { "epoch": 0.20484853012588247, "learning_rate": 4.980823112060275e-05, "lm_loss": 0.9217, "loss": 0.9217, "step": 457 }, { "epoch": 0.20529677636248178, "learning_rate": 4.980673193718137e-05, "lm_loss": 0.9395, "loss": 0.9395, "step": 458 }, { "epoch": 0.2057450225990811, "learning_rate": 4.980522693924169e-05, "lm_loss": 1.3685, "loss": 1.3685, "step": 459 }, { "epoch": 0.2061932688356804, "learning_rate": 4.980371612713645e-05, "lm_loss": 3.034, "loss": 3.034, "step": 460 }, { "epoch": 0.2066415150722797, "learning_rate": 4.980219950121979e-05, "lm_loss": 1.296, "loss": 1.296, "step": 461 }, { "epoch": 0.20708976130887902, "learning_rate": 4.98006770618472e-05, "lm_loss": 1.0297, "loss": 1.0297, "step": 462 }, { "epoch": 0.20753800754547833, "learning_rate": 4.9799148809375516e-05, "lm_loss": 1.0713, "loss": 1.0713, "step": 463 }, { "epoch": 0.20798625378207763, "learning_rate": 4.9797614744162955e-05, "lm_loss": 1.0917, "loss": 1.0917, "step": 464 }, { "epoch": 0.20843450001867692, "learning_rate": 4.979607486656909e-05, "lm_loss": 1.1524, "loss": 1.1524, "step": 465 }, { "epoch": 0.20888274625527622, "learning_rate": 4.979452917695486e-05, "lm_loss": 1.3266, "loss": 1.3266, "step": 466 }, { "epoch": 0.20933099249187553, "learning_rate": 4.979297767568256e-05, "lm_loss": 2.304, "loss": 2.304, "step": 467 }, { "epoch": 0.20977923872847484, "learning_rate": 4.979142036311585e-05, "lm_loss": 1.7387, "loss": 1.7387, "step": 468 }, { "epoch": 0.21022748496507415, "learning_rate": 4.978985723961975e-05, "lm_loss": 1.2778, "loss": 1.2778, "step": 469 }, { "epoch": 0.21067573120167346, "learning_rate": 4.978828830556066e-05, "lm_loss": 0.6299, "loss": 0.6299, "step": 470 }, { "epoch": 0.21112397743827277, "learning_rate": 4.9786713561306316e-05, "lm_loss": 1.3495, "loss": 1.3495, "step": 471 }, { "epoch": 0.21157222367487205, "learning_rate": 4.9785133007225824e-05, "lm_loss": 1.3686, "loss": 1.3686, "step": 472 }, { "epoch": 0.21202046991147136, "learning_rate": 4.978354664368965e-05, "lm_loss": 0.889, "loss": 0.889, "step": 473 }, { "epoch": 0.21246871614807067, "learning_rate": 4.978195447106965e-05, "lm_loss": 0.9095, "loss": 0.9095, "step": 474 }, { "epoch": 0.21291696238466998, "learning_rate": 4.978035648973899e-05, "lm_loss": 0.9789, "loss": 0.9789, "step": 475 }, { "epoch": 0.2133652086212693, "learning_rate": 4.977875270007224e-05, "lm_loss": 1.3318, "loss": 1.3318, "step": 476 }, { "epoch": 0.2138134548578686, "learning_rate": 4.9777143102445314e-05, "lm_loss": 1.0383, "loss": 1.0383, "step": 477 }, { "epoch": 0.2142617010944679, "learning_rate": 4.977552769723548e-05, "lm_loss": 3.0732, "loss": 3.0732, "step": 478 }, { "epoch": 0.21470994733106719, "learning_rate": 4.9773906484821396e-05, "lm_loss": 1.3484, "loss": 1.3484, "step": 479 }, { "epoch": 0.2151581935676665, "learning_rate": 4.977227946558305e-05, "lm_loss": 0.9579, "loss": 0.9579, "step": 480 }, { "epoch": 0.2156064398042658, "learning_rate": 4.977064663990181e-05, "lm_loss": 1.7803, "loss": 1.7803, "step": 481 }, { "epoch": 0.2160546860408651, "learning_rate": 4.976900800816039e-05, "lm_loss": 2.571, "loss": 2.571, "step": 482 }, { "epoch": 0.21650293227746442, "learning_rate": 4.976736357074288e-05, "lm_loss": 1.0662, "loss": 1.0662, "step": 483 }, { "epoch": 0.21695117851406373, "learning_rate": 4.9765713328034724e-05, "lm_loss": 1.0171, "loss": 1.0171, "step": 484 }, { "epoch": 0.21739942475066304, "learning_rate": 4.976405728042273e-05, "lm_loss": 1.3573, "loss": 1.3573, "step": 485 }, { "epoch": 0.21784767098726235, "learning_rate": 4.976239542829505e-05, "lm_loss": 0.8943, "loss": 0.8943, "step": 486 }, { "epoch": 0.21829591722386163, "learning_rate": 4.9760727772041224e-05, "lm_loss": 0.9609, "loss": 0.9609, "step": 487 }, { "epoch": 0.21874416346046094, "learning_rate": 4.975905431205213e-05, "lm_loss": 1.4223, "loss": 1.4223, "step": 488 }, { "epoch": 0.21919240969706025, "learning_rate": 4.9757375048720025e-05, "lm_loss": 0.9762, "loss": 0.9762, "step": 489 }, { "epoch": 0.21964065593365956, "learning_rate": 4.975568998243851e-05, "lm_loss": 0.9475, "loss": 0.9475, "step": 490 }, { "epoch": 0.22008890217025887, "learning_rate": 4.975399911360254e-05, "lm_loss": 2.7273, "loss": 2.7273, "step": 491 }, { "epoch": 0.22053714840685817, "learning_rate": 4.9752302442608474e-05, "lm_loss": 1.7792, "loss": 1.7792, "step": 492 }, { "epoch": 0.22098539464345748, "learning_rate": 4.9750599969853966e-05, "lm_loss": 1.8372, "loss": 1.8372, "step": 493 }, { "epoch": 0.22143364088005676, "learning_rate": 4.9748891695738087e-05, "lm_loss": 3.9465, "loss": 3.9465, "step": 494 }, { "epoch": 0.22188188711665607, "learning_rate": 4.9747177620661224e-05, "lm_loss": 1.7682, "loss": 1.7682, "step": 495 }, { "epoch": 0.22233013335325538, "learning_rate": 4.974545774502516e-05, "lm_loss": 0.9761, "loss": 0.9761, "step": 496 }, { "epoch": 0.2227783795898547, "learning_rate": 4.974373206923302e-05, "lm_loss": 2.7495, "loss": 2.7495, "step": 497 }, { "epoch": 0.223226625826454, "learning_rate": 4.9742000593689275e-05, "lm_loss": 1.8348, "loss": 1.8348, "step": 498 }, { "epoch": 0.2236748720630533, "learning_rate": 4.9740263318799784e-05, "lm_loss": 1.2431, "loss": 1.2431, "step": 499 }, { "epoch": 0.22412311829965262, "learning_rate": 4.9738520244971754e-05, "lm_loss": 0.9184, "loss": 0.9184, "step": 500 }, { "epoch": 0.22457136453625193, "learning_rate": 4.973677137261374e-05, "lm_loss": 1.0393, "loss": 1.0393, "step": 501 }, { "epoch": 0.2250196107728512, "learning_rate": 4.9735016702135664e-05, "lm_loss": 0.9115, "loss": 0.9115, "step": 502 }, { "epoch": 0.22546785700945052, "learning_rate": 4.973325623394882e-05, "lm_loss": 1.4262, "loss": 1.4262, "step": 503 }, { "epoch": 0.22591610324604983, "learning_rate": 4.973148996846584e-05, "lm_loss": 1.0062, "loss": 1.0062, "step": 504 }, { "epoch": 0.22636434948264914, "learning_rate": 4.972971790610072e-05, "lm_loss": 0.9723, "loss": 0.9723, "step": 505 }, { "epoch": 0.22681259571924844, "learning_rate": 4.972794004726883e-05, "lm_loss": 1.1269, "loss": 1.1269, "step": 506 }, { "epoch": 0.22726084195584775, "learning_rate": 4.972615639238687e-05, "lm_loss": 1.0298, "loss": 1.0298, "step": 507 }, { "epoch": 0.22770908819244706, "learning_rate": 4.972436694187294e-05, "lm_loss": 1.2097, "loss": 1.2097, "step": 508 }, { "epoch": 0.22815733442904634, "learning_rate": 4.972257169614646e-05, "lm_loss": 1.0487, "loss": 1.0487, "step": 509 }, { "epoch": 0.22860558066564565, "learning_rate": 4.972077065562821e-05, "lm_loss": 3.0888, "loss": 3.0888, "step": 510 }, { "epoch": 0.22905382690224496, "learning_rate": 4.971896382074037e-05, "lm_loss": 0.8864, "loss": 0.8864, "step": 511 }, { "epoch": 0.22950207313884427, "learning_rate": 4.971715119190643e-05, "lm_loss": 1.342, "loss": 1.342, "step": 512 }, { "epoch": 0.22995031937544358, "learning_rate": 4.971533276955126e-05, "lm_loss": 1.1918, "loss": 1.1918, "step": 513 }, { "epoch": 0.2303985656120429, "learning_rate": 4.971350855410108e-05, "lm_loss": 0.9667, "loss": 0.9667, "step": 514 }, { "epoch": 0.2308468118486422, "learning_rate": 4.9711678545983486e-05, "lm_loss": 2.7214, "loss": 2.7214, "step": 515 }, { "epoch": 0.2312950580852415, "learning_rate": 4.970984274562741e-05, "lm_loss": 1.7141, "loss": 1.7141, "step": 516 }, { "epoch": 0.2317433043218408, "learning_rate": 4.970800115346315e-05, "lm_loss": 0.9833, "loss": 0.9833, "step": 517 }, { "epoch": 0.2321915505584401, "learning_rate": 4.970615376992236e-05, "lm_loss": 2.6874, "loss": 2.6874, "step": 518 }, { "epoch": 0.2326397967950394, "learning_rate": 4.970430059543806e-05, "lm_loss": 1.8472, "loss": 1.8472, "step": 519 }, { "epoch": 0.23308804303163871, "learning_rate": 4.9702441630444616e-05, "lm_loss": 3.1237, "loss": 3.1237, "step": 520 }, { "epoch": 0.23353628926823802, "learning_rate": 4.970057687537776e-05, "lm_loss": 2.4164, "loss": 2.4164, "step": 521 }, { "epoch": 0.23398453550483733, "learning_rate": 4.969870633067457e-05, "lm_loss": 1.7986, "loss": 1.7986, "step": 522 }, { "epoch": 0.23443278174143664, "learning_rate": 4.96968299967735e-05, "lm_loss": 1.1425, "loss": 1.1425, "step": 523 }, { "epoch": 0.23488102797803592, "learning_rate": 4.969494787411433e-05, "lm_loss": 1.2104, "loss": 1.2104, "step": 524 }, { "epoch": 0.23532927421463523, "learning_rate": 4.9693059963138235e-05, "lm_loss": 0.9369, "loss": 0.9369, "step": 525 }, { "epoch": 0.23577752045123454, "learning_rate": 4.969116626428772e-05, "lm_loss": 1.0547, "loss": 1.0547, "step": 526 }, { "epoch": 0.23622576668783385, "learning_rate": 4.968926677800665e-05, "lm_loss": 2.5229, "loss": 2.5229, "step": 527 }, { "epoch": 0.23667401292443316, "learning_rate": 4.968736150474026e-05, "lm_loss": 1.9253, "loss": 1.9253, "step": 528 }, { "epoch": 0.23712225916103247, "learning_rate": 4.968545044493513e-05, "lm_loss": 0.9311, "loss": 0.9311, "step": 529 }, { "epoch": 0.23757050539763178, "learning_rate": 4.9683533599039186e-05, "lm_loss": 1.3549, "loss": 1.3549, "step": 530 }, { "epoch": 0.23801875163423106, "learning_rate": 4.9681610967501744e-05, "lm_loss": 0.914, "loss": 0.914, "step": 531 }, { "epoch": 0.23846699787083037, "learning_rate": 4.9679682550773445e-05, "lm_loss": 0.9746, "loss": 0.9746, "step": 532 }, { "epoch": 0.23891524410742968, "learning_rate": 4.967774834930628e-05, "lm_loss": 1.3105, "loss": 1.3105, "step": 533 }, { "epoch": 0.23936349034402898, "learning_rate": 4.967580836355365e-05, "lm_loss": 1.0634, "loss": 1.0634, "step": 534 }, { "epoch": 0.2398117365806283, "learning_rate": 4.9673862593970235e-05, "lm_loss": 1.1147, "loss": 1.1147, "step": 535 }, { "epoch": 0.2402599828172276, "learning_rate": 4.967191104101212e-05, "lm_loss": 0.8378, "loss": 0.8378, "step": 536 }, { "epoch": 0.2407082290538269, "learning_rate": 4.966995370513674e-05, "lm_loss": 1.2024, "loss": 1.2024, "step": 537 }, { "epoch": 0.24115647529042622, "learning_rate": 4.966799058680289e-05, "lm_loss": 1.7936, "loss": 1.7936, "step": 538 }, { "epoch": 0.2416047215270255, "learning_rate": 4.9666021686470696e-05, "lm_loss": 2.679, "loss": 2.679, "step": 539 }, { "epoch": 0.2420529677636248, "learning_rate": 4.966404700460165e-05, "lm_loss": 0.8532, "loss": 0.8532, "step": 540 }, { "epoch": 0.24250121400022412, "learning_rate": 4.9662066541658624e-05, "lm_loss": 1.0819, "loss": 1.0819, "step": 541 }, { "epoch": 0.24294946023682343, "learning_rate": 4.9660080298105807e-05, "lm_loss": 1.1595, "loss": 1.1595, "step": 542 }, { "epoch": 0.24339770647342274, "learning_rate": 4.965808827440876e-05, "lm_loss": 1.1813, "loss": 1.1813, "step": 543 }, { "epoch": 0.24384595271002205, "learning_rate": 4.965609047103441e-05, "lm_loss": 1.0106, "loss": 1.0106, "step": 544 }, { "epoch": 0.24429419894662135, "learning_rate": 4.965408688845103e-05, "lm_loss": 1.1279, "loss": 1.1279, "step": 545 }, { "epoch": 0.24474244518322064, "learning_rate": 4.965207752712822e-05, "lm_loss": 1.0163, "loss": 1.0163, "step": 546 }, { "epoch": 0.24519069141981994, "learning_rate": 4.9650062387536984e-05, "lm_loss": 0.9736, "loss": 0.9736, "step": 547 }, { "epoch": 0.24563893765641925, "learning_rate": 4.9648041470149644e-05, "lm_loss": 1.0416, "loss": 1.0416, "step": 548 }, { "epoch": 0.24608718389301856, "learning_rate": 4.96460147754399e-05, "lm_loss": 1.2072, "loss": 1.2072, "step": 549 }, { "epoch": 0.24653543012961787, "learning_rate": 4.9643982303882785e-05, "lm_loss": 3.325, "loss": 3.325, "step": 550 }, { "epoch": 0.24698367636621718, "learning_rate": 4.9641944055954695e-05, "lm_loss": 1.0716, "loss": 1.0716, "step": 551 }, { "epoch": 0.2474319226028165, "learning_rate": 4.9639900032133386e-05, "lm_loss": 3.0705, "loss": 3.0705, "step": 552 }, { "epoch": 0.2478801688394158, "learning_rate": 4.9637850232897954e-05, "lm_loss": 3.322, "loss": 3.322, "step": 553 }, { "epoch": 0.24832841507601508, "learning_rate": 4.963579465872888e-05, "lm_loss": 0.9799, "loss": 0.9799, "step": 554 }, { "epoch": 0.2487766613126144, "learning_rate": 4.963373331010794e-05, "lm_loss": 1.1028, "loss": 1.1028, "step": 555 }, { "epoch": 0.2492249075492137, "learning_rate": 4.9631666187518324e-05, "lm_loss": 3.3514, "loss": 3.3514, "step": 556 }, { "epoch": 0.249673153785813, "learning_rate": 4.9629593291444546e-05, "lm_loss": 1.0117, "loss": 1.0117, "step": 557 }, { "epoch": 0.2501214000224123, "learning_rate": 4.962751462237247e-05, "lm_loss": 1.1827, "loss": 1.1827, "step": 558 }, { "epoch": 0.2505696462590116, "learning_rate": 4.962543018078934e-05, "lm_loss": 1.0193, "loss": 1.0193, "step": 559 }, { "epoch": 0.2510178924956109, "learning_rate": 4.962333996718371e-05, "lm_loss": 1.2159, "loss": 1.2159, "step": 560 }, { "epoch": 0.25146613873221024, "learning_rate": 4.962124398204553e-05, "lm_loss": 0.9404, "loss": 0.9404, "step": 561 }, { "epoch": 0.2519143849688095, "learning_rate": 4.9619142225866074e-05, "lm_loss": 1.1288, "loss": 1.1288, "step": 562 }, { "epoch": 0.25236263120540886, "learning_rate": 4.9617034699137975e-05, "lm_loss": 0.8069, "loss": 0.8069, "step": 563 }, { "epoch": 0.25281087744200814, "learning_rate": 4.9614921402355235e-05, "lm_loss": 1.276, "loss": 1.276, "step": 564 }, { "epoch": 0.2532591236786074, "learning_rate": 4.9612802336013185e-05, "lm_loss": 1.0953, "loss": 1.0953, "step": 565 }, { "epoch": 0.25370736991520676, "learning_rate": 4.9610677500608526e-05, "lm_loss": 1.2159, "loss": 1.2159, "step": 566 }, { "epoch": 0.25415561615180604, "learning_rate": 4.960854689663929e-05, "lm_loss": 3.0872, "loss": 3.0872, "step": 567 }, { "epoch": 0.2546038623884054, "learning_rate": 4.9606410524604896e-05, "lm_loss": 1.2352, "loss": 1.2352, "step": 568 }, { "epoch": 0.25505210862500466, "learning_rate": 4.9604268385006074e-05, "lm_loss": 0.6285, "loss": 0.6285, "step": 569 }, { "epoch": 0.255500354861604, "learning_rate": 4.9602120478344935e-05, "lm_loss": 1.3738, "loss": 1.3738, "step": 570 }, { "epoch": 0.2559486010982033, "learning_rate": 4.959996680512494e-05, "lm_loss": 1.0499, "loss": 1.0499, "step": 571 }, { "epoch": 0.25639684733480256, "learning_rate": 4.9597807365850884e-05, "lm_loss": 0.9775, "loss": 0.9775, "step": 572 }, { "epoch": 0.2568450935714019, "learning_rate": 4.9595642161028924e-05, "lm_loss": 1.2388, "loss": 1.2388, "step": 573 }, { "epoch": 0.2572933398080012, "learning_rate": 4.9593471191166574e-05, "lm_loss": 0.8845, "loss": 0.8845, "step": 574 }, { "epoch": 0.2577415860446005, "learning_rate": 4.95912944567727e-05, "lm_loss": 1.1824, "loss": 1.1824, "step": 575 }, { "epoch": 0.2581898322811998, "learning_rate": 4.958911195835749e-05, "lm_loss": 1.2895, "loss": 1.2895, "step": 576 }, { "epoch": 0.25863807851779913, "learning_rate": 4.9586923696432516e-05, "lm_loss": 0.9426, "loss": 0.9426, "step": 577 }, { "epoch": 0.2590863247543984, "learning_rate": 4.958472967151071e-05, "lm_loss": 1.3065, "loss": 1.3065, "step": 578 }, { "epoch": 0.25953457099099775, "learning_rate": 4.958252988410631e-05, "lm_loss": 1.0973, "loss": 1.0973, "step": 579 }, { "epoch": 0.25998281722759703, "learning_rate": 4.958032433473495e-05, "lm_loss": 1.1098, "loss": 1.1098, "step": 580 }, { "epoch": 0.2604310634641963, "learning_rate": 4.957811302391358e-05, "lm_loss": 1.2895, "loss": 1.2895, "step": 581 }, { "epoch": 0.26087930970079565, "learning_rate": 4.957589595216051e-05, "lm_loss": 0.8782, "loss": 0.8782, "step": 582 }, { "epoch": 0.26132755593739493, "learning_rate": 4.9573673119995426e-05, "lm_loss": 0.9057, "loss": 0.9057, "step": 583 }, { "epoch": 0.26177580217399427, "learning_rate": 4.957144452793934e-05, "lm_loss": 1.3638, "loss": 1.3638, "step": 584 }, { "epoch": 0.26222404841059355, "learning_rate": 4.95692101765146e-05, "lm_loss": 0.8857, "loss": 0.8857, "step": 585 }, { "epoch": 0.2626722946471929, "learning_rate": 4.9566970066244935e-05, "lm_loss": 1.0049, "loss": 1.0049, "step": 586 }, { "epoch": 0.26312054088379216, "learning_rate": 4.956472419765541e-05, "lm_loss": 1.4665, "loss": 1.4665, "step": 587 }, { "epoch": 0.26356878712039145, "learning_rate": 4.9562472571272435e-05, "lm_loss": 0.9252, "loss": 0.9252, "step": 588 }, { "epoch": 0.2640170333569908, "learning_rate": 4.956021518762379e-05, "lm_loss": 0.9838, "loss": 0.9838, "step": 589 }, { "epoch": 0.26446527959359006, "learning_rate": 4.9557952047238564e-05, "lm_loss": 1.2133, "loss": 1.2133, "step": 590 }, { "epoch": 0.2649135258301894, "learning_rate": 4.9555683150647246e-05, "lm_loss": 1.0354, "loss": 1.0354, "step": 591 }, { "epoch": 0.2653617720667887, "learning_rate": 4.955340849838164e-05, "lm_loss": 0.9758, "loss": 0.9758, "step": 592 }, { "epoch": 0.265810018303388, "learning_rate": 4.955112809097489e-05, "lm_loss": 1.2022, "loss": 1.2022, "step": 593 }, { "epoch": 0.2662582645399873, "learning_rate": 4.954884192896154e-05, "lm_loss": 1.1155, "loss": 1.1155, "step": 594 }, { "epoch": 0.2667065107765866, "learning_rate": 4.954655001287743e-05, "lm_loss": 0.9888, "loss": 0.9888, "step": 595 }, { "epoch": 0.2671547570131859, "learning_rate": 4.9544252343259764e-05, "lm_loss": 0.9759, "loss": 0.9759, "step": 596 }, { "epoch": 0.2676030032497852, "learning_rate": 4.9541948920647116e-05, "lm_loss": 1.3623, "loss": 1.3623, "step": 597 }, { "epoch": 0.26805124948638454, "learning_rate": 4.953963974557938e-05, "lm_loss": 1.2646, "loss": 1.2646, "step": 598 }, { "epoch": 0.2684994957229838, "learning_rate": 4.953732481859781e-05, "lm_loss": 0.9359, "loss": 0.9359, "step": 599 }, { "epoch": 0.26894774195958315, "learning_rate": 4.9535004140245004e-05, "lm_loss": 1.0652, "loss": 1.0652, "step": 600 }, { "epoch": 0.26939598819618243, "learning_rate": 4.9532677711064926e-05, "lm_loss": 1.0588, "loss": 1.0588, "step": 601 }, { "epoch": 0.2698442344327817, "learning_rate": 4.9530345531602864e-05, "lm_loss": 2.5822, "loss": 2.5822, "step": 602 }, { "epoch": 0.27029248066938105, "learning_rate": 4.952800760240547e-05, "lm_loss": 3.8093, "loss": 3.8093, "step": 603 }, { "epoch": 0.27074072690598033, "learning_rate": 4.9525663924020725e-05, "lm_loss": 3.2679, "loss": 3.2679, "step": 604 }, { "epoch": 0.27118897314257967, "learning_rate": 4.952331449699799e-05, "lm_loss": 2.236, "loss": 2.236, "step": 605 }, { "epoch": 0.27163721937917895, "learning_rate": 4.952095932188794e-05, "lm_loss": 1.8502, "loss": 1.8502, "step": 606 }, { "epoch": 0.2720854656157783, "learning_rate": 4.951859839924261e-05, "lm_loss": 1.1356, "loss": 1.1356, "step": 607 }, { "epoch": 0.27253371185237757, "learning_rate": 4.9516231729615394e-05, "lm_loss": 1.035, "loss": 1.035, "step": 608 }, { "epoch": 0.27298195808897685, "learning_rate": 4.951385931356101e-05, "lm_loss": 1.2625, "loss": 1.2625, "step": 609 }, { "epoch": 0.2734302043255762, "learning_rate": 4.951148115163554e-05, "lm_loss": 1.0551, "loss": 1.0551, "step": 610 }, { "epoch": 0.27387845056217547, "learning_rate": 4.950909724439641e-05, "lm_loss": 1.1347, "loss": 1.1347, "step": 611 }, { "epoch": 0.2743266967987748, "learning_rate": 4.950670759240239e-05, "lm_loss": 1.1354, "loss": 1.1354, "step": 612 }, { "epoch": 0.2747749430353741, "learning_rate": 4.9504312196213596e-05, "lm_loss": 1.1603, "loss": 1.1603, "step": 613 }, { "epoch": 0.2752231892719734, "learning_rate": 4.9501911056391494e-05, "lm_loss": 0.8447, "loss": 0.8447, "step": 614 }, { "epoch": 0.2756714355085727, "learning_rate": 4.949950417349889e-05, "lm_loss": 1.2237, "loss": 1.2237, "step": 615 }, { "epoch": 0.27611968174517204, "learning_rate": 4.9497091548099936e-05, "lm_loss": 1.1018, "loss": 1.1018, "step": 616 }, { "epoch": 0.2765679279817713, "learning_rate": 4.949467318076015e-05, "lm_loss": 1.1748, "loss": 1.1748, "step": 617 }, { "epoch": 0.2770161742183706, "learning_rate": 4.949224907204636e-05, "lm_loss": 0.9844, "loss": 0.9844, "step": 618 }, { "epoch": 0.27746442045496994, "learning_rate": 4.948981922252677e-05, "lm_loss": 1.012, "loss": 1.012, "step": 619 }, { "epoch": 0.2779126666915692, "learning_rate": 4.9487383632770925e-05, "lm_loss": 1.2348, "loss": 1.2348, "step": 620 }, { "epoch": 0.27836091292816856, "learning_rate": 4.94849423033497e-05, "lm_loss": 3.1223, "loss": 3.1223, "step": 621 }, { "epoch": 0.27880915916476784, "learning_rate": 4.948249523483532e-05, "lm_loss": 1.0593, "loss": 1.0593, "step": 622 }, { "epoch": 0.2792574054013672, "learning_rate": 4.948004242780138e-05, "lm_loss": 1.0062, "loss": 1.0062, "step": 623 }, { "epoch": 0.27970565163796646, "learning_rate": 4.9477583882822775e-05, "lm_loss": 1.274, "loss": 1.274, "step": 624 }, { "epoch": 0.28015389787456574, "learning_rate": 4.947511960047578e-05, "lm_loss": 1.0911, "loss": 1.0911, "step": 625 }, { "epoch": 0.2806021441111651, "learning_rate": 4.947264958133803e-05, "lm_loss": 0.8551, "loss": 0.8551, "step": 626 }, { "epoch": 0.28105039034776436, "learning_rate": 4.947017382598843e-05, "lm_loss": 1.2754, "loss": 1.2754, "step": 627 }, { "epoch": 0.2814986365843637, "learning_rate": 4.946769233500732e-05, "lm_loss": 1.021, "loss": 1.021, "step": 628 }, { "epoch": 0.281946882820963, "learning_rate": 4.9465205108976334e-05, "lm_loss": 1.258, "loss": 1.258, "step": 629 }, { "epoch": 0.2823951290575623, "learning_rate": 4.9462712148478455e-05, "lm_loss": 1.0123, "loss": 1.0123, "step": 630 }, { "epoch": 0.2828433752941616, "learning_rate": 4.946021345409801e-05, "lm_loss": 1.7297, "loss": 1.7297, "step": 631 }, { "epoch": 0.2832916215307609, "learning_rate": 4.945770902642068e-05, "lm_loss": 2.6216, "loss": 2.6216, "step": 632 }, { "epoch": 0.2837398677673602, "learning_rate": 4.945519886603349e-05, "lm_loss": 0.8708, "loss": 0.8708, "step": 633 }, { "epoch": 0.2841881140039595, "learning_rate": 4.94526829735248e-05, "lm_loss": 3.3765, "loss": 3.3765, "step": 634 }, { "epoch": 0.28463636024055883, "learning_rate": 4.9450161349484306e-05, "lm_loss": 0.8087, "loss": 0.8087, "step": 635 }, { "epoch": 0.2850846064771581, "learning_rate": 4.944763399450308e-05, "lm_loss": 1.1287, "loss": 1.1287, "step": 636 }, { "epoch": 0.28553285271375745, "learning_rate": 4.944510090917349e-05, "lm_loss": 0.8783, "loss": 0.8783, "step": 637 }, { "epoch": 0.2859810989503567, "learning_rate": 4.94425620940893e-05, "lm_loss": 1.3889, "loss": 1.3889, "step": 638 }, { "epoch": 0.286429345186956, "learning_rate": 4.944001754984558e-05, "lm_loss": 1.0323, "loss": 1.0323, "step": 639 }, { "epoch": 0.28687759142355534, "learning_rate": 4.943746727703873e-05, "lm_loss": 1.1694, "loss": 1.1694, "step": 640 }, { "epoch": 0.2873258376601546, "learning_rate": 4.943491127626655e-05, "lm_loss": 0.9754, "loss": 0.9754, "step": 641 }, { "epoch": 0.28777408389675396, "learning_rate": 4.9432349548128124e-05, "lm_loss": 3.0432, "loss": 3.0432, "step": 642 }, { "epoch": 0.28822233013335324, "learning_rate": 4.942978209322392e-05, "lm_loss": 3.3721, "loss": 3.3721, "step": 643 }, { "epoch": 0.2886705763699526, "learning_rate": 4.9427208912155715e-05, "lm_loss": 0.9602, "loss": 0.9602, "step": 644 }, { "epoch": 0.28911882260655186, "learning_rate": 4.942463000552665e-05, "lm_loss": 0.9511, "loss": 0.9511, "step": 645 }, { "epoch": 0.2895670688431512, "learning_rate": 4.9422045373941206e-05, "lm_loss": 0.9569, "loss": 0.9569, "step": 646 }, { "epoch": 0.2900153150797505, "learning_rate": 4.9419455018005203e-05, "lm_loss": 1.1091, "loss": 1.1091, "step": 647 }, { "epoch": 0.29046356131634976, "learning_rate": 4.9416858938325795e-05, "lm_loss": 1.2556, "loss": 1.2556, "step": 648 }, { "epoch": 0.2909118075529491, "learning_rate": 4.941425713551149e-05, "lm_loss": 0.9644, "loss": 0.9644, "step": 649 }, { "epoch": 0.2913600537895484, "learning_rate": 4.941164961017212e-05, "lm_loss": 1.1925, "loss": 1.1925, "step": 650 }, { "epoch": 0.2918083000261477, "learning_rate": 4.940903636291888e-05, "lm_loss": 1.0084, "loss": 1.0084, "step": 651 }, { "epoch": 0.292256546262747, "learning_rate": 4.9406417394364305e-05, "lm_loss": 1.2159, "loss": 1.2159, "step": 652 }, { "epoch": 0.29270479249934633, "learning_rate": 4.940379270512224e-05, "lm_loss": 0.9584, "loss": 0.9584, "step": 653 }, { "epoch": 0.2931530387359456, "learning_rate": 4.940116229580792e-05, "lm_loss": 1.1344, "loss": 1.1344, "step": 654 }, { "epoch": 0.2936012849725449, "learning_rate": 4.9398526167037875e-05, "lm_loss": 1.0503, "loss": 1.0503, "step": 655 }, { "epoch": 0.29404953120914423, "learning_rate": 4.9395884319429984e-05, "lm_loss": 1.1199, "loss": 1.1199, "step": 656 }, { "epoch": 0.2944977774457435, "learning_rate": 4.939323675360351e-05, "lm_loss": 1.1179, "loss": 1.1179, "step": 657 }, { "epoch": 0.29494602368234285, "learning_rate": 4.9390583470179e-05, "lm_loss": 1.0657, "loss": 1.0657, "step": 658 }, { "epoch": 0.29539426991894213, "learning_rate": 4.938792446977837e-05, "lm_loss": 0.8922, "loss": 0.8922, "step": 659 }, { "epoch": 0.29584251615554147, "learning_rate": 4.938525975302486e-05, "lm_loss": 1.289, "loss": 1.289, "step": 660 }, { "epoch": 0.29629076239214075, "learning_rate": 4.9382589320543085e-05, "lm_loss": 1.0769, "loss": 1.0769, "step": 661 }, { "epoch": 0.29673900862874003, "learning_rate": 4.9379913172958944e-05, "lm_loss": 0.9768, "loss": 0.9768, "step": 662 }, { "epoch": 0.29718725486533937, "learning_rate": 4.937723131089974e-05, "lm_loss": 1.2198, "loss": 1.2198, "step": 663 }, { "epoch": 0.29763550110193865, "learning_rate": 4.9374543734994053e-05, "lm_loss": 0.9232, "loss": 0.9232, "step": 664 }, { "epoch": 0.298083747338538, "learning_rate": 4.9371850445871846e-05, "lm_loss": 1.2542, "loss": 1.2542, "step": 665 }, { "epoch": 0.29853199357513727, "learning_rate": 4.936915144416441e-05, "lm_loss": 1.0928, "loss": 1.0928, "step": 666 }, { "epoch": 0.2989802398117366, "learning_rate": 4.9366446730504356e-05, "lm_loss": 0.9461, "loss": 0.9461, "step": 667 }, { "epoch": 0.2994284860483359, "learning_rate": 4.9363736305525667e-05, "lm_loss": 1.455, "loss": 1.455, "step": 668 }, { "epoch": 0.29987673228493517, "learning_rate": 4.9361020169863633e-05, "lm_loss": 2.928, "loss": 2.928, "step": 669 }, { "epoch": 0.3003249785215345, "learning_rate": 4.93582983241549e-05, "lm_loss": 1.7612, "loss": 1.7612, "step": 670 }, { "epoch": 0.3007732247581338, "learning_rate": 4.935557076903745e-05, "lm_loss": 2.2928, "loss": 2.2928, "step": 671 }, { "epoch": 0.3012214709947331, "learning_rate": 4.935283750515061e-05, "lm_loss": 1.2263, "loss": 1.2263, "step": 672 }, { "epoch": 0.3016697172313324, "learning_rate": 4.9350098533135034e-05, "lm_loss": 3.8358, "loss": 3.8358, "step": 673 }, { "epoch": 0.30211796346793174, "learning_rate": 4.93473538536327e-05, "lm_loss": 2.2452, "loss": 2.2452, "step": 674 }, { "epoch": 0.302566209704531, "learning_rate": 4.9344603467286966e-05, "lm_loss": 3.3449, "loss": 3.3449, "step": 675 }, { "epoch": 0.3030144559411303, "learning_rate": 4.9341847374742485e-05, "lm_loss": 0.9817, "loss": 0.9817, "step": 676 }, { "epoch": 0.30346270217772964, "learning_rate": 4.9339085576645274e-05, "lm_loss": 1.0244, "loss": 1.0244, "step": 677 }, { "epoch": 0.3039109484143289, "learning_rate": 4.933631807364267e-05, "lm_loss": 1.2889, "loss": 1.2889, "step": 678 }, { "epoch": 0.30435919465092826, "learning_rate": 4.9333544866383366e-05, "lm_loss": 1.7853, "loss": 1.7853, "step": 679 }, { "epoch": 0.30480744088752754, "learning_rate": 4.933076595551738e-05, "lm_loss": 2.4823, "loss": 2.4823, "step": 680 }, { "epoch": 0.3052556871241269, "learning_rate": 4.932798134169606e-05, "lm_loss": 0.9848, "loss": 0.9848, "step": 681 }, { "epoch": 0.30570393336072615, "learning_rate": 4.932519102557211e-05, "lm_loss": 1.2634, "loss": 1.2634, "step": 682 }, { "epoch": 0.3061521795973255, "learning_rate": 4.9322395007799554e-05, "lm_loss": 0.6988, "loss": 0.6988, "step": 683 }, { "epoch": 0.3066004258339248, "learning_rate": 4.931959328903376e-05, "lm_loss": 1.2111, "loss": 1.2111, "step": 684 }, { "epoch": 0.30704867207052405, "learning_rate": 4.931678586993143e-05, "lm_loss": 1.7704, "loss": 1.7704, "step": 685 }, { "epoch": 0.3074969183071234, "learning_rate": 4.931397275115059e-05, "lm_loss": 2.5955, "loss": 2.5955, "step": 686 }, { "epoch": 0.30794516454372267, "learning_rate": 4.931115393335064e-05, "lm_loss": 1.12, "loss": 1.12, "step": 687 }, { "epoch": 0.308393410780322, "learning_rate": 4.930832941719228e-05, "lm_loss": 1.7966, "loss": 1.7966, "step": 688 }, { "epoch": 0.3088416570169213, "learning_rate": 4.9305499203337545e-05, "lm_loss": 2.294, "loss": 2.294, "step": 689 }, { "epoch": 0.3092899032535206, "learning_rate": 4.930266329244984e-05, "lm_loss": 3.3277, "loss": 3.3277, "step": 690 }, { "epoch": 0.3097381494901199, "learning_rate": 4.929982168519385e-05, "lm_loss": 0.9801, "loss": 0.9801, "step": 691 }, { "epoch": 0.3101863957267192, "learning_rate": 4.929697438223565e-05, "lm_loss": 1.2501, "loss": 1.2501, "step": 692 }, { "epoch": 0.3106346419633185, "learning_rate": 4.929412138424262e-05, "lm_loss": 0.9912, "loss": 0.9912, "step": 693 }, { "epoch": 0.3110828881999178, "learning_rate": 4.929126269188349e-05, "lm_loss": 1.0388, "loss": 1.0388, "step": 694 }, { "epoch": 0.31153113443651714, "learning_rate": 4.928839830582831e-05, "lm_loss": 1.3268, "loss": 1.3268, "step": 695 }, { "epoch": 0.3119793806731164, "learning_rate": 4.928552822674847e-05, "lm_loss": 0.99, "loss": 0.99, "step": 696 }, { "epoch": 0.31242762690971576, "learning_rate": 4.928265245531669e-05, "lm_loss": 0.9643, "loss": 0.9643, "step": 697 }, { "epoch": 0.31287587314631504, "learning_rate": 4.9279770992207056e-05, "lm_loss": 1.2081, "loss": 1.2081, "step": 698 }, { "epoch": 0.3133241193829143, "learning_rate": 4.927688383809492e-05, "lm_loss": 1.1083, "loss": 1.1083, "step": 699 }, { "epoch": 0.31377236561951366, "learning_rate": 4.9273990993657056e-05, "lm_loss": 1.0105, "loss": 1.0105, "step": 700 }, { "epoch": 0.31422061185611294, "learning_rate": 4.927109245957149e-05, "lm_loss": 1.2825, "loss": 1.2825, "step": 701 }, { "epoch": 0.3146688580927123, "learning_rate": 4.926818823651764e-05, "lm_loss": 0.9046, "loss": 0.9046, "step": 702 }, { "epoch": 0.31511710432931156, "learning_rate": 4.926527832517622e-05, "lm_loss": 1.2355, "loss": 1.2355, "step": 703 }, { "epoch": 0.3155653505659109, "learning_rate": 4.926236272622929e-05, "lm_loss": 0.951, "loss": 0.951, "step": 704 }, { "epoch": 0.3160135968025102, "learning_rate": 4.925944144036026e-05, "lm_loss": 1.0504, "loss": 1.0504, "step": 705 }, { "epoch": 0.31646184303910946, "learning_rate": 4.9256514468253864e-05, "lm_loss": 3.8407, "loss": 3.8407, "step": 706 }, { "epoch": 0.3169100892757088, "learning_rate": 4.925358181059614e-05, "lm_loss": 2.4207, "loss": 2.4207, "step": 707 }, { "epoch": 0.3173583355123081, "learning_rate": 4.925064346807449e-05, "lm_loss": 1.1044, "loss": 1.1044, "step": 708 }, { "epoch": 0.3178065817489074, "learning_rate": 4.924769944137765e-05, "lm_loss": 0.9581, "loss": 0.9581, "step": 709 }, { "epoch": 0.3182548279855067, "learning_rate": 4.924474973119566e-05, "lm_loss": 0.8439, "loss": 0.8439, "step": 710 }, { "epoch": 0.31870307422210603, "learning_rate": 4.924179433821993e-05, "lm_loss": 1.2872, "loss": 1.2872, "step": 711 }, { "epoch": 0.3191513204587053, "learning_rate": 4.923883326314317e-05, "lm_loss": 1.1595, "loss": 1.1595, "step": 712 }, { "epoch": 0.3195995666953046, "learning_rate": 4.9235866506659447e-05, "lm_loss": 0.9121, "loss": 0.9121, "step": 713 }, { "epoch": 0.32004781293190393, "learning_rate": 4.923289406946413e-05, "lm_loss": 3.2361, "loss": 3.2361, "step": 714 }, { "epoch": 0.3204960591685032, "learning_rate": 4.9229915952253944e-05, "lm_loss": 1.0163, "loss": 1.0163, "step": 715 }, { "epoch": 0.32094430540510255, "learning_rate": 4.922693215572695e-05, "lm_loss": 1.0704, "loss": 1.0704, "step": 716 }, { "epoch": 0.32139255164170183, "learning_rate": 4.922394268058251e-05, "lm_loss": 1.0499, "loss": 1.0499, "step": 717 }, { "epoch": 0.32184079787830117, "learning_rate": 4.9220947527521356e-05, "lm_loss": 1.2494, "loss": 1.2494, "step": 718 }, { "epoch": 0.32228904411490045, "learning_rate": 4.921794669724551e-05, "lm_loss": 0.6653, "loss": 0.6653, "step": 719 }, { "epoch": 0.3227372903514998, "learning_rate": 4.9214940190458345e-05, "lm_loss": 1.2818, "loss": 1.2818, "step": 720 }, { "epoch": 0.32318553658809906, "learning_rate": 4.9211928007864585e-05, "lm_loss": 0.9748, "loss": 0.9748, "step": 721 }, { "epoch": 0.32363378282469835, "learning_rate": 4.920891015017024e-05, "lm_loss": 1.1276, "loss": 1.1276, "step": 722 }, { "epoch": 0.3240820290612977, "learning_rate": 4.92058866180827e-05, "lm_loss": 1.0322, "loss": 1.0322, "step": 723 }, { "epoch": 0.32453027529789696, "learning_rate": 4.9202857412310626e-05, "lm_loss": 1.8166, "loss": 1.8166, "step": 724 }, { "epoch": 0.3249785215344963, "learning_rate": 4.9199822533564075e-05, "lm_loss": 3.8956, "loss": 3.8956, "step": 725 }, { "epoch": 0.3254267677710956, "learning_rate": 4.9196781982554374e-05, "lm_loss": 1.7319, "loss": 1.7319, "step": 726 }, { "epoch": 0.3258750140076949, "learning_rate": 4.919373575999422e-05, "lm_loss": 1.373, "loss": 1.373, "step": 727 }, { "epoch": 0.3263232602442942, "learning_rate": 4.919068386659763e-05, "lm_loss": 2.2414, "loss": 2.2414, "step": 728 }, { "epoch": 0.3267715064808935, "learning_rate": 4.918762630307993e-05, "lm_loss": 3.8174, "loss": 3.8174, "step": 729 }, { "epoch": 0.3272197527174928, "learning_rate": 4.918456307015781e-05, "lm_loss": 0.9355, "loss": 0.9355, "step": 730 }, { "epoch": 0.3276679989540921, "learning_rate": 4.918149416854925e-05, "lm_loss": 2.4223, "loss": 2.4223, "step": 731 }, { "epoch": 0.32811624519069144, "learning_rate": 4.917841959897359e-05, "lm_loss": 1.8019, "loss": 1.8019, "step": 732 }, { "epoch": 0.3285644914272907, "learning_rate": 4.917533936215149e-05, "lm_loss": 1.3223, "loss": 1.3223, "step": 733 }, { "epoch": 0.32901273766389005, "learning_rate": 4.917225345880492e-05, "lm_loss": 0.6997, "loss": 0.6997, "step": 734 }, { "epoch": 0.32946098390048933, "learning_rate": 4.916916188965721e-05, "lm_loss": 1.2883, "loss": 1.2883, "step": 735 }, { "epoch": 0.3299092301370886, "learning_rate": 4.916606465543299e-05, "lm_loss": 1.3056, "loss": 1.3056, "step": 736 }, { "epoch": 0.33035747637368795, "learning_rate": 4.9162961756858236e-05, "lm_loss": 2.1682, "loss": 2.1682, "step": 737 }, { "epoch": 0.33080572261028723, "learning_rate": 4.915985319466024e-05, "lm_loss": 1.7577, "loss": 1.7577, "step": 738 }, { "epoch": 0.33125396884688657, "learning_rate": 4.915673896956763e-05, "lm_loss": 3.2555, "loss": 3.2555, "step": 739 }, { "epoch": 0.33170221508348585, "learning_rate": 4.915361908231036e-05, "lm_loss": 1.1084, "loss": 1.1084, "step": 740 }, { "epoch": 0.3321504613200852, "learning_rate": 4.91504935336197e-05, "lm_loss": 0.9034, "loss": 0.9034, "step": 741 }, { "epoch": 0.33259870755668447, "learning_rate": 4.914736232422826e-05, "lm_loss": 1.2011, "loss": 1.2011, "step": 742 }, { "epoch": 0.33304695379328375, "learning_rate": 4.9144225454869986e-05, "lm_loss": 1.1584, "loss": 1.1584, "step": 743 }, { "epoch": 0.3334952000298831, "learning_rate": 4.9141082926280124e-05, "lm_loss": 0.9861, "loss": 0.9861, "step": 744 }, { "epoch": 0.33394344626648237, "learning_rate": 4.9137934739195254e-05, "lm_loss": 0.9607, "loss": 0.9607, "step": 745 }, { "epoch": 0.3343916925030817, "learning_rate": 4.9134780894353305e-05, "lm_loss": 1.1744, "loss": 1.1744, "step": 746 }, { "epoch": 0.334839938739681, "learning_rate": 4.91316213924935e-05, "lm_loss": 0.9324, "loss": 0.9324, "step": 747 }, { "epoch": 0.3352881849762803, "learning_rate": 4.9128456234356414e-05, "lm_loss": 1.0578, "loss": 1.0578, "step": 748 }, { "epoch": 0.3357364312128796, "learning_rate": 4.912528542068393e-05, "lm_loss": 0.9366, "loss": 0.9366, "step": 749 }, { "epoch": 0.33618467744947894, "learning_rate": 4.912210895221927e-05, "lm_loss": 1.1894, "loss": 1.1894, "step": 750 }, { "epoch": 0.3366329236860782, "learning_rate": 4.911892682970697e-05, "lm_loss": 1.0039, "loss": 1.0039, "step": 751 }, { "epoch": 0.3370811699226775, "learning_rate": 4.911573905389289e-05, "lm_loss": 2.649, "loss": 2.649, "step": 752 }, { "epoch": 0.33752941615927684, "learning_rate": 4.911254562552424e-05, "lm_loss": 1.7192, "loss": 1.7192, "step": 753 }, { "epoch": 0.3379776623958761, "learning_rate": 4.910934654534953e-05, "lm_loss": 3.7101, "loss": 3.7101, "step": 754 }, { "epoch": 0.33842590863247546, "learning_rate": 4.9106141814118595e-05, "lm_loss": 2.3115, "loss": 2.3115, "step": 755 }, { "epoch": 0.33887415486907474, "learning_rate": 4.91029314325826e-05, "lm_loss": 1.1458, "loss": 1.1458, "step": 756 }, { "epoch": 0.3393224011056741, "learning_rate": 4.909971540149404e-05, "lm_loss": 1.0825, "loss": 1.0825, "step": 757 }, { "epoch": 0.33977064734227336, "learning_rate": 4.909649372160673e-05, "lm_loss": 1.264, "loss": 1.264, "step": 758 }, { "epoch": 0.34021889357887264, "learning_rate": 4.909326639367581e-05, "lm_loss": 0.8276, "loss": 0.8276, "step": 759 }, { "epoch": 0.340667139815472, "learning_rate": 4.909003341845773e-05, "lm_loss": 0.8383, "loss": 0.8383, "step": 760 }, { "epoch": 0.34111538605207126, "learning_rate": 4.908679479671029e-05, "lm_loss": 1.4647, "loss": 1.4647, "step": 761 }, { "epoch": 0.3415636322886706, "learning_rate": 4.90835505291926e-05, "lm_loss": 0.9367, "loss": 0.9367, "step": 762 }, { "epoch": 0.3420118785252699, "learning_rate": 4.9080300616665076e-05, "lm_loss": 3.2931, "loss": 3.2931, "step": 763 }, { "epoch": 0.3424601247618692, "learning_rate": 4.907704505988949e-05, "lm_loss": 1.0844, "loss": 1.0844, "step": 764 }, { "epoch": 0.3429083709984685, "learning_rate": 4.907378385962892e-05, "lm_loss": 1.1104, "loss": 1.1104, "step": 765 }, { "epoch": 0.3433566172350678, "learning_rate": 4.907051701664776e-05, "lm_loss": 1.0231, "loss": 1.0231, "step": 766 }, { "epoch": 0.3438048634716671, "learning_rate": 4.9067244531711736e-05, "lm_loss": 1.0523, "loss": 1.0523, "step": 767 }, { "epoch": 0.3442531097082664, "learning_rate": 4.90639664055879e-05, "lm_loss": 1.0999, "loss": 1.0999, "step": 768 }, { "epoch": 0.34470135594486573, "learning_rate": 4.906068263904462e-05, "lm_loss": 3.3407, "loss": 3.3407, "step": 769 }, { "epoch": 0.345149602181465, "learning_rate": 4.9057393232851576e-05, "lm_loss": 0.8153, "loss": 0.8153, "step": 770 }, { "epoch": 0.34559784841806435, "learning_rate": 4.90540981877798e-05, "lm_loss": 1.1539, "loss": 1.1539, "step": 771 }, { "epoch": 0.3460460946546636, "learning_rate": 4.9050797504601616e-05, "lm_loss": 1.1312, "loss": 1.1312, "step": 772 }, { "epoch": 0.3464943408912629, "learning_rate": 4.9047491184090674e-05, "lm_loss": 0.9138, "loss": 0.9138, "step": 773 }, { "epoch": 0.34694258712786225, "learning_rate": 4.904417922702197e-05, "lm_loss": 1.1163, "loss": 1.1163, "step": 774 }, { "epoch": 0.3473908333644615, "learning_rate": 4.904086163417178e-05, "lm_loss": 1.1007, "loss": 1.1007, "step": 775 }, { "epoch": 0.34783907960106086, "learning_rate": 4.903753840631774e-05, "lm_loss": 1.0635, "loss": 1.0635, "step": 776 }, { "epoch": 0.34828732583766014, "learning_rate": 4.9034209544238784e-05, "lm_loss": 3.1969, "loss": 3.1969, "step": 777 }, { "epoch": 0.3487355720742595, "learning_rate": 4.903087504871517e-05, "lm_loss": 1.128, "loss": 1.128, "step": 778 }, { "epoch": 0.34918381831085876, "learning_rate": 4.9027534920528496e-05, "lm_loss": 1.0552, "loss": 1.0552, "step": 779 }, { "epoch": 0.34963206454745804, "learning_rate": 4.902418916046165e-05, "lm_loss": 1.0627, "loss": 1.0627, "step": 780 }, { "epoch": 0.3500803107840574, "learning_rate": 4.902083776929885e-05, "lm_loss": 1.1171, "loss": 1.1171, "step": 781 }, { "epoch": 0.35052855702065666, "learning_rate": 4.9017480747825645e-05, "lm_loss": 1.0537, "loss": 1.0537, "step": 782 }, { "epoch": 0.350976803257256, "learning_rate": 4.90141180968289e-05, "lm_loss": 0.9556, "loss": 0.9556, "step": 783 }, { "epoch": 0.3514250494938553, "learning_rate": 4.9010749817096786e-05, "lm_loss": 0.8933, "loss": 0.8933, "step": 784 }, { "epoch": 0.3518732957304546, "learning_rate": 4.9007375909418814e-05, "lm_loss": 1.3013, "loss": 1.3013, "step": 785 }, { "epoch": 0.3523215419670539, "learning_rate": 4.9003996374585795e-05, "lm_loss": 0.9348, "loss": 0.9348, "step": 786 }, { "epoch": 0.35276978820365323, "learning_rate": 4.900061121338987e-05, "lm_loss": 0.9187, "loss": 0.9187, "step": 787 }, { "epoch": 0.3532180344402525, "learning_rate": 4.89972204266245e-05, "lm_loss": 1.0006, "loss": 1.0006, "step": 788 }, { "epoch": 0.3536662806768518, "learning_rate": 4.8993824015084455e-05, "lm_loss": 1.3899, "loss": 1.3899, "step": 789 }, { "epoch": 0.35411452691345113, "learning_rate": 4.8990421979565836e-05, "lm_loss": 0.9688, "loss": 0.9688, "step": 790 }, { "epoch": 0.3545627731500504, "learning_rate": 4.898701432086605e-05, "lm_loss": 1.3469, "loss": 1.3469, "step": 791 }, { "epoch": 0.35501101938664975, "learning_rate": 4.898360103978383e-05, "lm_loss": 0.8482, "loss": 0.8482, "step": 792 }, { "epoch": 0.35545926562324903, "learning_rate": 4.898018213711922e-05, "lm_loss": 1.7129, "loss": 1.7129, "step": 793 }, { "epoch": 0.35590751185984837, "learning_rate": 4.897675761367359e-05, "lm_loss": 2.4872, "loss": 2.4872, "step": 794 }, { "epoch": 0.35635575809644765, "learning_rate": 4.897332747024963e-05, "lm_loss": 3.061, "loss": 3.061, "step": 795 }, { "epoch": 0.35680400433304693, "learning_rate": 4.896989170765133e-05, "lm_loss": 0.8997, "loss": 0.8997, "step": 796 }, { "epoch": 0.35725225056964627, "learning_rate": 4.896645032668401e-05, "lm_loss": 1.3832, "loss": 1.3832, "step": 797 }, { "epoch": 0.35770049680624555, "learning_rate": 4.8963003328154306e-05, "lm_loss": 0.8843, "loss": 0.8843, "step": 798 }, { "epoch": 0.3581487430428449, "learning_rate": 4.8959550712870173e-05, "lm_loss": 1.2153, "loss": 1.2153, "step": 799 }, { "epoch": 0.35859698927944417, "learning_rate": 4.895609248164087e-05, "lm_loss": 1.0459, "loss": 1.0459, "step": 800 }, { "epoch": 0.3590452355160435, "learning_rate": 4.8952628635277e-05, "lm_loss": 0.849, "loss": 0.849, "step": 801 }, { "epoch": 0.3594934817526428, "learning_rate": 4.894915917459044e-05, "lm_loss": 1.2243, "loss": 1.2243, "step": 802 }, { "epoch": 0.35994172798924207, "learning_rate": 4.8945684100394415e-05, "lm_loss": 0.7479, "loss": 0.7479, "step": 803 }, { "epoch": 0.3603899742258414, "learning_rate": 4.8942203413503476e-05, "lm_loss": 1.3233, "loss": 1.3233, "step": 804 }, { "epoch": 0.3608382204624407, "learning_rate": 4.893871711473344e-05, "lm_loss": 1.045, "loss": 1.045, "step": 805 }, { "epoch": 0.36128646669904, "learning_rate": 4.893522520490149e-05, "lm_loss": 2.5997, "loss": 2.5997, "step": 806 }, { "epoch": 0.3617347129356393, "learning_rate": 4.89317276848261e-05, "lm_loss": 1.7938, "loss": 1.7938, "step": 807 }, { "epoch": 0.36218295917223864, "learning_rate": 4.8928224555327054e-05, "lm_loss": 1.7467, "loss": 1.7467, "step": 808 }, { "epoch": 0.3626312054088379, "learning_rate": 4.8924715817225475e-05, "lm_loss": 2.2485, "loss": 2.2485, "step": 809 }, { "epoch": 0.3630794516454372, "learning_rate": 4.892120147134378e-05, "lm_loss": 1.2421, "loss": 1.2421, "step": 810 }, { "epoch": 0.36352769788203654, "learning_rate": 4.891768151850571e-05, "lm_loss": 0.9448, "loss": 0.9448, "step": 811 }, { "epoch": 0.3639759441186358, "learning_rate": 4.89141559595363e-05, "lm_loss": 1.2902, "loss": 1.2902, "step": 812 }, { "epoch": 0.36442419035523516, "learning_rate": 4.891062479526194e-05, "lm_loss": 0.9316, "loss": 0.9316, "step": 813 }, { "epoch": 0.36487243659183444, "learning_rate": 4.890708802651029e-05, "lm_loss": 1.7935, "loss": 1.7935, "step": 814 }, { "epoch": 0.3653206828284338, "learning_rate": 4.890354565411035e-05, "lm_loss": 2.5034, "loss": 2.5034, "step": 815 }, { "epoch": 0.36576892906503305, "learning_rate": 4.889999767889243e-05, "lm_loss": 1.0191, "loss": 1.0191, "step": 816 }, { "epoch": 0.36621717530163234, "learning_rate": 4.8896444101688144e-05, "lm_loss": 0.9912, "loss": 0.9912, "step": 817 }, { "epoch": 0.3666654215382317, "learning_rate": 4.8892884923330426e-05, "lm_loss": 1.0129, "loss": 1.0129, "step": 818 }, { "epoch": 0.36711366777483095, "learning_rate": 4.888932014465352e-05, "lm_loss": 1.1476, "loss": 1.1476, "step": 819 }, { "epoch": 0.3675619140114303, "learning_rate": 4.8885749766492995e-05, "lm_loss": 0.9072, "loss": 0.9072, "step": 820 }, { "epoch": 0.36801016024802957, "learning_rate": 4.8882173789685704e-05, "lm_loss": 1.3459, "loss": 1.3459, "step": 821 }, { "epoch": 0.3684584064846289, "learning_rate": 4.887859221506984e-05, "lm_loss": 0.9827, "loss": 0.9827, "step": 822 }, { "epoch": 0.3689066527212282, "learning_rate": 4.88750050434849e-05, "lm_loss": 0.9673, "loss": 0.9673, "step": 823 }, { "epoch": 0.3693548989578275, "learning_rate": 4.887141227577169e-05, "lm_loss": 1.0931, "loss": 1.0931, "step": 824 }, { "epoch": 0.3698031451944268, "learning_rate": 4.886781391277233e-05, "lm_loss": 1.0277, "loss": 1.0277, "step": 825 }, { "epoch": 0.3702513914310261, "learning_rate": 4.886420995533024e-05, "lm_loss": 0.9373, "loss": 0.9373, "step": 826 }, { "epoch": 0.3706996376676254, "learning_rate": 4.886060040429018e-05, "lm_loss": 1.3021, "loss": 1.3021, "step": 827 }, { "epoch": 0.3711478839042247, "learning_rate": 4.885698526049818e-05, "lm_loss": 0.9815, "loss": 0.9815, "step": 828 }, { "epoch": 0.37159613014082404, "learning_rate": 4.885336452480163e-05, "lm_loss": 0.9124, "loss": 0.9124, "step": 829 }, { "epoch": 0.3720443763774233, "learning_rate": 4.8849738198049176e-05, "lm_loss": 0.9845, "loss": 0.9845, "step": 830 }, { "epoch": 0.37249262261402266, "learning_rate": 4.884610628109082e-05, "lm_loss": 3.2771, "loss": 3.2771, "step": 831 }, { "epoch": 0.37294086885062194, "learning_rate": 4.884246877477785e-05, "lm_loss": 1.692, "loss": 1.692, "step": 832 }, { "epoch": 0.3733891150872212, "learning_rate": 4.883882567996289e-05, "lm_loss": 2.4416, "loss": 2.4416, "step": 833 }, { "epoch": 0.37383736132382056, "learning_rate": 4.883517699749982e-05, "lm_loss": 1.0562, "loss": 1.0562, "step": 834 }, { "epoch": 0.37428560756041984, "learning_rate": 4.8831522728243897e-05, "lm_loss": 0.9977, "loss": 0.9977, "step": 835 }, { "epoch": 0.3747338537970192, "learning_rate": 4.8827862873051634e-05, "lm_loss": 0.9259, "loss": 0.9259, "step": 836 }, { "epoch": 0.37518210003361846, "learning_rate": 4.8824197432780884e-05, "lm_loss": 1.192, "loss": 1.192, "step": 837 }, { "epoch": 0.3756303462702178, "learning_rate": 4.88205264082908e-05, "lm_loss": 0.9734, "loss": 0.9734, "step": 838 }, { "epoch": 0.3760785925068171, "learning_rate": 4.881684980044184e-05, "lm_loss": 0.9026, "loss": 0.9026, "step": 839 }, { "epoch": 0.37652683874341636, "learning_rate": 4.881316761009579e-05, "lm_loss": 1.3331, "loss": 1.3331, "step": 840 }, { "epoch": 0.3769750849800157, "learning_rate": 4.88094798381157e-05, "lm_loss": 0.9296, "loss": 0.9296, "step": 841 }, { "epoch": 0.377423331216615, "learning_rate": 4.880578648536598e-05, "lm_loss": 1.0748, "loss": 1.0748, "step": 842 }, { "epoch": 0.3778715774532143, "learning_rate": 4.880208755271232e-05, "lm_loss": 1.2352, "loss": 1.2352, "step": 843 }, { "epoch": 0.3783198236898136, "learning_rate": 4.8798383041021715e-05, "lm_loss": 1.1741, "loss": 1.1741, "step": 844 }, { "epoch": 0.37876806992641293, "learning_rate": 4.879467295116248e-05, "lm_loss": 1.0192, "loss": 1.0192, "step": 845 }, { "epoch": 0.3792163161630122, "learning_rate": 4.8790957284004235e-05, "lm_loss": 0.9447, "loss": 0.9447, "step": 846 }, { "epoch": 0.3796645623996115, "learning_rate": 4.87872360404179e-05, "lm_loss": 0.9869, "loss": 0.9869, "step": 847 }, { "epoch": 0.38011280863621083, "learning_rate": 4.8783509221275725e-05, "lm_loss": 1.2146, "loss": 1.2146, "step": 848 }, { "epoch": 0.3805610548728101, "learning_rate": 4.877977682745123e-05, "lm_loss": 3.0923, "loss": 3.0923, "step": 849 }, { "epoch": 0.38100930110940945, "learning_rate": 4.8776038859819275e-05, "lm_loss": 1.7941, "loss": 1.7941, "step": 850 }, { "epoch": 0.38145754734600873, "learning_rate": 4.877229531925599e-05, "lm_loss": 2.5511, "loss": 2.5511, "step": 851 }, { "epoch": 0.38190579358260807, "learning_rate": 4.876854620663886e-05, "lm_loss": 0.9985, "loss": 0.9985, "step": 852 }, { "epoch": 0.38235403981920735, "learning_rate": 4.876479152284664e-05, "lm_loss": 1.2052, "loss": 1.2052, "step": 853 }, { "epoch": 0.38280228605580663, "learning_rate": 4.8761031268759395e-05, "lm_loss": 2.3876, "loss": 2.3876, "step": 854 }, { "epoch": 0.38325053229240597, "learning_rate": 4.875726544525851e-05, "lm_loss": 1.7347, "loss": 1.7347, "step": 855 }, { "epoch": 0.38369877852900525, "learning_rate": 4.875349405322665e-05, "lm_loss": 0.8418, "loss": 0.8418, "step": 856 }, { "epoch": 0.3841470247656046, "learning_rate": 4.874971709354783e-05, "lm_loss": 1.3304, "loss": 1.3304, "step": 857 }, { "epoch": 0.38459527100220386, "learning_rate": 4.8745934567107325e-05, "lm_loss": 0.9062, "loss": 0.9062, "step": 858 }, { "epoch": 0.3850435172388032, "learning_rate": 4.874214647479173e-05, "lm_loss": 1.041, "loss": 1.041, "step": 859 }, { "epoch": 0.3854917634754025, "learning_rate": 4.8738352817488956e-05, "lm_loss": 1.1193, "loss": 1.1193, "step": 860 }, { "epoch": 0.3859400097120018, "learning_rate": 4.873455359608819e-05, "lm_loss": 1.0401, "loss": 1.0401, "step": 861 }, { "epoch": 0.3863882559486011, "learning_rate": 4.873074881147997e-05, "lm_loss": 1.0014, "loss": 1.0014, "step": 862 }, { "epoch": 0.3868365021852004, "learning_rate": 4.872693846455608e-05, "lm_loss": 1.1132, "loss": 1.1132, "step": 863 }, { "epoch": 0.3872847484217997, "learning_rate": 4.872312255620967e-05, "lm_loss": 0.9859, "loss": 0.9859, "step": 864 }, { "epoch": 0.387732994658399, "learning_rate": 4.871930108733512e-05, "lm_loss": 0.9497, "loss": 0.9497, "step": 865 }, { "epoch": 0.38818124089499834, "learning_rate": 4.87154740588282e-05, "lm_loss": 1.1339, "loss": 1.1339, "step": 866 }, { "epoch": 0.3886294871315976, "learning_rate": 4.871164147158591e-05, "lm_loss": 1.0275, "loss": 1.0275, "step": 867 }, { "epoch": 0.38907773336819695, "learning_rate": 4.870780332650658e-05, "lm_loss": 1.7643, "loss": 1.7643, "step": 868 }, { "epoch": 0.38952597960479624, "learning_rate": 4.870395962448986e-05, "lm_loss": 3.7983, "loss": 3.7983, "step": 869 }, { "epoch": 0.3899742258413955, "learning_rate": 4.870011036643667e-05, "lm_loss": 1.7579, "loss": 1.7579, "step": 870 }, { "epoch": 0.39042247207799485, "learning_rate": 4.8696255553249246e-05, "lm_loss": 1.0619, "loss": 1.0619, "step": 871 }, { "epoch": 0.39087071831459413, "learning_rate": 4.869239518583115e-05, "lm_loss": 0.9161, "loss": 0.9161, "step": 872 }, { "epoch": 0.39131896455119347, "learning_rate": 4.868852926508721e-05, "lm_loss": 1.2345, "loss": 1.2345, "step": 873 }, { "epoch": 0.39176721078779275, "learning_rate": 4.868465779192357e-05, "lm_loss": 0.9433, "loss": 0.9433, "step": 874 }, { "epoch": 0.3922154570243921, "learning_rate": 4.868078076724767e-05, "lm_loss": 1.0614, "loss": 1.0614, "step": 875 }, { "epoch": 0.39266370326099137, "learning_rate": 4.867689819196827e-05, "lm_loss": 1.1904, "loss": 1.1904, "step": 876 }, { "epoch": 0.39311194949759065, "learning_rate": 4.86730100669954e-05, "lm_loss": 1.0216, "loss": 1.0216, "step": 877 }, { "epoch": 0.39356019573419, "learning_rate": 4.866911639324042e-05, "lm_loss": 0.9649, "loss": 0.9649, "step": 878 }, { "epoch": 0.39400844197078927, "learning_rate": 4.8665217171615986e-05, "lm_loss": 1.2494, "loss": 1.2494, "step": 879 }, { "epoch": 0.3944566882073886, "learning_rate": 4.866131240303604e-05, "lm_loss": 1.7426, "loss": 1.7426, "step": 880 }, { "epoch": 0.3949049344439879, "learning_rate": 4.865740208841582e-05, "lm_loss": 2.4546, "loss": 2.4546, "step": 881 }, { "epoch": 0.3953531806805872, "learning_rate": 4.8653486228671885e-05, "lm_loss": 3.2299, "loss": 3.2299, "step": 882 }, { "epoch": 0.3958014269171865, "learning_rate": 4.86495648247221e-05, "lm_loss": 0.9183, "loss": 0.9183, "step": 883 }, { "epoch": 0.3962496731537858, "learning_rate": 4.864563787748558e-05, "lm_loss": 1.268, "loss": 1.268, "step": 884 }, { "epoch": 0.3966979193903851, "learning_rate": 4.86417053878828e-05, "lm_loss": 0.8309, "loss": 0.8309, "step": 885 }, { "epoch": 0.3971461656269844, "learning_rate": 4.863776735683549e-05, "lm_loss": 1.0678, "loss": 1.0678, "step": 886 }, { "epoch": 0.39759441186358374, "learning_rate": 4.863382378526672e-05, "lm_loss": 1.2339, "loss": 1.2339, "step": 887 }, { "epoch": 0.398042658100183, "learning_rate": 4.8629874674100806e-05, "lm_loss": 3.0269, "loss": 3.0269, "step": 888 }, { "epoch": 0.39849090433678236, "learning_rate": 4.862592002426341e-05, "lm_loss": 1.1744, "loss": 1.1744, "step": 889 }, { "epoch": 0.39893915057338164, "learning_rate": 4.8621959836681464e-05, "lm_loss": 1.0368, "loss": 1.0368, "step": 890 }, { "epoch": 0.399387396809981, "learning_rate": 4.861799411228321e-05, "lm_loss": 1.0613, "loss": 1.0613, "step": 891 }, { "epoch": 0.39983564304658026, "learning_rate": 4.8614022851998186e-05, "lm_loss": 1.0204, "loss": 1.0204, "step": 892 }, { "epoch": 0.40028388928317954, "learning_rate": 4.861004605675723e-05, "lm_loss": 1.3874, "loss": 1.3874, "step": 893 }, { "epoch": 0.4007321355197789, "learning_rate": 4.860606372749247e-05, "lm_loss": 0.8089, "loss": 0.8089, "step": 894 }, { "epoch": 0.40118038175637816, "learning_rate": 4.860207586513733e-05, "lm_loss": 1.0725, "loss": 1.0725, "step": 895 }, { "epoch": 0.4016286279929775, "learning_rate": 4.859808247062655e-05, "lm_loss": 1.2996, "loss": 1.2996, "step": 896 }, { "epoch": 0.4020768742295768, "learning_rate": 4.8594083544896135e-05, "lm_loss": 0.8843, "loss": 0.8843, "step": 897 }, { "epoch": 0.4025251204661761, "learning_rate": 4.859007908888342e-05, "lm_loss": 1.005, "loss": 1.005, "step": 898 }, { "epoch": 0.4029733667027754, "learning_rate": 4.858606910352701e-05, "lm_loss": 0.9371, "loss": 0.9371, "step": 899 }, { "epoch": 0.4034216129393747, "learning_rate": 4.858205358976682e-05, "lm_loss": 1.1642, "loss": 1.1642, "step": 900 }, { "epoch": 0.403869859175974, "learning_rate": 4.857803254854406e-05, "lm_loss": 0.9705, "loss": 0.9705, "step": 901 }, { "epoch": 0.4043181054125733, "learning_rate": 4.857400598080123e-05, "lm_loss": 1.2036, "loss": 1.2036, "step": 902 }, { "epoch": 0.40476635164917263, "learning_rate": 4.8569973887482135e-05, "lm_loss": 0.9136, "loss": 0.9136, "step": 903 }, { "epoch": 0.4052145978857719, "learning_rate": 4.8565936269531854e-05, "lm_loss": 1.0842, "loss": 1.0842, "step": 904 }, { "epoch": 0.40566284412237125, "learning_rate": 4.8561893127896785e-05, "lm_loss": 1.2499, "loss": 1.2499, "step": 905 }, { "epoch": 0.40611109035897053, "learning_rate": 4.8557844463524616e-05, "lm_loss": 0.8332, "loss": 0.8332, "step": 906 }, { "epoch": 0.4065593365955698, "learning_rate": 4.8553790277364315e-05, "lm_loss": 0.9442, "loss": 0.9442, "step": 907 }, { "epoch": 0.40700758283216915, "learning_rate": 4.854973057036616e-05, "lm_loss": 2.5105, "loss": 2.5105, "step": 908 }, { "epoch": 0.4074558290687684, "learning_rate": 4.854566534348171e-05, "lm_loss": 1.7501, "loss": 1.7501, "step": 909 }, { "epoch": 0.40790407530536776, "learning_rate": 4.854159459766383e-05, "lm_loss": 0.9908, "loss": 0.9908, "step": 910 }, { "epoch": 0.40835232154196704, "learning_rate": 4.853751833386667e-05, "lm_loss": 1.2217, "loss": 1.2217, "step": 911 }, { "epoch": 0.4088005677785664, "learning_rate": 4.8533436553045684e-05, "lm_loss": 0.9042, "loss": 0.9042, "step": 912 }, { "epoch": 0.40924881401516566, "learning_rate": 4.852934925615761e-05, "lm_loss": 0.8567, "loss": 0.8567, "step": 913 }, { "epoch": 0.40969706025176494, "learning_rate": 4.852525644416047e-05, "lm_loss": 1.0883, "loss": 1.0883, "step": 914 }, { "epoch": 0.4101453064883643, "learning_rate": 4.85211581180136e-05, "lm_loss": 1.2283, "loss": 1.2283, "step": 915 }, { "epoch": 0.41059355272496356, "learning_rate": 4.851705427867762e-05, "lm_loss": 0.8768, "loss": 0.8768, "step": 916 }, { "epoch": 0.4110417989615629, "learning_rate": 4.851294492711443e-05, "lm_loss": 1.3719, "loss": 1.3719, "step": 917 }, { "epoch": 0.4114900451981622, "learning_rate": 4.850883006428723e-05, "lm_loss": 0.9451, "loss": 0.9451, "step": 918 }, { "epoch": 0.4119382914347615, "learning_rate": 4.850470969116054e-05, "lm_loss": 0.8719, "loss": 0.8719, "step": 919 }, { "epoch": 0.4123865376713608, "learning_rate": 4.850058380870012e-05, "lm_loss": 1.4517, "loss": 1.4517, "step": 920 }, { "epoch": 0.4128347839079601, "learning_rate": 4.849645241787305e-05, "lm_loss": 0.9366, "loss": 0.9366, "step": 921 }, { "epoch": 0.4132830301445594, "learning_rate": 4.849231551964771e-05, "lm_loss": 1.6226, "loss": 1.6226, "step": 922 }, { "epoch": 0.4137312763811587, "learning_rate": 4.848817311499375e-05, "lm_loss": 2.4391, "loss": 2.4391, "step": 923 }, { "epoch": 0.41417952261775803, "learning_rate": 4.848402520488212e-05, "lm_loss": 0.9535, "loss": 0.9535, "step": 924 }, { "epoch": 0.4146277688543573, "learning_rate": 4.847987179028506e-05, "lm_loss": 1.3251, "loss": 1.3251, "step": 925 }, { "epoch": 0.41507601509095665, "learning_rate": 4.8475712872176104e-05, "lm_loss": 0.7422, "loss": 0.7422, "step": 926 }, { "epoch": 0.41552426132755593, "learning_rate": 4.847154845153007e-05, "lm_loss": 1.2535, "loss": 1.2535, "step": 927 }, { "epoch": 0.41597250756415527, "learning_rate": 4.846737852932306e-05, "lm_loss": 1.0092, "loss": 1.0092, "step": 928 }, { "epoch": 0.41642075380075455, "learning_rate": 4.846320310653248e-05, "lm_loss": 1.2717, "loss": 1.2717, "step": 929 }, { "epoch": 0.41686900003735383, "learning_rate": 4.845902218413704e-05, "lm_loss": 0.8737, "loss": 0.8737, "step": 930 }, { "epoch": 0.41731724627395317, "learning_rate": 4.845483576311668e-05, "lm_loss": 1.0041, "loss": 1.0041, "step": 931 }, { "epoch": 0.41776549251055245, "learning_rate": 4.845064384445269e-05, "lm_loss": 1.1048, "loss": 1.1048, "step": 932 }, { "epoch": 0.4182137387471518, "learning_rate": 4.844644642912762e-05, "lm_loss": 3.0442, "loss": 3.0442, "step": 933 }, { "epoch": 0.41866198498375107, "learning_rate": 4.844224351812532e-05, "lm_loss": 1.6692, "loss": 1.6692, "step": 934 }, { "epoch": 0.4191102312203504, "learning_rate": 4.843803511243091e-05, "lm_loss": 2.4215, "loss": 2.4215, "step": 935 }, { "epoch": 0.4195584774569497, "learning_rate": 4.8433821213030813e-05, "lm_loss": 3.0116, "loss": 3.0116, "step": 936 }, { "epoch": 0.42000672369354897, "learning_rate": 4.842960182091275e-05, "lm_loss": 1.2036, "loss": 1.2036, "step": 937 }, { "epoch": 0.4204549699301483, "learning_rate": 4.84253769370657e-05, "lm_loss": 0.9593, "loss": 0.9593, "step": 938 }, { "epoch": 0.4209032161667476, "learning_rate": 4.842114656247995e-05, "lm_loss": 2.8621, "loss": 2.8621, "step": 939 }, { "epoch": 0.4213514624033469, "learning_rate": 4.841691069814707e-05, "lm_loss": 1.0758, "loss": 1.0758, "step": 940 }, { "epoch": 0.4217997086399462, "learning_rate": 4.841266934505991e-05, "lm_loss": 1.1239, "loss": 1.1239, "step": 941 }, { "epoch": 0.42224795487654554, "learning_rate": 4.840842250421263e-05, "lm_loss": 0.9905, "loss": 0.9905, "step": 942 }, { "epoch": 0.4226962011131448, "learning_rate": 4.8404170176600635e-05, "lm_loss": 1.1352, "loss": 1.1352, "step": 943 }, { "epoch": 0.4231444473497441, "learning_rate": 4.8399912363220654e-05, "lm_loss": 1.1569, "loss": 1.1569, "step": 944 }, { "epoch": 0.42359269358634344, "learning_rate": 4.8395649065070686e-05, "lm_loss": 0.8399, "loss": 0.8399, "step": 945 }, { "epoch": 0.4240409398229427, "learning_rate": 4.839138028315002e-05, "lm_loss": 0.7665, "loss": 0.7665, "step": 946 }, { "epoch": 0.42448918605954206, "learning_rate": 4.838710601845922e-05, "lm_loss": 1.3435, "loss": 1.3435, "step": 947 }, { "epoch": 0.42493743229614134, "learning_rate": 4.8382826272000146e-05, "lm_loss": 0.8631, "loss": 0.8631, "step": 948 }, { "epoch": 0.4253856785327407, "learning_rate": 4.837854104477595e-05, "lm_loss": 0.9357, "loss": 0.9357, "step": 949 }, { "epoch": 0.42583392476933996, "learning_rate": 4.837425033779104e-05, "lm_loss": 1.1971, "loss": 1.1971, "step": 950 }, { "epoch": 0.42628217100593924, "learning_rate": 4.8369954152051135e-05, "lm_loss": 0.9815, "loss": 0.9815, "step": 951 }, { "epoch": 0.4267304172425386, "learning_rate": 4.836565248856323e-05, "lm_loss": 1.1899, "loss": 1.1899, "step": 952 }, { "epoch": 0.42717866347913785, "learning_rate": 4.836134534833561e-05, "lm_loss": 1.0404, "loss": 1.0404, "step": 953 }, { "epoch": 0.4276269097157372, "learning_rate": 4.8357032732377814e-05, "lm_loss": 1.0643, "loss": 1.0643, "step": 954 }, { "epoch": 0.4280751559523365, "learning_rate": 4.835271464170072e-05, "lm_loss": 0.887, "loss": 0.887, "step": 955 }, { "epoch": 0.4285234021889358, "learning_rate": 4.8348391077316444e-05, "lm_loss": 2.5522, "loss": 2.5522, "step": 956 }, { "epoch": 0.4289716484255351, "learning_rate": 4.8344062040238395e-05, "lm_loss": 1.6985, "loss": 1.6985, "step": 957 }, { "epoch": 0.42941989466213437, "learning_rate": 4.833972753148126e-05, "lm_loss": 1.2076, "loss": 1.2076, "step": 958 }, { "epoch": 0.4298681408987337, "learning_rate": 4.833538755206103e-05, "lm_loss": 0.6831, "loss": 0.6831, "step": 959 }, { "epoch": 0.430316387135333, "learning_rate": 4.833104210299496e-05, "lm_loss": 1.1473, "loss": 1.1473, "step": 960 }, { "epoch": 0.4307646333719323, "learning_rate": 4.83266911853016e-05, "lm_loss": 1.0802, "loss": 1.0802, "step": 961 }, { "epoch": 0.4312128796085316, "learning_rate": 4.8322334800000755e-05, "lm_loss": 0.914, "loss": 0.914, "step": 962 }, { "epoch": 0.43166112584513094, "learning_rate": 4.8317972948113553e-05, "lm_loss": 1.271, "loss": 1.271, "step": 963 }, { "epoch": 0.4321093720817302, "learning_rate": 4.831360563066236e-05, "lm_loss": 0.9754, "loss": 0.9754, "step": 964 }, { "epoch": 0.43255761831832956, "learning_rate": 4.830923284867086e-05, "lm_loss": 0.9625, "loss": 0.9625, "step": 965 }, { "epoch": 0.43300586455492884, "learning_rate": 4.830485460316398e-05, "lm_loss": 1.2345, "loss": 1.2345, "step": 966 }, { "epoch": 0.4334541107915281, "learning_rate": 4.8300470895167974e-05, "lm_loss": 0.9856, "loss": 0.9856, "step": 967 }, { "epoch": 0.43390235702812746, "learning_rate": 4.829608172571034e-05, "lm_loss": 1.1534, "loss": 1.1534, "step": 968 }, { "epoch": 0.43435060326472674, "learning_rate": 4.8291687095819865e-05, "lm_loss": 0.9229, "loss": 0.9229, "step": 969 }, { "epoch": 0.4347988495013261, "learning_rate": 4.8287287006526625e-05, "lm_loss": 1.3609, "loss": 1.3609, "step": 970 }, { "epoch": 0.43524709573792536, "learning_rate": 4.828288145886196e-05, "lm_loss": 0.5738, "loss": 0.5738, "step": 971 }, { "epoch": 0.4356953419745247, "learning_rate": 4.82784704538585e-05, "lm_loss": 1.1642, "loss": 1.1642, "step": 972 }, { "epoch": 0.436143588211124, "learning_rate": 4.827405399255016e-05, "lm_loss": 1.0547, "loss": 1.0547, "step": 973 }, { "epoch": 0.43659183444772326, "learning_rate": 4.8269632075972123e-05, "lm_loss": 1.308, "loss": 1.308, "step": 974 }, { "epoch": 0.4370400806843226, "learning_rate": 4.826520470516086e-05, "lm_loss": 0.7842, "loss": 0.7842, "step": 975 }, { "epoch": 0.4374883269209219, "learning_rate": 4.82607718811541e-05, "lm_loss": 1.029, "loss": 1.029, "step": 976 }, { "epoch": 0.4379365731575212, "learning_rate": 4.8256333604990875e-05, "lm_loss": 1.2613, "loss": 1.2613, "step": 977 }, { "epoch": 0.4383848193941205, "learning_rate": 4.8251889877711486e-05, "lm_loss": 0.9036, "loss": 0.9036, "step": 978 }, { "epoch": 0.43883306563071983, "learning_rate": 4.824744070035751e-05, "lm_loss": 1.2445, "loss": 1.2445, "step": 979 }, { "epoch": 0.4392813118673191, "learning_rate": 4.8242986073971794e-05, "lm_loss": 1.0114, "loss": 1.0114, "step": 980 }, { "epoch": 0.4397295581039184, "learning_rate": 4.8238525999598486e-05, "lm_loss": 0.9698, "loss": 0.9698, "step": 981 }, { "epoch": 0.44017780434051773, "learning_rate": 4.823406047828297e-05, "lm_loss": 1.0377, "loss": 1.0377, "step": 982 }, { "epoch": 0.440626050577117, "learning_rate": 4.822958951107196e-05, "lm_loss": 1.1586, "loss": 1.1586, "step": 983 }, { "epoch": 0.44107429681371635, "learning_rate": 4.82251130990134e-05, "lm_loss": 1.0649, "loss": 1.0649, "step": 984 }, { "epoch": 0.44152254305031563, "learning_rate": 4.822063124315653e-05, "lm_loss": 1.0828, "loss": 1.0828, "step": 985 }, { "epoch": 0.44197078928691497, "learning_rate": 4.8216143944551876e-05, "lm_loss": 1.1359, "loss": 1.1359, "step": 986 }, { "epoch": 0.44241903552351425, "learning_rate": 4.821165120425121e-05, "lm_loss": 1.0249, "loss": 1.0249, "step": 987 }, { "epoch": 0.44286728176011353, "learning_rate": 4.8207153023307615e-05, "lm_loss": 1.2676, "loss": 1.2676, "step": 988 }, { "epoch": 0.44331552799671287, "learning_rate": 4.820264940277543e-05, "lm_loss": 0.9966, "loss": 0.9966, "step": 989 }, { "epoch": 0.44376377423331215, "learning_rate": 4.819814034371026e-05, "lm_loss": 1.0265, "loss": 1.0265, "step": 990 }, { "epoch": 0.4442120204699115, "learning_rate": 4.8193625847169e-05, "lm_loss": 0.835, "loss": 0.835, "step": 991 }, { "epoch": 0.44466026670651077, "learning_rate": 4.8189105914209825e-05, "lm_loss": 1.182, "loss": 1.182, "step": 992 }, { "epoch": 0.4451085129431101, "learning_rate": 4.8184580545892167e-05, "lm_loss": 0.9328, "loss": 0.9328, "step": 993 }, { "epoch": 0.4455567591797094, "learning_rate": 4.8180049743276734e-05, "lm_loss": 1.2262, "loss": 1.2262, "step": 994 }, { "epoch": 0.4460050054163087, "learning_rate": 4.817551350742552e-05, "lm_loss": 0.9365, "loss": 0.9365, "step": 995 }, { "epoch": 0.446453251652908, "learning_rate": 4.817097183940179e-05, "lm_loss": 0.9804, "loss": 0.9804, "step": 996 }, { "epoch": 0.4469014978895073, "learning_rate": 4.816642474027007e-05, "lm_loss": 1.2214, "loss": 1.2214, "step": 997 }, { "epoch": 0.4473497441261066, "learning_rate": 4.816187221109616e-05, "lm_loss": 0.8125, "loss": 0.8125, "step": 998 }, { "epoch": 0.4477979903627059, "learning_rate": 4.8157314252947164e-05, "lm_loss": 1.257, "loss": 1.257, "step": 999 }, { "epoch": 0.44824623659930524, "learning_rate": 4.815275086689142e-05, "lm_loss": 1.0278, "loss": 1.0278, "step": 1000 }, { "epoch": 0.4486944828359045, "learning_rate": 4.814818205399854e-05, "lm_loss": 1.1455, "loss": 1.1455, "step": 1001 }, { "epoch": 0.44914272907250385, "learning_rate": 4.8143607815339444e-05, "lm_loss": 1.0364, "loss": 1.0364, "step": 1002 }, { "epoch": 0.44959097530910314, "learning_rate": 4.813902815198629e-05, "lm_loss": 1.1203, "loss": 1.1203, "step": 1003 }, { "epoch": 0.4500392215457024, "learning_rate": 4.813444306501251e-05, "lm_loss": 1.1209, "loss": 1.1209, "step": 1004 }, { "epoch": 0.45048746778230175, "learning_rate": 4.812985255549283e-05, "lm_loss": 0.9538, "loss": 0.9538, "step": 1005 }, { "epoch": 0.45093571401890103, "learning_rate": 4.812525662450322e-05, "lm_loss": 1.3382, "loss": 1.3382, "step": 1006 }, { "epoch": 0.45138396025550037, "learning_rate": 4.812065527312094e-05, "lm_loss": 2.1803, "loss": 2.1803, "step": 1007 }, { "epoch": 0.45183220649209965, "learning_rate": 4.8116048502424514e-05, "lm_loss": 1.7553, "loss": 1.7553, "step": 1008 }, { "epoch": 0.452280452728699, "learning_rate": 4.811143631349374e-05, "lm_loss": 2.948, "loss": 2.948, "step": 1009 }, { "epoch": 0.45272869896529827, "learning_rate": 4.810681870740966e-05, "lm_loss": 1.1208, "loss": 1.1208, "step": 1010 }, { "epoch": 0.45317694520189755, "learning_rate": 4.8102195685254636e-05, "lm_loss": 0.9865, "loss": 0.9865, "step": 1011 }, { "epoch": 0.4536251914384969, "learning_rate": 4.809756724811225e-05, "lm_loss": 3.8227, "loss": 3.8227, "step": 1012 }, { "epoch": 0.45407343767509617, "learning_rate": 4.809293339706739e-05, "lm_loss": 2.5099, "loss": 2.5099, "step": 1013 }, { "epoch": 0.4545216839116955, "learning_rate": 4.808829413320617e-05, "lm_loss": 0.9449, "loss": 0.9449, "step": 1014 }, { "epoch": 0.4549699301482948, "learning_rate": 4.808364945761603e-05, "lm_loss": 0.9791, "loss": 0.9791, "step": 1015 }, { "epoch": 0.4554181763848941, "learning_rate": 4.8078999371385643e-05, "lm_loss": 1.3102, "loss": 1.3102, "step": 1016 }, { "epoch": 0.4558664226214934, "learning_rate": 4.8074343875604947e-05, "lm_loss": 0.949, "loss": 0.949, "step": 1017 }, { "epoch": 0.4563146688580927, "learning_rate": 4.806968297136515e-05, "lm_loss": 1.2263, "loss": 1.2263, "step": 1018 }, { "epoch": 0.456762915094692, "learning_rate": 4.8065016659758745e-05, "lm_loss": 0.9628, "loss": 0.9628, "step": 1019 }, { "epoch": 0.4572111613312913, "learning_rate": 4.806034494187949e-05, "lm_loss": 0.819, "loss": 0.819, "step": 1020 }, { "epoch": 0.45765940756789064, "learning_rate": 4.8055667818822384e-05, "lm_loss": 1.3325, "loss": 1.3325, "step": 1021 }, { "epoch": 0.4581076538044899, "learning_rate": 4.8050985291683714e-05, "lm_loss": 0.6041, "loss": 0.6041, "step": 1022 }, { "epoch": 0.45855590004108926, "learning_rate": 4.804629736156104e-05, "lm_loss": 1.1847, "loss": 1.1847, "step": 1023 }, { "epoch": 0.45900414627768854, "learning_rate": 4.8041604029553164e-05, "lm_loss": 1.0166, "loss": 1.0166, "step": 1024 }, { "epoch": 0.4594523925142878, "learning_rate": 4.803690529676019e-05, "lm_loss": 0.8358, "loss": 0.8358, "step": 1025 }, { "epoch": 0.45990063875088716, "learning_rate": 4.803220116428345e-05, "lm_loss": 1.2025, "loss": 1.2025, "step": 1026 }, { "epoch": 0.46034888498748644, "learning_rate": 4.802749163322557e-05, "lm_loss": 1.1534, "loss": 1.1534, "step": 1027 }, { "epoch": 0.4607971312240858, "learning_rate": 4.802277670469042e-05, "lm_loss": 1.1447, "loss": 1.1447, "step": 1028 }, { "epoch": 0.46124537746068506, "learning_rate": 4.801805637978315e-05, "lm_loss": 0.8292, "loss": 0.8292, "step": 1029 }, { "epoch": 0.4616936236972844, "learning_rate": 4.8013330659610176e-05, "lm_loss": 1.0793, "loss": 1.0793, "step": 1030 }, { "epoch": 0.4621418699338837, "learning_rate": 4.800859954527915e-05, "lm_loss": 1.0705, "loss": 1.0705, "step": 1031 }, { "epoch": 0.462590116170483, "learning_rate": 4.800386303789904e-05, "lm_loss": 1.0061, "loss": 1.0061, "step": 1032 }, { "epoch": 0.4630383624070823, "learning_rate": 4.7999121138580035e-05, "lm_loss": 0.7439, "loss": 0.7439, "step": 1033 }, { "epoch": 0.4634866086436816, "learning_rate": 4.7994373848433596e-05, "lm_loss": 2.7903, "loss": 2.7903, "step": 1034 }, { "epoch": 0.4639348548802809, "learning_rate": 4.798962116857246e-05, "lm_loss": 1.6958, "loss": 1.6958, "step": 1035 }, { "epoch": 0.4643831011168802, "learning_rate": 4.7984863100110635e-05, "lm_loss": 1.0338, "loss": 1.0338, "step": 1036 }, { "epoch": 0.46483134735347953, "learning_rate": 4.798009964416335e-05, "lm_loss": 0.9585, "loss": 0.9585, "step": 1037 }, { "epoch": 0.4652795935900788, "learning_rate": 4.797533080184714e-05, "lm_loss": 1.1274, "loss": 1.1274, "step": 1038 }, { "epoch": 0.46572783982667815, "learning_rate": 4.797055657427978e-05, "lm_loss": 1.0026, "loss": 1.0026, "step": 1039 }, { "epoch": 0.46617608606327743, "learning_rate": 4.7965776962580325e-05, "lm_loss": 2.5113, "loss": 2.5113, "step": 1040 }, { "epoch": 0.4666243322998767, "learning_rate": 4.796099196786908e-05, "lm_loss": 1.7462, "loss": 1.7462, "step": 1041 }, { "epoch": 0.46707257853647605, "learning_rate": 4.7956201591267604e-05, "lm_loss": 0.9294, "loss": 0.9294, "step": 1042 }, { "epoch": 0.4675208247730753, "learning_rate": 4.7951405833898736e-05, "lm_loss": 1.3402, "loss": 1.3402, "step": 1043 }, { "epoch": 0.46796907100967466, "learning_rate": 4.7946604696886556e-05, "lm_loss": 2.8886, "loss": 2.8886, "step": 1044 }, { "epoch": 0.46841731724627395, "learning_rate": 4.7941798181356425e-05, "lm_loss": 0.879, "loss": 0.879, "step": 1045 }, { "epoch": 0.4688655634828733, "learning_rate": 4.793698628843495e-05, "lm_loss": 1.2631, "loss": 1.2631, "step": 1046 }, { "epoch": 0.46931380971947256, "learning_rate": 4.793216901925001e-05, "lm_loss": 0.9726, "loss": 0.9726, "step": 1047 }, { "epoch": 0.46976205595607184, "learning_rate": 4.7927346374930735e-05, "lm_loss": 0.8294, "loss": 0.8294, "step": 1048 }, { "epoch": 0.4702103021926712, "learning_rate": 4.7922518356607515e-05, "lm_loss": 1.2264, "loss": 1.2264, "step": 1049 }, { "epoch": 0.47065854842927046, "learning_rate": 4.7917684965412015e-05, "lm_loss": 0.8976, "loss": 0.8976, "step": 1050 }, { "epoch": 0.4711067946658698, "learning_rate": 4.7912846202477126e-05, "lm_loss": 1.0912, "loss": 1.0912, "step": 1051 }, { "epoch": 0.4715550409024691, "learning_rate": 4.790800206893703e-05, "lm_loss": 1.1463, "loss": 1.1463, "step": 1052 }, { "epoch": 0.4720032871390684, "learning_rate": 4.7903152565927155e-05, "lm_loss": 0.9323, "loss": 0.9323, "step": 1053 }, { "epoch": 0.4724515333756677, "learning_rate": 4.7898297694584195e-05, "lm_loss": 1.1088, "loss": 1.1088, "step": 1054 }, { "epoch": 0.472899779612267, "learning_rate": 4.789343745604609e-05, "lm_loss": 1.0736, "loss": 1.0736, "step": 1055 }, { "epoch": 0.4733480258488663, "learning_rate": 4.7888571851452055e-05, "lm_loss": 0.9469, "loss": 0.9469, "step": 1056 }, { "epoch": 0.4737962720854656, "learning_rate": 4.788370088194254e-05, "lm_loss": 1.0249, "loss": 1.0249, "step": 1057 }, { "epoch": 0.47424451832206493, "learning_rate": 4.787882454865926e-05, "lm_loss": 1.0751, "loss": 1.0751, "step": 1058 }, { "epoch": 0.4746927645586642, "learning_rate": 4.787394285274521e-05, "lm_loss": 1.0266, "loss": 1.0266, "step": 1059 }, { "epoch": 0.47514101079526355, "learning_rate": 4.786905579534462e-05, "lm_loss": 1.0688, "loss": 1.0688, "step": 1060 }, { "epoch": 0.47558925703186283, "learning_rate": 4.786416337760298e-05, "lm_loss": 1.1982, "loss": 1.1982, "step": 1061 }, { "epoch": 0.4760375032684621, "learning_rate": 4.7859265600667026e-05, "lm_loss": 0.9393, "loss": 0.9393, "step": 1062 }, { "epoch": 0.47648574950506145, "learning_rate": 4.785436246568477e-05, "lm_loss": 1.022, "loss": 1.022, "step": 1063 }, { "epoch": 0.47693399574166073, "learning_rate": 4.7849453973805476e-05, "lm_loss": 1.3732, "loss": 1.3732, "step": 1064 }, { "epoch": 0.47738224197826007, "learning_rate": 4.784454012617965e-05, "lm_loss": 0.7963, "loss": 0.7963, "step": 1065 }, { "epoch": 0.47783048821485935, "learning_rate": 4.7839620923959067e-05, "lm_loss": 1.3136, "loss": 1.3136, "step": 1066 }, { "epoch": 0.4782787344514587, "learning_rate": 4.783469636829675e-05, "lm_loss": 0.6365, "loss": 0.6365, "step": 1067 }, { "epoch": 0.47872698068805797, "learning_rate": 4.7829766460346985e-05, "lm_loss": 1.193, "loss": 1.193, "step": 1068 }, { "epoch": 0.4791752269246573, "learning_rate": 4.78248312012653e-05, "lm_loss": 3.05, "loss": 3.05, "step": 1069 }, { "epoch": 0.4796234731612566, "learning_rate": 4.781989059220849e-05, "lm_loss": 1.309, "loss": 1.309, "step": 1070 }, { "epoch": 0.48007171939785587, "learning_rate": 4.781494463433459e-05, "lm_loss": 0.9108, "loss": 0.9108, "step": 1071 }, { "epoch": 0.4805199656344552, "learning_rate": 4.78099933288029e-05, "lm_loss": 1.031, "loss": 1.031, "step": 1072 }, { "epoch": 0.4809682118710545, "learning_rate": 4.7805036676773984e-05, "lm_loss": 1.2356, "loss": 1.2356, "step": 1073 }, { "epoch": 0.4814164581076538, "learning_rate": 4.7800074679409625e-05, "lm_loss": 0.9072, "loss": 0.9072, "step": 1074 }, { "epoch": 0.4818647043442531, "learning_rate": 4.779510733787288e-05, "lm_loss": 1.2035, "loss": 1.2035, "step": 1075 }, { "epoch": 0.48231295058085244, "learning_rate": 4.7790134653328074e-05, "lm_loss": 0.9345, "loss": 0.9345, "step": 1076 }, { "epoch": 0.4827611968174517, "learning_rate": 4.778515662694075e-05, "lm_loss": 0.9491, "loss": 0.9491, "step": 1077 }, { "epoch": 0.483209443054051, "learning_rate": 4.7780173259877745e-05, "lm_loss": 0.8672, "loss": 0.8672, "step": 1078 }, { "epoch": 0.48365768929065034, "learning_rate": 4.7775184553307094e-05, "lm_loss": 1.4092, "loss": 1.4092, "step": 1079 }, { "epoch": 0.4841059355272496, "learning_rate": 4.777019050839814e-05, "lm_loss": 0.8522, "loss": 0.8522, "step": 1080 }, { "epoch": 0.48455418176384896, "learning_rate": 4.776519112632144e-05, "lm_loss": 1.2169, "loss": 1.2169, "step": 1081 }, { "epoch": 0.48500242800044824, "learning_rate": 4.7760186408248805e-05, "lm_loss": 0.9489, "loss": 0.9489, "step": 1082 }, { "epoch": 0.4854506742370476, "learning_rate": 4.775517635535332e-05, "lm_loss": 0.9694, "loss": 0.9694, "step": 1083 }, { "epoch": 0.48589892047364686, "learning_rate": 4.77501609688093e-05, "lm_loss": 1.6351, "loss": 1.6351, "step": 1084 }, { "epoch": 0.48634716671024614, "learning_rate": 4.774514024979231e-05, "lm_loss": 2.303, "loss": 2.303, "step": 1085 }, { "epoch": 0.4867954129468455, "learning_rate": 4.774011419947917e-05, "lm_loss": 1.2403, "loss": 1.2403, "step": 1086 }, { "epoch": 0.48724365918344476, "learning_rate": 4.7735082819047955e-05, "lm_loss": 1.0509, "loss": 1.0509, "step": 1087 }, { "epoch": 0.4876919054200441, "learning_rate": 4.7730046109677985e-05, "lm_loss": 0.8372, "loss": 0.8372, "step": 1088 }, { "epoch": 0.4881401516566434, "learning_rate": 4.772500407254982e-05, "lm_loss": 1.43, "loss": 1.43, "step": 1089 }, { "epoch": 0.4885883978932427, "learning_rate": 4.7719956708845285e-05, "lm_loss": 1.1825, "loss": 1.1825, "step": 1090 }, { "epoch": 0.489036644129842, "learning_rate": 4.7714904019747445e-05, "lm_loss": 0.7439, "loss": 0.7439, "step": 1091 }, { "epoch": 0.48948489036644127, "learning_rate": 4.7709846006440616e-05, "lm_loss": 1.2257, "loss": 1.2257, "step": 1092 }, { "epoch": 0.4899331366030406, "learning_rate": 4.770478267011035e-05, "lm_loss": 3.1278, "loss": 3.1278, "step": 1093 }, { "epoch": 0.4903813828396399, "learning_rate": 4.769971401194346e-05, "lm_loss": 0.626, "loss": 0.626, "step": 1094 }, { "epoch": 0.4908296290762392, "learning_rate": 4.769464003312801e-05, "lm_loss": 1.1808, "loss": 1.1808, "step": 1095 }, { "epoch": 0.4912778753128385, "learning_rate": 4.76895607348533e-05, "lm_loss": 1.688, "loss": 1.688, "step": 1096 }, { "epoch": 0.49172612154943784, "learning_rate": 4.768447611830987e-05, "lm_loss": 2.4385, "loss": 2.4385, "step": 1097 }, { "epoch": 0.4921743677860371, "learning_rate": 4.7679386184689535e-05, "lm_loss": 0.8563, "loss": 0.8563, "step": 1098 }, { "epoch": 0.49262261402263646, "learning_rate": 4.767429093518533e-05, "lm_loss": 1.3147, "loss": 1.3147, "step": 1099 }, { "epoch": 0.49307086025923574, "learning_rate": 4.7669190370991534e-05, "lm_loss": 0.6312, "loss": 0.6312, "step": 1100 }, { "epoch": 0.493519106495835, "learning_rate": 4.766408449330371e-05, "lm_loss": 1.1399, "loss": 1.1399, "step": 1101 }, { "epoch": 0.49396735273243436, "learning_rate": 4.765897330331861e-05, "lm_loss": 1.058, "loss": 1.058, "step": 1102 }, { "epoch": 0.49441559896903364, "learning_rate": 4.765385680223428e-05, "lm_loss": 1.1188, "loss": 1.1188, "step": 1103 }, { "epoch": 0.494863845205633, "learning_rate": 4.7648734991249966e-05, "lm_loss": 0.9964, "loss": 0.9964, "step": 1104 }, { "epoch": 0.49531209144223226, "learning_rate": 4.764360787156621e-05, "lm_loss": 0.8652, "loss": 0.8652, "step": 1105 }, { "epoch": 0.4957603376788316, "learning_rate": 4.763847544438477e-05, "lm_loss": 1.3934, "loss": 1.3934, "step": 1106 }, { "epoch": 0.4962085839154309, "learning_rate": 4.763333771090863e-05, "lm_loss": 0.9016, "loss": 0.9016, "step": 1107 }, { "epoch": 0.49665683015203016, "learning_rate": 4.7628194672342054e-05, "lm_loss": 0.9757, "loss": 0.9757, "step": 1108 }, { "epoch": 0.4971050763886295, "learning_rate": 4.762304632989053e-05, "lm_loss": 1.2308, "loss": 1.2308, "step": 1109 }, { "epoch": 0.4975533226252288, "learning_rate": 4.7617892684760776e-05, "lm_loss": 0.8514, "loss": 0.8514, "step": 1110 }, { "epoch": 0.4980015688618281, "learning_rate": 4.7612733738160797e-05, "lm_loss": 1.2026, "loss": 1.2026, "step": 1111 }, { "epoch": 0.4984498150984274, "learning_rate": 4.7607569491299794e-05, "lm_loss": 0.9058, "loss": 0.9058, "step": 1112 }, { "epoch": 0.49889806133502673, "learning_rate": 4.760239994538824e-05, "lm_loss": 0.9958, "loss": 0.9958, "step": 1113 }, { "epoch": 0.499346307571626, "learning_rate": 4.759722510163783e-05, "lm_loss": 0.9082, "loss": 0.9082, "step": 1114 }, { "epoch": 0.4997945538082253, "learning_rate": 4.759204496126151e-05, "lm_loss": 1.3129, "loss": 1.3129, "step": 1115 }, { "epoch": 0.5002428000448246, "learning_rate": 4.758685952547347e-05, "lm_loss": 0.8648, "loss": 0.8648, "step": 1116 }, { "epoch": 0.5006910462814239, "learning_rate": 4.758166879548914e-05, "lm_loss": 0.9246, "loss": 0.9246, "step": 1117 }, { "epoch": 0.5011392925180232, "learning_rate": 4.7576472772525196e-05, "lm_loss": 2.4528, "loss": 2.4528, "step": 1118 }, { "epoch": 0.5015875387546226, "learning_rate": 4.7571271457799526e-05, "lm_loss": 1.8062, "loss": 1.8062, "step": 1119 }, { "epoch": 0.5020357849912218, "learning_rate": 4.75660648525313e-05, "lm_loss": 1.0271, "loss": 1.0271, "step": 1120 }, { "epoch": 0.5024840312278211, "learning_rate": 4.7560852957940917e-05, "lm_loss": 1.1986, "loss": 1.1986, "step": 1121 }, { "epoch": 0.5029322774644205, "learning_rate": 4.755563577524998e-05, "lm_loss": 0.8742, "loss": 0.8742, "step": 1122 }, { "epoch": 0.5033805237010197, "learning_rate": 4.755041330568138e-05, "lm_loss": 1.6665, "loss": 1.6665, "step": 1123 }, { "epoch": 0.503828769937619, "learning_rate": 4.754518555045921e-05, "lm_loss": 2.4941, "loss": 2.4941, "step": 1124 }, { "epoch": 0.5042770161742184, "learning_rate": 4.753995251080884e-05, "lm_loss": 2.7779, "loss": 2.7779, "step": 1125 }, { "epoch": 0.5047252624108177, "learning_rate": 4.753471418795683e-05, "lm_loss": 0.904, "loss": 0.904, "step": 1126 }, { "epoch": 0.505173508647417, "learning_rate": 4.752947058313103e-05, "lm_loss": 1.0813, "loss": 1.0813, "step": 1127 }, { "epoch": 0.5056217548840163, "learning_rate": 4.752422169756048e-05, "lm_loss": 1.1217, "loss": 1.1217, "step": 1128 }, { "epoch": 0.5060700011206156, "learning_rate": 4.75189675324755e-05, "lm_loss": 1.7307, "loss": 1.7307, "step": 1129 }, { "epoch": 0.5065182473572148, "learning_rate": 4.7513708089107616e-05, "lm_loss": 2.4248, "loss": 2.4248, "step": 1130 }, { "epoch": 0.5069664935938142, "learning_rate": 4.7508443368689615e-05, "lm_loss": 1.0556, "loss": 1.0556, "step": 1131 }, { "epoch": 0.5074147398304135, "learning_rate": 4.750317337245549e-05, "lm_loss": 0.7793, "loss": 0.7793, "step": 1132 }, { "epoch": 0.5078629860670129, "learning_rate": 4.749789810164052e-05, "lm_loss": 1.0876, "loss": 1.0876, "step": 1133 }, { "epoch": 0.5083112323036121, "learning_rate": 4.749261755748116e-05, "lm_loss": 1.0912, "loss": 1.0912, "step": 1134 }, { "epoch": 0.5087594785402114, "learning_rate": 4.748733174121514e-05, "lm_loss": 3.3633, "loss": 3.3633, "step": 1135 }, { "epoch": 0.5092077247768108, "learning_rate": 4.748204065408143e-05, "lm_loss": 0.8205, "loss": 0.8205, "step": 1136 }, { "epoch": 0.50965597101341, "learning_rate": 4.7476744297320205e-05, "lm_loss": 0.9928, "loss": 0.9928, "step": 1137 }, { "epoch": 0.5101042172500093, "learning_rate": 4.7471442672172905e-05, "lm_loss": 0.9652, "loss": 0.9652, "step": 1138 }, { "epoch": 0.5105524634866087, "learning_rate": 4.746613577988219e-05, "lm_loss": 0.9624, "loss": 0.9624, "step": 1139 }, { "epoch": 0.511000709723208, "learning_rate": 4.7460823621691955e-05, "lm_loss": 1.1337, "loss": 1.1337, "step": 1140 }, { "epoch": 0.5114489559598072, "learning_rate": 4.745550619884733e-05, "lm_loss": 0.8378, "loss": 0.8378, "step": 1141 }, { "epoch": 0.5118972021964066, "learning_rate": 4.745018351259468e-05, "lm_loss": 1.3806, "loss": 1.3806, "step": 1142 }, { "epoch": 0.5123454484330059, "learning_rate": 4.7444855564181616e-05, "lm_loss": 1.0089, "loss": 1.0089, "step": 1143 }, { "epoch": 0.5127936946696051, "learning_rate": 4.743952235485696e-05, "lm_loss": 0.9166, "loss": 0.9166, "step": 1144 }, { "epoch": 0.5132419409062045, "learning_rate": 4.743418388587077e-05, "lm_loss": 1.2068, "loss": 1.2068, "step": 1145 }, { "epoch": 0.5136901871428038, "learning_rate": 4.742884015847436e-05, "lm_loss": 0.9096, "loss": 0.9096, "step": 1146 }, { "epoch": 0.5141384333794031, "learning_rate": 4.742349117392025e-05, "lm_loss": 0.7846, "loss": 0.7846, "step": 1147 }, { "epoch": 0.5145866796160024, "learning_rate": 4.741813693346221e-05, "lm_loss": 2.5449, "loss": 2.5449, "step": 1148 }, { "epoch": 0.5150349258526017, "learning_rate": 4.741277743835523e-05, "lm_loss": 1.6402, "loss": 1.6402, "step": 1149 }, { "epoch": 0.515483172089201, "learning_rate": 4.740741268985554e-05, "lm_loss": 0.9242, "loss": 0.9242, "step": 1150 }, { "epoch": 0.5159314183258004, "learning_rate": 4.74020426892206e-05, "lm_loss": 1.1072, "loss": 1.1072, "step": 1151 }, { "epoch": 0.5163796645623996, "learning_rate": 4.7396667437709096e-05, "lm_loss": 0.9556, "loss": 0.9556, "step": 1152 }, { "epoch": 0.5168279107989989, "learning_rate": 4.739128693658095e-05, "lm_loss": 1.025, "loss": 1.025, "step": 1153 }, { "epoch": 0.5172761570355983, "learning_rate": 4.7385901187097305e-05, "lm_loss": 1.3366, "loss": 1.3366, "step": 1154 }, { "epoch": 0.5177244032721975, "learning_rate": 4.738051019052056e-05, "lm_loss": 0.8893, "loss": 0.8893, "step": 1155 }, { "epoch": 0.5181726495087968, "learning_rate": 4.7375113948114304e-05, "lm_loss": 0.94, "loss": 0.94, "step": 1156 }, { "epoch": 0.5186208957453962, "learning_rate": 4.7369712461143386e-05, "lm_loss": 1.1575, "loss": 1.1575, "step": 1157 }, { "epoch": 0.5190691419819955, "learning_rate": 4.736430573087388e-05, "lm_loss": 0.8127, "loss": 0.8127, "step": 1158 }, { "epoch": 0.5195173882185947, "learning_rate": 4.7358893758573074e-05, "lm_loss": 1.2719, "loss": 1.2719, "step": 1159 }, { "epoch": 0.5199656344551941, "learning_rate": 4.7353476545509514e-05, "lm_loss": 0.5746, "loss": 0.5746, "step": 1160 }, { "epoch": 0.5204138806917934, "learning_rate": 4.734805409295293e-05, "lm_loss": 1.2509, "loss": 1.2509, "step": 1161 }, { "epoch": 0.5208621269283926, "learning_rate": 4.734262640217433e-05, "lm_loss": 1.0747, "loss": 1.0747, "step": 1162 }, { "epoch": 0.521310373164992, "learning_rate": 4.733719347444591e-05, "lm_loss": 1.0599, "loss": 1.0599, "step": 1163 }, { "epoch": 0.5217586194015913, "learning_rate": 4.733175531104111e-05, "lm_loss": 1.0372, "loss": 1.0372, "step": 1164 }, { "epoch": 0.5222068656381906, "learning_rate": 4.732631191323461e-05, "lm_loss": 0.9395, "loss": 0.9395, "step": 1165 }, { "epoch": 0.5226551118747899, "learning_rate": 4.732086328230228e-05, "lm_loss": 1.1701, "loss": 1.1701, "step": 1166 }, { "epoch": 0.5231033581113892, "learning_rate": 4.731540941952126e-05, "lm_loss": 0.9703, "loss": 0.9703, "step": 1167 }, { "epoch": 0.5235516043479885, "learning_rate": 4.730995032616988e-05, "lm_loss": 1.6865, "loss": 1.6865, "step": 1168 }, { "epoch": 0.5239998505845878, "learning_rate": 4.7304486003527725e-05, "lm_loss": 2.225, "loss": 2.225, "step": 1169 }, { "epoch": 0.5244480968211871, "learning_rate": 4.729901645287559e-05, "lm_loss": 1.2505, "loss": 1.2505, "step": 1170 }, { "epoch": 0.5248963430577864, "learning_rate": 4.729354167549549e-05, "lm_loss": 1.1744, "loss": 1.1744, "step": 1171 }, { "epoch": 0.5253445892943858, "learning_rate": 4.728806167267068e-05, "lm_loss": 0.8241, "loss": 0.8241, "step": 1172 }, { "epoch": 0.525792835530985, "learning_rate": 4.728257644568563e-05, "lm_loss": 0.9675, "loss": 0.9675, "step": 1173 }, { "epoch": 0.5262410817675843, "learning_rate": 4.727708599582604e-05, "lm_loss": 0.8675, "loss": 0.8675, "step": 1174 }, { "epoch": 0.5266893280041837, "learning_rate": 4.727159032437883e-05, "lm_loss": 0.9691, "loss": 0.9691, "step": 1175 }, { "epoch": 0.5271375742407829, "learning_rate": 4.7266089432632155e-05, "lm_loss": 1.2396, "loss": 1.2396, "step": 1176 }, { "epoch": 0.5275858204773822, "learning_rate": 4.7260583321875366e-05, "lm_loss": 1.0134, "loss": 1.0134, "step": 1177 }, { "epoch": 0.5280340667139816, "learning_rate": 4.725507199339907e-05, "lm_loss": 0.8909, "loss": 0.8909, "step": 1178 }, { "epoch": 0.5284823129505809, "learning_rate": 4.7249555448495075e-05, "lm_loss": 0.8961, "loss": 0.8961, "step": 1179 }, { "epoch": 0.5289305591871801, "learning_rate": 4.724403368845643e-05, "lm_loss": 0.8095, "loss": 0.8095, "step": 1180 }, { "epoch": 0.5293788054237795, "learning_rate": 4.7238506714577365e-05, "lm_loss": 2.3624, "loss": 2.3624, "step": 1181 }, { "epoch": 0.5298270516603788, "learning_rate": 4.7232974528153403e-05, "lm_loss": 1.6502, "loss": 1.6502, "step": 1182 }, { "epoch": 0.530275297896978, "learning_rate": 4.7227437130481233e-05, "lm_loss": 1.0058, "loss": 1.0058, "step": 1183 }, { "epoch": 0.5307235441335774, "learning_rate": 4.722189452285877e-05, "lm_loss": 0.9349, "loss": 0.9349, "step": 1184 }, { "epoch": 0.5311717903701767, "learning_rate": 4.721634670658517e-05, "lm_loss": 1.0852, "loss": 1.0852, "step": 1185 }, { "epoch": 0.531620036606776, "learning_rate": 4.721079368296081e-05, "lm_loss": 1.1269, "loss": 1.1269, "step": 1186 }, { "epoch": 0.5320682828433753, "learning_rate": 4.720523545328726e-05, "lm_loss": 1.0784, "loss": 1.0784, "step": 1187 }, { "epoch": 0.5325165290799746, "learning_rate": 4.719967201886734e-05, "lm_loss": 2.831, "loss": 2.831, "step": 1188 }, { "epoch": 0.5329647753165739, "learning_rate": 4.7194103381005067e-05, "lm_loss": 0.9729, "loss": 0.9729, "step": 1189 }, { "epoch": 0.5334130215531732, "learning_rate": 4.718852954100571e-05, "lm_loss": 0.938, "loss": 0.938, "step": 1190 }, { "epoch": 0.5338612677897725, "learning_rate": 4.718295050017572e-05, "lm_loss": 1.1351, "loss": 1.1351, "step": 1191 }, { "epoch": 0.5343095140263718, "learning_rate": 4.717736625982279e-05, "lm_loss": 0.9134, "loss": 0.9134, "step": 1192 }, { "epoch": 0.5347577602629712, "learning_rate": 4.717177682125583e-05, "lm_loss": 0.8162, "loss": 0.8162, "step": 1193 }, { "epoch": 0.5352060064995704, "learning_rate": 4.7166182185784954e-05, "lm_loss": 1.2979, "loss": 1.2979, "step": 1194 }, { "epoch": 0.5356542527361697, "learning_rate": 4.71605823547215e-05, "lm_loss": 1.0362, "loss": 1.0362, "step": 1195 }, { "epoch": 0.5361024989727691, "learning_rate": 4.7154977329378054e-05, "lm_loss": 0.7825, "loss": 0.7825, "step": 1196 }, { "epoch": 0.5365507452093683, "learning_rate": 4.7149367111068363e-05, "lm_loss": 1.2432, "loss": 1.2432, "step": 1197 }, { "epoch": 0.5369989914459676, "learning_rate": 4.714375170110744e-05, "lm_loss": 0.9705, "loss": 0.9705, "step": 1198 }, { "epoch": 0.537447237682567, "learning_rate": 4.713813110081149e-05, "lm_loss": 1.2319, "loss": 1.2319, "step": 1199 }, { "epoch": 0.5378954839191663, "learning_rate": 4.713250531149794e-05, "lm_loss": 0.8366, "loss": 0.8366, "step": 1200 }, { "epoch": 0.5383437301557655, "learning_rate": 4.712687433448544e-05, "lm_loss": 0.8421, "loss": 0.8421, "step": 1201 }, { "epoch": 0.5387919763923649, "learning_rate": 4.712123817109385e-05, "lm_loss": 1.201, "loss": 1.201, "step": 1202 }, { "epoch": 0.5392402226289642, "learning_rate": 4.7115596822644235e-05, "lm_loss": 0.9213, "loss": 0.9213, "step": 1203 }, { "epoch": 0.5396884688655634, "learning_rate": 4.710995029045889e-05, "lm_loss": 0.9125, "loss": 0.9125, "step": 1204 }, { "epoch": 0.5401367151021628, "learning_rate": 4.7104298575861336e-05, "lm_loss": 1.2276, "loss": 1.2276, "step": 1205 }, { "epoch": 0.5405849613387621, "learning_rate": 4.709864168017627e-05, "lm_loss": 2.7189, "loss": 2.7189, "step": 1206 }, { "epoch": 0.5410332075753614, "learning_rate": 4.7092979604729646e-05, "lm_loss": 1.1623, "loss": 1.1623, "step": 1207 }, { "epoch": 0.5414814538119607, "learning_rate": 4.7087312350848614e-05, "lm_loss": 0.8707, "loss": 0.8707, "step": 1208 }, { "epoch": 0.54192970004856, "learning_rate": 4.708163991986152e-05, "lm_loss": 1.0313, "loss": 1.0313, "step": 1209 }, { "epoch": 0.5423779462851593, "learning_rate": 4.7075962313097954e-05, "lm_loss": 0.8995, "loss": 0.8995, "step": 1210 }, { "epoch": 0.5428261925217586, "learning_rate": 4.707027953188871e-05, "lm_loss": 0.8529, "loss": 0.8529, "step": 1211 }, { "epoch": 0.5432744387583579, "learning_rate": 4.706459157756578e-05, "lm_loss": 3.1563, "loss": 3.1563, "step": 1212 }, { "epoch": 0.5437226849949572, "learning_rate": 4.7058898451462387e-05, "lm_loss": 1.0158, "loss": 1.0158, "step": 1213 }, { "epoch": 0.5441709312315566, "learning_rate": 4.7053200154912946e-05, "lm_loss": 1.1151, "loss": 1.1151, "step": 1214 }, { "epoch": 0.5446191774681558, "learning_rate": 4.7047496689253114e-05, "lm_loss": 0.8632, "loss": 0.8632, "step": 1215 }, { "epoch": 0.5450674237047551, "learning_rate": 4.704178805581972e-05, "lm_loss": 1.1266, "loss": 1.1266, "step": 1216 }, { "epoch": 0.5455156699413545, "learning_rate": 4.703607425595086e-05, "lm_loss": 0.8941, "loss": 0.8941, "step": 1217 }, { "epoch": 0.5459639161779537, "learning_rate": 4.7030355290985775e-05, "lm_loss": 0.8671, "loss": 0.8671, "step": 1218 }, { "epoch": 0.546412162414553, "learning_rate": 4.7024631162264965e-05, "lm_loss": 1.0195, "loss": 1.0195, "step": 1219 }, { "epoch": 0.5468604086511524, "learning_rate": 4.7018901871130114e-05, "lm_loss": 0.8989, "loss": 0.8989, "step": 1220 }, { "epoch": 0.5473086548877517, "learning_rate": 4.701316741892414e-05, "lm_loss": 1.1035, "loss": 1.1035, "step": 1221 }, { "epoch": 0.5477569011243509, "learning_rate": 4.700742780699115e-05, "lm_loss": 0.8243, "loss": 0.8243, "step": 1222 }, { "epoch": 0.5482051473609503, "learning_rate": 4.700168303667647e-05, "lm_loss": 1.2048, "loss": 1.2048, "step": 1223 }, { "epoch": 0.5486533935975496, "learning_rate": 4.6995933109326616e-05, "lm_loss": 0.9478, "loss": 0.9478, "step": 1224 }, { "epoch": 0.549101639834149, "learning_rate": 4.699017802628936e-05, "lm_loss": 1.0486, "loss": 1.0486, "step": 1225 }, { "epoch": 0.5495498860707482, "learning_rate": 4.698441778891363e-05, "lm_loss": 2.212, "loss": 2.212, "step": 1226 }, { "epoch": 0.5499981323073475, "learning_rate": 4.6978652398549574e-05, "lm_loss": 1.5688, "loss": 1.5688, "step": 1227 }, { "epoch": 0.5504463785439468, "learning_rate": 4.697288185654859e-05, "lm_loss": 0.9507, "loss": 0.9507, "step": 1228 }, { "epoch": 0.5508946247805461, "learning_rate": 4.696710616426323e-05, "lm_loss": 1.146, "loss": 1.146, "step": 1229 }, { "epoch": 0.5513428710171454, "learning_rate": 4.6961325323047276e-05, "lm_loss": 2.5819, "loss": 2.5819, "step": 1230 }, { "epoch": 0.5517911172537447, "learning_rate": 4.6955539334255716e-05, "lm_loss": 0.7948, "loss": 0.7948, "step": 1231 }, { "epoch": 0.5522393634903441, "learning_rate": 4.694974819924475e-05, "lm_loss": 2.3687, "loss": 2.3687, "step": 1232 }, { "epoch": 0.5526876097269433, "learning_rate": 4.694395191937177e-05, "lm_loss": 1.5147, "loss": 1.5147, "step": 1233 }, { "epoch": 0.5531358559635426, "learning_rate": 4.693815049599539e-05, "lm_loss": 1.1941, "loss": 1.1941, "step": 1234 }, { "epoch": 0.553584102200142, "learning_rate": 4.693234393047541e-05, "lm_loss": 0.6815, "loss": 0.6815, "step": 1235 }, { "epoch": 0.5540323484367412, "learning_rate": 4.692653222417286e-05, "lm_loss": 2.7737, "loss": 2.7737, "step": 1236 }, { "epoch": 0.5544805946733405, "learning_rate": 4.692071537844995e-05, "lm_loss": 0.9471, "loss": 0.9471, "step": 1237 }, { "epoch": 0.5549288409099399, "learning_rate": 4.6914893394670124e-05, "lm_loss": 0.7986, "loss": 0.7986, "step": 1238 }, { "epoch": 0.5553770871465392, "learning_rate": 4.690906627419799e-05, "lm_loss": 1.056, "loss": 1.056, "step": 1239 }, { "epoch": 0.5558253333831384, "learning_rate": 4.69032340183994e-05, "lm_loss": 0.9896, "loss": 0.9896, "step": 1240 }, { "epoch": 0.5562735796197378, "learning_rate": 4.689739662864139e-05, "lm_loss": 0.8906, "loss": 0.8906, "step": 1241 }, { "epoch": 0.5567218258563371, "learning_rate": 4.689155410629219e-05, "lm_loss": 1.13, "loss": 1.13, "step": 1242 }, { "epoch": 0.5571700720929363, "learning_rate": 4.6885706452721255e-05, "lm_loss": 2.5784, "loss": 2.5784, "step": 1243 }, { "epoch": 0.5576183183295357, "learning_rate": 4.687985366929924e-05, "lm_loss": 1.2583, "loss": 1.2583, "step": 1244 }, { "epoch": 0.558066564566135, "learning_rate": 4.687399575739798e-05, "lm_loss": 0.8271, "loss": 0.8271, "step": 1245 }, { "epoch": 0.5585148108027344, "learning_rate": 4.686813271839054e-05, "lm_loss": 0.9313, "loss": 0.9313, "step": 1246 }, { "epoch": 0.5589630570393336, "learning_rate": 4.6862264553651155e-05, "lm_loss": 0.9817, "loss": 0.9817, "step": 1247 }, { "epoch": 0.5594113032759329, "learning_rate": 4.68563912645553e-05, "lm_loss": 0.866, "loss": 0.866, "step": 1248 }, { "epoch": 0.5598595495125323, "learning_rate": 4.685051285247962e-05, "lm_loss": 0.9767, "loss": 0.9767, "step": 1249 }, { "epoch": 0.5603077957491315, "learning_rate": 4.684462931880198e-05, "lm_loss": 1.037, "loss": 1.037, "step": 1250 }, { "epoch": 0.5607560419857308, "learning_rate": 4.6838740664901435e-05, "lm_loss": 0.8824, "loss": 0.8824, "step": 1251 }, { "epoch": 0.5612042882223301, "learning_rate": 4.683284689215823e-05, "lm_loss": 2.6664, "loss": 2.6664, "step": 1252 }, { "epoch": 0.5616525344589295, "learning_rate": 4.682694800195385e-05, "lm_loss": 0.9942, "loss": 0.9942, "step": 1253 }, { "epoch": 0.5621007806955287, "learning_rate": 4.6821043995670935e-05, "lm_loss": 0.8903, "loss": 0.8903, "step": 1254 }, { "epoch": 0.562549026932128, "learning_rate": 4.6815134874693335e-05, "lm_loss": 0.7809, "loss": 0.7809, "step": 1255 }, { "epoch": 0.5629972731687274, "learning_rate": 4.680922064040612e-05, "lm_loss": 1.267, "loss": 1.267, "step": 1256 }, { "epoch": 0.5634455194053266, "learning_rate": 4.680330129419553e-05, "lm_loss": 0.7551, "loss": 0.7551, "step": 1257 }, { "epoch": 0.563893765641926, "learning_rate": 4.679737683744903e-05, "lm_loss": 1.1736, "loss": 1.1736, "step": 1258 }, { "epoch": 0.5643420118785253, "learning_rate": 4.6791447271555255e-05, "lm_loss": 0.7862, "loss": 0.7862, "step": 1259 }, { "epoch": 0.5647902581151246, "learning_rate": 4.678551259790407e-05, "lm_loss": 0.8809, "loss": 0.8809, "step": 1260 }, { "epoch": 0.5652385043517238, "learning_rate": 4.6779572817886495e-05, "lm_loss": 1.0982, "loss": 1.0982, "step": 1261 }, { "epoch": 0.5656867505883232, "learning_rate": 4.67736279328948e-05, "lm_loss": 0.6691, "loss": 0.6691, "step": 1262 }, { "epoch": 0.5661349968249225, "learning_rate": 4.6767677944322406e-05, "lm_loss": 0.9966, "loss": 0.9966, "step": 1263 }, { "epoch": 0.5665832430615217, "learning_rate": 4.6761722853563954e-05, "lm_loss": 1.0231, "loss": 1.0231, "step": 1264 }, { "epoch": 0.5670314892981211, "learning_rate": 4.675576266201527e-05, "lm_loss": 1.985, "loss": 1.985, "step": 1265 }, { "epoch": 0.5674797355347204, "learning_rate": 4.674979737107338e-05, "lm_loss": 1.3945, "loss": 1.3945, "step": 1266 }, { "epoch": 0.5679279817713198, "learning_rate": 4.67438269821365e-05, "lm_loss": 2.5416, "loss": 2.5416, "step": 1267 }, { "epoch": 0.568376228007919, "learning_rate": 4.673785149660406e-05, "lm_loss": 1.0426, "loss": 1.0426, "step": 1268 }, { "epoch": 0.5688244742445183, "learning_rate": 4.673187091587666e-05, "lm_loss": 0.7598, "loss": 0.7598, "step": 1269 }, { "epoch": 0.5692727204811177, "learning_rate": 4.672588524135611e-05, "lm_loss": 0.9942, "loss": 0.9942, "step": 1270 }, { "epoch": 0.5697209667177169, "learning_rate": 4.671989447444539e-05, "lm_loss": 0.9841, "loss": 0.9841, "step": 1271 }, { "epoch": 0.5701692129543162, "learning_rate": 4.6713898616548724e-05, "lm_loss": 0.7819, "loss": 0.7819, "step": 1272 }, { "epoch": 0.5706174591909156, "learning_rate": 4.670789766907148e-05, "lm_loss": 1.0074, "loss": 1.0074, "step": 1273 }, { "epoch": 0.5710657054275149, "learning_rate": 4.670189163342023e-05, "lm_loss": 0.6946, "loss": 0.6946, "step": 1274 }, { "epoch": 0.5715139516641141, "learning_rate": 4.669588051100276e-05, "lm_loss": 1.0025, "loss": 1.0025, "step": 1275 }, { "epoch": 0.5719621979007135, "learning_rate": 4.668986430322803e-05, "lm_loss": 1.4072, "loss": 1.4072, "step": 1276 }, { "epoch": 0.5724104441373128, "learning_rate": 4.668384301150618e-05, "lm_loss": 2.0115, "loss": 2.0115, "step": 1277 }, { "epoch": 0.572858690373912, "learning_rate": 4.667781663724858e-05, "lm_loss": 0.8179, "loss": 0.8179, "step": 1278 }, { "epoch": 0.5733069366105114, "learning_rate": 4.6671785181867754e-05, "lm_loss": 1.1094, "loss": 1.1094, "step": 1279 }, { "epoch": 0.5737551828471107, "learning_rate": 4.666574864677744e-05, "lm_loss": 0.8525, "loss": 0.8525, "step": 1280 }, { "epoch": 0.57420342908371, "learning_rate": 4.6659707033392555e-05, "lm_loss": 0.9634, "loss": 0.9634, "step": 1281 }, { "epoch": 0.5746516753203093, "learning_rate": 4.6653660343129206e-05, "lm_loss": 1.11, "loss": 1.11, "step": 1282 }, { "epoch": 0.5750999215569086, "learning_rate": 4.664760857740469e-05, "lm_loss": 1.8325, "loss": 1.8325, "step": 1283 }, { "epoch": 0.5755481677935079, "learning_rate": 4.6641551737637514e-05, "lm_loss": 2.712, "loss": 2.712, "step": 1284 }, { "epoch": 0.5759964140301072, "learning_rate": 4.663548982524734e-05, "lm_loss": 0.9665, "loss": 0.9665, "step": 1285 }, { "epoch": 0.5764446602667065, "learning_rate": 4.6629422841655045e-05, "lm_loss": 1.071, "loss": 1.071, "step": 1286 }, { "epoch": 0.5768929065033058, "learning_rate": 4.662335078828268e-05, "lm_loss": 0.8176, "loss": 0.8176, "step": 1287 }, { "epoch": 0.5773411527399052, "learning_rate": 4.6617273666553494e-05, "lm_loss": 0.8243, "loss": 0.8243, "step": 1288 }, { "epoch": 0.5777893989765044, "learning_rate": 4.6611191477891924e-05, "lm_loss": 0.9498, "loss": 0.9498, "step": 1289 }, { "epoch": 0.5782376452131037, "learning_rate": 4.660510422372358e-05, "lm_loss": 2.4796, "loss": 2.4796, "step": 1290 }, { "epoch": 0.5786858914497031, "learning_rate": 4.659901190547529e-05, "lm_loss": 1.4058, "loss": 1.4058, "step": 1291 }, { "epoch": 0.5791341376863024, "learning_rate": 4.659291452457502e-05, "lm_loss": 1.9683, "loss": 1.9683, "step": 1292 }, { "epoch": 0.5795823839229016, "learning_rate": 4.658681208245198e-05, "lm_loss": 0.8497, "loss": 0.8497, "step": 1293 }, { "epoch": 0.580030630159501, "learning_rate": 4.6580704580536525e-05, "lm_loss": 1.0365, "loss": 1.0365, "step": 1294 }, { "epoch": 0.5804788763961003, "learning_rate": 4.657459202026021e-05, "lm_loss": 0.9384, "loss": 0.9384, "step": 1295 }, { "epoch": 0.5809271226326995, "learning_rate": 4.6568474403055785e-05, "lm_loss": 0.8582, "loss": 0.8582, "step": 1296 }, { "epoch": 0.5813753688692989, "learning_rate": 4.656235173035716e-05, "lm_loss": 0.9974, "loss": 0.9974, "step": 1297 }, { "epoch": 0.5818236151058982, "learning_rate": 4.655622400359945e-05, "lm_loss": 0.9012, "loss": 0.9012, "step": 1298 }, { "epoch": 0.5822718613424975, "learning_rate": 4.655009122421895e-05, "lm_loss": 0.844, "loss": 0.844, "step": 1299 }, { "epoch": 0.5827201075790968, "learning_rate": 4.654395339365313e-05, "lm_loss": 1.0374, "loss": 1.0374, "step": 1300 }, { "epoch": 0.5831683538156961, "learning_rate": 4.653781051334067e-05, "lm_loss": 0.8645, "loss": 0.8645, "step": 1301 }, { "epoch": 0.5836166000522954, "learning_rate": 4.653166258472142e-05, "lm_loss": 0.8732, "loss": 0.8732, "step": 1302 }, { "epoch": 0.5840648462888947, "learning_rate": 4.6525509609236394e-05, "lm_loss": 0.8014, "loss": 0.8014, "step": 1303 }, { "epoch": 0.584513092525494, "learning_rate": 4.651935158832781e-05, "lm_loss": 1.0571, "loss": 1.0571, "step": 1304 }, { "epoch": 0.5849613387620933, "learning_rate": 4.651318852343907e-05, "lm_loss": 0.8103, "loss": 0.8103, "step": 1305 }, { "epoch": 0.5854095849986927, "learning_rate": 4.650702041601473e-05, "lm_loss": 1.4046, "loss": 1.4046, "step": 1306 }, { "epoch": 0.5858578312352919, "learning_rate": 4.650084726750058e-05, "lm_loss": 1.8096, "loss": 1.8096, "step": 1307 }, { "epoch": 0.5863060774718912, "learning_rate": 4.6494669079343544e-05, "lm_loss": 0.9004, "loss": 0.9004, "step": 1308 }, { "epoch": 0.5867543237084906, "learning_rate": 4.648848585299175e-05, "lm_loss": 2.3863, "loss": 2.3863, "step": 1309 }, { "epoch": 0.5872025699450898, "learning_rate": 4.648229758989449e-05, "lm_loss": 0.8154, "loss": 0.8154, "step": 1310 }, { "epoch": 0.5876508161816891, "learning_rate": 4.647610429150227e-05, "lm_loss": 0.8787, "loss": 0.8787, "step": 1311 }, { "epoch": 0.5880990624182885, "learning_rate": 4.6469905959266724e-05, "lm_loss": 2.3055, "loss": 2.3055, "step": 1312 }, { "epoch": 0.5885473086548878, "learning_rate": 4.646370259464072e-05, "lm_loss": 0.5946, "loss": 0.5946, "step": 1313 }, { "epoch": 0.588995554891487, "learning_rate": 4.6457494199078286e-05, "lm_loss": 1.0291, "loss": 1.0291, "step": 1314 }, { "epoch": 0.5894438011280864, "learning_rate": 4.64512807740346e-05, "lm_loss": 2.4622, "loss": 2.4622, "step": 1315 }, { "epoch": 0.5898920473646857, "learning_rate": 4.644506232096607e-05, "lm_loss": 0.6705, "loss": 0.6705, "step": 1316 }, { "epoch": 0.5903402936012849, "learning_rate": 4.643883884133024e-05, "lm_loss": 1.1295, "loss": 1.1295, "step": 1317 }, { "epoch": 0.5907885398378843, "learning_rate": 4.643261033658585e-05, "lm_loss": 0.9203, "loss": 0.9203, "step": 1318 }, { "epoch": 0.5912367860744836, "learning_rate": 4.6426376808192815e-05, "lm_loss": 0.8477, "loss": 0.8477, "step": 1319 }, { "epoch": 0.5916850323110829, "learning_rate": 4.642013825761225e-05, "lm_loss": 0.9247, "loss": 0.9247, "step": 1320 }, { "epoch": 0.5921332785476822, "learning_rate": 4.641389468630639e-05, "lm_loss": 0.9814, "loss": 0.9814, "step": 1321 }, { "epoch": 0.5925815247842815, "learning_rate": 4.640764609573871e-05, "lm_loss": 1.158, "loss": 1.158, "step": 1322 }, { "epoch": 0.5930297710208808, "learning_rate": 4.640139248737383e-05, "lm_loss": 0.7674, "loss": 0.7674, "step": 1323 }, { "epoch": 0.5934780172574801, "learning_rate": 4.639513386267754e-05, "lm_loss": 1.0829, "loss": 1.0829, "step": 1324 }, { "epoch": 0.5939262634940794, "learning_rate": 4.638887022311683e-05, "lm_loss": 0.7726, "loss": 0.7726, "step": 1325 }, { "epoch": 0.5943745097306787, "learning_rate": 4.638260157015985e-05, "lm_loss": 0.8265, "loss": 0.8265, "step": 1326 }, { "epoch": 0.5948227559672781, "learning_rate": 4.6376327905275914e-05, "lm_loss": 1.06, "loss": 1.06, "step": 1327 }, { "epoch": 0.5952710022038773, "learning_rate": 4.637004922993553e-05, "lm_loss": 0.6077, "loss": 0.6077, "step": 1328 }, { "epoch": 0.5957192484404766, "learning_rate": 4.636376554561038e-05, "lm_loss": 0.9387, "loss": 0.9387, "step": 1329 }, { "epoch": 0.596167494677076, "learning_rate": 4.635747685377331e-05, "lm_loss": 1.0741, "loss": 1.0741, "step": 1330 }, { "epoch": 0.5966157409136752, "learning_rate": 4.6351183155898336e-05, "lm_loss": 0.8822, "loss": 0.8822, "step": 1331 }, { "epoch": 0.5970639871502745, "learning_rate": 4.6344884453460674e-05, "lm_loss": 0.8183, "loss": 0.8183, "step": 1332 }, { "epoch": 0.5975122333868739, "learning_rate": 4.6338580747936675e-05, "lm_loss": 0.7475, "loss": 0.7475, "step": 1333 }, { "epoch": 0.5979604796234732, "learning_rate": 4.6332272040803895e-05, "lm_loss": 1.1712, "loss": 1.1712, "step": 1334 }, { "epoch": 0.5984087258600724, "learning_rate": 4.6325958333541044e-05, "lm_loss": 0.7714, "loss": 0.7714, "step": 1335 }, { "epoch": 0.5988569720966718, "learning_rate": 4.631963962762801e-05, "lm_loss": 1.0127, "loss": 1.0127, "step": 1336 }, { "epoch": 0.5993052183332711, "learning_rate": 4.6313315924545844e-05, "lm_loss": 1.6432, "loss": 1.6432, "step": 1337 }, { "epoch": 0.5997534645698703, "learning_rate": 4.630698722577679e-05, "lm_loss": 1.3562, "loss": 1.3562, "step": 1338 }, { "epoch": 0.6002017108064697, "learning_rate": 4.6300653532804236e-05, "lm_loss": 2.1481, "loss": 2.1481, "step": 1339 }, { "epoch": 0.600649957043069, "learning_rate": 4.629431484711277e-05, "lm_loss": 0.7027, "loss": 0.7027, "step": 1340 }, { "epoch": 0.6010982032796683, "learning_rate": 4.628797117018812e-05, "lm_loss": 0.9983, "loss": 0.9983, "step": 1341 }, { "epoch": 0.6015464495162676, "learning_rate": 4.62816225035172e-05, "lm_loss": 1.0004, "loss": 1.0004, "step": 1342 }, { "epoch": 0.6019946957528669, "learning_rate": 4.627526884858809e-05, "lm_loss": 0.7277, "loss": 0.7277, "step": 1343 }, { "epoch": 0.6024429419894662, "learning_rate": 4.626891020689006e-05, "lm_loss": 0.8348, "loss": 0.8348, "step": 1344 }, { "epoch": 0.6028911882260655, "learning_rate": 4.62625465799135e-05, "lm_loss": 0.8222, "loss": 0.8222, "step": 1345 }, { "epoch": 0.6033394344626648, "learning_rate": 4.6256177969150016e-05, "lm_loss": 0.7828, "loss": 0.7828, "step": 1346 }, { "epoch": 0.6037876806992641, "learning_rate": 4.624980437609236e-05, "lm_loss": 2.4327, "loss": 2.4327, "step": 1347 }, { "epoch": 0.6042359269358635, "learning_rate": 4.6243425802234466e-05, "lm_loss": 0.7104, "loss": 0.7104, "step": 1348 }, { "epoch": 0.6046841731724627, "learning_rate": 4.6237042249071406e-05, "lm_loss": 1.0723, "loss": 1.0723, "step": 1349 }, { "epoch": 0.605132419409062, "learning_rate": 4.623065371809945e-05, "lm_loss": 0.7789, "loss": 0.7789, "step": 1350 }, { "epoch": 0.6055806656456614, "learning_rate": 4.6224260210816026e-05, "lm_loss": 0.8098, "loss": 0.8098, "step": 1351 }, { "epoch": 0.6060289118822606, "learning_rate": 4.621786172871972e-05, "lm_loss": 1.1075, "loss": 1.1075, "step": 1352 }, { "epoch": 0.6064771581188599, "learning_rate": 4.621145827331029e-05, "lm_loss": 0.8724, "loss": 0.8724, "step": 1353 }, { "epoch": 0.6069254043554593, "learning_rate": 4.6205049846088664e-05, "lm_loss": 1.1021, "loss": 1.1021, "step": 1354 }, { "epoch": 0.6073736505920586, "learning_rate": 4.619863644855692e-05, "lm_loss": 0.7691, "loss": 0.7691, "step": 1355 }, { "epoch": 0.6078218968286578, "learning_rate": 4.619221808221833e-05, "lm_loss": 0.8547, "loss": 0.8547, "step": 1356 }, { "epoch": 0.6082701430652572, "learning_rate": 4.6185794748577305e-05, "lm_loss": 0.9846, "loss": 0.9846, "step": 1357 }, { "epoch": 0.6087183893018565, "learning_rate": 4.617936644913942e-05, "lm_loss": 0.8184, "loss": 0.8184, "step": 1358 }, { "epoch": 0.6091666355384557, "learning_rate": 4.617293318541143e-05, "lm_loss": 0.9012, "loss": 0.9012, "step": 1359 }, { "epoch": 0.6096148817750551, "learning_rate": 4.6166494958901235e-05, "lm_loss": 0.9923, "loss": 0.9923, "step": 1360 }, { "epoch": 0.6100631280116544, "learning_rate": 4.616005177111792e-05, "lm_loss": 0.6902, "loss": 0.6902, "step": 1361 }, { "epoch": 0.6105113742482537, "learning_rate": 4.615360362357172e-05, "lm_loss": 0.9637, "loss": 0.9637, "step": 1362 }, { "epoch": 0.610959620484853, "learning_rate": 4.614715051777404e-05, "lm_loss": 0.8768, "loss": 0.8768, "step": 1363 }, { "epoch": 0.6114078667214523, "learning_rate": 4.614069245523742e-05, "lm_loss": 0.9098, "loss": 0.9098, "step": 1364 }, { "epoch": 0.6118561129580516, "learning_rate": 4.6134229437475606e-05, "lm_loss": 0.817, "loss": 0.817, "step": 1365 }, { "epoch": 0.612304359194651, "learning_rate": 4.612776146600347e-05, "lm_loss": 0.8853, "loss": 0.8853, "step": 1366 }, { "epoch": 0.6127526054312502, "learning_rate": 4.612128854233706e-05, "lm_loss": 0.7542, "loss": 0.7542, "step": 1367 }, { "epoch": 0.6132008516678495, "learning_rate": 4.611481066799358e-05, "lm_loss": 0.9891, "loss": 0.9891, "step": 1368 }, { "epoch": 0.6136490979044489, "learning_rate": 4.610832784449141e-05, "lm_loss": 0.8831, "loss": 0.8831, "step": 1369 }, { "epoch": 0.6140973441410481, "learning_rate": 4.6101840073350054e-05, "lm_loss": 1.0617, "loss": 1.0617, "step": 1370 }, { "epoch": 0.6145455903776474, "learning_rate": 4.6095347356090226e-05, "lm_loss": 0.6837, "loss": 0.6837, "step": 1371 }, { "epoch": 0.6149938366142468, "learning_rate": 4.6088849694233746e-05, "lm_loss": 0.7285, "loss": 0.7285, "step": 1372 }, { "epoch": 0.6154420828508461, "learning_rate": 4.608234708930363e-05, "lm_loss": 1.0609, "loss": 1.0609, "step": 1373 }, { "epoch": 0.6158903290874453, "learning_rate": 4.607583954282405e-05, "lm_loss": 0.6989, "loss": 0.6989, "step": 1374 }, { "epoch": 0.6163385753240447, "learning_rate": 4.6069327056320315e-05, "lm_loss": 0.9103, "loss": 0.9103, "step": 1375 }, { "epoch": 0.616786821560644, "learning_rate": 4.606280963131891e-05, "lm_loss": 0.7846, "loss": 0.7846, "step": 1376 }, { "epoch": 0.6172350677972432, "learning_rate": 4.605628726934747e-05, "lm_loss": 0.8311, "loss": 0.8311, "step": 1377 }, { "epoch": 0.6176833140338426, "learning_rate": 4.6049759971934795e-05, "lm_loss": 0.7786, "loss": 0.7786, "step": 1378 }, { "epoch": 0.6181315602704419, "learning_rate": 4.604322774061083e-05, "lm_loss": 0.7821, "loss": 0.7821, "step": 1379 }, { "epoch": 0.6185798065070413, "learning_rate": 4.6036690576906685e-05, "lm_loss": 0.9348, "loss": 0.9348, "step": 1380 }, { "epoch": 0.6190280527436405, "learning_rate": 4.603014848235462e-05, "lm_loss": 1.4175, "loss": 1.4175, "step": 1381 }, { "epoch": 0.6194762989802398, "learning_rate": 4.6023601458488066e-05, "lm_loss": 1.6415, "loss": 1.6415, "step": 1382 }, { "epoch": 0.6199245452168392, "learning_rate": 4.601704950684158e-05, "lm_loss": 0.9344, "loss": 0.9344, "step": 1383 }, { "epoch": 0.6203727914534384, "learning_rate": 4.6010492628950906e-05, "lm_loss": 0.8886, "loss": 0.8886, "step": 1384 }, { "epoch": 0.6208210376900377, "learning_rate": 4.6003930826352926e-05, "lm_loss": 0.7578, "loss": 0.7578, "step": 1385 }, { "epoch": 0.621269283926637, "learning_rate": 4.599736410058568e-05, "lm_loss": 0.8663, "loss": 0.8663, "step": 1386 }, { "epoch": 0.6217175301632364, "learning_rate": 4.599079245318836e-05, "lm_loss": 0.7572, "loss": 0.7572, "step": 1387 }, { "epoch": 0.6221657763998356, "learning_rate": 4.59842158857013e-05, "lm_loss": 0.9778, "loss": 0.9778, "step": 1388 }, { "epoch": 0.622614022636435, "learning_rate": 4.597763439966602e-05, "lm_loss": 0.9911, "loss": 0.9911, "step": 1389 }, { "epoch": 0.6230622688730343, "learning_rate": 4.597104799662517e-05, "lm_loss": 1.2999, "loss": 1.2999, "step": 1390 }, { "epoch": 0.6235105151096335, "learning_rate": 4.596445667812254e-05, "lm_loss": 1.8836, "loss": 1.8836, "step": 1391 }, { "epoch": 0.6239587613462328, "learning_rate": 4.59578604457031e-05, "lm_loss": 0.7364, "loss": 0.7364, "step": 1392 }, { "epoch": 0.6244070075828322, "learning_rate": 4.595125930091295e-05, "lm_loss": 1.0658, "loss": 1.0658, "step": 1393 }, { "epoch": 0.6248552538194315, "learning_rate": 4.5944653245299365e-05, "lm_loss": 1.59, "loss": 1.59, "step": 1394 }, { "epoch": 0.6253035000560307, "learning_rate": 4.593804228041074e-05, "lm_loss": 1.3516, "loss": 1.3516, "step": 1395 }, { "epoch": 0.6257517462926301, "learning_rate": 4.593142640779665e-05, "lm_loss": 1.952, "loss": 1.952, "step": 1396 }, { "epoch": 0.6261999925292294, "learning_rate": 4.5924805629007796e-05, "lm_loss": 0.9288, "loss": 0.9288, "step": 1397 }, { "epoch": 0.6266482387658286, "learning_rate": 4.591817994559605e-05, "lm_loss": 0.8778, "loss": 0.8778, "step": 1398 }, { "epoch": 0.627096485002428, "learning_rate": 4.591154935911443e-05, "lm_loss": 0.6606, "loss": 0.6606, "step": 1399 }, { "epoch": 0.6275447312390273, "learning_rate": 4.590491387111708e-05, "lm_loss": 0.8335, "loss": 0.8335, "step": 1400 }, { "epoch": 0.6279929774756267, "learning_rate": 4.589827348315932e-05, "lm_loss": 0.9196, "loss": 0.9196, "step": 1401 }, { "epoch": 0.6284412237122259, "learning_rate": 4.589162819679761e-05, "lm_loss": 0.7929, "loss": 0.7929, "step": 1402 }, { "epoch": 0.6288894699488252, "learning_rate": 4.588497801358955e-05, "lm_loss": 1.712, "loss": 1.712, "step": 1403 }, { "epoch": 0.6293377161854246, "learning_rate": 4.5878322935093905e-05, "lm_loss": 1.2634, "loss": 1.2634, "step": 1404 }, { "epoch": 0.6297859624220238, "learning_rate": 4.587166296287057e-05, "lm_loss": 0.9358, "loss": 0.9358, "step": 1405 }, { "epoch": 0.6302342086586231, "learning_rate": 4.5864998098480594e-05, "lm_loss": 0.8245, "loss": 0.8245, "step": 1406 }, { "epoch": 0.6306824548952225, "learning_rate": 4.5858328343486176e-05, "lm_loss": 0.903, "loss": 0.903, "step": 1407 }, { "epoch": 0.6311307011318218, "learning_rate": 4.585165369945066e-05, "lm_loss": 1.0567, "loss": 1.0567, "step": 1408 }, { "epoch": 0.631578947368421, "learning_rate": 4.584497416793854e-05, "lm_loss": 1.5183, "loss": 1.5183, "step": 1409 }, { "epoch": 0.6320271936050204, "learning_rate": 4.583828975051543e-05, "lm_loss": 1.3446, "loss": 1.3446, "step": 1410 }, { "epoch": 0.6324754398416197, "learning_rate": 4.583160044874811e-05, "lm_loss": 0.8469, "loss": 0.8469, "step": 1411 }, { "epoch": 0.6329236860782189, "learning_rate": 4.582490626420454e-05, "lm_loss": 1.0417, "loss": 1.0417, "step": 1412 }, { "epoch": 0.6333719323148183, "learning_rate": 4.581820719845374e-05, "lm_loss": 1.795, "loss": 1.795, "step": 1413 }, { "epoch": 0.6338201785514176, "learning_rate": 4.5811503253065955e-05, "lm_loss": 1.2355, "loss": 1.2355, "step": 1414 }, { "epoch": 0.6342684247880169, "learning_rate": 4.580479442961254e-05, "lm_loss": 1.5412, "loss": 1.5412, "step": 1415 }, { "epoch": 0.6347166710246162, "learning_rate": 4.579808072966597e-05, "lm_loss": 0.9296, "loss": 0.9296, "step": 1416 }, { "epoch": 0.6351649172612155, "learning_rate": 4.579136215479991e-05, "lm_loss": 0.8359, "loss": 0.8359, "step": 1417 }, { "epoch": 0.6356131634978148, "learning_rate": 4.5784638706589144e-05, "lm_loss": 0.9362, "loss": 0.9362, "step": 1418 }, { "epoch": 0.636061409734414, "learning_rate": 4.577791038660959e-05, "lm_loss": 0.7617, "loss": 0.7617, "step": 1419 }, { "epoch": 0.6365096559710134, "learning_rate": 4.577117719643832e-05, "lm_loss": 1.2257, "loss": 1.2257, "step": 1420 }, { "epoch": 0.6369579022076127, "learning_rate": 4.576443913765355e-05, "lm_loss": 1.7008, "loss": 1.7008, "step": 1421 }, { "epoch": 0.6374061484442121, "learning_rate": 4.575769621183463e-05, "lm_loss": 1.7932, "loss": 1.7932, "step": 1422 }, { "epoch": 0.6378543946808113, "learning_rate": 4.575094842056206e-05, "lm_loss": 2.0837, "loss": 2.0837, "step": 1423 }, { "epoch": 0.6383026409174106, "learning_rate": 4.5744195765417455e-05, "lm_loss": 0.715, "loss": 0.715, "step": 1424 }, { "epoch": 0.63875088715401, "learning_rate": 4.57374382479836e-05, "lm_loss": 0.7991, "loss": 0.7991, "step": 1425 }, { "epoch": 0.6391991333906092, "learning_rate": 4.573067586984441e-05, "lm_loss": 0.8766, "loss": 0.8766, "step": 1426 }, { "epoch": 0.6396473796272085, "learning_rate": 4.5723908632584926e-05, "lm_loss": 1.2216, "loss": 1.2216, "step": 1427 }, { "epoch": 0.6400956258638079, "learning_rate": 4.5717136537791364e-05, "lm_loss": 0.7248, "loss": 0.7248, "step": 1428 }, { "epoch": 0.6405438721004072, "learning_rate": 4.571035958705102e-05, "lm_loss": 0.8235, "loss": 0.8235, "step": 1429 }, { "epoch": 0.6409921183370064, "learning_rate": 4.570357778195239e-05, "lm_loss": 0.7226, "loss": 0.7226, "step": 1430 }, { "epoch": 0.6414403645736058, "learning_rate": 4.569679112408507e-05, "lm_loss": 1.0834, "loss": 1.0834, "step": 1431 }, { "epoch": 0.6418886108102051, "learning_rate": 4.56899996150398e-05, "lm_loss": 2.5277, "loss": 2.5277, "step": 1432 }, { "epoch": 0.6423368570468044, "learning_rate": 4.568320325640847e-05, "lm_loss": 1.5682, "loss": 1.5682, "step": 1433 }, { "epoch": 0.6427851032834037, "learning_rate": 4.56764020497841e-05, "lm_loss": 0.8472, "loss": 0.8472, "step": 1434 }, { "epoch": 0.643233349520003, "learning_rate": 4.5669595996760814e-05, "lm_loss": 2.1074, "loss": 2.1074, "step": 1435 }, { "epoch": 0.6436815957566023, "learning_rate": 4.5662785098933934e-05, "lm_loss": 0.7835, "loss": 0.7835, "step": 1436 }, { "epoch": 0.6441298419932016, "learning_rate": 4.5655969357899874e-05, "lm_loss": 0.8214, "loss": 0.8214, "step": 1437 }, { "epoch": 0.6445780882298009, "learning_rate": 4.564914877525619e-05, "lm_loss": 1.1071, "loss": 1.1071, "step": 1438 }, { "epoch": 0.6450263344664002, "learning_rate": 4.564232335260159e-05, "lm_loss": 1.4868, "loss": 1.4868, "step": 1439 }, { "epoch": 0.6454745807029996, "learning_rate": 4.5635493091535884e-05, "lm_loss": 1.235, "loss": 1.235, "step": 1440 }, { "epoch": 0.6459228269395988, "learning_rate": 4.562865799366005e-05, "lm_loss": 0.9041, "loss": 0.9041, "step": 1441 }, { "epoch": 0.6463710731761981, "learning_rate": 4.5621818060576185e-05, "lm_loss": 0.8886, "loss": 0.8886, "step": 1442 }, { "epoch": 0.6468193194127975, "learning_rate": 4.561497329388752e-05, "lm_loss": 0.9793, "loss": 0.9793, "step": 1443 }, { "epoch": 0.6472675656493967, "learning_rate": 4.5608123695198406e-05, "lm_loss": 2.0181, "loss": 2.0181, "step": 1444 }, { "epoch": 0.647715811885996, "learning_rate": 4.5601269266114355e-05, "lm_loss": 1.0757, "loss": 1.0757, "step": 1445 }, { "epoch": 0.6481640581225954, "learning_rate": 4.559441000824199e-05, "lm_loss": 0.9161, "loss": 0.9161, "step": 1446 }, { "epoch": 0.6486123043591947, "learning_rate": 4.558754592318907e-05, "lm_loss": 2.0376, "loss": 2.0376, "step": 1447 }, { "epoch": 0.6490605505957939, "learning_rate": 4.5580677012564486e-05, "lm_loss": 0.7041, "loss": 0.7041, "step": 1448 }, { "epoch": 0.6495087968323933, "learning_rate": 4.557380327797826e-05, "lm_loss": 0.9366, "loss": 0.9366, "step": 1449 }, { "epoch": 0.6499570430689926, "learning_rate": 4.556692472104155e-05, "lm_loss": 0.6959, "loss": 0.6959, "step": 1450 }, { "epoch": 0.6504052893055918, "learning_rate": 4.556004134336664e-05, "lm_loss": 0.7989, "loss": 0.7989, "step": 1451 }, { "epoch": 0.6508535355421912, "learning_rate": 4.555315314656694e-05, "lm_loss": 0.9986, "loss": 0.9986, "step": 1452 }, { "epoch": 0.6513017817787905, "learning_rate": 4.5546260132257004e-05, "lm_loss": 0.8074, "loss": 0.8074, "step": 1453 }, { "epoch": 0.6517500280153898, "learning_rate": 4.553936230205248e-05, "lm_loss": 0.9881, "loss": 0.9881, "step": 1454 }, { "epoch": 0.6521982742519891, "learning_rate": 4.5532459657570196e-05, "lm_loss": 1.9597, "loss": 1.9597, "step": 1455 }, { "epoch": 0.6526465204885884, "learning_rate": 4.5525552200428065e-05, "lm_loss": 0.9845, "loss": 0.9845, "step": 1456 }, { "epoch": 0.6530947667251877, "learning_rate": 4.551863993224517e-05, "lm_loss": 0.7346, "loss": 0.7346, "step": 1457 }, { "epoch": 0.653543012961787, "learning_rate": 4.551172285464166e-05, "lm_loss": 0.8848, "loss": 0.8848, "step": 1458 }, { "epoch": 0.6539912591983863, "learning_rate": 4.550480096923886e-05, "lm_loss": 1.2472, "loss": 1.2472, "step": 1459 }, { "epoch": 0.6544395054349856, "learning_rate": 4.549787427765923e-05, "lm_loss": 1.8374, "loss": 1.8374, "step": 1460 }, { "epoch": 0.654887751671585, "learning_rate": 4.5490942781526316e-05, "lm_loss": 0.6284, "loss": 0.6284, "step": 1461 }, { "epoch": 0.6553359979081842, "learning_rate": 4.548400648246481e-05, "lm_loss": 0.9253, "loss": 0.9253, "step": 1462 }, { "epoch": 0.6557842441447835, "learning_rate": 4.547706538210055e-05, "lm_loss": 0.7683, "loss": 0.7683, "step": 1463 }, { "epoch": 0.6562324903813829, "learning_rate": 4.547011948206046e-05, "lm_loss": 0.9344, "loss": 0.9344, "step": 1464 }, { "epoch": 0.6566807366179821, "learning_rate": 4.546316878397261e-05, "lm_loss": 0.7845, "loss": 0.7845, "step": 1465 }, { "epoch": 0.6571289828545814, "learning_rate": 4.545621328946621e-05, "lm_loss": 0.9714, "loss": 0.9714, "step": 1466 }, { "epoch": 0.6575772290911808, "learning_rate": 4.544925300017155e-05, "lm_loss": 0.7883, "loss": 0.7883, "step": 1467 }, { "epoch": 0.6580254753277801, "learning_rate": 4.5442287917720105e-05, "lm_loss": 0.8695, "loss": 0.8695, "step": 1468 }, { "epoch": 0.6584737215643793, "learning_rate": 4.543531804374441e-05, "lm_loss": 0.7053, "loss": 0.7053, "step": 1469 }, { "epoch": 0.6589219678009787, "learning_rate": 4.542834337987817e-05, "lm_loss": 0.7525, "loss": 0.7525, "step": 1470 }, { "epoch": 0.659370214037578, "learning_rate": 4.542136392775619e-05, "lm_loss": 0.7855, "loss": 0.7855, "step": 1471 }, { "epoch": 0.6598184602741772, "learning_rate": 4.54143796890144e-05, "lm_loss": 0.9799, "loss": 0.9799, "step": 1472 }, { "epoch": 0.6602667065107766, "learning_rate": 4.540739066528986e-05, "lm_loss": 0.8095, "loss": 0.8095, "step": 1473 }, { "epoch": 0.6607149527473759, "learning_rate": 4.540039685822075e-05, "lm_loss": 0.8847, "loss": 0.8847, "step": 1474 }, { "epoch": 0.6611631989839752, "learning_rate": 4.5393398269446364e-05, "lm_loss": 0.9406, "loss": 0.9406, "step": 1475 }, { "epoch": 0.6616114452205745, "learning_rate": 4.538639490060711e-05, "lm_loss": 0.7763, "loss": 0.7763, "step": 1476 }, { "epoch": 0.6620596914571738, "learning_rate": 4.537938675334455e-05, "lm_loss": 0.7444, "loss": 0.7444, "step": 1477 }, { "epoch": 0.6625079376937731, "learning_rate": 4.537237382930132e-05, "lm_loss": 0.8143, "loss": 0.8143, "step": 1478 }, { "epoch": 0.6629561839303724, "learning_rate": 4.5365356130121205e-05, "lm_loss": 0.841, "loss": 0.841, "step": 1479 }, { "epoch": 0.6634044301669717, "learning_rate": 4.535833365744912e-05, "lm_loss": 1.2094, "loss": 1.2094, "step": 1480 }, { "epoch": 0.663852676403571, "learning_rate": 4.535130641293105e-05, "lm_loss": 1.3311, "loss": 1.3311, "step": 1481 }, { "epoch": 0.6643009226401704, "learning_rate": 4.534427439821416e-05, "lm_loss": 1.0082, "loss": 1.0082, "step": 1482 }, { "epoch": 0.6647491688767696, "learning_rate": 4.5337237614946683e-05, "lm_loss": 0.7542, "loss": 0.7542, "step": 1483 }, { "epoch": 0.6651974151133689, "learning_rate": 4.533019606477801e-05, "lm_loss": 0.7954, "loss": 0.7954, "step": 1484 }, { "epoch": 0.6656456613499683, "learning_rate": 4.5323149749358604e-05, "lm_loss": 0.8963, "loss": 0.8963, "step": 1485 }, { "epoch": 0.6660939075865675, "learning_rate": 4.531609867034009e-05, "lm_loss": 0.8609, "loss": 0.8609, "step": 1486 }, { "epoch": 0.6665421538231668, "learning_rate": 4.530904282937518e-05, "lm_loss": 0.8596, "loss": 0.8596, "step": 1487 }, { "epoch": 0.6669904000597662, "learning_rate": 4.530198222811772e-05, "lm_loss": 0.9749, "loss": 0.9749, "step": 1488 }, { "epoch": 0.6674386462963655, "learning_rate": 4.529491686822266e-05, "lm_loss": 0.7941, "loss": 0.7941, "step": 1489 }, { "epoch": 0.6678868925329647, "learning_rate": 4.528784675134608e-05, "lm_loss": 0.9071, "loss": 0.9071, "step": 1490 }, { "epoch": 0.6683351387695641, "learning_rate": 4.5280771879145134e-05, "lm_loss": 0.7847, "loss": 0.7847, "step": 1491 }, { "epoch": 0.6687833850061634, "learning_rate": 4.527369225327814e-05, "lm_loss": 0.7774, "loss": 0.7774, "step": 1492 }, { "epoch": 0.6692316312427626, "learning_rate": 4.526660787540452e-05, "lm_loss": 0.8979, "loss": 0.8979, "step": 1493 }, { "epoch": 0.669679877479362, "learning_rate": 4.525951874718479e-05, "lm_loss": 0.8555, "loss": 0.8555, "step": 1494 }, { "epoch": 0.6701281237159613, "learning_rate": 4.52524248702806e-05, "lm_loss": 1.2707, "loss": 1.2707, "step": 1495 }, { "epoch": 0.6705763699525606, "learning_rate": 4.524532624635468e-05, "lm_loss": 1.6823, "loss": 1.6823, "step": 1496 }, { "epoch": 0.6710246161891599, "learning_rate": 4.523822287707092e-05, "lm_loss": 0.8409, "loss": 0.8409, "step": 1497 }, { "epoch": 0.6714728624257592, "learning_rate": 4.5231114764094286e-05, "lm_loss": 0.8395, "loss": 0.8395, "step": 1498 }, { "epoch": 0.6719211086623585, "learning_rate": 4.522400190909087e-05, "lm_loss": 0.8425, "loss": 0.8425, "step": 1499 }, { "epoch": 0.6723693548989579, "learning_rate": 4.521688431372788e-05, "lm_loss": 0.7327, "loss": 0.7327, "step": 1500 }, { "epoch": 0.6728176011355571, "learning_rate": 4.5209761979673626e-05, "lm_loss": 0.766, "loss": 0.766, "step": 1501 }, { "epoch": 0.6732658473721564, "learning_rate": 4.520263490859753e-05, "lm_loss": 0.6822, "loss": 0.6822, "step": 1502 }, { "epoch": 0.6737140936087558, "learning_rate": 4.519550310217012e-05, "lm_loss": 0.9088, "loss": 0.9088, "step": 1503 }, { "epoch": 0.674162339845355, "learning_rate": 4.5188366562063054e-05, "lm_loss": 0.7687, "loss": 0.7687, "step": 1504 }, { "epoch": 0.6746105860819543, "learning_rate": 4.518122528994907e-05, "lm_loss": 0.6676, "loss": 0.6676, "step": 1505 }, { "epoch": 0.6750588323185537, "learning_rate": 4.517407928750204e-05, "lm_loss": 0.9459, "loss": 0.9459, "step": 1506 }, { "epoch": 0.675507078555153, "learning_rate": 4.516692855639693e-05, "lm_loss": 0.7642, "loss": 0.7642, "step": 1507 }, { "epoch": 0.6759553247917522, "learning_rate": 4.515977309830982e-05, "lm_loss": 0.9951, "loss": 0.9951, "step": 1508 }, { "epoch": 0.6764035710283516, "learning_rate": 4.51526129149179e-05, "lm_loss": 1.6841, "loss": 1.6841, "step": 1509 }, { "epoch": 0.6768518172649509, "learning_rate": 4.514544800789947e-05, "lm_loss": 0.9337, "loss": 0.9337, "step": 1510 }, { "epoch": 0.6773000635015501, "learning_rate": 4.513827837893393e-05, "lm_loss": 0.6877, "loss": 0.6877, "step": 1511 }, { "epoch": 0.6777483097381495, "learning_rate": 4.513110402970178e-05, "lm_loss": 0.8923, "loss": 0.8923, "step": 1512 }, { "epoch": 0.6781965559747488, "learning_rate": 4.512392496188465e-05, "lm_loss": 1.773, "loss": 1.773, "step": 1513 }, { "epoch": 0.6786448022113482, "learning_rate": 4.511674117716524e-05, "lm_loss": 0.8491, "loss": 0.8491, "step": 1514 }, { "epoch": 0.6790930484479474, "learning_rate": 4.510955267722739e-05, "lm_loss": 0.7959, "loss": 0.7959, "step": 1515 }, { "epoch": 0.6795412946845467, "learning_rate": 4.510235946375605e-05, "lm_loss": 0.7439, "loss": 0.7439, "step": 1516 }, { "epoch": 0.679989540921146, "learning_rate": 4.509516153843722e-05, "lm_loss": 0.9453, "loss": 0.9453, "step": 1517 }, { "epoch": 0.6804377871577453, "learning_rate": 4.508795890295808e-05, "lm_loss": 0.7263, "loss": 0.7263, "step": 1518 }, { "epoch": 0.6808860333943446, "learning_rate": 4.508075155900685e-05, "lm_loss": 0.8121, "loss": 0.8121, "step": 1519 }, { "epoch": 0.681334279630944, "learning_rate": 4.5073539508272875e-05, "lm_loss": 0.8529, "loss": 0.8529, "step": 1520 }, { "epoch": 0.6817825258675433, "learning_rate": 4.5066322752446634e-05, "lm_loss": 0.8331, "loss": 0.8331, "step": 1521 }, { "epoch": 0.6822307721041425, "learning_rate": 4.5059101293219665e-05, "lm_loss": 0.8739, "loss": 0.8739, "step": 1522 }, { "epoch": 0.6826790183407418, "learning_rate": 4.505187513228462e-05, "lm_loss": 0.7427, "loss": 0.7427, "step": 1523 }, { "epoch": 0.6831272645773412, "learning_rate": 4.504464427133527e-05, "lm_loss": 0.8907, "loss": 0.8907, "step": 1524 }, { "epoch": 0.6835755108139404, "learning_rate": 4.503740871206647e-05, "lm_loss": 1.0845, "loss": 1.0845, "step": 1525 }, { "epoch": 0.6840237570505397, "learning_rate": 4.5030168456174195e-05, "lm_loss": 0.5218, "loss": 0.5218, "step": 1526 }, { "epoch": 0.6844720032871391, "learning_rate": 4.5022923505355484e-05, "lm_loss": 0.8772, "loss": 0.8772, "step": 1527 }, { "epoch": 0.6849202495237384, "learning_rate": 4.5015673861308525e-05, "lm_loss": 0.8661, "loss": 0.8661, "step": 1528 }, { "epoch": 0.6853684957603376, "learning_rate": 4.500841952573258e-05, "lm_loss": 0.7398, "loss": 0.7398, "step": 1529 }, { "epoch": 0.685816741996937, "learning_rate": 4.500116050032799e-05, "lm_loss": 0.8309, "loss": 0.8309, "step": 1530 }, { "epoch": 0.6862649882335363, "learning_rate": 4.499389678679625e-05, "lm_loss": 0.9179, "loss": 0.9179, "step": 1531 }, { "epoch": 0.6867132344701355, "learning_rate": 4.49866283868399e-05, "lm_loss": 0.7096, "loss": 0.7096, "step": 1532 }, { "epoch": 0.6871614807067349, "learning_rate": 4.49793553021626e-05, "lm_loss": 0.7968, "loss": 0.7968, "step": 1533 }, { "epoch": 0.6876097269433342, "learning_rate": 4.497207753446912e-05, "lm_loss": 0.8667, "loss": 0.8667, "step": 1534 }, { "epoch": 0.6880579731799336, "learning_rate": 4.496479508546531e-05, "lm_loss": 0.9123, "loss": 0.9123, "step": 1535 }, { "epoch": 0.6885062194165328, "learning_rate": 4.4957507956858125e-05, "lm_loss": 0.7314, "loss": 0.7314, "step": 1536 }, { "epoch": 0.6889544656531321, "learning_rate": 4.495021615035561e-05, "lm_loss": 0.7368, "loss": 0.7368, "step": 1537 }, { "epoch": 0.6894027118897315, "learning_rate": 4.4942919667666916e-05, "lm_loss": 0.6673, "loss": 0.6673, "step": 1538 }, { "epoch": 0.6898509581263307, "learning_rate": 4.493561851050229e-05, "lm_loss": 0.8869, "loss": 0.8869, "step": 1539 }, { "epoch": 0.69029920436293, "learning_rate": 4.4928312680573064e-05, "lm_loss": 0.7755, "loss": 0.7755, "step": 1540 }, { "epoch": 0.6907474505995294, "learning_rate": 4.492100217959168e-05, "lm_loss": 1.0157, "loss": 1.0157, "step": 1541 }, { "epoch": 0.6911956968361287, "learning_rate": 4.4913687009271644e-05, "lm_loss": 0.6972, "loss": 0.6972, "step": 1542 }, { "epoch": 0.6916439430727279, "learning_rate": 4.49063671713276e-05, "lm_loss": 0.8601, "loss": 0.8601, "step": 1543 }, { "epoch": 0.6920921893093273, "learning_rate": 4.4899042667475264e-05, "lm_loss": 0.8347, "loss": 0.8347, "step": 1544 }, { "epoch": 0.6925404355459266, "learning_rate": 4.489171349943144e-05, "lm_loss": 0.8451, "loss": 0.8451, "step": 1545 }, { "epoch": 0.6929886817825258, "learning_rate": 4.488437966891404e-05, "lm_loss": 1.204, "loss": 1.204, "step": 1546 }, { "epoch": 0.6934369280191252, "learning_rate": 4.487704117764205e-05, "lm_loss": 1.3071, "loss": 1.3071, "step": 1547 }, { "epoch": 0.6938851742557245, "learning_rate": 4.486969802733556e-05, "lm_loss": 0.9654, "loss": 0.9654, "step": 1548 }, { "epoch": 0.6943334204923238, "learning_rate": 4.486235021971577e-05, "lm_loss": 0.9895, "loss": 0.9895, "step": 1549 }, { "epoch": 0.694781666728923, "learning_rate": 4.485499775650493e-05, "lm_loss": 0.7347, "loss": 0.7347, "step": 1550 }, { "epoch": 0.6952299129655224, "learning_rate": 4.48476406394264e-05, "lm_loss": 0.9532, "loss": 0.9532, "step": 1551 }, { "epoch": 0.6956781592021217, "learning_rate": 4.484027887020466e-05, "lm_loss": 0.8386, "loss": 0.8386, "step": 1552 }, { "epoch": 0.696126405438721, "learning_rate": 4.4832912450565246e-05, "lm_loss": 0.8411, "loss": 0.8411, "step": 1553 }, { "epoch": 0.6965746516753203, "learning_rate": 4.482554138223478e-05, "lm_loss": 0.8516, "loss": 0.8516, "step": 1554 }, { "epoch": 0.6970228979119196, "learning_rate": 4.4818165666941005e-05, "lm_loss": 0.7976, "loss": 0.7976, "step": 1555 }, { "epoch": 0.697471144148519, "learning_rate": 4.4810785306412726e-05, "lm_loss": 0.7286, "loss": 0.7286, "step": 1556 }, { "epoch": 0.6979193903851182, "learning_rate": 4.480340030237985e-05, "lm_loss": 1.7997, "loss": 1.7997, "step": 1557 }, { "epoch": 0.6983676366217175, "learning_rate": 4.4796010656573364e-05, "lm_loss": 0.7895, "loss": 0.7895, "step": 1558 }, { "epoch": 0.6988158828583169, "learning_rate": 4.4788616370725346e-05, "lm_loss": 1.5207, "loss": 1.5207, "step": 1559 }, { "epoch": 0.6992641290949161, "learning_rate": 4.478121744656897e-05, "lm_loss": 1.1397, "loss": 1.1397, "step": 1560 }, { "epoch": 0.6997123753315154, "learning_rate": 4.4773813885838485e-05, "lm_loss": 0.7819, "loss": 0.7819, "step": 1561 }, { "epoch": 0.7001606215681148, "learning_rate": 4.476640569026924e-05, "lm_loss": 0.8056, "loss": 0.8056, "step": 1562 }, { "epoch": 0.7006088678047141, "learning_rate": 4.4758992861597646e-05, "lm_loss": 1.938, "loss": 1.938, "step": 1563 }, { "epoch": 0.7010571140413133, "learning_rate": 4.475157540156123e-05, "lm_loss": 1.1725, "loss": 1.1725, "step": 1564 }, { "epoch": 0.7015053602779127, "learning_rate": 4.474415331189859e-05, "lm_loss": 1.4472, "loss": 1.4472, "step": 1565 }, { "epoch": 0.701953606514512, "learning_rate": 4.473672659434941e-05, "lm_loss": 0.7968, "loss": 0.7968, "step": 1566 }, { "epoch": 0.7024018527511112, "learning_rate": 4.472929525065445e-05, "lm_loss": 1.8748, "loss": 1.8748, "step": 1567 }, { "epoch": 0.7028500989877106, "learning_rate": 4.472185928255558e-05, "lm_loss": 0.7161, "loss": 0.7161, "step": 1568 }, { "epoch": 0.7032983452243099, "learning_rate": 4.471441869179571e-05, "lm_loss": 0.8588, "loss": 0.8588, "step": 1569 }, { "epoch": 0.7037465914609092, "learning_rate": 4.470697348011889e-05, "lm_loss": 0.7644, "loss": 0.7644, "step": 1570 }, { "epoch": 0.7041948376975085, "learning_rate": 4.4699523649270206e-05, "lm_loss": 0.7313, "loss": 0.7313, "step": 1571 }, { "epoch": 0.7046430839341078, "learning_rate": 4.469206920099585e-05, "lm_loss": 0.8672, "loss": 0.8672, "step": 1572 }, { "epoch": 0.7050913301707071, "learning_rate": 4.468461013704309e-05, "lm_loss": 0.9125, "loss": 0.9125, "step": 1573 }, { "epoch": 0.7055395764073065, "learning_rate": 4.467714645916028e-05, "lm_loss": 0.766, "loss": 0.766, "step": 1574 }, { "epoch": 0.7059878226439057, "learning_rate": 4.466967816909684e-05, "lm_loss": 0.7623, "loss": 0.7623, "step": 1575 }, { "epoch": 0.706436068880505, "learning_rate": 4.4662205268603303e-05, "lm_loss": 0.7777, "loss": 0.7777, "step": 1576 }, { "epoch": 0.7068843151171044, "learning_rate": 4.4654727759431246e-05, "lm_loss": 0.9169, "loss": 0.9169, "step": 1577 }, { "epoch": 0.7073325613537036, "learning_rate": 4.4647245643333346e-05, "lm_loss": 0.8017, "loss": 0.8017, "step": 1578 }, { "epoch": 0.7077808075903029, "learning_rate": 4.463975892206336e-05, "lm_loss": 0.7117, "loss": 0.7117, "step": 1579 }, { "epoch": 0.7082290538269023, "learning_rate": 4.4632267597376125e-05, "lm_loss": 0.9058, "loss": 0.9058, "step": 1580 }, { "epoch": 0.7086773000635016, "learning_rate": 4.4624771671027545e-05, "lm_loss": 1.6884, "loss": 1.6884, "step": 1581 }, { "epoch": 0.7091255463001008, "learning_rate": 4.461727114477462e-05, "lm_loss": 1.5613, "loss": 1.5613, "step": 1582 }, { "epoch": 0.7095737925367002, "learning_rate": 4.4609766020375406e-05, "lm_loss": 0.8003, "loss": 0.8003, "step": 1583 }, { "epoch": 0.7100220387732995, "learning_rate": 4.460225629958906e-05, "lm_loss": 1.7011, "loss": 1.7011, "step": 1584 }, { "epoch": 0.7104702850098987, "learning_rate": 4.45947419841758e-05, "lm_loss": 0.7395, "loss": 0.7395, "step": 1585 }, { "epoch": 0.7109185312464981, "learning_rate": 4.458722307589693e-05, "lm_loss": 1.4579, "loss": 1.4579, "step": 1586 }, { "epoch": 0.7113667774830974, "learning_rate": 4.457969957651484e-05, "lm_loss": 1.0909, "loss": 1.0909, "step": 1587 }, { "epoch": 0.7118150237196967, "learning_rate": 4.4572171487792966e-05, "lm_loss": 0.7696, "loss": 0.7696, "step": 1588 }, { "epoch": 0.712263269956296, "learning_rate": 4.4564638811495843e-05, "lm_loss": 0.7764, "loss": 0.7764, "step": 1589 }, { "epoch": 0.7127115161928953, "learning_rate": 4.4557101549389076e-05, "lm_loss": 0.8786, "loss": 0.8786, "step": 1590 }, { "epoch": 0.7131597624294946, "learning_rate": 4.454955970323934e-05, "lm_loss": 0.8549, "loss": 0.8549, "step": 1591 }, { "epoch": 0.7136080086660939, "learning_rate": 4.4542013274814395e-05, "lm_loss": 0.7659, "loss": 0.7659, "step": 1592 }, { "epoch": 0.7140562549026932, "learning_rate": 4.453446226588308e-05, "lm_loss": 0.7133, "loss": 0.7133, "step": 1593 }, { "epoch": 0.7145045011392925, "learning_rate": 4.4526906678215275e-05, "lm_loss": 0.684, "loss": 0.684, "step": 1594 }, { "epoch": 0.7149527473758919, "learning_rate": 4.451934651358197e-05, "lm_loss": 1.1245, "loss": 1.1245, "step": 1595 }, { "epoch": 0.7154009936124911, "learning_rate": 4.451178177375521e-05, "lm_loss": 1.6009, "loss": 1.6009, "step": 1596 }, { "epoch": 0.7158492398490904, "learning_rate": 4.45042124605081e-05, "lm_loss": 1.2193, "loss": 1.2193, "step": 1597 }, { "epoch": 0.7162974860856898, "learning_rate": 4.449663857561486e-05, "lm_loss": 1.4298, "loss": 1.4298, "step": 1598 }, { "epoch": 0.716745732322289, "learning_rate": 4.4489060120850735e-05, "lm_loss": 1.7469, "loss": 1.7469, "step": 1599 }, { "epoch": 0.7171939785588883, "learning_rate": 4.448147709799206e-05, "lm_loss": 0.7063, "loss": 0.7063, "step": 1600 }, { "epoch": 0.7176422247954877, "learning_rate": 4.447388950881625e-05, "lm_loss": 0.8454, "loss": 0.8454, "step": 1601 }, { "epoch": 0.718090471032087, "learning_rate": 4.446629735510177e-05, "lm_loss": 0.7878, "loss": 0.7878, "step": 1602 }, { "epoch": 0.7185387172686862, "learning_rate": 4.445870063862817e-05, "lm_loss": 0.8754, "loss": 0.8754, "step": 1603 }, { "epoch": 0.7189869635052856, "learning_rate": 4.445109936117607e-05, "lm_loss": 0.884, "loss": 0.884, "step": 1604 }, { "epoch": 0.7194352097418849, "learning_rate": 4.4443493524527155e-05, "lm_loss": 0.6999, "loss": 0.6999, "step": 1605 }, { "epoch": 0.7198834559784841, "learning_rate": 4.4435883130464175e-05, "lm_loss": 0.7265, "loss": 0.7265, "step": 1606 }, { "epoch": 0.7203317022150835, "learning_rate": 4.442826818077095e-05, "lm_loss": 0.9459, "loss": 0.9459, "step": 1607 }, { "epoch": 0.7207799484516828, "learning_rate": 4.4420648677232356e-05, "lm_loss": 0.7497, "loss": 0.7497, "step": 1608 }, { "epoch": 0.7212281946882821, "learning_rate": 4.441302462163438e-05, "lm_loss": 1.6559, "loss": 1.6559, "step": 1609 }, { "epoch": 0.7216764409248814, "learning_rate": 4.4405396015764024e-05, "lm_loss": 1.3391, "loss": 1.3391, "step": 1610 }, { "epoch": 0.7221246871614807, "learning_rate": 4.439776286140937e-05, "lm_loss": 1.0748, "loss": 1.0748, "step": 1611 }, { "epoch": 0.72257293339808, "learning_rate": 4.439012516035961e-05, "lm_loss": 0.9124, "loss": 0.9124, "step": 1612 }, { "epoch": 0.7230211796346793, "learning_rate": 4.438248291440492e-05, "lm_loss": 0.5872, "loss": 0.5872, "step": 1613 }, { "epoch": 0.7234694258712786, "learning_rate": 4.437483612533662e-05, "lm_loss": 0.9075, "loss": 0.9075, "step": 1614 }, { "epoch": 0.7239176721078779, "learning_rate": 4.436718479494705e-05, "lm_loss": 0.8783, "loss": 0.8783, "step": 1615 }, { "epoch": 0.7243659183444773, "learning_rate": 4.435952892502962e-05, "lm_loss": 0.8992, "loss": 0.8992, "step": 1616 }, { "epoch": 0.7248141645810765, "learning_rate": 4.4351868517378834e-05, "lm_loss": 1.744, "loss": 1.744, "step": 1617 }, { "epoch": 0.7252624108176758, "learning_rate": 4.434420357379021e-05, "lm_loss": 0.7657, "loss": 0.7657, "step": 1618 }, { "epoch": 0.7257106570542752, "learning_rate": 4.4336534096060364e-05, "lm_loss": 0.7998, "loss": 0.7998, "step": 1619 }, { "epoch": 0.7261589032908744, "learning_rate": 4.4328860085986976e-05, "lm_loss": 0.9085, "loss": 0.9085, "step": 1620 }, { "epoch": 0.7266071495274737, "learning_rate": 4.432118154536877e-05, "lm_loss": 0.7152, "loss": 0.7152, "step": 1621 }, { "epoch": 0.7270553957640731, "learning_rate": 4.431349847600553e-05, "lm_loss": 0.9912, "loss": 0.9912, "step": 1622 }, { "epoch": 0.7275036420006724, "learning_rate": 4.430581087969813e-05, "lm_loss": 0.7658, "loss": 0.7658, "step": 1623 }, { "epoch": 0.7279518882372716, "learning_rate": 4.429811875824848e-05, "lm_loss": 0.7765, "loss": 0.7765, "step": 1624 }, { "epoch": 0.728400134473871, "learning_rate": 4.4290422113459554e-05, "lm_loss": 0.7276, "loss": 0.7276, "step": 1625 }, { "epoch": 0.7288483807104703, "learning_rate": 4.428272094713539e-05, "lm_loss": 0.9565, "loss": 0.9565, "step": 1626 }, { "epoch": 0.7292966269470695, "learning_rate": 4.4275015261081094e-05, "lm_loss": 0.833, "loss": 0.833, "step": 1627 }, { "epoch": 0.7297448731836689, "learning_rate": 4.426730505710282e-05, "lm_loss": 0.9258, "loss": 0.9258, "step": 1628 }, { "epoch": 0.7301931194202682, "learning_rate": 4.4259590337007764e-05, "lm_loss": 0.7904, "loss": 0.7904, "step": 1629 }, { "epoch": 0.7306413656568675, "learning_rate": 4.425187110260423e-05, "lm_loss": 1.176, "loss": 1.176, "step": 1630 }, { "epoch": 0.7310896118934668, "learning_rate": 4.424414735570154e-05, "lm_loss": 1.3184, "loss": 1.3184, "step": 1631 }, { "epoch": 0.7315378581300661, "learning_rate": 4.423641909811007e-05, "lm_loss": 0.9552, "loss": 0.9552, "step": 1632 }, { "epoch": 0.7319861043666654, "learning_rate": 4.422868633164129e-05, "lm_loss": 1.7334, "loss": 1.7334, "step": 1633 }, { "epoch": 0.7324343506032647, "learning_rate": 4.4220949058107696e-05, "lm_loss": 0.7545, "loss": 0.7545, "step": 1634 }, { "epoch": 0.732882596839864, "learning_rate": 4.421320727932284e-05, "lm_loss": 0.7632, "loss": 0.7632, "step": 1635 }, { "epoch": 0.7333308430764633, "learning_rate": 4.4205460997101354e-05, "lm_loss": 0.7145, "loss": 0.7145, "step": 1636 }, { "epoch": 0.7337790893130627, "learning_rate": 4.4197710213258894e-05, "lm_loss": 0.8484, "loss": 0.8484, "step": 1637 }, { "epoch": 0.7342273355496619, "learning_rate": 4.418995492961221e-05, "lm_loss": 0.7404, "loss": 0.7404, "step": 1638 }, { "epoch": 0.7346755817862612, "learning_rate": 4.418219514797905e-05, "lm_loss": 0.8482, "loss": 0.8482, "step": 1639 }, { "epoch": 0.7351238280228606, "learning_rate": 4.4174430870178296e-05, "lm_loss": 0.7098, "loss": 0.7098, "step": 1640 }, { "epoch": 0.7355720742594599, "learning_rate": 4.41666620980298e-05, "lm_loss": 0.8725, "loss": 0.8725, "step": 1641 }, { "epoch": 0.7360203204960591, "learning_rate": 4.415888883335452e-05, "lm_loss": 0.8291, "loss": 0.8291, "step": 1642 }, { "epoch": 0.7364685667326585, "learning_rate": 4.415111107797445e-05, "lm_loss": 0.9489, "loss": 0.9489, "step": 1643 }, { "epoch": 0.7369168129692578, "learning_rate": 4.414332883371265e-05, "lm_loss": 0.7223, "loss": 0.7223, "step": 1644 }, { "epoch": 0.737365059205857, "learning_rate": 4.413554210239321e-05, "lm_loss": 1.0181, "loss": 1.0181, "step": 1645 }, { "epoch": 0.7378133054424564, "learning_rate": 4.412775088584129e-05, "lm_loss": 0.6765, "loss": 0.6765, "step": 1646 }, { "epoch": 0.7382615516790557, "learning_rate": 4.411995518588309e-05, "lm_loss": 0.8227, "loss": 0.8227, "step": 1647 }, { "epoch": 0.738709797915655, "learning_rate": 4.411215500434586e-05, "lm_loss": 0.7533, "loss": 0.7533, "step": 1648 }, { "epoch": 0.7391580441522543, "learning_rate": 4.410435034305792e-05, "lm_loss": 0.6627, "loss": 0.6627, "step": 1649 }, { "epoch": 0.7396062903888536, "learning_rate": 4.409654120384862e-05, "lm_loss": 1.8634, "loss": 1.8634, "step": 1650 }, { "epoch": 0.740054536625453, "learning_rate": 4.408872758854837e-05, "lm_loss": 1.9282, "loss": 1.9282, "step": 1651 }, { "epoch": 0.7405027828620522, "learning_rate": 4.408090949898862e-05, "lm_loss": 1.3752, "loss": 1.3752, "step": 1652 }, { "epoch": 0.7409510290986515, "learning_rate": 4.4073086937001865e-05, "lm_loss": 0.8503, "loss": 0.8503, "step": 1653 }, { "epoch": 0.7413992753352509, "learning_rate": 4.406525990442167e-05, "lm_loss": 0.8201, "loss": 0.8201, "step": 1654 }, { "epoch": 0.7418475215718502, "learning_rate": 4.4057428403082637e-05, "lm_loss": 0.7715, "loss": 0.7715, "step": 1655 }, { "epoch": 0.7422957678084494, "learning_rate": 4.40495924348204e-05, "lm_loss": 0.6529, "loss": 0.6529, "step": 1656 }, { "epoch": 0.7427440140450488, "learning_rate": 4.404175200147166e-05, "lm_loss": 0.8434, "loss": 0.8434, "step": 1657 }, { "epoch": 0.7431922602816481, "learning_rate": 4.403390710487416e-05, "lm_loss": 0.8541, "loss": 0.8541, "step": 1658 }, { "epoch": 0.7436405065182473, "learning_rate": 4.402605774686668e-05, "lm_loss": 0.766, "loss": 0.766, "step": 1659 }, { "epoch": 0.7440887527548466, "learning_rate": 4.401820392928906e-05, "lm_loss": 0.7119, "loss": 0.7119, "step": 1660 }, { "epoch": 0.744536998991446, "learning_rate": 4.4010345653982176e-05, "lm_loss": 0.8055, "loss": 0.8055, "step": 1661 }, { "epoch": 0.7449852452280453, "learning_rate": 4.4002482922787944e-05, "lm_loss": 1.0095, "loss": 1.0095, "step": 1662 }, { "epoch": 0.7454334914646445, "learning_rate": 4.3994615737549346e-05, "lm_loss": 1.0238, "loss": 1.0238, "step": 1663 }, { "epoch": 0.7458817377012439, "learning_rate": 4.398674410011039e-05, "lm_loss": 0.6389, "loss": 0.6389, "step": 1664 }, { "epoch": 0.7463299839378432, "learning_rate": 4.3978868012316116e-05, "lm_loss": 0.8524, "loss": 0.8524, "step": 1665 }, { "epoch": 0.7467782301744424, "learning_rate": 4.397098747601263e-05, "lm_loss": 0.8102, "loss": 0.8102, "step": 1666 }, { "epoch": 0.7472264764110418, "learning_rate": 4.396310249304708e-05, "lm_loss": 0.7258, "loss": 0.7258, "step": 1667 }, { "epoch": 0.7476747226476411, "learning_rate": 4.3955213065267634e-05, "lm_loss": 0.8093, "loss": 0.8093, "step": 1668 }, { "epoch": 0.7481229688842405, "learning_rate": 4.394731919452353e-05, "lm_loss": 0.8427, "loss": 0.8427, "step": 1669 }, { "epoch": 0.7485712151208397, "learning_rate": 4.393942088266503e-05, "lm_loss": 0.7928, "loss": 0.7928, "step": 1670 }, { "epoch": 0.749019461357439, "learning_rate": 4.3931518131543445e-05, "lm_loss": 0.7917, "loss": 0.7917, "step": 1671 }, { "epoch": 0.7494677075940384, "learning_rate": 4.392361094301112e-05, "lm_loss": 0.8549, "loss": 0.8549, "step": 1672 }, { "epoch": 0.7499159538306376, "learning_rate": 4.391569931892143e-05, "lm_loss": 0.7796, "loss": 0.7796, "step": 1673 }, { "epoch": 0.7503642000672369, "learning_rate": 4.390778326112882e-05, "lm_loss": 0.7078, "loss": 0.7078, "step": 1674 }, { "epoch": 0.7508124463038363, "learning_rate": 4.3899862771488754e-05, "lm_loss": 0.7455, "loss": 0.7455, "step": 1675 }, { "epoch": 0.7512606925404356, "learning_rate": 4.389193785185772e-05, "lm_loss": 0.8044, "loss": 0.8044, "step": 1676 }, { "epoch": 0.7517089387770348, "learning_rate": 4.3884008504093285e-05, "lm_loss": 0.8532, "loss": 0.8532, "step": 1677 }, { "epoch": 0.7521571850136342, "learning_rate": 4.387607473005403e-05, "lm_loss": 1.0225, "loss": 1.0225, "step": 1678 }, { "epoch": 0.7526054312502335, "learning_rate": 4.386813653159956e-05, "lm_loss": 1.4183, "loss": 1.4183, "step": 1679 }, { "epoch": 0.7530536774868327, "learning_rate": 4.386019391059053e-05, "lm_loss": 0.7198, "loss": 0.7198, "step": 1680 }, { "epoch": 0.753501923723432, "learning_rate": 4.385224686888864e-05, "lm_loss": 0.8236, "loss": 0.8236, "step": 1681 }, { "epoch": 0.7539501699600314, "learning_rate": 4.384429540835662e-05, "lm_loss": 0.7237, "loss": 0.7237, "step": 1682 }, { "epoch": 0.7543984161966307, "learning_rate": 4.3836339530858234e-05, "lm_loss": 0.67, "loss": 0.67, "step": 1683 }, { "epoch": 0.75484666243323, "learning_rate": 4.382837923825828e-05, "lm_loss": 0.7925, "loss": 0.7925, "step": 1684 }, { "epoch": 0.7552949086698293, "learning_rate": 4.382041453242259e-05, "lm_loss": 0.7141, "loss": 0.7141, "step": 1685 }, { "epoch": 0.7557431549064286, "learning_rate": 4.381244541521805e-05, "lm_loss": 0.9361, "loss": 0.9361, "step": 1686 }, { "epoch": 0.7561914011430279, "learning_rate": 4.380447188851254e-05, "lm_loss": 0.6459, "loss": 0.6459, "step": 1687 }, { "epoch": 0.7566396473796272, "learning_rate": 4.379649395417501e-05, "lm_loss": 0.854, "loss": 0.854, "step": 1688 }, { "epoch": 0.7570878936162265, "learning_rate": 4.378851161407543e-05, "lm_loss": 0.7524, "loss": 0.7524, "step": 1689 }, { "epoch": 0.7575361398528259, "learning_rate": 4.378052487008479e-05, "lm_loss": 0.8427, "loss": 0.8427, "step": 1690 }, { "epoch": 0.7579843860894251, "learning_rate": 4.377253372407515e-05, "lm_loss": 0.7306, "loss": 0.7306, "step": 1691 }, { "epoch": 0.7584326323260244, "learning_rate": 4.376453817791956e-05, "lm_loss": 0.7879, "loss": 0.7879, "step": 1692 }, { "epoch": 0.7588808785626238, "learning_rate": 4.375653823349212e-05, "lm_loss": 0.7317, "loss": 0.7317, "step": 1693 }, { "epoch": 0.759329124799223, "learning_rate": 4.3748533892667945e-05, "lm_loss": 0.7241, "loss": 0.7241, "step": 1694 }, { "epoch": 0.7597773710358223, "learning_rate": 4.374052515732322e-05, "lm_loss": 0.8572, "loss": 0.8572, "step": 1695 }, { "epoch": 0.7602256172724217, "learning_rate": 4.3732512029335124e-05, "lm_loss": 1.0602, "loss": 1.0602, "step": 1696 }, { "epoch": 0.760673863509021, "learning_rate": 4.3724494510581874e-05, "lm_loss": 1.2524, "loss": 1.2524, "step": 1697 }, { "epoch": 0.7611221097456202, "learning_rate": 4.371647260294273e-05, "lm_loss": 0.7388, "loss": 0.7388, "step": 1698 }, { "epoch": 0.7615703559822196, "learning_rate": 4.3708446308297945e-05, "lm_loss": 0.9066, "loss": 0.9066, "step": 1699 }, { "epoch": 0.7620186022188189, "learning_rate": 4.3700415628528845e-05, "lm_loss": 0.7488, "loss": 0.7488, "step": 1700 }, { "epoch": 0.7624668484554181, "learning_rate": 4.369238056551775e-05, "lm_loss": 0.7488, "loss": 0.7488, "step": 1701 }, { "epoch": 0.7629150946920175, "learning_rate": 4.368434112114803e-05, "lm_loss": 0.74, "loss": 0.74, "step": 1702 }, { "epoch": 0.7633633409286168, "learning_rate": 4.367629729730407e-05, "lm_loss": 0.8383, "loss": 0.8383, "step": 1703 }, { "epoch": 0.7638115871652161, "learning_rate": 4.3668249095871286e-05, "lm_loss": 0.8071, "loss": 0.8071, "step": 1704 }, { "epoch": 0.7642598334018154, "learning_rate": 4.366019651873612e-05, "lm_loss": 1.1135, "loss": 1.1135, "step": 1705 }, { "epoch": 0.7647080796384147, "learning_rate": 4.365213956778602e-05, "lm_loss": 1.3584, "loss": 1.3584, "step": 1706 }, { "epoch": 0.765156325875014, "learning_rate": 4.3644078244909505e-05, "lm_loss": 0.7284, "loss": 0.7284, "step": 1707 }, { "epoch": 0.7656045721116133, "learning_rate": 4.3636012551996064e-05, "lm_loss": 1.7698, "loss": 1.7698, "step": 1708 }, { "epoch": 0.7660528183482126, "learning_rate": 4.3627942490936255e-05, "lm_loss": 0.8887, "loss": 0.8887, "step": 1709 }, { "epoch": 0.7665010645848119, "learning_rate": 4.361986806362164e-05, "lm_loss": 0.7272, "loss": 0.7272, "step": 1710 }, { "epoch": 0.7669493108214113, "learning_rate": 4.361178927194479e-05, "lm_loss": 0.7577, "loss": 0.7577, "step": 1711 }, { "epoch": 0.7673975570580105, "learning_rate": 4.360370611779934e-05, "lm_loss": 0.6677, "loss": 0.6677, "step": 1712 }, { "epoch": 0.7678458032946098, "learning_rate": 4.35956186030799e-05, "lm_loss": 0.9568, "loss": 0.9568, "step": 1713 }, { "epoch": 0.7682940495312092, "learning_rate": 4.358752672968215e-05, "lm_loss": 1.0976, "loss": 1.0976, "step": 1714 }, { "epoch": 0.7687422957678085, "learning_rate": 4.3579430499502736e-05, "lm_loss": 1.4366, "loss": 1.4366, "step": 1715 }, { "epoch": 0.7691905420044077, "learning_rate": 4.357132991443938e-05, "lm_loss": 0.7175, "loss": 0.7175, "step": 1716 }, { "epoch": 0.7696387882410071, "learning_rate": 4.3563224976390784e-05, "lm_loss": 0.7606, "loss": 0.7606, "step": 1717 }, { "epoch": 0.7700870344776064, "learning_rate": 4.355511568725671e-05, "lm_loss": 0.7772, "loss": 0.7772, "step": 1718 }, { "epoch": 0.7705352807142056, "learning_rate": 4.354700204893789e-05, "lm_loss": 1.6602, "loss": 1.6602, "step": 1719 }, { "epoch": 0.770983526950805, "learning_rate": 4.3538884063336117e-05, "lm_loss": 1.0585, "loss": 1.0585, "step": 1720 }, { "epoch": 0.7714317731874043, "learning_rate": 4.353076173235418e-05, "lm_loss": 1.4861, "loss": 1.4861, "step": 1721 }, { "epoch": 0.7718800194240036, "learning_rate": 4.35226350578959e-05, "lm_loss": 0.8204, "loss": 0.8204, "step": 1722 }, { "epoch": 0.7723282656606029, "learning_rate": 4.351450404186611e-05, "lm_loss": 0.8399, "loss": 0.8399, "step": 1723 }, { "epoch": 0.7727765118972022, "learning_rate": 4.3506368686170664e-05, "lm_loss": 0.7369, "loss": 0.7369, "step": 1724 }, { "epoch": 0.7732247581338015, "learning_rate": 4.349822899271643e-05, "lm_loss": 0.8248, "loss": 0.8248, "step": 1725 }, { "epoch": 0.7736730043704008, "learning_rate": 4.3490084963411285e-05, "lm_loss": 0.6599, "loss": 0.6599, "step": 1726 }, { "epoch": 0.7741212506070001, "learning_rate": 4.348193660016414e-05, "lm_loss": 0.8295, "loss": 0.8295, "step": 1727 }, { "epoch": 0.7745694968435994, "learning_rate": 4.34737839048849e-05, "lm_loss": 0.8382, "loss": 0.8382, "step": 1728 }, { "epoch": 0.7750177430801988, "learning_rate": 4.346562687948451e-05, "lm_loss": 1.6404, "loss": 1.6404, "step": 1729 }, { "epoch": 0.775465989316798, "learning_rate": 4.3457465525874916e-05, "lm_loss": 0.8368, "loss": 0.8368, "step": 1730 }, { "epoch": 0.7759142355533973, "learning_rate": 4.344929984596908e-05, "lm_loss": 0.7073, "loss": 0.7073, "step": 1731 }, { "epoch": 0.7763624817899967, "learning_rate": 4.344112984168098e-05, "lm_loss": 0.6528, "loss": 0.6528, "step": 1732 }, { "epoch": 0.7768107280265959, "learning_rate": 4.3432955514925585e-05, "lm_loss": 0.8615, "loss": 0.8615, "step": 1733 }, { "epoch": 0.7772589742631952, "learning_rate": 4.342477686761893e-05, "lm_loss": 0.7653, "loss": 0.7653, "step": 1734 }, { "epoch": 0.7777072204997946, "learning_rate": 4.341659390167802e-05, "lm_loss": 0.7209, "loss": 0.7209, "step": 1735 }, { "epoch": 0.7781554667363939, "learning_rate": 4.340840661902087e-05, "lm_loss": 0.8846, "loss": 0.8846, "step": 1736 }, { "epoch": 0.7786037129729931, "learning_rate": 4.340021502156653e-05, "lm_loss": 0.7393, "loss": 0.7393, "step": 1737 }, { "epoch": 0.7790519592095925, "learning_rate": 4.339201911123504e-05, "lm_loss": 0.9678, "loss": 0.9678, "step": 1738 }, { "epoch": 0.7795002054461918, "learning_rate": 4.338381888994748e-05, "lm_loss": 0.6735, "loss": 0.6735, "step": 1739 }, { "epoch": 0.779948451682791, "learning_rate": 4.3375614359625914e-05, "lm_loss": 0.8919, "loss": 0.8919, "step": 1740 }, { "epoch": 0.7803966979193904, "learning_rate": 4.336740552219343e-05, "lm_loss": 0.8053, "loss": 0.8053, "step": 1741 }, { "epoch": 0.7808449441559897, "learning_rate": 4.3359192379574106e-05, "lm_loss": 0.7942, "loss": 0.7942, "step": 1742 }, { "epoch": 0.781293190392589, "learning_rate": 4.335097493369306e-05, "lm_loss": 0.7391, "loss": 0.7391, "step": 1743 }, { "epoch": 0.7817414366291883, "learning_rate": 4.334275318647638e-05, "lm_loss": 0.7492, "loss": 0.7492, "step": 1744 }, { "epoch": 0.7821896828657876, "learning_rate": 4.333452713985121e-05, "lm_loss": 0.9262, "loss": 0.9262, "step": 1745 }, { "epoch": 0.7826379291023869, "learning_rate": 4.332629679574566e-05, "lm_loss": 0.6967, "loss": 0.6967, "step": 1746 }, { "epoch": 0.7830861753389862, "learning_rate": 4.331806215608887e-05, "lm_loss": 0.7599, "loss": 0.7599, "step": 1747 }, { "epoch": 0.7835344215755855, "learning_rate": 4.3309823222810966e-05, "lm_loss": 0.8408, "loss": 0.8408, "step": 1748 }, { "epoch": 0.7839826678121848, "learning_rate": 4.330157999784311e-05, "lm_loss": 0.7773, "loss": 0.7773, "step": 1749 }, { "epoch": 0.7844309140487842, "learning_rate": 4.3293332483117454e-05, "lm_loss": 0.7848, "loss": 0.7848, "step": 1750 }, { "epoch": 0.7848791602853834, "learning_rate": 4.328508068056715e-05, "lm_loss": 0.6688, "loss": 0.6688, "step": 1751 }, { "epoch": 0.7853274065219827, "learning_rate": 4.3276824592126355e-05, "lm_loss": 0.875, "loss": 0.875, "step": 1752 }, { "epoch": 0.7857756527585821, "learning_rate": 4.326856421973024e-05, "lm_loss": 0.6987, "loss": 0.6987, "step": 1753 }, { "epoch": 0.7862238989951813, "learning_rate": 4.326029956531499e-05, "lm_loss": 0.8488, "loss": 0.8488, "step": 1754 }, { "epoch": 0.7866721452317806, "learning_rate": 4.325203063081776e-05, "lm_loss": 0.7311, "loss": 0.7311, "step": 1755 }, { "epoch": 0.78712039146838, "learning_rate": 4.324375741817674e-05, "lm_loss": 0.6844, "loss": 0.6844, "step": 1756 }, { "epoch": 0.7875686377049793, "learning_rate": 4.323547992933111e-05, "lm_loss": 0.9307, "loss": 0.9307, "step": 1757 }, { "epoch": 0.7880168839415785, "learning_rate": 4.322719816622105e-05, "lm_loss": 0.7849, "loss": 0.7849, "step": 1758 }, { "epoch": 0.7884651301781779, "learning_rate": 4.321891213078775e-05, "lm_loss": 0.7893, "loss": 0.7893, "step": 1759 }, { "epoch": 0.7889133764147772, "learning_rate": 4.3210621824973384e-05, "lm_loss": 0.8511, "loss": 0.8511, "step": 1760 }, { "epoch": 0.7893616226513764, "learning_rate": 4.320232725072116e-05, "lm_loss": 0.7761, "loss": 0.7761, "step": 1761 }, { "epoch": 0.7898098688879758, "learning_rate": 4.319402840997526e-05, "lm_loss": 0.6899, "loss": 0.6899, "step": 1762 }, { "epoch": 0.7902581151245751, "learning_rate": 4.3185725304680865e-05, "lm_loss": 0.6849, "loss": 0.6849, "step": 1763 }, { "epoch": 0.7907063613611744, "learning_rate": 4.3177417936784156e-05, "lm_loss": 0.8445, "loss": 0.8445, "step": 1764 }, { "epoch": 0.7911546075977737, "learning_rate": 4.3169106308232345e-05, "lm_loss": 0.8516, "loss": 0.8516, "step": 1765 }, { "epoch": 0.791602853834373, "learning_rate": 4.316079042097359e-05, "lm_loss": 0.6672, "loss": 0.6672, "step": 1766 }, { "epoch": 0.7920511000709723, "learning_rate": 4.315247027695709e-05, "lm_loss": 0.7367, "loss": 0.7367, "step": 1767 }, { "epoch": 0.7924993463075716, "learning_rate": 4.3144145878133025e-05, "lm_loss": 1.6845, "loss": 1.6845, "step": 1768 }, { "epoch": 0.7929475925441709, "learning_rate": 4.3135817226452565e-05, "lm_loss": 0.7026, "loss": 0.7026, "step": 1769 }, { "epoch": 0.7933958387807702, "learning_rate": 4.3127484323867906e-05, "lm_loss": 0.8421, "loss": 0.8421, "step": 1770 }, { "epoch": 0.7938440850173696, "learning_rate": 4.3119147172332194e-05, "lm_loss": 0.9149, "loss": 0.9149, "step": 1771 }, { "epoch": 0.7942923312539688, "learning_rate": 4.3110805773799625e-05, "lm_loss": 0.6169, "loss": 0.6169, "step": 1772 }, { "epoch": 0.7947405774905681, "learning_rate": 4.310246013022534e-05, "lm_loss": 0.7347, "loss": 0.7347, "step": 1773 }, { "epoch": 0.7951888237271675, "learning_rate": 4.30941102435655e-05, "lm_loss": 0.8532, "loss": 0.8532, "step": 1774 }, { "epoch": 0.7956370699637667, "learning_rate": 4.308575611577727e-05, "lm_loss": 0.6003, "loss": 0.6003, "step": 1775 }, { "epoch": 0.796085316200366, "learning_rate": 4.307739774881878e-05, "lm_loss": 0.8105, "loss": 0.8105, "step": 1776 }, { "epoch": 0.7965335624369654, "learning_rate": 4.306903514464919e-05, "lm_loss": 1.1362, "loss": 1.1362, "step": 1777 }, { "epoch": 0.7969818086735647, "learning_rate": 4.306066830522862e-05, "lm_loss": 1.3739, "loss": 1.3739, "step": 1778 }, { "epoch": 0.7974300549101639, "learning_rate": 4.30522972325182e-05, "lm_loss": 0.831, "loss": 0.831, "step": 1779 }, { "epoch": 0.7978783011467633, "learning_rate": 4.3043921928480044e-05, "lm_loss": 1.0525, "loss": 1.0525, "step": 1780 }, { "epoch": 0.7983265473833626, "learning_rate": 4.3035542395077274e-05, "lm_loss": 0.58, "loss": 0.58, "step": 1781 }, { "epoch": 0.798774793619962, "learning_rate": 4.302715863427399e-05, "lm_loss": 1.8336, "loss": 1.8336, "step": 1782 }, { "epoch": 0.7992230398565612, "learning_rate": 4.3018770648035275e-05, "lm_loss": 0.7118, "loss": 0.7118, "step": 1783 }, { "epoch": 0.7996712860931605, "learning_rate": 4.301037843832723e-05, "lm_loss": 0.7553, "loss": 0.7553, "step": 1784 }, { "epoch": 0.8001195323297599, "learning_rate": 4.30019820071169e-05, "lm_loss": 0.8723, "loss": 0.8723, "step": 1785 }, { "epoch": 0.8005677785663591, "learning_rate": 4.299358135637238e-05, "lm_loss": 1.6436, "loss": 1.6436, "step": 1786 }, { "epoch": 0.8010160248029584, "learning_rate": 4.298517648806269e-05, "lm_loss": 0.6482, "loss": 0.6482, "step": 1787 }, { "epoch": 0.8014642710395578, "learning_rate": 4.297676740415789e-05, "lm_loss": 0.9196, "loss": 0.9196, "step": 1788 }, { "epoch": 0.8019125172761571, "learning_rate": 4.296835410662901e-05, "lm_loss": 0.7485, "loss": 0.7485, "step": 1789 }, { "epoch": 0.8023607635127563, "learning_rate": 4.2959936597448056e-05, "lm_loss": 0.7171, "loss": 0.7171, "step": 1790 }, { "epoch": 0.8028090097493557, "learning_rate": 4.295151487858804e-05, "lm_loss": 0.9993, "loss": 0.9993, "step": 1791 }, { "epoch": 0.803257255985955, "learning_rate": 4.2943088952022945e-05, "lm_loss": 0.8097, "loss": 0.8097, "step": 1792 }, { "epoch": 0.8037055022225542, "learning_rate": 4.293465881972775e-05, "lm_loss": 0.8763, "loss": 0.8763, "step": 1793 }, { "epoch": 0.8041537484591536, "learning_rate": 4.292622448367841e-05, "lm_loss": 1.4702, "loss": 1.4702, "step": 1794 }, { "epoch": 0.8046019946957529, "learning_rate": 4.291778594585189e-05, "lm_loss": 1.1159, "loss": 1.1159, "step": 1795 }, { "epoch": 0.8050502409323522, "learning_rate": 4.2909343208226106e-05, "lm_loss": 1.2302, "loss": 1.2302, "step": 1796 }, { "epoch": 0.8054984871689514, "learning_rate": 4.290089627277998e-05, "lm_loss": 0.8204, "loss": 0.8204, "step": 1797 }, { "epoch": 0.8059467334055508, "learning_rate": 4.289244514149341e-05, "lm_loss": 1.0866, "loss": 1.0866, "step": 1798 }, { "epoch": 0.8063949796421501, "learning_rate": 4.288398981634728e-05, "lm_loss": 1.3232, "loss": 1.3232, "step": 1799 }, { "epoch": 0.8068432258787493, "learning_rate": 4.287553029932346e-05, "lm_loss": 0.8022, "loss": 0.8022, "step": 1800 }, { "epoch": 0.8072914721153487, "learning_rate": 4.28670665924048e-05, "lm_loss": 0.7571, "loss": 0.7571, "step": 1801 }, { "epoch": 0.807739718351948, "learning_rate": 4.2858598697575133e-05, "lm_loss": 0.7235, "loss": 0.7235, "step": 1802 }, { "epoch": 0.8081879645885474, "learning_rate": 4.285012661681926e-05, "lm_loss": 0.7831, "loss": 0.7831, "step": 1803 }, { "epoch": 0.8086362108251466, "learning_rate": 4.2841650352123e-05, "lm_loss": 1.0766, "loss": 1.0766, "step": 1804 }, { "epoch": 0.8090844570617459, "learning_rate": 4.28331699054731e-05, "lm_loss": 1.3176, "loss": 1.3176, "step": 1805 }, { "epoch": 0.8095327032983453, "learning_rate": 4.2824685278857337e-05, "lm_loss": 0.8113, "loss": 0.8113, "step": 1806 }, { "epoch": 0.8099809495349445, "learning_rate": 4.281619647426443e-05, "lm_loss": 0.6661, "loss": 0.6661, "step": 1807 }, { "epoch": 0.8104291957715438, "learning_rate": 4.28077034936841e-05, "lm_loss": 0.9261, "loss": 0.9261, "step": 1808 }, { "epoch": 0.8108774420081432, "learning_rate": 4.279920633910704e-05, "lm_loss": 0.7814, "loss": 0.7814, "step": 1809 }, { "epoch": 0.8113256882447425, "learning_rate": 4.279070501252493e-05, "lm_loss": 0.7511, "loss": 0.7511, "step": 1810 }, { "epoch": 0.8117739344813417, "learning_rate": 4.278219951593041e-05, "lm_loss": 0.8293, "loss": 0.8293, "step": 1811 }, { "epoch": 0.8122221807179411, "learning_rate": 4.27736898513171e-05, "lm_loss": 0.7076, "loss": 0.7076, "step": 1812 }, { "epoch": 0.8126704269545404, "learning_rate": 4.276517602067962e-05, "lm_loss": 1.1099, "loss": 1.1099, "step": 1813 }, { "epoch": 0.8131186731911396, "learning_rate": 4.275665802601354e-05, "lm_loss": 1.279, "loss": 1.279, "step": 1814 }, { "epoch": 0.813566919427739, "learning_rate": 4.274813586931542e-05, "lm_loss": 0.6962, "loss": 0.6962, "step": 1815 }, { "epoch": 0.8140151656643383, "learning_rate": 4.2739609552582785e-05, "lm_loss": 0.7595, "loss": 0.7595, "step": 1816 }, { "epoch": 0.8144634119009376, "learning_rate": 4.273107907781415e-05, "lm_loss": 0.6777, "loss": 0.6777, "step": 1817 }, { "epoch": 0.8149116581375369, "learning_rate": 4.2722544447009e-05, "lm_loss": 0.9139, "loss": 0.9139, "step": 1818 }, { "epoch": 0.8153599043741362, "learning_rate": 4.2714005662167774e-05, "lm_loss": 0.7338, "loss": 0.7338, "step": 1819 }, { "epoch": 0.8158081506107355, "learning_rate": 4.270546272529191e-05, "lm_loss": 0.6831, "loss": 0.6831, "step": 1820 }, { "epoch": 0.8162563968473348, "learning_rate": 4.2696915638383825e-05, "lm_loss": 0.7919, "loss": 0.7919, "step": 1821 }, { "epoch": 0.8167046430839341, "learning_rate": 4.2688364403446866e-05, "lm_loss": 0.7057, "loss": 0.7057, "step": 1822 }, { "epoch": 0.8171528893205334, "learning_rate": 4.2679809022485407e-05, "lm_loss": 0.8598, "loss": 0.8598, "step": 1823 }, { "epoch": 0.8176011355571328, "learning_rate": 4.2671249497504754e-05, "lm_loss": 0.7299, "loss": 0.7299, "step": 1824 }, { "epoch": 0.818049381793732, "learning_rate": 4.26626858305112e-05, "lm_loss": 0.8147, "loss": 0.8147, "step": 1825 }, { "epoch": 0.8184976280303313, "learning_rate": 4.2654118023512006e-05, "lm_loss": 0.7131, "loss": 0.7131, "step": 1826 }, { "epoch": 0.8189458742669307, "learning_rate": 4.264554607851541e-05, "lm_loss": 0.6906, "loss": 0.6906, "step": 1827 }, { "epoch": 0.8193941205035299, "learning_rate": 4.26369699975306e-05, "lm_loss": 0.9996, "loss": 0.9996, "step": 1828 }, { "epoch": 0.8198423667401292, "learning_rate": 4.262838978256776e-05, "lm_loss": 1.7806, "loss": 1.7806, "step": 1829 }, { "epoch": 0.8202906129767286, "learning_rate": 4.261980543563802e-05, "lm_loss": 1.0061, "loss": 1.0061, "step": 1830 }, { "epoch": 0.8207388592133279, "learning_rate": 4.2611216958753496e-05, "lm_loss": 0.967, "loss": 0.967, "step": 1831 }, { "epoch": 0.8211871054499271, "learning_rate": 4.2602624353927265e-05, "lm_loss": 0.798, "loss": 0.798, "step": 1832 }, { "epoch": 0.8216353516865265, "learning_rate": 4.2594027623173374e-05, "lm_loss": 1.5832, "loss": 1.5832, "step": 1833 }, { "epoch": 0.8220835979231258, "learning_rate": 4.2585426768506825e-05, "lm_loss": 1.5247, "loss": 1.5247, "step": 1834 }, { "epoch": 0.822531844159725, "learning_rate": 4.2576821791943604e-05, "lm_loss": 0.7023, "loss": 0.7023, "step": 1835 }, { "epoch": 0.8229800903963244, "learning_rate": 4.256821269550065e-05, "lm_loss": 0.847, "loss": 0.847, "step": 1836 }, { "epoch": 0.8234283366329237, "learning_rate": 4.2559599481195876e-05, "lm_loss": 0.834, "loss": 0.834, "step": 1837 }, { "epoch": 0.823876582869523, "learning_rate": 4.255098215104816e-05, "lm_loss": 1.1463, "loss": 1.1463, "step": 1838 }, { "epoch": 0.8243248291061223, "learning_rate": 4.254236070707733e-05, "lm_loss": 1.0693, "loss": 1.0693, "step": 1839 }, { "epoch": 0.8247730753427216, "learning_rate": 4.2533735151304206e-05, "lm_loss": 0.7576, "loss": 0.7576, "step": 1840 }, { "epoch": 0.8252213215793209, "learning_rate": 4.252510548575054e-05, "lm_loss": 0.6795, "loss": 0.6795, "step": 1841 }, { "epoch": 0.8256695678159202, "learning_rate": 4.251647171243908e-05, "lm_loss": 0.7498, "loss": 0.7498, "step": 1842 }, { "epoch": 0.8261178140525195, "learning_rate": 4.2507833833393504e-05, "lm_loss": 1.0209, "loss": 1.0209, "step": 1843 }, { "epoch": 0.8265660602891188, "learning_rate": 4.249919185063848e-05, "lm_loss": 1.3057, "loss": 1.3057, "step": 1844 }, { "epoch": 0.8270143065257182, "learning_rate": 4.2490545766199616e-05, "lm_loss": 0.7511, "loss": 0.7511, "step": 1845 }, { "epoch": 0.8274625527623174, "learning_rate": 4.2481895582103495e-05, "lm_loss": 1.0047, "loss": 1.0047, "step": 1846 }, { "epoch": 0.8279107989989167, "learning_rate": 4.247324130037767e-05, "lm_loss": 1.2506, "loss": 1.2506, "step": 1847 }, { "epoch": 0.8283590452355161, "learning_rate": 4.246458292305062e-05, "lm_loss": 1.3477, "loss": 1.3477, "step": 1848 }, { "epoch": 0.8288072914721154, "learning_rate": 4.245592045215182e-05, "lm_loss": 0.7106, "loss": 0.7106, "step": 1849 }, { "epoch": 0.8292555377087146, "learning_rate": 4.244725388971169e-05, "lm_loss": 0.9452, "loss": 0.9452, "step": 1850 }, { "epoch": 0.829703783945314, "learning_rate": 4.243858323776161e-05, "lm_loss": 0.7273, "loss": 0.7273, "step": 1851 }, { "epoch": 0.8301520301819133, "learning_rate": 4.242990849833391e-05, "lm_loss": 0.6081, "loss": 0.6081, "step": 1852 }, { "epoch": 0.8306002764185125, "learning_rate": 4.2421229673461896e-05, "lm_loss": 0.6901, "loss": 0.6901, "step": 1853 }, { "epoch": 0.8310485226551119, "learning_rate": 4.241254676517982e-05, "lm_loss": 0.9582, "loss": 0.9582, "step": 1854 }, { "epoch": 0.8314967688917112, "learning_rate": 4.240385977552288e-05, "lm_loss": 0.9775, "loss": 0.9775, "step": 1855 }, { "epoch": 0.8319450151283105, "learning_rate": 4.2395168706527253e-05, "lm_loss": 1.2368, "loss": 1.2368, "step": 1856 }, { "epoch": 0.8323932613649098, "learning_rate": 4.238647356023007e-05, "lm_loss": 0.6972, "loss": 0.6972, "step": 1857 }, { "epoch": 0.8328415076015091, "learning_rate": 4.23777743386694e-05, "lm_loss": 0.8005, "loss": 0.8005, "step": 1858 }, { "epoch": 0.8332897538381084, "learning_rate": 4.236907104388429e-05, "lm_loss": 0.7816, "loss": 0.7816, "step": 1859 }, { "epoch": 0.8337380000747077, "learning_rate": 4.23603636779147e-05, "lm_loss": 0.714, "loss": 0.714, "step": 1860 }, { "epoch": 0.834186246311307, "learning_rate": 4.2351652242801607e-05, "lm_loss": 0.9063, "loss": 0.9063, "step": 1861 }, { "epoch": 0.8346344925479063, "learning_rate": 4.23429367405869e-05, "lm_loss": 0.5549, "loss": 0.5549, "step": 1862 }, { "epoch": 0.8350827387845057, "learning_rate": 4.2334217173313416e-05, "lm_loss": 0.894, "loss": 0.894, "step": 1863 }, { "epoch": 0.8355309850211049, "learning_rate": 4.232549354302496e-05, "lm_loss": 1.0793, "loss": 1.0793, "step": 1864 }, { "epoch": 0.8359792312577042, "learning_rate": 4.231676585176631e-05, "lm_loss": 1.3295, "loss": 1.3295, "step": 1865 }, { "epoch": 0.8364274774943036, "learning_rate": 4.230803410158315e-05, "lm_loss": 0.7549, "loss": 0.7549, "step": 1866 }, { "epoch": 0.8368757237309028, "learning_rate": 4.229929829452215e-05, "lm_loss": 0.6509, "loss": 0.6509, "step": 1867 }, { "epoch": 0.8373239699675021, "learning_rate": 4.229055843263092e-05, "lm_loss": 0.9506, "loss": 0.9506, "step": 1868 }, { "epoch": 0.8377722162041015, "learning_rate": 4.228181451795801e-05, "lm_loss": 0.6351, "loss": 0.6351, "step": 1869 }, { "epoch": 0.8382204624407008, "learning_rate": 4.2273066552552945e-05, "lm_loss": 0.8072, "loss": 0.8072, "step": 1870 }, { "epoch": 0.8386687086773, "learning_rate": 4.226431453846616e-05, "lm_loss": 0.7322, "loss": 0.7322, "step": 1871 }, { "epoch": 0.8391169549138994, "learning_rate": 4.22555584777491e-05, "lm_loss": 0.7429, "loss": 0.7429, "step": 1872 }, { "epoch": 0.8395652011504987, "learning_rate": 4.22467983724541e-05, "lm_loss": 0.8589, "loss": 0.8589, "step": 1873 }, { "epoch": 0.8400134473870979, "learning_rate": 4.223803422463447e-05, "lm_loss": 0.5699, "loss": 0.5699, "step": 1874 }, { "epoch": 0.8404616936236973, "learning_rate": 4.2229266036344455e-05, "lm_loss": 0.8821, "loss": 0.8821, "step": 1875 }, { "epoch": 0.8409099398602966, "learning_rate": 4.222049380963927e-05, "lm_loss": 0.9849, "loss": 0.9849, "step": 1876 }, { "epoch": 0.8413581860968959, "learning_rate": 4.221171754657505e-05, "lm_loss": 1.0507, "loss": 1.0507, "step": 1877 }, { "epoch": 0.8418064323334952, "learning_rate": 4.220293724920889e-05, "lm_loss": 1.0839, "loss": 1.0839, "step": 1878 }, { "epoch": 0.8422546785700945, "learning_rate": 4.2194152919598814e-05, "lm_loss": 0.8373, "loss": 0.8373, "step": 1879 }, { "epoch": 0.8427029248066938, "learning_rate": 4.218536455980384e-05, "lm_loss": 0.7227, "loss": 0.7227, "step": 1880 }, { "epoch": 0.8431511710432931, "learning_rate": 4.2176572171883865e-05, "lm_loss": 0.7557, "loss": 0.7557, "step": 1881 }, { "epoch": 0.8435994172798924, "learning_rate": 4.216777575789977e-05, "lm_loss": 0.6632, "loss": 0.6632, "step": 1882 }, { "epoch": 0.8440476635164917, "learning_rate": 4.215897531991337e-05, "lm_loss": 0.8433, "loss": 0.8433, "step": 1883 }, { "epoch": 0.8444959097530911, "learning_rate": 4.2150170859987435e-05, "lm_loss": 0.6836, "loss": 0.6836, "step": 1884 }, { "epoch": 0.8449441559896903, "learning_rate": 4.2141362380185655e-05, "lm_loss": 0.8897, "loss": 0.8897, "step": 1885 }, { "epoch": 0.8453924022262896, "learning_rate": 4.213254988257267e-05, "lm_loss": 0.7302, "loss": 0.7302, "step": 1886 }, { "epoch": 0.845840648462889, "learning_rate": 4.212373336921409e-05, "lm_loss": 0.7616, "loss": 0.7616, "step": 1887 }, { "epoch": 0.8462888946994882, "learning_rate": 4.2114912842176404e-05, "lm_loss": 0.7596, "loss": 0.7596, "step": 1888 }, { "epoch": 0.8467371409360875, "learning_rate": 4.2106088303527115e-05, "lm_loss": 0.7531, "loss": 0.7531, "step": 1889 }, { "epoch": 0.8471853871726869, "learning_rate": 4.2097259755334606e-05, "lm_loss": 0.8239, "loss": 0.8239, "step": 1890 }, { "epoch": 0.8476336334092862, "learning_rate": 4.208842719966823e-05, "lm_loss": 1.4544, "loss": 1.4544, "step": 1891 }, { "epoch": 0.8480818796458854, "learning_rate": 4.2079590638598294e-05, "lm_loss": 0.7379, "loss": 0.7379, "step": 1892 }, { "epoch": 0.8485301258824848, "learning_rate": 4.2070750074195994e-05, "lm_loss": 0.832, "loss": 0.832, "step": 1893 }, { "epoch": 0.8489783721190841, "learning_rate": 4.206190550853353e-05, "lm_loss": 0.747, "loss": 0.747, "step": 1894 }, { "epoch": 0.8494266183556833, "learning_rate": 4.205305694368396e-05, "lm_loss": 1.0231, "loss": 1.0231, "step": 1895 }, { "epoch": 0.8498748645922827, "learning_rate": 4.204420438172135e-05, "lm_loss": 0.6822, "loss": 0.6822, "step": 1896 }, { "epoch": 0.850323110828882, "learning_rate": 4.2035347824720686e-05, "lm_loss": 1.6483, "loss": 1.6483, "step": 1897 }, { "epoch": 0.8507713570654813, "learning_rate": 4.2026487274757857e-05, "lm_loss": 0.7166, "loss": 0.7166, "step": 1898 }, { "epoch": 0.8512196033020806, "learning_rate": 4.201762273390972e-05, "lm_loss": 0.7428, "loss": 0.7428, "step": 1899 }, { "epoch": 0.8516678495386799, "learning_rate": 4.200875420425407e-05, "lm_loss": 0.7643, "loss": 0.7643, "step": 1900 }, { "epoch": 0.8521160957752792, "learning_rate": 4.1999881687869614e-05, "lm_loss": 0.8395, "loss": 0.8395, "step": 1901 }, { "epoch": 0.8525643420118785, "learning_rate": 4.1991005186836005e-05, "lm_loss": 1.5045, "loss": 1.5045, "step": 1902 }, { "epoch": 0.8530125882484778, "learning_rate": 4.198212470323385e-05, "lm_loss": 1.032, "loss": 1.032, "step": 1903 }, { "epoch": 0.8534608344850771, "learning_rate": 4.197324023914464e-05, "lm_loss": 1.1327, "loss": 1.1327, "step": 1904 }, { "epoch": 0.8539090807216765, "learning_rate": 4.1964351796650845e-05, "lm_loss": 0.8464, "loss": 0.8464, "step": 1905 }, { "epoch": 0.8543573269582757, "learning_rate": 4.195545937783586e-05, "lm_loss": 0.9664, "loss": 0.9664, "step": 1906 }, { "epoch": 0.854805573194875, "learning_rate": 4.194656298478399e-05, "lm_loss": 1.3734, "loss": 1.3734, "step": 1907 }, { "epoch": 0.8552538194314744, "learning_rate": 4.1937662619580484e-05, "lm_loss": 0.5982, "loss": 0.5982, "step": 1908 }, { "epoch": 0.8557020656680736, "learning_rate": 4.1928758284311524e-05, "lm_loss": 0.8272, "loss": 0.8272, "step": 1909 }, { "epoch": 0.856150311904673, "learning_rate": 4.1919849981064234e-05, "lm_loss": 0.6555, "loss": 0.6555, "step": 1910 }, { "epoch": 0.8565985581412723, "learning_rate": 4.1910937711926645e-05, "lm_loss": 0.7676, "loss": 0.7676, "step": 1911 }, { "epoch": 0.8570468043778716, "learning_rate": 4.1902021478987744e-05, "lm_loss": 0.9308, "loss": 0.9308, "step": 1912 }, { "epoch": 0.8574950506144708, "learning_rate": 4.1893101284337397e-05, "lm_loss": 0.7484, "loss": 0.7484, "step": 1913 }, { "epoch": 0.8579432968510702, "learning_rate": 4.188417713006647e-05, "lm_loss": 0.6502, "loss": 0.6502, "step": 1914 }, { "epoch": 0.8583915430876695, "learning_rate": 4.1875249018266695e-05, "lm_loss": 0.7671, "loss": 0.7671, "step": 1915 }, { "epoch": 0.8588397893242687, "learning_rate": 4.186631695103077e-05, "lm_loss": 1.2683, "loss": 1.2683, "step": 1916 }, { "epoch": 0.8592880355608681, "learning_rate": 4.1857380930452305e-05, "lm_loss": 1.0581, "loss": 1.0581, "step": 1917 }, { "epoch": 0.8597362817974674, "learning_rate": 4.184844095862583e-05, "lm_loss": 0.7108, "loss": 0.7108, "step": 1918 }, { "epoch": 0.8601845280340668, "learning_rate": 4.183949703764683e-05, "lm_loss": 1.2842, "loss": 1.2842, "step": 1919 }, { "epoch": 0.860632774270666, "learning_rate": 4.183054916961168e-05, "lm_loss": 0.9971, "loss": 0.9971, "step": 1920 }, { "epoch": 0.8610810205072653, "learning_rate": 4.182159735661769e-05, "lm_loss": 0.9659, "loss": 0.9659, "step": 1921 }, { "epoch": 0.8615292667438647, "learning_rate": 4.1812641600763104e-05, "lm_loss": 1.1694, "loss": 1.1694, "step": 1922 }, { "epoch": 0.861977512980464, "learning_rate": 4.1803681904147094e-05, "lm_loss": 0.8854, "loss": 0.8854, "step": 1923 }, { "epoch": 0.8624257592170632, "learning_rate": 4.179471826886975e-05, "lm_loss": 0.6665, "loss": 0.6665, "step": 1924 }, { "epoch": 0.8628740054536626, "learning_rate": 4.1785750697032064e-05, "lm_loss": 0.8361, "loss": 0.8361, "step": 1925 }, { "epoch": 0.8633222516902619, "learning_rate": 4.1776779190735996e-05, "lm_loss": 0.7135, "loss": 0.7135, "step": 1926 }, { "epoch": 0.8637704979268611, "learning_rate": 4.176780375208438e-05, "lm_loss": 0.7215, "loss": 0.7215, "step": 1927 }, { "epoch": 0.8642187441634605, "learning_rate": 4.1758824383181005e-05, "lm_loss": 0.7524, "loss": 0.7524, "step": 1928 }, { "epoch": 0.8646669904000598, "learning_rate": 4.174984108613057e-05, "lm_loss": 0.6486, "loss": 0.6486, "step": 1929 }, { "epoch": 0.8651152366366591, "learning_rate": 4.174085386303869e-05, "lm_loss": 0.7799, "loss": 0.7799, "step": 1930 }, { "epoch": 0.8655634828732583, "learning_rate": 4.173186271601191e-05, "lm_loss": 0.6324, "loss": 0.6324, "step": 1931 }, { "epoch": 0.8660117291098577, "learning_rate": 4.1722867647157684e-05, "lm_loss": 1.5005, "loss": 1.5005, "step": 1932 }, { "epoch": 0.866459975346457, "learning_rate": 4.1713868658584386e-05, "lm_loss": 0.8931, "loss": 0.8931, "step": 1933 }, { "epoch": 0.8669082215830562, "learning_rate": 4.1704865752401334e-05, "lm_loss": 0.7362, "loss": 0.7362, "step": 1934 }, { "epoch": 0.8673564678196556, "learning_rate": 4.169585893071873e-05, "lm_loss": 0.6654, "loss": 0.6654, "step": 1935 }, { "epoch": 0.8678047140562549, "learning_rate": 4.168684819564771e-05, "lm_loss": 0.7018, "loss": 0.7018, "step": 1936 }, { "epoch": 0.8682529602928543, "learning_rate": 4.1677833549300324e-05, "lm_loss": 0.7384, "loss": 0.7384, "step": 1937 }, { "epoch": 0.8687012065294535, "learning_rate": 4.166881499378954e-05, "lm_loss": 0.7496, "loss": 0.7496, "step": 1938 }, { "epoch": 0.8691494527660528, "learning_rate": 4.1659792531229244e-05, "lm_loss": 0.9738, "loss": 0.9738, "step": 1939 }, { "epoch": 0.8695976990026522, "learning_rate": 4.165076616373423e-05, "lm_loss": 1.2719, "loss": 1.2719, "step": 1940 }, { "epoch": 0.8700459452392514, "learning_rate": 4.164173589342022e-05, "lm_loss": 0.7581, "loss": 0.7581, "step": 1941 }, { "epoch": 0.8704941914758507, "learning_rate": 4.163270172240384e-05, "lm_loss": 0.682, "loss": 0.682, "step": 1942 }, { "epoch": 0.8709424377124501, "learning_rate": 4.1623663652802646e-05, "lm_loss": 0.672, "loss": 0.672, "step": 1943 }, { "epoch": 0.8713906839490494, "learning_rate": 4.1614621686735076e-05, "lm_loss": 0.6606, "loss": 0.6606, "step": 1944 }, { "epoch": 0.8718389301856486, "learning_rate": 4.160557582632051e-05, "lm_loss": 0.7417, "loss": 0.7417, "step": 1945 }, { "epoch": 0.872287176422248, "learning_rate": 4.159652607367924e-05, "lm_loss": 0.582, "loss": 0.582, "step": 1946 }, { "epoch": 0.8727354226588473, "learning_rate": 4.158747243093245e-05, "lm_loss": 0.6877, "loss": 0.6877, "step": 1947 }, { "epoch": 0.8731836688954465, "learning_rate": 4.157841490020227e-05, "lm_loss": 0.6202, "loss": 0.6202, "step": 1948 }, { "epoch": 0.8736319151320459, "learning_rate": 4.1569353483611686e-05, "lm_loss": 0.6395, "loss": 0.6395, "step": 1949 }, { "epoch": 0.8740801613686452, "learning_rate": 4.1560288183284645e-05, "lm_loss": 0.8894, "loss": 0.8894, "step": 1950 }, { "epoch": 0.8745284076052445, "learning_rate": 4.1551219001346e-05, "lm_loss": 0.7488, "loss": 0.7488, "step": 1951 }, { "epoch": 0.8749766538418438, "learning_rate": 4.154214593992149e-05, "lm_loss": 0.7282, "loss": 0.7282, "step": 1952 }, { "epoch": 0.8754249000784431, "learning_rate": 4.153306900113777e-05, "lm_loss": 0.71, "loss": 0.71, "step": 1953 }, { "epoch": 0.8758731463150424, "learning_rate": 4.152398818712242e-05, "lm_loss": 0.6659, "loss": 0.6659, "step": 1954 }, { "epoch": 0.8763213925516417, "learning_rate": 4.151490350000391e-05, "lm_loss": 0.8376, "loss": 0.8376, "step": 1955 }, { "epoch": 0.876769638788241, "learning_rate": 4.150581494191162e-05, "lm_loss": 0.5273, "loss": 0.5273, "step": 1956 }, { "epoch": 0.8772178850248403, "learning_rate": 4.149672251497585e-05, "lm_loss": 0.775, "loss": 0.775, "step": 1957 }, { "epoch": 0.8776661312614397, "learning_rate": 4.14876262213278e-05, "lm_loss": 0.7083, "loss": 0.7083, "step": 1958 }, { "epoch": 0.8781143774980389, "learning_rate": 4.1478526063099574e-05, "lm_loss": 0.7464, "loss": 0.7464, "step": 1959 }, { "epoch": 0.8785626237346382, "learning_rate": 4.146942204242418e-05, "lm_loss": 0.7882, "loss": 0.7882, "step": 1960 }, { "epoch": 0.8790108699712376, "learning_rate": 4.146031416143554e-05, "lm_loss": 0.5157, "loss": 0.5157, "step": 1961 }, { "epoch": 0.8794591162078368, "learning_rate": 4.145120242226847e-05, "lm_loss": 0.6549, "loss": 0.6549, "step": 1962 }, { "epoch": 0.8799073624444361, "learning_rate": 4.144208682705869e-05, "lm_loss": 0.7219, "loss": 0.7219, "step": 1963 }, { "epoch": 0.8803556086810355, "learning_rate": 4.1432967377942846e-05, "lm_loss": 0.7022, "loss": 0.7022, "step": 1964 }, { "epoch": 0.8808038549176348, "learning_rate": 4.142384407705846e-05, "lm_loss": 0.732, "loss": 0.732, "step": 1965 }, { "epoch": 0.881252101154234, "learning_rate": 4.141471692654397e-05, "lm_loss": 0.6617, "loss": 0.6617, "step": 1966 }, { "epoch": 0.8817003473908334, "learning_rate": 4.14055859285387e-05, "lm_loss": 0.7507, "loss": 0.7507, "step": 1967 }, { "epoch": 0.8821485936274327, "learning_rate": 4.139645108518292e-05, "lm_loss": 0.6987, "loss": 0.6987, "step": 1968 }, { "epoch": 0.8825968398640319, "learning_rate": 4.138731239861775e-05, "lm_loss": 0.6462, "loss": 0.6462, "step": 1969 }, { "epoch": 0.8830450861006313, "learning_rate": 4.137816987098524e-05, "lm_loss": 0.8705, "loss": 0.8705, "step": 1970 }, { "epoch": 0.8834933323372306, "learning_rate": 4.136902350442832e-05, "lm_loss": 0.5274, "loss": 0.5274, "step": 1971 }, { "epoch": 0.8839415785738299, "learning_rate": 4.135987330109085e-05, "lm_loss": 0.7808, "loss": 0.7808, "step": 1972 }, { "epoch": 0.8843898248104292, "learning_rate": 4.135071926311755e-05, "lm_loss": 0.6244, "loss": 0.6244, "step": 1973 }, { "epoch": 0.8848380710470285, "learning_rate": 4.134156139265408e-05, "lm_loss": 0.7022, "loss": 0.7022, "step": 1974 }, { "epoch": 0.8852863172836278, "learning_rate": 4.1332399691846965e-05, "lm_loss": 0.8168, "loss": 0.8168, "step": 1975 }, { "epoch": 0.8857345635202271, "learning_rate": 4.132323416284365e-05, "lm_loss": 0.4762, "loss": 0.4762, "step": 1976 }, { "epoch": 0.8861828097568264, "learning_rate": 4.131406480779247e-05, "lm_loss": 0.7814, "loss": 0.7814, "step": 1977 }, { "epoch": 0.8866310559934257, "learning_rate": 4.130489162884265e-05, "lm_loss": 0.6845, "loss": 0.6845, "step": 1978 }, { "epoch": 0.8870793022300251, "learning_rate": 4.129571462814431e-05, "lm_loss": 0.722, "loss": 0.722, "step": 1979 }, { "epoch": 0.8875275484666243, "learning_rate": 4.128653380784849e-05, "lm_loss": 0.6303, "loss": 0.6303, "step": 1980 }, { "epoch": 0.8879757947032236, "learning_rate": 4.127734917010709e-05, "lm_loss": 0.813, "loss": 0.813, "step": 1981 }, { "epoch": 0.888424040939823, "learning_rate": 4.1268160717072936e-05, "lm_loss": 0.5078, "loss": 0.5078, "step": 1982 }, { "epoch": 0.8888722871764222, "learning_rate": 4.125896845089973e-05, "lm_loss": 0.7707, "loss": 0.7707, "step": 1983 }, { "epoch": 0.8893205334130215, "learning_rate": 4.124977237374207e-05, "lm_loss": 0.6311, "loss": 0.6311, "step": 1984 }, { "epoch": 0.8897687796496209, "learning_rate": 4.1240572487755455e-05, "lm_loss": 0.7627, "loss": 0.7627, "step": 1985 }, { "epoch": 0.8902170258862202, "learning_rate": 4.123136879509626e-05, "lm_loss": 0.689, "loss": 0.689, "step": 1986 }, { "epoch": 0.8906652721228194, "learning_rate": 4.122216129792178e-05, "lm_loss": 0.6074, "loss": 0.6074, "step": 1987 }, { "epoch": 0.8911135183594188, "learning_rate": 4.121294999839018e-05, "lm_loss": 0.7766, "loss": 0.7766, "step": 1988 }, { "epoch": 0.8915617645960181, "learning_rate": 4.120373489866052e-05, "lm_loss": 0.85, "loss": 0.85, "step": 1989 }, { "epoch": 0.8920100108326174, "learning_rate": 4.119451600089275e-05, "lm_loss": 0.7671, "loss": 0.7671, "step": 1990 }, { "epoch": 0.8924582570692167, "learning_rate": 4.1185293307247704e-05, "lm_loss": 0.7237, "loss": 0.7237, "step": 1991 }, { "epoch": 0.892906503305816, "learning_rate": 4.117606681988714e-05, "lm_loss": 0.6846, "loss": 0.6846, "step": 1992 }, { "epoch": 0.8933547495424153, "learning_rate": 4.116683654097367e-05, "lm_loss": 0.7249, "loss": 0.7249, "step": 1993 }, { "epoch": 0.8938029957790146, "learning_rate": 4.1157602472670784e-05, "lm_loss": 0.6698, "loss": 0.6698, "step": 1994 }, { "epoch": 0.8942512420156139, "learning_rate": 4.114836461714291e-05, "lm_loss": 0.6673, "loss": 0.6673, "step": 1995 }, { "epoch": 0.8946994882522132, "learning_rate": 4.1139122976555324e-05, "lm_loss": 0.9317, "loss": 0.9317, "step": 1996 }, { "epoch": 0.8951477344888126, "learning_rate": 4.112987755307419e-05, "lm_loss": 1.2286, "loss": 1.2286, "step": 1997 }, { "epoch": 0.8955959807254118, "learning_rate": 4.1120628348866576e-05, "lm_loss": 1.3588, "loss": 1.3588, "step": 1998 }, { "epoch": 0.8960442269620111, "learning_rate": 4.111137536610043e-05, "lm_loss": 1.5363, "loss": 1.5363, "step": 1999 }, { "epoch": 0.8964924731986105, "learning_rate": 4.1102118606944586e-05, "lm_loss": 1.1685, "loss": 1.1685, "step": 2000 }, { "epoch": 0.8969407194352097, "learning_rate": 4.1092858073568755e-05, "lm_loss": 1.0205, "loss": 1.0205, "step": 2001 }, { "epoch": 0.897388965671809, "learning_rate": 4.108359376814353e-05, "lm_loss": 0.692, "loss": 0.692, "step": 2002 }, { "epoch": 0.8978372119084084, "learning_rate": 4.107432569284042e-05, "lm_loss": 0.7337, "loss": 0.7337, "step": 2003 }, { "epoch": 0.8982854581450077, "learning_rate": 4.106505384983178e-05, "lm_loss": 0.6582, "loss": 0.6582, "step": 2004 }, { "epoch": 0.8987337043816069, "learning_rate": 4.105577824129087e-05, "lm_loss": 0.7961, "loss": 0.7961, "step": 2005 }, { "epoch": 0.8991819506182063, "learning_rate": 4.104649886939183e-05, "lm_loss": 1.1072, "loss": 1.1072, "step": 2006 }, { "epoch": 0.8996301968548056, "learning_rate": 4.103721573630965e-05, "lm_loss": 1.6893, "loss": 1.6893, "step": 2007 }, { "epoch": 0.9000784430914048, "learning_rate": 4.102792884422025e-05, "lm_loss": 0.6741, "loss": 0.6741, "step": 2008 }, { "epoch": 0.9005266893280042, "learning_rate": 4.1018638195300425e-05, "lm_loss": 0.6073, "loss": 0.6073, "step": 2009 }, { "epoch": 0.9009749355646035, "learning_rate": 4.1009343791727806e-05, "lm_loss": 0.8391, "loss": 0.8391, "step": 2010 }, { "epoch": 0.9014231818012028, "learning_rate": 4.100004563568096e-05, "lm_loss": 0.9431, "loss": 0.9431, "step": 2011 }, { "epoch": 0.9018714280378021, "learning_rate": 4.0990743729339276e-05, "lm_loss": 0.4169, "loss": 0.4169, "step": 2012 }, { "epoch": 0.9023196742744014, "learning_rate": 4.0981438074883084e-05, "lm_loss": 0.7925, "loss": 0.7925, "step": 2013 }, { "epoch": 0.9027679205110007, "learning_rate": 4.0972128674493546e-05, "lm_loss": 0.609, "loss": 0.609, "step": 2014 }, { "epoch": 0.9032161667476, "learning_rate": 4.0962815530352734e-05, "lm_loss": 0.9488, "loss": 0.9488, "step": 2015 }, { "epoch": 0.9036644129841993, "learning_rate": 4.0953498644643554e-05, "lm_loss": 0.5661, "loss": 0.5661, "step": 2016 }, { "epoch": 0.9041126592207986, "learning_rate": 4.094417801954984e-05, "lm_loss": 0.7167, "loss": 0.7167, "step": 2017 }, { "epoch": 0.904560905457398, "learning_rate": 4.093485365725628e-05, "lm_loss": 0.7834, "loss": 0.7834, "step": 2018 }, { "epoch": 0.9050091516939972, "learning_rate": 4.092552555994841e-05, "lm_loss": 0.7186, "loss": 0.7186, "step": 2019 }, { "epoch": 0.9054573979305965, "learning_rate": 4.09161937298127e-05, "lm_loss": 0.6409, "loss": 0.6409, "step": 2020 }, { "epoch": 0.9059056441671959, "learning_rate": 4.090685816903644e-05, "lm_loss": 0.714, "loss": 0.714, "step": 2021 }, { "epoch": 0.9063538904037951, "learning_rate": 4.089751887980784e-05, "lm_loss": 0.7487, "loss": 0.7487, "step": 2022 }, { "epoch": 0.9068021366403944, "learning_rate": 4.088817586431595e-05, "lm_loss": 0.8473, "loss": 0.8473, "step": 2023 }, { "epoch": 0.9072503828769938, "learning_rate": 4.087882912475069e-05, "lm_loss": 0.6302, "loss": 0.6302, "step": 2024 }, { "epoch": 0.9076986291135931, "learning_rate": 4.0869478663302894e-05, "lm_loss": 0.6877, "loss": 0.6877, "step": 2025 }, { "epoch": 0.9081468753501923, "learning_rate": 4.086012448216423e-05, "lm_loss": 0.6351, "loss": 0.6351, "step": 2026 }, { "epoch": 0.9085951215867917, "learning_rate": 4.085076658352725e-05, "lm_loss": 1.3009, "loss": 1.3009, "step": 2027 }, { "epoch": 0.909043367823391, "learning_rate": 4.084140496958538e-05, "lm_loss": 1.0521, "loss": 1.0521, "step": 2028 }, { "epoch": 0.9094916140599902, "learning_rate": 4.0832039642532913e-05, "lm_loss": 0.7285, "loss": 0.7285, "step": 2029 }, { "epoch": 0.9099398602965896, "learning_rate": 4.082267060456502e-05, "lm_loss": 0.7506, "loss": 0.7506, "step": 2030 }, { "epoch": 0.9103881065331889, "learning_rate": 4.081329785787773e-05, "lm_loss": 1.2925, "loss": 1.2925, "step": 2031 }, { "epoch": 0.9108363527697882, "learning_rate": 4.0803921404667946e-05, "lm_loss": 0.7848, "loss": 0.7848, "step": 2032 }, { "epoch": 0.9112845990063875, "learning_rate": 4.079454124713343e-05, "lm_loss": 0.4687, "loss": 0.4687, "step": 2033 }, { "epoch": 0.9117328452429868, "learning_rate": 4.078515738747285e-05, "lm_loss": 0.7599, "loss": 0.7599, "step": 2034 }, { "epoch": 0.9121810914795861, "learning_rate": 4.07757698278857e-05, "lm_loss": 0.7339, "loss": 0.7339, "step": 2035 }, { "epoch": 0.9126293377161854, "learning_rate": 4.076637857057235e-05, "lm_loss": 0.5183, "loss": 0.5183, "step": 2036 }, { "epoch": 0.9130775839527847, "learning_rate": 4.0756983617734044e-05, "lm_loss": 0.7759, "loss": 0.7759, "step": 2037 }, { "epoch": 0.913525830189384, "learning_rate": 4.0747584971572905e-05, "lm_loss": 0.4768, "loss": 0.4768, "step": 2038 }, { "epoch": 0.9139740764259834, "learning_rate": 4.0738182634291895e-05, "lm_loss": 0.8342, "loss": 0.8342, "step": 2039 }, { "epoch": 0.9144223226625826, "learning_rate": 4.0728776608094856e-05, "lm_loss": 1.3966, "loss": 1.3966, "step": 2040 }, { "epoch": 0.914870568899182, "learning_rate": 4.0719366895186495e-05, "lm_loss": 0.7133, "loss": 0.7133, "step": 2041 }, { "epoch": 0.9153188151357813, "learning_rate": 4.070995349777238e-05, "lm_loss": 0.8083, "loss": 0.8083, "step": 2042 }, { "epoch": 0.9157670613723805, "learning_rate": 4.070053641805893e-05, "lm_loss": 0.6424, "loss": 0.6424, "step": 2043 }, { "epoch": 0.9162153076089798, "learning_rate": 4.069111565825346e-05, "lm_loss": 0.7834, "loss": 0.7834, "step": 2044 }, { "epoch": 0.9166635538455792, "learning_rate": 4.068169122056413e-05, "lm_loss": 0.7171, "loss": 0.7171, "step": 2045 }, { "epoch": 0.9171118000821785, "learning_rate": 4.067226310719994e-05, "lm_loss": 0.6333, "loss": 0.6333, "step": 2046 }, { "epoch": 0.9175600463187777, "learning_rate": 4.066283132037079e-05, "lm_loss": 0.7467, "loss": 0.7467, "step": 2047 }, { "epoch": 0.9180082925553771, "learning_rate": 4.06533958622874e-05, "lm_loss": 0.5343, "loss": 0.5343, "step": 2048 }, { "epoch": 0.9184565387919764, "learning_rate": 4.0643956735161395e-05, "lm_loss": 0.752, "loss": 0.752, "step": 2049 }, { "epoch": 0.9189047850285756, "learning_rate": 4.0634513941205224e-05, "lm_loss": 1.5593, "loss": 1.5593, "step": 2050 }, { "epoch": 0.919353031265175, "learning_rate": 4.0625067482632226e-05, "lm_loss": 0.6955, "loss": 0.6955, "step": 2051 }, { "epoch": 0.9198012775017743, "learning_rate": 4.0615617361656564e-05, "lm_loss": 1.3552, "loss": 1.3552, "step": 2052 }, { "epoch": 0.9202495237383737, "learning_rate": 4.060616358049329e-05, "lm_loss": 0.5793, "loss": 0.5793, "step": 2053 }, { "epoch": 0.9206977699749729, "learning_rate": 4.05967061413583e-05, "lm_loss": 0.7958, "loss": 0.7958, "step": 2054 }, { "epoch": 0.9211460162115722, "learning_rate": 4.058724504646834e-05, "lm_loss": 1.3537, "loss": 1.3537, "step": 2055 }, { "epoch": 0.9215942624481716, "learning_rate": 4.057778029804103e-05, "lm_loss": 0.7278, "loss": 0.7278, "step": 2056 }, { "epoch": 0.9220425086847709, "learning_rate": 4.0568311898294845e-05, "lm_loss": 0.5578, "loss": 0.5578, "step": 2057 }, { "epoch": 0.9224907549213701, "learning_rate": 4.05588398494491e-05, "lm_loss": 1.5802, "loss": 1.5802, "step": 2058 }, { "epoch": 0.9229390011579695, "learning_rate": 4.0549364153723974e-05, "lm_loss": 0.8609, "loss": 0.8609, "step": 2059 }, { "epoch": 0.9233872473945688, "learning_rate": 4.053988481334051e-05, "lm_loss": 0.5044, "loss": 0.5044, "step": 2060 }, { "epoch": 0.923835493631168, "learning_rate": 4.053040183052058e-05, "lm_loss": 0.7682, "loss": 0.7682, "step": 2061 }, { "epoch": 0.9242837398677674, "learning_rate": 4.052091520748694e-05, "lm_loss": 1.4284, "loss": 1.4284, "step": 2062 }, { "epoch": 0.9247319861043667, "learning_rate": 4.051142494646319e-05, "lm_loss": 0.7952, "loss": 0.7952, "step": 2063 }, { "epoch": 0.925180232340966, "learning_rate": 4.050193104967377e-05, "lm_loss": 0.6233, "loss": 0.6233, "step": 2064 }, { "epoch": 0.9256284785775653, "learning_rate": 4.0492433519343976e-05, "lm_loss": 0.9361, "loss": 0.9361, "step": 2065 }, { "epoch": 0.9260767248141646, "learning_rate": 4.048293235769997e-05, "lm_loss": 1.1057, "loss": 1.1057, "step": 2066 }, { "epoch": 0.9265249710507639, "learning_rate": 4.047342756696876e-05, "lm_loss": 0.7343, "loss": 0.7343, "step": 2067 }, { "epoch": 0.9269732172873631, "learning_rate": 4.046391914937818e-05, "lm_loss": 0.6222, "loss": 0.6222, "step": 2068 }, { "epoch": 0.9274214635239625, "learning_rate": 4.0454407107156955e-05, "lm_loss": 1.1889, "loss": 1.1889, "step": 2069 }, { "epoch": 0.9278697097605618, "learning_rate": 4.044489144253462e-05, "lm_loss": 0.9335, "loss": 0.9335, "step": 2070 }, { "epoch": 0.9283179559971612, "learning_rate": 4.043537215774159e-05, "lm_loss": 1.333, "loss": 1.333, "step": 2071 }, { "epoch": 0.9287662022337604, "learning_rate": 4.0425849255009124e-05, "lm_loss": 0.7848, "loss": 0.7848, "step": 2072 }, { "epoch": 0.9292144484703597, "learning_rate": 4.0416322736569315e-05, "lm_loss": 0.7466, "loss": 0.7466, "step": 2073 }, { "epoch": 0.9296626947069591, "learning_rate": 4.0406792604655085e-05, "lm_loss": 0.6458, "loss": 0.6458, "step": 2074 }, { "epoch": 0.9301109409435583, "learning_rate": 4.039725886150026e-05, "lm_loss": 1.2097, "loss": 1.2097, "step": 2075 }, { "epoch": 0.9305591871801576, "learning_rate": 4.038772150933948e-05, "lm_loss": 1.6706, "loss": 1.6706, "step": 2076 }, { "epoch": 0.931007433416757, "learning_rate": 4.037818055040822e-05, "lm_loss": 0.6731, "loss": 0.6731, "step": 2077 }, { "epoch": 0.9314556796533563, "learning_rate": 4.03686359869428e-05, "lm_loss": 0.7445, "loss": 0.7445, "step": 2078 }, { "epoch": 0.9319039258899555, "learning_rate": 4.035908782118042e-05, "lm_loss": 0.6656, "loss": 0.6656, "step": 2079 }, { "epoch": 0.9323521721265549, "learning_rate": 4.034953605535908e-05, "lm_loss": 0.7663, "loss": 0.7663, "step": 2080 }, { "epoch": 0.9328004183631542, "learning_rate": 4.0339980691717674e-05, "lm_loss": 0.6095, "loss": 0.6095, "step": 2081 }, { "epoch": 0.9332486645997534, "learning_rate": 4.033042173249588e-05, "lm_loss": 0.7939, "loss": 0.7939, "step": 2082 }, { "epoch": 0.9336969108363528, "learning_rate": 4.032085917993426e-05, "lm_loss": 0.7974, "loss": 0.7974, "step": 2083 }, { "epoch": 0.9341451570729521, "learning_rate": 4.03112930362742e-05, "lm_loss": 0.6933, "loss": 0.6933, "step": 2084 }, { "epoch": 0.9345934033095514, "learning_rate": 4.030172330375795e-05, "lm_loss": 0.616, "loss": 0.616, "step": 2085 }, { "epoch": 0.9350416495461507, "learning_rate": 4.029214998462858e-05, "lm_loss": 0.5977, "loss": 0.5977, "step": 2086 }, { "epoch": 0.93548989578275, "learning_rate": 4.028257308113001e-05, "lm_loss": 0.7585, "loss": 0.7585, "step": 2087 }, { "epoch": 0.9359381420193493, "learning_rate": 4.027299259550698e-05, "lm_loss": 0.6082, "loss": 0.6082, "step": 2088 }, { "epoch": 0.9363863882559486, "learning_rate": 4.026340853000509e-05, "lm_loss": 0.6105, "loss": 0.6105, "step": 2089 }, { "epoch": 0.9368346344925479, "learning_rate": 4.0253820886870796e-05, "lm_loss": 0.789, "loss": 0.789, "step": 2090 }, { "epoch": 0.9372828807291472, "learning_rate": 4.024422966835136e-05, "lm_loss": 0.6124, "loss": 0.6124, "step": 2091 }, { "epoch": 0.9377311269657466, "learning_rate": 4.0234634876694885e-05, "lm_loss": 0.6631, "loss": 0.6631, "step": 2092 }, { "epoch": 0.9381793732023458, "learning_rate": 4.022503651415034e-05, "lm_loss": 0.7968, "loss": 0.7968, "step": 2093 }, { "epoch": 0.9386276194389451, "learning_rate": 4.021543458296749e-05, "lm_loss": 1.2767, "loss": 1.2767, "step": 2094 }, { "epoch": 0.9390758656755445, "learning_rate": 4.020582908539699e-05, "lm_loss": 0.6602, "loss": 0.6602, "step": 2095 }, { "epoch": 0.9395241119121437, "learning_rate": 4.019622002369026e-05, "lm_loss": 0.7012, "loss": 0.7012, "step": 2096 }, { "epoch": 0.939972358148743, "learning_rate": 4.018660740009961e-05, "lm_loss": 0.8162, "loss": 0.8162, "step": 2097 }, { "epoch": 0.9404206043853424, "learning_rate": 4.017699121687818e-05, "lm_loss": 0.9263, "loss": 0.9263, "step": 2098 }, { "epoch": 0.9408688506219417, "learning_rate": 4.016737147627992e-05, "lm_loss": 1.2904, "loss": 1.2904, "step": 2099 }, { "epoch": 0.9413170968585409, "learning_rate": 4.015774818055964e-05, "lm_loss": 0.5671, "loss": 0.5671, "step": 2100 }, { "epoch": 0.9417653430951403, "learning_rate": 4.014812133197296e-05, "lm_loss": 0.7113, "loss": 0.7113, "step": 2101 }, { "epoch": 0.9422135893317396, "learning_rate": 4.013849093277635e-05, "lm_loss": 0.7682, "loss": 0.7682, "step": 2102 }, { "epoch": 0.9426618355683388, "learning_rate": 4.0128856985227105e-05, "lm_loss": 0.6107, "loss": 0.6107, "step": 2103 }, { "epoch": 0.9431100818049382, "learning_rate": 4.011921949158335e-05, "lm_loss": 0.8534, "loss": 0.8534, "step": 2104 }, { "epoch": 0.9435583280415375, "learning_rate": 4.010957845410404e-05, "lm_loss": 0.5561, "loss": 0.5561, "step": 2105 }, { "epoch": 0.9440065742781368, "learning_rate": 4.0099933875048964e-05, "lm_loss": 0.6694, "loss": 0.6694, "step": 2106 }, { "epoch": 0.9444548205147361, "learning_rate": 4.009028575667876e-05, "lm_loss": 0.5961, "loss": 0.5961, "step": 2107 }, { "epoch": 0.9449030667513354, "learning_rate": 4.008063410125486e-05, "lm_loss": 0.785, "loss": 0.785, "step": 2108 }, { "epoch": 0.9453513129879347, "learning_rate": 4.007097891103955e-05, "lm_loss": 0.6244, "loss": 0.6244, "step": 2109 }, { "epoch": 0.945799559224534, "learning_rate": 4.0061320188295916e-05, "lm_loss": 0.6435, "loss": 0.6435, "step": 2110 }, { "epoch": 0.9462478054611333, "learning_rate": 4.005165793528792e-05, "lm_loss": 0.5321, "loss": 0.5321, "step": 2111 }, { "epoch": 0.9466960516977326, "learning_rate": 4.0041992154280314e-05, "lm_loss": 0.7725, "loss": 0.7725, "step": 2112 }, { "epoch": 0.947144297934332, "learning_rate": 4.003232284753869e-05, "lm_loss": 0.8441, "loss": 0.8441, "step": 2113 }, { "epoch": 0.9475925441709312, "learning_rate": 4.0022650017329456e-05, "lm_loss": 0.4034, "loss": 0.4034, "step": 2114 }, { "epoch": 0.9480407904075305, "learning_rate": 4.001297366591985e-05, "lm_loss": 1.5815, "loss": 1.5815, "step": 2115 }, { "epoch": 0.9484890366441299, "learning_rate": 4.000329379557795e-05, "lm_loss": 0.7907, "loss": 0.7907, "step": 2116 }, { "epoch": 0.9489372828807291, "learning_rate": 3.999361040857265e-05, "lm_loss": 0.6468, "loss": 0.6468, "step": 2117 }, { "epoch": 0.9493855291173284, "learning_rate": 3.998392350717365e-05, "lm_loss": 0.7588, "loss": 0.7588, "step": 2118 }, { "epoch": 0.9498337753539278, "learning_rate": 3.9974233093651504e-05, "lm_loss": 0.7294, "loss": 0.7294, "step": 2119 }, { "epoch": 0.9502820215905271, "learning_rate": 3.996453917027756e-05, "lm_loss": 0.6263, "loss": 0.6263, "step": 2120 }, { "epoch": 0.9507302678271263, "learning_rate": 3.9954841739324025e-05, "lm_loss": 0.8397, "loss": 0.8397, "step": 2121 }, { "epoch": 0.9511785140637257, "learning_rate": 3.994514080306389e-05, "lm_loss": 0.5831, "loss": 0.5831, "step": 2122 }, { "epoch": 0.951626760300325, "learning_rate": 3.993543636377099e-05, "lm_loss": 1.2011, "loss": 1.2011, "step": 2123 }, { "epoch": 0.9520750065369242, "learning_rate": 3.9925728423719964e-05, "lm_loss": 1.0163, "loss": 1.0163, "step": 2124 }, { "epoch": 0.9525232527735236, "learning_rate": 3.9916016985186304e-05, "lm_loss": 0.7265, "loss": 0.7265, "step": 2125 }, { "epoch": 0.9529714990101229, "learning_rate": 3.990630205044629e-05, "lm_loss": 0.6984, "loss": 0.6984, "step": 2126 }, { "epoch": 0.9534197452467222, "learning_rate": 3.989658362177703e-05, "lm_loss": 0.6495, "loss": 0.6495, "step": 2127 }, { "epoch": 0.9538679914833215, "learning_rate": 3.988686170145644e-05, "lm_loss": 0.608, "loss": 0.608, "step": 2128 }, { "epoch": 0.9543162377199208, "learning_rate": 3.987713629176331e-05, "lm_loss": 0.9032, "loss": 0.9032, "step": 2129 }, { "epoch": 0.9547644839565201, "learning_rate": 3.9867407394977165e-05, "lm_loss": 0.7364, "loss": 0.7364, "step": 2130 }, { "epoch": 0.9552127301931195, "learning_rate": 3.98576750133784e-05, "lm_loss": 0.6618, "loss": 0.6618, "step": 2131 }, { "epoch": 0.9556609764297187, "learning_rate": 3.984793914924822e-05, "lm_loss": 0.6754, "loss": 0.6754, "step": 2132 }, { "epoch": 0.956109222666318, "learning_rate": 3.983819980486863e-05, "lm_loss": 0.6408, "loss": 0.6408, "step": 2133 }, { "epoch": 0.9565574689029174, "learning_rate": 3.982845698252248e-05, "lm_loss": 0.7506, "loss": 0.7506, "step": 2134 }, { "epoch": 0.9570057151395166, "learning_rate": 3.9818710684493396e-05, "lm_loss": 0.7971, "loss": 0.7971, "step": 2135 }, { "epoch": 0.9574539613761159, "learning_rate": 3.980896091306585e-05, "lm_loss": 0.5906, "loss": 0.5906, "step": 2136 }, { "epoch": 0.9579022076127153, "learning_rate": 3.979920767052512e-05, "lm_loss": 0.6449, "loss": 0.6449, "step": 2137 }, { "epoch": 0.9583504538493146, "learning_rate": 3.978945095915729e-05, "lm_loss": 0.8422, "loss": 0.8422, "step": 2138 }, { "epoch": 0.9587987000859138, "learning_rate": 3.977969078124928e-05, "lm_loss": 0.7161, "loss": 0.7161, "step": 2139 }, { "epoch": 0.9592469463225132, "learning_rate": 3.976992713908878e-05, "lm_loss": 0.5647, "loss": 0.5647, "step": 2140 }, { "epoch": 0.9596951925591125, "learning_rate": 3.9760160034964324e-05, "lm_loss": 0.8102, "loss": 0.8102, "step": 2141 }, { "epoch": 0.9601434387957117, "learning_rate": 3.9750389471165256e-05, "lm_loss": 0.7225, "loss": 0.7225, "step": 2142 }, { "epoch": 0.9605916850323111, "learning_rate": 3.9740615449981725e-05, "lm_loss": 0.678, "loss": 0.678, "step": 2143 }, { "epoch": 0.9610399312689104, "learning_rate": 3.973083797370469e-05, "lm_loss": 0.8279, "loss": 0.8279, "step": 2144 }, { "epoch": 0.9614881775055097, "learning_rate": 3.972105704462592e-05, "lm_loss": 0.6155, "loss": 0.6155, "step": 2145 }, { "epoch": 0.961936423742109, "learning_rate": 3.971127266503799e-05, "lm_loss": 0.7031, "loss": 0.7031, "step": 2146 }, { "epoch": 0.9623846699787083, "learning_rate": 3.9701484837234296e-05, "lm_loss": 0.6058, "loss": 0.6058, "step": 2147 }, { "epoch": 0.9628329162153076, "learning_rate": 3.969169356350902e-05, "lm_loss": 0.7646, "loss": 0.7646, "step": 2148 }, { "epoch": 0.9632811624519069, "learning_rate": 3.968189884615718e-05, "lm_loss": 0.7744, "loss": 0.7744, "step": 2149 }, { "epoch": 0.9637294086885062, "learning_rate": 3.967210068747458e-05, "lm_loss": 1.0279, "loss": 1.0279, "step": 2150 }, { "epoch": 0.9641776549251055, "learning_rate": 3.966229908975785e-05, "lm_loss": 0.9306, "loss": 0.9306, "step": 2151 }, { "epoch": 0.9646259011617049, "learning_rate": 3.965249405530439e-05, "lm_loss": 0.526, "loss": 0.526, "step": 2152 }, { "epoch": 0.9650741473983041, "learning_rate": 3.9642685586412454e-05, "lm_loss": 0.9275, "loss": 0.9275, "step": 2153 }, { "epoch": 0.9655223936349034, "learning_rate": 3.963287368538106e-05, "lm_loss": 0.5657, "loss": 0.5657, "step": 2154 }, { "epoch": 0.9659706398715028, "learning_rate": 3.962305835451004e-05, "lm_loss": 0.7362, "loss": 0.7362, "step": 2155 }, { "epoch": 0.966418886108102, "learning_rate": 3.961323959610005e-05, "lm_loss": 1.2492, "loss": 1.2492, "step": 2156 }, { "epoch": 0.9668671323447013, "learning_rate": 3.9603417412452544e-05, "lm_loss": 0.941, "loss": 0.941, "step": 2157 }, { "epoch": 0.9673153785813007, "learning_rate": 3.959359180586975e-05, "lm_loss": 0.6914, "loss": 0.6914, "step": 2158 }, { "epoch": 0.9677636248179, "learning_rate": 3.958376277865473e-05, "lm_loss": 0.6793, "loss": 0.6793, "step": 2159 }, { "epoch": 0.9682118710544992, "learning_rate": 3.957393033311134e-05, "lm_loss": 0.6802, "loss": 0.6802, "step": 2160 }, { "epoch": 0.9686601172910986, "learning_rate": 3.9564094471544224e-05, "lm_loss": 0.7409, "loss": 0.7409, "step": 2161 }, { "epoch": 0.9691083635276979, "learning_rate": 3.955425519625884e-05, "lm_loss": 0.6497, "loss": 0.6497, "step": 2162 }, { "epoch": 0.9695566097642971, "learning_rate": 3.954441250956145e-05, "lm_loss": 0.7367, "loss": 0.7367, "step": 2163 }, { "epoch": 0.9700048560008965, "learning_rate": 3.95345664137591e-05, "lm_loss": 0.531, "loss": 0.531, "step": 2164 }, { "epoch": 0.9704531022374958, "learning_rate": 3.9524716911159645e-05, "lm_loss": 0.8483, "loss": 0.8483, "step": 2165 }, { "epoch": 0.9709013484740951, "learning_rate": 3.9514864004071747e-05, "lm_loss": 0.715, "loss": 0.715, "step": 2166 }, { "epoch": 0.9713495947106944, "learning_rate": 3.950500769480484e-05, "lm_loss": 0.6521, "loss": 0.6521, "step": 2167 }, { "epoch": 0.9717978409472937, "learning_rate": 3.949514798566918e-05, "lm_loss": 0.6509, "loss": 0.6509, "step": 2168 }, { "epoch": 0.972246087183893, "learning_rate": 3.9485284878975816e-05, "lm_loss": 1.3805, "loss": 1.3805, "step": 2169 }, { "epoch": 0.9726943334204923, "learning_rate": 3.9475418377036585e-05, "lm_loss": 1.4113, "loss": 1.4113, "step": 2170 }, { "epoch": 0.9731425796570916, "learning_rate": 3.9465548482164115e-05, "lm_loss": 0.6614, "loss": 0.6614, "step": 2171 }, { "epoch": 0.973590825893691, "learning_rate": 3.945567519667185e-05, "lm_loss": 0.7624, "loss": 0.7624, "step": 2172 }, { "epoch": 0.9740390721302903, "learning_rate": 3.944579852287401e-05, "lm_loss": 0.9202, "loss": 0.9202, "step": 2173 }, { "epoch": 0.9744873183668895, "learning_rate": 3.943591846308562e-05, "lm_loss": 1.7821, "loss": 1.7821, "step": 2174 }, { "epoch": 0.9749355646034888, "learning_rate": 3.942603501962249e-05, "lm_loss": 0.9496, "loss": 0.9496, "step": 2175 }, { "epoch": 0.9753838108400882, "learning_rate": 3.941614819480123e-05, "lm_loss": 0.712, "loss": 0.712, "step": 2176 }, { "epoch": 0.9758320570766874, "learning_rate": 3.9406257990939235e-05, "lm_loss": 0.6155, "loss": 0.6155, "step": 2177 }, { "epoch": 0.9762803033132867, "learning_rate": 3.9396364410354716e-05, "lm_loss": 0.7062, "loss": 0.7062, "step": 2178 }, { "epoch": 0.9767285495498861, "learning_rate": 3.938646745536663e-05, "lm_loss": 0.6393, "loss": 0.6393, "step": 2179 }, { "epoch": 0.9771767957864854, "learning_rate": 3.937656712829477e-05, "lm_loss": 0.8538, "loss": 0.8538, "step": 2180 }, { "epoch": 0.9776250420230846, "learning_rate": 3.9366663431459694e-05, "lm_loss": 0.5849, "loss": 0.5849, "step": 2181 }, { "epoch": 0.978073288259684, "learning_rate": 3.9356756367182754e-05, "lm_loss": 0.6407, "loss": 0.6407, "step": 2182 }, { "epoch": 0.9785215344962833, "learning_rate": 3.93468459377861e-05, "lm_loss": 0.8504, "loss": 0.8504, "step": 2183 }, { "epoch": 0.9789697807328825, "learning_rate": 3.9336932145592665e-05, "lm_loss": 0.5659, "loss": 0.5659, "step": 2184 }, { "epoch": 0.9794180269694819, "learning_rate": 3.932701499292616e-05, "lm_loss": 1.482, "loss": 1.482, "step": 2185 }, { "epoch": 0.9798662732060812, "learning_rate": 3.931709448211111e-05, "lm_loss": 0.5778, "loss": 0.5778, "step": 2186 }, { "epoch": 0.9803145194426806, "learning_rate": 3.93071706154728e-05, "lm_loss": 0.7162, "loss": 0.7162, "step": 2187 }, { "epoch": 0.9807627656792798, "learning_rate": 3.9297243395337325e-05, "lm_loss": 0.6864, "loss": 0.6864, "step": 2188 }, { "epoch": 0.9812110119158791, "learning_rate": 3.928731282403153e-05, "lm_loss": 0.7867, "loss": 0.7867, "step": 2189 }, { "epoch": 0.9816592581524785, "learning_rate": 3.9277378903883086e-05, "lm_loss": 0.7056, "loss": 0.7056, "step": 2190 }, { "epoch": 0.9821075043890777, "learning_rate": 3.926744163722043e-05, "lm_loss": 0.7397, "loss": 0.7397, "step": 2191 }, { "epoch": 0.982555750625677, "learning_rate": 3.925750102637278e-05, "lm_loss": 0.4722, "loss": 0.4722, "step": 2192 }, { "epoch": 0.9830039968622764, "learning_rate": 3.924755707367016e-05, "lm_loss": 0.9004, "loss": 0.9004, "step": 2193 }, { "epoch": 0.9834522430988757, "learning_rate": 3.9237609781443334e-05, "lm_loss": 0.7286, "loss": 0.7286, "step": 2194 }, { "epoch": 0.9839004893354749, "learning_rate": 3.922765915202389e-05, "lm_loss": 0.5667, "loss": 0.5667, "step": 2195 }, { "epoch": 0.9843487355720743, "learning_rate": 3.921770518774419e-05, "lm_loss": 0.817, "loss": 0.817, "step": 2196 }, { "epoch": 0.9847969818086736, "learning_rate": 3.920774789093736e-05, "lm_loss": 0.6718, "loss": 0.6718, "step": 2197 }, { "epoch": 0.9852452280452729, "learning_rate": 3.919778726393733e-05, "lm_loss": 0.5792, "loss": 0.5792, "step": 2198 }, { "epoch": 0.9856934742818722, "learning_rate": 3.918782330907877e-05, "lm_loss": 0.7093, "loss": 0.7093, "step": 2199 }, { "epoch": 0.9861417205184715, "learning_rate": 3.917785602869719e-05, "lm_loss": 0.7224, "loss": 0.7224, "step": 2200 }, { "epoch": 0.9865899667550708, "learning_rate": 3.916788542512884e-05, "lm_loss": 0.7268, "loss": 0.7268, "step": 2201 }, { "epoch": 0.98703821299167, "learning_rate": 3.915791150071075e-05, "lm_loss": 1.3334, "loss": 1.3334, "step": 2202 }, { "epoch": 0.9874864592282694, "learning_rate": 3.914793425778075e-05, "lm_loss": 0.6942, "loss": 0.6942, "step": 2203 }, { "epoch": 0.9879347054648687, "learning_rate": 3.913795369867742e-05, "lm_loss": 0.6928, "loss": 0.6928, "step": 2204 }, { "epoch": 0.9883829517014681, "learning_rate": 3.912796982574013e-05, "lm_loss": 0.5612, "loss": 0.5612, "step": 2205 }, { "epoch": 0.9888311979380673, "learning_rate": 3.911798264130904e-05, "lm_loss": 1.3032, "loss": 1.3032, "step": 2206 }, { "epoch": 0.9892794441746666, "learning_rate": 3.910799214772506e-05, "lm_loss": 0.7367, "loss": 0.7367, "step": 2207 }, { "epoch": 0.989727690411266, "learning_rate": 3.90979983473299e-05, "lm_loss": 0.5806, "loss": 0.5806, "step": 2208 }, { "epoch": 0.9901759366478652, "learning_rate": 3.908800124246602e-05, "lm_loss": 0.7529, "loss": 0.7529, "step": 2209 }, { "epoch": 0.9906241828844645, "learning_rate": 3.907800083547669e-05, "lm_loss": 0.6911, "loss": 0.6911, "step": 2210 }, { "epoch": 0.9910724291210639, "learning_rate": 3.9067997128705915e-05, "lm_loss": 0.6943, "loss": 0.6943, "step": 2211 }, { "epoch": 0.9915206753576632, "learning_rate": 3.90579901244985e-05, "lm_loss": 0.6799, "loss": 0.6799, "step": 2212 }, { "epoch": 0.9919689215942624, "learning_rate": 3.904797982520001e-05, "lm_loss": 0.6556, "loss": 0.6556, "step": 2213 }, { "epoch": 0.9924171678308618, "learning_rate": 3.90379662331568e-05, "lm_loss": 0.7745, "loss": 0.7745, "step": 2214 }, { "epoch": 0.9928654140674611, "learning_rate": 3.9027949350715964e-05, "lm_loss": 0.9576, "loss": 0.9576, "step": 2215 }, { "epoch": 0.9933136603040603, "learning_rate": 3.90179291802254e-05, "lm_loss": 1.3075, "loss": 1.3075, "step": 2216 }, { "epoch": 0.9937619065406597, "learning_rate": 3.900790572403376e-05, "lm_loss": 0.6328, "loss": 0.6328, "step": 2217 }, { "epoch": 0.994210152777259, "learning_rate": 3.899787898449045e-05, "lm_loss": 0.6317, "loss": 0.6317, "step": 2218 }, { "epoch": 0.9946583990138583, "learning_rate": 3.8987848963945704e-05, "lm_loss": 0.739, "loss": 0.739, "step": 2219 }, { "epoch": 0.9951066452504576, "learning_rate": 3.897781566475046e-05, "lm_loss": 1.3962, "loss": 1.3962, "step": 2220 }, { "epoch": 0.9955548914870569, "learning_rate": 3.896777908925645e-05, "lm_loss": 0.7145, "loss": 0.7145, "step": 2221 }, { "epoch": 0.9960031377236562, "learning_rate": 3.8957739239816184e-05, "lm_loss": 1.1753, "loss": 1.1753, "step": 2222 }, { "epoch": 0.9964513839602555, "learning_rate": 3.8947696118782936e-05, "lm_loss": 0.8867, "loss": 0.8867, "step": 2223 }, { "epoch": 0.9968996301968548, "learning_rate": 3.893764972851072e-05, "lm_loss": 0.5943, "loss": 0.5943, "step": 2224 }, { "epoch": 0.9973478764334541, "learning_rate": 3.892760007135436e-05, "lm_loss": 0.6863, "loss": 0.6863, "step": 2225 }, { "epoch": 0.9977961226700535, "learning_rate": 3.8917547149669404e-05, "lm_loss": 0.822, "loss": 0.822, "step": 2226 }, { "epoch": 0.9982443689066527, "learning_rate": 3.890749096581219e-05, "lm_loss": 0.6975, "loss": 0.6975, "step": 2227 }, { "epoch": 0.998692615143252, "learning_rate": 3.8897431522139816e-05, "lm_loss": 0.7803, "loss": 0.7803, "step": 2228 }, { "epoch": 0.9991408613798514, "learning_rate": 3.888736882101015e-05, "lm_loss": 0.5898, "loss": 0.5898, "step": 2229 }, { "epoch": 0.9995891076164506, "learning_rate": 3.8877302864781804e-05, "lm_loss": 0.7607, "loss": 0.7607, "step": 2230 }, { "epoch": 1.00003735385305, "learning_rate": 3.886723365581417e-05, "lm_loss": 0.9801, "loss": 0.9801, "step": 2231 }, { "epoch": 1.0004856000896492, "learning_rate": 3.8857161196467406e-05, "lm_loss": 0.6997, "loss": 0.6997, "step": 2232 }, { "epoch": 1.0009338463262485, "learning_rate": 3.8847085489102406e-05, "lm_loss": 0.64, "loss": 0.64, "step": 2233 }, { "epoch": 1.0013820925628478, "learning_rate": 3.883700653608086e-05, "lm_loss": 0.597, "loss": 0.597, "step": 2234 }, { "epoch": 1.0018303387994472, "learning_rate": 3.8826924339765195e-05, "lm_loss": 0.9412, "loss": 0.9412, "step": 2235 }, { "epoch": 1.0022785850360465, "learning_rate": 3.8816838902518594e-05, "lm_loss": 0.6778, "loss": 0.6778, "step": 2236 }, { "epoch": 1.0027268312726458, "learning_rate": 3.8806750226705033e-05, "lm_loss": 0.6253, "loss": 0.6253, "step": 2237 }, { "epoch": 1.0031750775092452, "learning_rate": 3.8796658314689205e-05, "lm_loss": 0.5635, "loss": 0.5635, "step": 2238 }, { "epoch": 1.0036233237458443, "learning_rate": 3.878656316883659e-05, "lm_loss": 0.6287, "loss": 0.6287, "step": 2239 }, { "epoch": 1.0040715699824436, "learning_rate": 3.877646479151341e-05, "lm_loss": 0.6427, "loss": 0.6427, "step": 2240 }, { "epoch": 1.004519816219043, "learning_rate": 3.876636318508666e-05, "lm_loss": 0.7226, "loss": 0.7226, "step": 2241 }, { "epoch": 1.0049680624556423, "learning_rate": 3.875625835192408e-05, "lm_loss": 0.6165, "loss": 0.6165, "step": 2242 }, { "epoch": 1.0054163086922416, "learning_rate": 3.8746150294394177e-05, "lm_loss": 0.5668, "loss": 0.5668, "step": 2243 }, { "epoch": 1.005864554928841, "learning_rate": 3.873603901486619e-05, "lm_loss": 0.6222, "loss": 0.6222, "step": 2244 }, { "epoch": 1.0063128011654403, "learning_rate": 3.872592451571014e-05, "lm_loss": 0.6378, "loss": 0.6378, "step": 2245 }, { "epoch": 1.0067610474020394, "learning_rate": 3.87158067992968e-05, "lm_loss": 0.5573, "loss": 0.5573, "step": 2246 }, { "epoch": 1.0072092936386388, "learning_rate": 3.8705685867997673e-05, "lm_loss": 0.7, "loss": 0.7, "step": 2247 }, { "epoch": 1.007657539875238, "learning_rate": 3.8695561724185044e-05, "lm_loss": 0.7355, "loss": 0.7355, "step": 2248 }, { "epoch": 1.0081057861118374, "learning_rate": 3.8685434370231935e-05, "lm_loss": 0.5585, "loss": 0.5585, "step": 2249 }, { "epoch": 1.0085540323484368, "learning_rate": 3.867530380851212e-05, "lm_loss": 0.6278, "loss": 0.6278, "step": 2250 }, { "epoch": 1.009002278585036, "learning_rate": 3.866517004140013e-05, "lm_loss": 0.6396, "loss": 0.6396, "step": 2251 }, { "epoch": 1.0094505248216354, "learning_rate": 3.8655033071271255e-05, "lm_loss": 0.5508, "loss": 0.5508, "step": 2252 }, { "epoch": 1.0098987710582346, "learning_rate": 3.8644892900501514e-05, "lm_loss": 0.6295, "loss": 0.6295, "step": 2253 }, { "epoch": 1.010347017294834, "learning_rate": 3.863474953146772e-05, "lm_loss": 0.5161, "loss": 0.5161, "step": 2254 }, { "epoch": 1.0107952635314332, "learning_rate": 3.862460296654736e-05, "lm_loss": 0.7231, "loss": 0.7231, "step": 2255 }, { "epoch": 1.0112435097680326, "learning_rate": 3.861445320811876e-05, "lm_loss": 0.6377, "loss": 0.6377, "step": 2256 }, { "epoch": 1.011691756004632, "learning_rate": 3.860430025856091e-05, "lm_loss": 0.364, "loss": 0.364, "step": 2257 }, { "epoch": 1.0121400022412312, "learning_rate": 3.859414412025361e-05, "lm_loss": 0.7667, "loss": 0.7667, "step": 2258 }, { "epoch": 1.0125882484778306, "learning_rate": 3.858398479557739e-05, "lm_loss": 0.7235, "loss": 0.7235, "step": 2259 }, { "epoch": 1.0130364947144297, "learning_rate": 3.8573822286913505e-05, "lm_loss": 0.3516, "loss": 0.3516, "step": 2260 }, { "epoch": 1.013484740951029, "learning_rate": 3.856365659664399e-05, "lm_loss": 0.8441, "loss": 0.8441, "step": 2261 }, { "epoch": 1.0139329871876284, "learning_rate": 3.8553487727151596e-05, "lm_loss": 0.5072, "loss": 0.5072, "step": 2262 }, { "epoch": 1.0143812334242277, "learning_rate": 3.8543315680819836e-05, "lm_loss": 0.7594, "loss": 0.7594, "step": 2263 }, { "epoch": 1.014829479660827, "learning_rate": 3.853314046003297e-05, "lm_loss": 0.4856, "loss": 0.4856, "step": 2264 }, { "epoch": 1.0152777258974264, "learning_rate": 3.8522962067175994e-05, "lm_loss": 0.5592, "loss": 0.5592, "step": 2265 }, { "epoch": 1.0157259721340257, "learning_rate": 3.8512780504634644e-05, "lm_loss": 0.7189, "loss": 0.7189, "step": 2266 }, { "epoch": 1.0161742183706248, "learning_rate": 3.85025957747954e-05, "lm_loss": 0.6158, "loss": 0.6158, "step": 2267 }, { "epoch": 1.0166224646072242, "learning_rate": 3.8492407880045504e-05, "lm_loss": 0.6051, "loss": 0.6051, "step": 2268 }, { "epoch": 1.0170707108438235, "learning_rate": 3.848221682277292e-05, "lm_loss": 0.666, "loss": 0.666, "step": 2269 }, { "epoch": 1.0175189570804228, "learning_rate": 3.847202260536636e-05, "lm_loss": 0.6646, "loss": 0.6646, "step": 2270 }, { "epoch": 1.0179672033170222, "learning_rate": 3.846182523021526e-05, "lm_loss": 0.6423, "loss": 0.6423, "step": 2271 }, { "epoch": 1.0184154495536215, "learning_rate": 3.845162469970982e-05, "lm_loss": 0.539, "loss": 0.539, "step": 2272 }, { "epoch": 1.0188636957902208, "learning_rate": 3.844142101624098e-05, "lm_loss": 0.6239, "loss": 0.6239, "step": 2273 }, { "epoch": 1.01931194202682, "learning_rate": 3.84312141822004e-05, "lm_loss": 0.6976, "loss": 0.6976, "step": 2274 }, { "epoch": 1.0197601882634193, "learning_rate": 3.8421004199980496e-05, "lm_loss": 0.717, "loss": 0.717, "step": 2275 }, { "epoch": 1.0202084345000186, "learning_rate": 3.841079107197439e-05, "lm_loss": 0.59, "loss": 0.59, "step": 2276 }, { "epoch": 1.020656680736618, "learning_rate": 3.8400574800575984e-05, "lm_loss": 0.6375, "loss": 0.6375, "step": 2277 }, { "epoch": 1.0211049269732173, "learning_rate": 3.839035538817991e-05, "lm_loss": 0.5499, "loss": 0.5499, "step": 2278 }, { "epoch": 1.0215531732098166, "learning_rate": 3.83801328371815e-05, "lm_loss": 0.6679, "loss": 0.6679, "step": 2279 }, { "epoch": 1.022001419446416, "learning_rate": 3.836990714997686e-05, "lm_loss": 0.5663, "loss": 0.5663, "step": 2280 }, { "epoch": 1.022449665683015, "learning_rate": 3.8359678328962814e-05, "lm_loss": 0.5836, "loss": 0.5836, "step": 2281 }, { "epoch": 1.0228979119196144, "learning_rate": 3.834944637653692e-05, "lm_loss": 1.1315, "loss": 1.1315, "step": 2282 }, { "epoch": 1.0233461581562138, "learning_rate": 3.8339211295097474e-05, "lm_loss": 0.6177, "loss": 0.6177, "step": 2283 }, { "epoch": 1.023794404392813, "learning_rate": 3.8328973087043516e-05, "lm_loss": 0.6662, "loss": 0.6662, "step": 2284 }, { "epoch": 1.0242426506294124, "learning_rate": 3.831873175477479e-05, "lm_loss": 0.6337, "loss": 0.6337, "step": 2285 }, { "epoch": 1.0246908968660118, "learning_rate": 3.830848730069181e-05, "lm_loss": 0.406, "loss": 0.406, "step": 2286 }, { "epoch": 1.0251391431026111, "learning_rate": 3.829823972719578e-05, "lm_loss": 0.8404, "loss": 0.8404, "step": 2287 }, { "epoch": 1.0255873893392102, "learning_rate": 3.8287989036688676e-05, "lm_loss": 0.5471, "loss": 0.5471, "step": 2288 }, { "epoch": 1.0260356355758096, "learning_rate": 3.8277735231573174e-05, "lm_loss": 0.5816, "loss": 0.5816, "step": 2289 }, { "epoch": 1.026483881812409, "learning_rate": 3.8267478314252705e-05, "lm_loss": 0.6787, "loss": 0.6787, "step": 2290 }, { "epoch": 1.0269321280490082, "learning_rate": 3.8257218287131395e-05, "lm_loss": 0.602, "loss": 0.602, "step": 2291 }, { "epoch": 1.0273803742856076, "learning_rate": 3.824695515261414e-05, "lm_loss": 0.6363, "loss": 0.6363, "step": 2292 }, { "epoch": 1.027828620522207, "learning_rate": 3.823668891310653e-05, "lm_loss": 0.5759, "loss": 0.5759, "step": 2293 }, { "epoch": 1.0282768667588063, "learning_rate": 3.8226419571014904e-05, "lm_loss": 0.6988, "loss": 0.6988, "step": 2294 }, { "epoch": 1.0287251129954056, "learning_rate": 3.821614712874633e-05, "lm_loss": 0.4691, "loss": 0.4691, "step": 2295 }, { "epoch": 1.0291733592320047, "learning_rate": 3.820587158870857e-05, "lm_loss": 1.1155, "loss": 1.1155, "step": 2296 }, { "epoch": 1.029621605468604, "learning_rate": 3.819559295331016e-05, "lm_loss": 0.7595, "loss": 0.7595, "step": 2297 }, { "epoch": 1.0300698517052034, "learning_rate": 3.818531122496032e-05, "lm_loss": 0.4794, "loss": 0.4794, "step": 2298 }, { "epoch": 1.0305180979418027, "learning_rate": 3.8175026406069036e-05, "lm_loss": 1.0984, "loss": 1.0984, "step": 2299 }, { "epoch": 1.030966344178402, "learning_rate": 3.816473849904697e-05, "lm_loss": 0.712, "loss": 0.712, "step": 2300 }, { "epoch": 1.0314145904150014, "learning_rate": 3.8154447506305545e-05, "lm_loss": 0.603, "loss": 0.603, "step": 2301 }, { "epoch": 1.0318628366516007, "learning_rate": 3.81441534302569e-05, "lm_loss": 0.7384, "loss": 0.7384, "step": 2302 }, { "epoch": 1.0323110828881998, "learning_rate": 3.813385627331388e-05, "lm_loss": 0.6591, "loss": 0.6591, "step": 2303 }, { "epoch": 1.0327593291247992, "learning_rate": 3.8123556037890076e-05, "lm_loss": 0.4342, "loss": 0.4342, "step": 2304 }, { "epoch": 1.0332075753613985, "learning_rate": 3.811325272639977e-05, "lm_loss": 0.7166, "loss": 0.7166, "step": 2305 }, { "epoch": 1.0336558215979978, "learning_rate": 3.8102946341258e-05, "lm_loss": 0.6833, "loss": 0.6833, "step": 2306 }, { "epoch": 1.0341040678345972, "learning_rate": 3.809263688488051e-05, "lm_loss": 0.5377, "loss": 0.5377, "step": 2307 }, { "epoch": 1.0345523140711965, "learning_rate": 3.8082324359683744e-05, "lm_loss": 1.1774, "loss": 1.1774, "step": 2308 }, { "epoch": 1.0350005603077959, "learning_rate": 3.80720087680849e-05, "lm_loss": 1.2798, "loss": 1.2798, "step": 2309 }, { "epoch": 1.035448806544395, "learning_rate": 3.8061690112501874e-05, "lm_loss": 0.6224, "loss": 0.6224, "step": 2310 }, { "epoch": 1.0358970527809943, "learning_rate": 3.8051368395353284e-05, "lm_loss": 0.6954, "loss": 0.6954, "step": 2311 }, { "epoch": 1.0363452990175936, "learning_rate": 3.804104361905846e-05, "lm_loss": 1.0317, "loss": 1.0317, "step": 2312 }, { "epoch": 1.036793545254193, "learning_rate": 3.8030715786037464e-05, "lm_loss": 0.6353, "loss": 0.6353, "step": 2313 }, { "epoch": 1.0372417914907923, "learning_rate": 3.802038489871106e-05, "lm_loss": 0.44, "loss": 0.44, "step": 2314 }, { "epoch": 1.0376900377273917, "learning_rate": 3.801005095950073e-05, "lm_loss": 1.1736, "loss": 1.1736, "step": 2315 }, { "epoch": 1.038138283963991, "learning_rate": 3.7999713970828686e-05, "lm_loss": 0.5727, "loss": 0.5727, "step": 2316 }, { "epoch": 1.03858653020059, "learning_rate": 3.798937393511782e-05, "lm_loss": 0.8037, "loss": 0.8037, "step": 2317 }, { "epoch": 1.0390347764371894, "learning_rate": 3.7979030854791795e-05, "lm_loss": 0.5109, "loss": 0.5109, "step": 2318 }, { "epoch": 1.0394830226737888, "learning_rate": 3.796868473227493e-05, "lm_loss": 0.6412, "loss": 0.6412, "step": 2319 }, { "epoch": 1.0399312689103881, "learning_rate": 3.795833556999229e-05, "lm_loss": 0.5586, "loss": 0.5586, "step": 2320 }, { "epoch": 1.0403795151469875, "learning_rate": 3.794798337036963e-05, "lm_loss": 0.5537, "loss": 0.5537, "step": 2321 }, { "epoch": 1.0408277613835868, "learning_rate": 3.793762813583345e-05, "lm_loss": 0.7072, "loss": 0.7072, "step": 2322 }, { "epoch": 1.0412760076201861, "learning_rate": 3.792726986881094e-05, "lm_loss": 0.6709, "loss": 0.6709, "step": 2323 }, { "epoch": 1.0417242538567852, "learning_rate": 3.791690857172999e-05, "lm_loss": 0.4817, "loss": 0.4817, "step": 2324 }, { "epoch": 1.0421725000933846, "learning_rate": 3.7906544247019216e-05, "lm_loss": 0.5891, "loss": 0.5891, "step": 2325 }, { "epoch": 1.042620746329984, "learning_rate": 3.789617689710794e-05, "lm_loss": 0.6077, "loss": 0.6077, "step": 2326 }, { "epoch": 1.0430689925665833, "learning_rate": 3.788580652442621e-05, "lm_loss": 1.1359, "loss": 1.1359, "step": 2327 }, { "epoch": 1.0435172388031826, "learning_rate": 3.787543313140475e-05, "lm_loss": 0.6256, "loss": 0.6256, "step": 2328 }, { "epoch": 1.043965485039782, "learning_rate": 3.7865056720475014e-05, "lm_loss": 0.4093, "loss": 0.4093, "step": 2329 }, { "epoch": 1.0444137312763813, "learning_rate": 3.7854677294069144e-05, "lm_loss": 0.742, "loss": 0.742, "step": 2330 }, { "epoch": 1.0448619775129804, "learning_rate": 3.784429485462003e-05, "lm_loss": 0.7748, "loss": 0.7748, "step": 2331 }, { "epoch": 1.0453102237495797, "learning_rate": 3.783390940456122e-05, "lm_loss": 0.9535, "loss": 0.9535, "step": 2332 }, { "epoch": 1.045758469986179, "learning_rate": 3.7823520946326984e-05, "lm_loss": 0.6872, "loss": 0.6872, "step": 2333 }, { "epoch": 1.0462067162227784, "learning_rate": 3.7813129482352315e-05, "lm_loss": 0.5679, "loss": 0.5679, "step": 2334 }, { "epoch": 1.0466549624593777, "learning_rate": 3.7802735015072896e-05, "lm_loss": 0.6356, "loss": 0.6356, "step": 2335 }, { "epoch": 1.047103208695977, "learning_rate": 3.779233754692512e-05, "lm_loss": 0.6688, "loss": 0.6688, "step": 2336 }, { "epoch": 1.0475514549325764, "learning_rate": 3.7781937080346055e-05, "lm_loss": 0.4454, "loss": 0.4454, "step": 2337 }, { "epoch": 1.0479997011691755, "learning_rate": 3.7771533617773505e-05, "lm_loss": 0.7799, "loss": 0.7799, "step": 2338 }, { "epoch": 1.0484479474057748, "learning_rate": 3.776112716164598e-05, "lm_loss": 0.5087, "loss": 0.5087, "step": 2339 }, { "epoch": 1.0488961936423742, "learning_rate": 3.7750717714402676e-05, "lm_loss": 0.5479, "loss": 0.5479, "step": 2340 }, { "epoch": 1.0493444398789735, "learning_rate": 3.7740305278483476e-05, "lm_loss": 0.7105, "loss": 0.7105, "step": 2341 }, { "epoch": 1.0497926861155729, "learning_rate": 3.7729889856328994e-05, "lm_loss": 0.4237, "loss": 0.4237, "step": 2342 }, { "epoch": 1.0502409323521722, "learning_rate": 3.7719471450380514e-05, "lm_loss": 0.8054, "loss": 0.8054, "step": 2343 }, { "epoch": 1.0506891785887715, "learning_rate": 3.770905006308005e-05, "lm_loss": 0.4231, "loss": 0.4231, "step": 2344 }, { "epoch": 1.0511374248253706, "learning_rate": 3.7698625696870296e-05, "lm_loss": 0.6342, "loss": 0.6342, "step": 2345 }, { "epoch": 1.05158567106197, "learning_rate": 3.768819835419465e-05, "lm_loss": 0.7274, "loss": 0.7274, "step": 2346 }, { "epoch": 1.0520339172985693, "learning_rate": 3.767776803749719e-05, "lm_loss": 0.683, "loss": 0.683, "step": 2347 }, { "epoch": 1.0524821635351687, "learning_rate": 3.766733474922273e-05, "lm_loss": 0.5704, "loss": 0.5704, "step": 2348 }, { "epoch": 1.052930409771768, "learning_rate": 3.765689849181674e-05, "lm_loss": 0.6677, "loss": 0.6677, "step": 2349 }, { "epoch": 1.0533786560083673, "learning_rate": 3.764645926772541e-05, "lm_loss": 0.5169, "loss": 0.5169, "step": 2350 }, { "epoch": 1.0538269022449667, "learning_rate": 3.763601707939563e-05, "lm_loss": 0.6823, "loss": 0.6823, "step": 2351 }, { "epoch": 1.0542751484815658, "learning_rate": 3.7625571929274935e-05, "lm_loss": 0.7261, "loss": 0.7261, "step": 2352 }, { "epoch": 1.0547233947181651, "learning_rate": 3.761512381981164e-05, "lm_loss": 0.5765, "loss": 0.5765, "step": 2353 }, { "epoch": 1.0551716409547645, "learning_rate": 3.760467275345467e-05, "lm_loss": 0.6933, "loss": 0.6933, "step": 2354 }, { "epoch": 1.0556198871913638, "learning_rate": 3.759421873265371e-05, "lm_loss": 0.5725, "loss": 0.5725, "step": 2355 }, { "epoch": 1.0560681334279631, "learning_rate": 3.758376175985908e-05, "lm_loss": 0.5751, "loss": 0.5751, "step": 2356 }, { "epoch": 1.0565163796645625, "learning_rate": 3.7573301837521835e-05, "lm_loss": 0.5241, "loss": 0.5241, "step": 2357 }, { "epoch": 1.0569646259011618, "learning_rate": 3.756283896809369e-05, "lm_loss": 0.5649, "loss": 0.5649, "step": 2358 }, { "epoch": 1.057412872137761, "learning_rate": 3.755237315402708e-05, "lm_loss": 0.707, "loss": 0.707, "step": 2359 }, { "epoch": 1.0578611183743603, "learning_rate": 3.754190439777512e-05, "lm_loss": 0.6993, "loss": 0.6993, "step": 2360 }, { "epoch": 1.0583093646109596, "learning_rate": 3.753143270179159e-05, "lm_loss": 0.59, "loss": 0.59, "step": 2361 }, { "epoch": 1.058757610847559, "learning_rate": 3.7520958068531e-05, "lm_loss": 0.5658, "loss": 0.5658, "step": 2362 }, { "epoch": 1.0592058570841583, "learning_rate": 3.751048050044851e-05, "lm_loss": 1.3073, "loss": 1.3073, "step": 2363 }, { "epoch": 1.0596541033207576, "learning_rate": 3.7500000000000003e-05, "lm_loss": 1.2552, "loss": 1.2552, "step": 2364 }, { "epoch": 1.060102349557357, "learning_rate": 3.748951656964202e-05, "lm_loss": 0.9163, "loss": 0.9163, "step": 2365 }, { "epoch": 1.060550595793956, "learning_rate": 3.7479030211831814e-05, "lm_loss": 0.6545, "loss": 0.6545, "step": 2366 }, { "epoch": 1.0609988420305554, "learning_rate": 3.746854092902729e-05, "lm_loss": 0.9551, "loss": 0.9551, "step": 2367 }, { "epoch": 1.0614470882671547, "learning_rate": 3.745804872368709e-05, "lm_loss": 0.7066, "loss": 0.7066, "step": 2368 }, { "epoch": 1.061895334503754, "learning_rate": 3.74475535982705e-05, "lm_loss": 0.7081, "loss": 0.7081, "step": 2369 }, { "epoch": 1.0623435807403534, "learning_rate": 3.7437055555237474e-05, "lm_loss": 1.1019, "loss": 1.1019, "step": 2370 }, { "epoch": 1.0627918269769527, "learning_rate": 3.742655459704871e-05, "lm_loss": 0.5902, "loss": 0.5902, "step": 2371 }, { "epoch": 1.063240073213552, "learning_rate": 3.7416050726165546e-05, "lm_loss": 0.5417, "loss": 0.5417, "step": 2372 }, { "epoch": 1.0636883194501512, "learning_rate": 3.740554394505001e-05, "lm_loss": 0.6627, "loss": 0.6627, "step": 2373 }, { "epoch": 1.0641365656867505, "learning_rate": 3.739503425616482e-05, "lm_loss": 0.5482, "loss": 0.5482, "step": 2374 }, { "epoch": 1.0645848119233499, "learning_rate": 3.738452166197337e-05, "lm_loss": 0.7073, "loss": 0.7073, "step": 2375 }, { "epoch": 1.0650330581599492, "learning_rate": 3.737400616493974e-05, "lm_loss": 0.6844, "loss": 0.6844, "step": 2376 }, { "epoch": 1.0654813043965485, "learning_rate": 3.736348776752868e-05, "lm_loss": 0.5056, "loss": 0.5056, "step": 2377 }, { "epoch": 1.0659295506331479, "learning_rate": 3.735296647220562e-05, "lm_loss": 0.7046, "loss": 0.7046, "step": 2378 }, { "epoch": 1.0663777968697472, "learning_rate": 3.734244228143668e-05, "lm_loss": 0.8916, "loss": 0.8916, "step": 2379 }, { "epoch": 1.0668260431063463, "learning_rate": 3.733191519768867e-05, "lm_loss": 0.9125, "loss": 0.9125, "step": 2380 }, { "epoch": 1.0672742893429457, "learning_rate": 3.7321385223429054e-05, "lm_loss": 0.5121, "loss": 0.5121, "step": 2381 }, { "epoch": 1.067722535579545, "learning_rate": 3.731085236112597e-05, "lm_loss": 0.7016, "loss": 0.7016, "step": 2382 }, { "epoch": 1.0681707818161443, "learning_rate": 3.7300316613248245e-05, "lm_loss": 0.4093, "loss": 0.4093, "step": 2383 }, { "epoch": 1.0686190280527437, "learning_rate": 3.7289777982265397e-05, "lm_loss": 0.7246, "loss": 0.7246, "step": 2384 }, { "epoch": 1.069067274289343, "learning_rate": 3.7279236470647595e-05, "lm_loss": 0.6229, "loss": 0.6229, "step": 2385 }, { "epoch": 1.0695155205259423, "learning_rate": 3.7268692080865696e-05, "lm_loss": 0.6904, "loss": 0.6904, "step": 2386 }, { "epoch": 1.0699637667625415, "learning_rate": 3.725814481539122e-05, "lm_loss": 0.5289, "loss": 0.5289, "step": 2387 }, { "epoch": 1.0704120129991408, "learning_rate": 3.724759467669638e-05, "lm_loss": 0.514, "loss": 0.514, "step": 2388 }, { "epoch": 1.0708602592357401, "learning_rate": 3.7237041667254055e-05, "lm_loss": 0.6602, "loss": 0.6602, "step": 2389 }, { "epoch": 1.0713085054723395, "learning_rate": 3.7226485789537786e-05, "lm_loss": 0.4457, "loss": 0.4457, "step": 2390 }, { "epoch": 1.0717567517089388, "learning_rate": 3.721592704602179e-05, "lm_loss": 0.8676, "loss": 0.8676, "step": 2391 }, { "epoch": 1.0722049979455381, "learning_rate": 3.7205365439180964e-05, "lm_loss": 0.9142, "loss": 0.9142, "step": 2392 }, { "epoch": 1.0726532441821375, "learning_rate": 3.719480097149089e-05, "lm_loss": 0.6843, "loss": 0.6843, "step": 2393 }, { "epoch": 1.0731014904187366, "learning_rate": 3.718423364542778e-05, "lm_loss": 0.4836, "loss": 0.4836, "step": 2394 }, { "epoch": 1.073549736655336, "learning_rate": 3.7173663463468536e-05, "lm_loss": 0.5853, "loss": 0.5853, "step": 2395 }, { "epoch": 1.0739979828919353, "learning_rate": 3.716309042809076e-05, "lm_loss": 1.1392, "loss": 1.1392, "step": 2396 }, { "epoch": 1.0744462291285346, "learning_rate": 3.715251454177266e-05, "lm_loss": 0.5015, "loss": 0.5015, "step": 2397 }, { "epoch": 1.074894475365134, "learning_rate": 3.714193580699317e-05, "lm_loss": 0.5571, "loss": 0.5571, "step": 2398 }, { "epoch": 1.0753427216017333, "learning_rate": 3.7131354226231864e-05, "lm_loss": 0.5594, "loss": 0.5594, "step": 2399 }, { "epoch": 1.0757909678383326, "learning_rate": 3.712076980196899e-05, "lm_loss": 0.9023, "loss": 0.9023, "step": 2400 }, { "epoch": 1.0762392140749317, "learning_rate": 3.711018253668545e-05, "lm_loss": 0.8463, "loss": 0.8463, "step": 2401 }, { "epoch": 1.076687460311531, "learning_rate": 3.709959243286284e-05, "lm_loss": 0.6363, "loss": 0.6363, "step": 2402 }, { "epoch": 1.0771357065481304, "learning_rate": 3.708899949298338e-05, "lm_loss": 0.6091, "loss": 0.6091, "step": 2403 }, { "epoch": 1.0775839527847297, "learning_rate": 3.707840371953e-05, "lm_loss": 0.7072, "loss": 0.7072, "step": 2404 }, { "epoch": 1.078032199021329, "learning_rate": 3.7067805114986266e-05, "lm_loss": 0.4866, "loss": 0.4866, "step": 2405 }, { "epoch": 1.0784804452579284, "learning_rate": 3.7057203681836406e-05, "lm_loss": 0.71, "loss": 0.71, "step": 2406 }, { "epoch": 1.0789286914945277, "learning_rate": 3.704659942256533e-05, "lm_loss": 0.5321, "loss": 0.5321, "step": 2407 }, { "epoch": 1.0793769377311269, "learning_rate": 3.7035992339658586e-05, "lm_loss": 0.6402, "loss": 0.6402, "step": 2408 }, { "epoch": 1.0798251839677262, "learning_rate": 3.702538243560242e-05, "lm_loss": 0.5957, "loss": 0.5957, "step": 2409 }, { "epoch": 1.0802734302043255, "learning_rate": 3.701476971288369e-05, "lm_loss": 0.5477, "loss": 0.5477, "step": 2410 }, { "epoch": 1.0807216764409249, "learning_rate": 3.700415417398996e-05, "lm_loss": 0.7357, "loss": 0.7357, "step": 2411 }, { "epoch": 1.0811699226775242, "learning_rate": 3.699353582140943e-05, "lm_loss": 0.6386, "loss": 0.6386, "step": 2412 }, { "epoch": 1.0816181689141235, "learning_rate": 3.6982914657630966e-05, "lm_loss": 0.5961, "loss": 0.5961, "step": 2413 }, { "epoch": 1.0820664151507229, "learning_rate": 3.697229068514409e-05, "lm_loss": 0.6387, "loss": 0.6387, "step": 2414 }, { "epoch": 1.082514661387322, "learning_rate": 3.696166390643899e-05, "lm_loss": 0.6102, "loss": 0.6102, "step": 2415 }, { "epoch": 1.0829629076239213, "learning_rate": 3.6951034324006486e-05, "lm_loss": 0.4368, "loss": 0.4368, "step": 2416 }, { "epoch": 1.0834111538605207, "learning_rate": 3.6940401940338104e-05, "lm_loss": 0.7822, "loss": 0.7822, "step": 2417 }, { "epoch": 1.08385940009712, "learning_rate": 3.6929766757925976e-05, "lm_loss": 0.611, "loss": 0.611, "step": 2418 }, { "epoch": 1.0843076463337193, "learning_rate": 3.6919128779262926e-05, "lm_loss": 0.4817, "loss": 0.4817, "step": 2419 }, { "epoch": 1.0847558925703187, "learning_rate": 3.69084880068424e-05, "lm_loss": 1.1991, "loss": 1.1991, "step": 2420 }, { "epoch": 1.085204138806918, "learning_rate": 3.689784444315855e-05, "lm_loss": 0.5905, "loss": 0.5905, "step": 2421 }, { "epoch": 1.0856523850435171, "learning_rate": 3.688719809070612e-05, "lm_loss": 0.6413, "loss": 0.6413, "step": 2422 }, { "epoch": 1.0861006312801165, "learning_rate": 3.6876548951980536e-05, "lm_loss": 0.6027, "loss": 0.6027, "step": 2423 }, { "epoch": 1.0865488775167158, "learning_rate": 3.6865897029477906e-05, "lm_loss": 0.6428, "loss": 0.6428, "step": 2424 }, { "epoch": 1.0869971237533151, "learning_rate": 3.685524232569495e-05, "lm_loss": 0.6011, "loss": 0.6011, "step": 2425 }, { "epoch": 1.0874453699899145, "learning_rate": 3.6844584843129046e-05, "lm_loss": 0.5041, "loss": 0.5041, "step": 2426 }, { "epoch": 1.0878936162265138, "learning_rate": 3.683392458427825e-05, "lm_loss": 0.8493, "loss": 0.8493, "step": 2427 }, { "epoch": 1.0883418624631132, "learning_rate": 3.682326155164122e-05, "lm_loss": 0.8768, "loss": 0.8768, "step": 2428 }, { "epoch": 1.0887901086997123, "learning_rate": 3.681259574771732e-05, "lm_loss": 0.5065, "loss": 0.5065, "step": 2429 }, { "epoch": 1.0892383549363116, "learning_rate": 3.6801927175006526e-05, "lm_loss": 0.6468, "loss": 0.6468, "step": 2430 }, { "epoch": 1.089686601172911, "learning_rate": 3.679125583600948e-05, "lm_loss": 0.4446, "loss": 0.4446, "step": 2431 }, { "epoch": 1.0901348474095103, "learning_rate": 3.678058173322746e-05, "lm_loss": 1.2037, "loss": 1.2037, "step": 2432 }, { "epoch": 1.0905830936461096, "learning_rate": 3.6769904869162394e-05, "lm_loss": 0.6108, "loss": 0.6108, "step": 2433 }, { "epoch": 1.091031339882709, "learning_rate": 3.675922524631687e-05, "lm_loss": 0.6924, "loss": 0.6924, "step": 2434 }, { "epoch": 1.0914795861193083, "learning_rate": 3.674854286719412e-05, "lm_loss": 0.4443, "loss": 0.4443, "step": 2435 }, { "epoch": 1.0919278323559074, "learning_rate": 3.6737857734298e-05, "lm_loss": 0.6854, "loss": 0.6854, "step": 2436 }, { "epoch": 1.0923760785925067, "learning_rate": 3.672716985013304e-05, "lm_loss": 0.5212, "loss": 0.5212, "step": 2437 }, { "epoch": 1.092824324829106, "learning_rate": 3.6716479217204404e-05, "lm_loss": 0.6487, "loss": 0.6487, "step": 2438 }, { "epoch": 1.0932725710657054, "learning_rate": 3.670578583801789e-05, "lm_loss": 1.2649, "loss": 1.2649, "step": 2439 }, { "epoch": 1.0937208173023047, "learning_rate": 3.669508971507995e-05, "lm_loss": 0.857, "loss": 0.857, "step": 2440 }, { "epoch": 1.094169063538904, "learning_rate": 3.6684390850897684e-05, "lm_loss": 0.5955, "loss": 0.5955, "step": 2441 }, { "epoch": 1.0946173097755034, "learning_rate": 3.667368924797883e-05, "lm_loss": 0.5559, "loss": 0.5559, "step": 2442 }, { "epoch": 1.0950655560121025, "learning_rate": 3.6662984908831754e-05, "lm_loss": 0.742, "loss": 0.742, "step": 2443 }, { "epoch": 1.0955138022487019, "learning_rate": 3.665227783596548e-05, "lm_loss": 0.562, "loss": 0.562, "step": 2444 }, { "epoch": 1.0959620484853012, "learning_rate": 3.6641568031889677e-05, "lm_loss": 0.8458, "loss": 0.8458, "step": 2445 }, { "epoch": 1.0964102947219005, "learning_rate": 3.663085549911464e-05, "lm_loss": 0.8411, "loss": 0.8411, "step": 2446 }, { "epoch": 1.0968585409584999, "learning_rate": 3.6620140240151304e-05, "lm_loss": 0.5418, "loss": 0.5418, "step": 2447 }, { "epoch": 1.0973067871950992, "learning_rate": 3.660942225751126e-05, "lm_loss": 0.6449, "loss": 0.6449, "step": 2448 }, { "epoch": 1.0977550334316986, "learning_rate": 3.6598701553706713e-05, "lm_loss": 0.672, "loss": 0.672, "step": 2449 }, { "epoch": 1.0982032796682977, "learning_rate": 3.6587978131250516e-05, "lm_loss": 0.7221, "loss": 0.7221, "step": 2450 }, { "epoch": 1.098651525904897, "learning_rate": 3.657725199265618e-05, "lm_loss": 0.8557, "loss": 0.8557, "step": 2451 }, { "epoch": 1.0990997721414963, "learning_rate": 3.6566523140437825e-05, "lm_loss": 0.9483, "loss": 0.9483, "step": 2452 }, { "epoch": 1.0995480183780957, "learning_rate": 3.655579157711021e-05, "lm_loss": 0.6048, "loss": 0.6048, "step": 2453 }, { "epoch": 1.099996264614695, "learning_rate": 3.654505730518874e-05, "lm_loss": 0.5902, "loss": 0.5902, "step": 2454 }, { "epoch": 1.1004445108512944, "learning_rate": 3.653432032718945e-05, "lm_loss": 0.5247, "loss": 0.5247, "step": 2455 }, { "epoch": 1.1008927570878937, "learning_rate": 3.652358064562901e-05, "lm_loss": 0.6215, "loss": 0.6215, "step": 2456 }, { "epoch": 1.1013410033244928, "learning_rate": 3.651283826302472e-05, "lm_loss": 0.4741, "loss": 0.4741, "step": 2457 }, { "epoch": 1.1017892495610921, "learning_rate": 3.650209318189454e-05, "lm_loss": 0.8822, "loss": 0.8822, "step": 2458 }, { "epoch": 1.1022374957976915, "learning_rate": 3.6491345404757e-05, "lm_loss": 0.6308, "loss": 0.6308, "step": 2459 }, { "epoch": 1.1026857420342908, "learning_rate": 3.648059493413133e-05, "lm_loss": 0.6753, "loss": 0.6753, "step": 2460 }, { "epoch": 1.1031339882708902, "learning_rate": 3.646984177253735e-05, "lm_loss": 0.6825, "loss": 0.6825, "step": 2461 }, { "epoch": 1.1035822345074895, "learning_rate": 3.6459085922495536e-05, "lm_loss": 0.564, "loss": 0.564, "step": 2462 }, { "epoch": 1.1040304807440888, "learning_rate": 3.644832738652697e-05, "lm_loss": 1.1006, "loss": 1.1006, "step": 2463 }, { "epoch": 1.104478726980688, "learning_rate": 3.643756616715337e-05, "lm_loss": 0.8033, "loss": 0.8033, "step": 2464 }, { "epoch": 1.1049269732172873, "learning_rate": 3.6426802266897094e-05, "lm_loss": 0.6222, "loss": 0.6222, "step": 2465 }, { "epoch": 1.1053752194538866, "learning_rate": 3.641603568828113e-05, "lm_loss": 1.149, "loss": 1.149, "step": 2466 }, { "epoch": 1.105823465690486, "learning_rate": 3.6405266433829075e-05, "lm_loss": 0.658, "loss": 0.658, "step": 2467 }, { "epoch": 1.1062717119270853, "learning_rate": 3.6394494506065176e-05, "lm_loss": 0.5605, "loss": 0.5605, "step": 2468 }, { "epoch": 1.1067199581636846, "learning_rate": 3.638371990751428e-05, "lm_loss": 0.5555, "loss": 0.5555, "step": 2469 }, { "epoch": 1.107168204400284, "learning_rate": 3.637294264070189e-05, "lm_loss": 0.6122, "loss": 0.6122, "step": 2470 }, { "epoch": 1.1076164506368833, "learning_rate": 3.63621627081541e-05, "lm_loss": 0.6744, "loss": 0.6744, "step": 2471 }, { "epoch": 1.1080646968734824, "learning_rate": 3.635138011239767e-05, "lm_loss": 0.7016, "loss": 0.7016, "step": 2472 }, { "epoch": 1.1085129431100817, "learning_rate": 3.634059485595995e-05, "lm_loss": 0.3721, "loss": 0.3721, "step": 2473 }, { "epoch": 1.108961189346681, "learning_rate": 3.6329806941368926e-05, "lm_loss": 0.63, "loss": 0.63, "step": 2474 }, { "epoch": 1.1094094355832804, "learning_rate": 3.6319016371153215e-05, "lm_loss": 0.7311, "loss": 0.7311, "step": 2475 }, { "epoch": 1.1098576818198798, "learning_rate": 3.6308223147842034e-05, "lm_loss": 0.4529, "loss": 0.4529, "step": 2476 }, { "epoch": 1.110305928056479, "learning_rate": 3.6297427273965254e-05, "lm_loss": 0.671, "loss": 0.671, "step": 2477 }, { "epoch": 1.1107541742930784, "learning_rate": 3.628662875205333e-05, "lm_loss": 0.8049, "loss": 0.8049, "step": 2478 }, { "epoch": 1.1112024205296775, "learning_rate": 3.6275827584637376e-05, "lm_loss": 0.8655, "loss": 0.8655, "step": 2479 }, { "epoch": 1.1116506667662769, "learning_rate": 3.62650237742491e-05, "lm_loss": 0.69, "loss": 0.69, "step": 2480 }, { "epoch": 1.1120989130028762, "learning_rate": 3.625421732342083e-05, "lm_loss": 0.5391, "loss": 0.5391, "step": 2481 }, { "epoch": 1.1125471592394756, "learning_rate": 3.624340823468553e-05, "lm_loss": 0.5168, "loss": 0.5168, "step": 2482 }, { "epoch": 1.112995405476075, "learning_rate": 3.6232596510576775e-05, "lm_loss": 1.099, "loss": 1.099, "step": 2483 }, { "epoch": 1.1134436517126742, "learning_rate": 3.622178215362874e-05, "lm_loss": 0.6822, "loss": 0.6822, "step": 2484 }, { "epoch": 1.1138918979492736, "learning_rate": 3.6210965166376245e-05, "lm_loss": 0.5074, "loss": 0.5074, "step": 2485 }, { "epoch": 1.1143401441858727, "learning_rate": 3.62001455513547e-05, "lm_loss": 0.6758, "loss": 0.6758, "step": 2486 }, { "epoch": 1.114788390422472, "learning_rate": 3.618932331110016e-05, "lm_loss": 0.6188, "loss": 0.6188, "step": 2487 }, { "epoch": 1.1152366366590714, "learning_rate": 3.617849844814927e-05, "lm_loss": 0.5599, "loss": 0.5599, "step": 2488 }, { "epoch": 1.1156848828956707, "learning_rate": 3.616767096503931e-05, "lm_loss": 0.7185, "loss": 0.7185, "step": 2489 }, { "epoch": 1.11613312913227, "learning_rate": 3.615684086430815e-05, "lm_loss": 0.4561, "loss": 0.4561, "step": 2490 }, { "epoch": 1.1165813753688694, "learning_rate": 3.6146008148494286e-05, "lm_loss": 0.7539, "loss": 0.7539, "step": 2491 }, { "epoch": 1.1170296216054687, "learning_rate": 3.613517282013685e-05, "lm_loss": 0.4797, "loss": 0.4797, "step": 2492 }, { "epoch": 1.1174778678420678, "learning_rate": 3.612433488177554e-05, "lm_loss": 0.4804, "loss": 0.4804, "step": 2493 }, { "epoch": 1.1179261140786672, "learning_rate": 3.61134943359507e-05, "lm_loss": 0.6583, "loss": 0.6583, "step": 2494 }, { "epoch": 1.1183743603152665, "learning_rate": 3.6102651185203275e-05, "lm_loss": 1.1401, "loss": 1.1401, "step": 2495 }, { "epoch": 1.1188226065518658, "learning_rate": 3.6091805432074826e-05, "lm_loss": 0.6159, "loss": 0.6159, "step": 2496 }, { "epoch": 1.1192708527884652, "learning_rate": 3.6080957079107515e-05, "lm_loss": 0.5692, "loss": 0.5692, "step": 2497 }, { "epoch": 1.1197190990250645, "learning_rate": 3.607010612884411e-05, "lm_loss": 0.6313, "loss": 0.6313, "step": 2498 }, { "epoch": 1.1201673452616638, "learning_rate": 3.605925258382801e-05, "lm_loss": 0.4522, "loss": 0.4522, "step": 2499 }, { "epoch": 1.120615591498263, "learning_rate": 3.604839644660319e-05, "lm_loss": 0.619, "loss": 0.619, "step": 2500 }, { "epoch": 1.1210638377348623, "learning_rate": 3.603753771971426e-05, "lm_loss": 0.6345, "loss": 0.6345, "step": 2501 }, { "epoch": 1.1215120839714616, "learning_rate": 3.602667640570643e-05, "lm_loss": 0.5868, "loss": 0.5868, "step": 2502 }, { "epoch": 1.121960330208061, "learning_rate": 3.601581250712551e-05, "lm_loss": 0.6292, "loss": 0.6292, "step": 2503 }, { "epoch": 1.1224085764446603, "learning_rate": 3.6004946026517915e-05, "lm_loss": 0.6378, "loss": 0.6378, "step": 2504 }, { "epoch": 1.1228568226812596, "learning_rate": 3.599407696643068e-05, "lm_loss": 0.4817, "loss": 0.4817, "step": 2505 }, { "epoch": 1.123305068917859, "learning_rate": 3.598320532941142e-05, "lm_loss": 0.5849, "loss": 0.5849, "step": 2506 }, { "epoch": 1.123753315154458, "learning_rate": 3.5972331118008386e-05, "lm_loss": 1.1663, "loss": 1.1663, "step": 2507 }, { "epoch": 1.1242015613910574, "learning_rate": 3.596145433477039e-05, "lm_loss": 0.6547, "loss": 0.6547, "step": 2508 }, { "epoch": 1.1246498076276568, "learning_rate": 3.595057498224689e-05, "lm_loss": 0.9402, "loss": 0.9402, "step": 2509 }, { "epoch": 1.125098053864256, "learning_rate": 3.5939693062987916e-05, "lm_loss": 0.7343, "loss": 0.7343, "step": 2510 }, { "epoch": 1.1255463001008554, "learning_rate": 3.5928808579544126e-05, "lm_loss": 0.5907, "loss": 0.5907, "step": 2511 }, { "epoch": 1.1259945463374548, "learning_rate": 3.591792153446676e-05, "lm_loss": 0.5634, "loss": 0.5634, "step": 2512 }, { "epoch": 1.126442792574054, "learning_rate": 3.590703193030764e-05, "lm_loss": 0.5721, "loss": 0.5721, "step": 2513 }, { "epoch": 1.1268910388106532, "learning_rate": 3.5896139769619235e-05, "lm_loss": 0.6287, "loss": 0.6287, "step": 2514 }, { "epoch": 1.1273392850472526, "learning_rate": 3.5885245054954586e-05, "lm_loss": 0.5531, "loss": 0.5531, "step": 2515 }, { "epoch": 1.127787531283852, "learning_rate": 3.5874347788867335e-05, "lm_loss": 0.5568, "loss": 0.5568, "step": 2516 }, { "epoch": 1.1282357775204512, "learning_rate": 3.586344797391171e-05, "lm_loss": 0.6923, "loss": 0.6923, "step": 2517 }, { "epoch": 1.1286840237570506, "learning_rate": 3.585254561264256e-05, "lm_loss": 0.423, "loss": 0.423, "step": 2518 }, { "epoch": 1.12913226999365, "learning_rate": 3.584164070761531e-05, "lm_loss": 1.0488, "loss": 1.0488, "step": 2519 }, { "epoch": 1.1295805162302492, "learning_rate": 3.5830733261386e-05, "lm_loss": 0.8306, "loss": 0.8306, "step": 2520 }, { "epoch": 1.1300287624668486, "learning_rate": 3.581982327651126e-05, "lm_loss": 0.8563, "loss": 0.8563, "step": 2521 }, { "epoch": 1.1304770087034477, "learning_rate": 3.5808910755548296e-05, "lm_loss": 0.5825, "loss": 0.5825, "step": 2522 }, { "epoch": 1.130925254940047, "learning_rate": 3.5797995701054935e-05, "lm_loss": 0.9095, "loss": 0.9095, "step": 2523 }, { "epoch": 1.1313735011766464, "learning_rate": 3.578707811558959e-05, "lm_loss": 0.8791, "loss": 0.8791, "step": 2524 }, { "epoch": 1.1318217474132457, "learning_rate": 3.5776158001711254e-05, "lm_loss": 1.1476, "loss": 1.1476, "step": 2525 }, { "epoch": 1.132269993649845, "learning_rate": 3.576523536197952e-05, "lm_loss": 0.743, "loss": 0.743, "step": 2526 }, { "epoch": 1.1327182398864444, "learning_rate": 3.575431019895459e-05, "lm_loss": 0.4753, "loss": 0.4753, "step": 2527 }, { "epoch": 1.1331664861230437, "learning_rate": 3.574338251519724e-05, "lm_loss": 0.774, "loss": 0.774, "step": 2528 }, { "epoch": 1.1336147323596428, "learning_rate": 3.573245231326883e-05, "lm_loss": 0.8466, "loss": 0.8466, "step": 2529 }, { "epoch": 1.1340629785962422, "learning_rate": 3.572151959573132e-05, "lm_loss": 0.8631, "loss": 0.8631, "step": 2530 }, { "epoch": 1.1345112248328415, "learning_rate": 3.571058436514728e-05, "lm_loss": 0.608, "loss": 0.608, "step": 2531 }, { "epoch": 1.1349594710694408, "learning_rate": 3.569964662407983e-05, "lm_loss": 0.4759, "loss": 0.4759, "step": 2532 }, { "epoch": 1.1354077173060402, "learning_rate": 3.56887063750927e-05, "lm_loss": 0.5616, "loss": 0.5616, "step": 2533 }, { "epoch": 1.1358559635426395, "learning_rate": 3.567776362075021e-05, "lm_loss": 0.6121, "loss": 0.6121, "step": 2534 }, { "epoch": 1.1363042097792388, "learning_rate": 3.566681836361724e-05, "lm_loss": 0.5432, "loss": 0.5432, "step": 2535 }, { "epoch": 1.136752456015838, "learning_rate": 3.5655870606259315e-05, "lm_loss": 0.6799, "loss": 0.6799, "step": 2536 }, { "epoch": 1.1372007022524373, "learning_rate": 3.5644920351242496e-05, "lm_loss": 0.5691, "loss": 0.5691, "step": 2537 }, { "epoch": 1.1376489484890366, "learning_rate": 3.5633967601133436e-05, "lm_loss": 0.5043, "loss": 0.5043, "step": 2538 }, { "epoch": 1.138097194725636, "learning_rate": 3.5623012358499384e-05, "lm_loss": 0.7236, "loss": 0.7236, "step": 2539 }, { "epoch": 1.1385454409622353, "learning_rate": 3.561205462590818e-05, "lm_loss": 0.63, "loss": 0.63, "step": 2540 }, { "epoch": 1.1389936871988346, "learning_rate": 3.560109440592822e-05, "lm_loss": 0.5167, "loss": 0.5167, "step": 2541 }, { "epoch": 1.139441933435434, "learning_rate": 3.55901317011285e-05, "lm_loss": 0.6469, "loss": 0.6469, "step": 2542 }, { "epoch": 1.139890179672033, "learning_rate": 3.5579166514078614e-05, "lm_loss": 0.5945, "loss": 0.5945, "step": 2543 }, { "epoch": 1.1403384259086324, "learning_rate": 3.556819884734872e-05, "lm_loss": 0.7651, "loss": 0.7651, "step": 2544 }, { "epoch": 1.1407866721452318, "learning_rate": 3.5557228703509546e-05, "lm_loss": 0.9396, "loss": 0.9396, "step": 2545 }, { "epoch": 1.141234918381831, "learning_rate": 3.554625608513244e-05, "lm_loss": 0.5521, "loss": 0.5521, "step": 2546 }, { "epoch": 1.1416831646184304, "learning_rate": 3.553528099478926e-05, "lm_loss": 0.6348, "loss": 0.6348, "step": 2547 }, { "epoch": 1.1421314108550298, "learning_rate": 3.552430343505254e-05, "lm_loss": 0.7007, "loss": 0.7007, "step": 2548 }, { "epoch": 1.1425796570916291, "learning_rate": 3.55133234084953e-05, "lm_loss": 0.5122, "loss": 0.5122, "step": 2549 }, { "epoch": 1.1430279033282282, "learning_rate": 3.55023409176912e-05, "lm_loss": 0.5936, "loss": 0.5936, "step": 2550 }, { "epoch": 1.1434761495648276, "learning_rate": 3.549135596521445e-05, "lm_loss": 0.6984, "loss": 0.6984, "step": 2551 }, { "epoch": 1.143924395801427, "learning_rate": 3.548036855363984e-05, "lm_loss": 0.6675, "loss": 0.6675, "step": 2552 }, { "epoch": 1.1443726420380262, "learning_rate": 3.5469378685542744e-05, "lm_loss": 0.904, "loss": 0.904, "step": 2553 }, { "epoch": 1.1448208882746256, "learning_rate": 3.5458386363499115e-05, "lm_loss": 0.8917, "loss": 0.8917, "step": 2554 }, { "epoch": 1.145269134511225, "learning_rate": 3.544739159008545e-05, "lm_loss": 0.5113, "loss": 0.5113, "step": 2555 }, { "epoch": 1.1457173807478243, "learning_rate": 3.543639436787888e-05, "lm_loss": 0.5609, "loss": 0.5609, "step": 2556 }, { "epoch": 1.1461656269844234, "learning_rate": 3.5425394699457045e-05, "lm_loss": 0.7694, "loss": 0.7694, "step": 2557 }, { "epoch": 1.1466138732210227, "learning_rate": 3.5414392587398195e-05, "lm_loss": 0.57, "loss": 0.57, "step": 2558 }, { "epoch": 1.147062119457622, "learning_rate": 3.5403388034281146e-05, "lm_loss": 1.2679, "loss": 1.2679, "step": 2559 }, { "epoch": 1.1475103656942214, "learning_rate": 3.539238104268529e-05, "lm_loss": 0.7351, "loss": 0.7351, "step": 2560 }, { "epoch": 1.1479586119308207, "learning_rate": 3.5381371615190584e-05, "lm_loss": 0.5637, "loss": 0.5637, "step": 2561 }, { "epoch": 1.14840685816742, "learning_rate": 3.537035975437755e-05, "lm_loss": 0.7411, "loss": 0.7411, "step": 2562 }, { "epoch": 1.1488551044040194, "learning_rate": 3.5359345462827295e-05, "lm_loss": 0.5522, "loss": 0.5522, "step": 2563 }, { "epoch": 1.1493033506406185, "learning_rate": 3.53483287431215e-05, "lm_loss": 0.6088, "loss": 0.6088, "step": 2564 }, { "epoch": 1.1497515968772178, "learning_rate": 3.533730959784238e-05, "lm_loss": 0.4309, "loss": 0.4309, "step": 2565 }, { "epoch": 1.1501998431138172, "learning_rate": 3.5326288029572774e-05, "lm_loss": 0.6868, "loss": 0.6868, "step": 2566 }, { "epoch": 1.1506480893504165, "learning_rate": 3.531526404089602e-05, "lm_loss": 0.5684, "loss": 0.5684, "step": 2567 }, { "epoch": 1.1510963355870159, "learning_rate": 3.5304237634396094e-05, "lm_loss": 0.6611, "loss": 0.6611, "step": 2568 }, { "epoch": 1.1515445818236152, "learning_rate": 3.5293208812657495e-05, "lm_loss": 0.5776, "loss": 0.5776, "step": 2569 }, { "epoch": 1.1519928280602145, "learning_rate": 3.5282177578265296e-05, "lm_loss": 0.4644, "loss": 0.4644, "step": 2570 }, { "epoch": 1.1524410742968136, "learning_rate": 3.5271143933805136e-05, "lm_loss": 0.641, "loss": 0.641, "step": 2571 }, { "epoch": 1.152889320533413, "learning_rate": 3.526010788186322e-05, "lm_loss": 0.6452, "loss": 0.6452, "step": 2572 }, { "epoch": 1.1533375667700123, "learning_rate": 3.524906942502633e-05, "lm_loss": 0.4459, "loss": 0.4459, "step": 2573 }, { "epoch": 1.1537858130066116, "learning_rate": 3.52380285658818e-05, "lm_loss": 0.7526, "loss": 0.7526, "step": 2574 }, { "epoch": 1.154234059243211, "learning_rate": 3.522698530701751e-05, "lm_loss": 0.5118, "loss": 0.5118, "step": 2575 }, { "epoch": 1.1546823054798103, "learning_rate": 3.521593965102194e-05, "lm_loss": 0.6679, "loss": 0.6679, "step": 2576 }, { "epoch": 1.1551305517164097, "learning_rate": 3.5204891600484094e-05, "lm_loss": 0.6742, "loss": 0.6742, "step": 2577 }, { "epoch": 1.1555787979530088, "learning_rate": 3.519384115799357e-05, "lm_loss": 0.4673, "loss": 0.4673, "step": 2578 }, { "epoch": 1.156027044189608, "learning_rate": 3.5182788326140514e-05, "lm_loss": 0.7197, "loss": 0.7197, "step": 2579 }, { "epoch": 1.1564752904262074, "learning_rate": 3.5171733107515604e-05, "lm_loss": 1.0672, "loss": 1.0672, "step": 2580 }, { "epoch": 1.1569235366628068, "learning_rate": 3.516067550471013e-05, "lm_loss": 0.4172, "loss": 0.4172, "step": 2581 }, { "epoch": 1.1573717828994061, "learning_rate": 3.514961552031591e-05, "lm_loss": 0.7264, "loss": 0.7264, "step": 2582 }, { "epoch": 1.1578200291360055, "learning_rate": 3.5138553156925316e-05, "lm_loss": 0.5266, "loss": 0.5266, "step": 2583 }, { "epoch": 1.1582682753726048, "learning_rate": 3.512748841713128e-05, "lm_loss": 0.5932, "loss": 0.5932, "step": 2584 }, { "epoch": 1.158716521609204, "learning_rate": 3.5116421303527317e-05, "lm_loss": 0.7055, "loss": 0.7055, "step": 2585 }, { "epoch": 1.1591647678458032, "learning_rate": 3.510535181870746e-05, "lm_loss": 0.4735, "loss": 0.4735, "step": 2586 }, { "epoch": 1.1596130140824026, "learning_rate": 3.509427996526633e-05, "lm_loss": 0.5084, "loss": 0.5084, "step": 2587 }, { "epoch": 1.160061260319002, "learning_rate": 3.5083205745799076e-05, "lm_loss": 0.6572, "loss": 0.6572, "step": 2588 }, { "epoch": 1.1605095065556013, "learning_rate": 3.5072129162901416e-05, "lm_loss": 0.8964, "loss": 0.8964, "step": 2589 }, { "epoch": 1.1609577527922006, "learning_rate": 3.506105021916963e-05, "lm_loss": 0.7316, "loss": 0.7316, "step": 2590 }, { "epoch": 1.1614059990288, "learning_rate": 3.504996891720055e-05, "lm_loss": 0.5417, "loss": 0.5417, "step": 2591 }, { "epoch": 1.161854245265399, "learning_rate": 3.503888525959152e-05, "lm_loss": 0.7143, "loss": 0.7143, "step": 2592 }, { "epoch": 1.1623024915019984, "learning_rate": 3.5027799248940504e-05, "lm_loss": 0.6077, "loss": 0.6077, "step": 2593 }, { "epoch": 1.1627507377385977, "learning_rate": 3.501671088784597e-05, "lm_loss": 0.5865, "loss": 0.5865, "step": 2594 }, { "epoch": 1.163198983975197, "learning_rate": 3.500562017890695e-05, "lm_loss": 0.5671, "loss": 0.5671, "step": 2595 }, { "epoch": 1.1636472302117964, "learning_rate": 3.499452712472302e-05, "lm_loss": 1.0516, "loss": 1.0516, "step": 2596 }, { "epoch": 1.1640954764483957, "learning_rate": 3.498343172789432e-05, "lm_loss": 0.7745, "loss": 0.7745, "step": 2597 }, { "epoch": 1.164543722684995, "learning_rate": 3.4972333991021515e-05, "lm_loss": 0.6037, "loss": 0.6037, "step": 2598 }, { "epoch": 1.1649919689215942, "learning_rate": 3.4961233916705856e-05, "lm_loss": 1.0234, "loss": 1.0234, "step": 2599 }, { "epoch": 1.1654402151581935, "learning_rate": 3.49501315075491e-05, "lm_loss": 0.5701, "loss": 0.5701, "step": 2600 }, { "epoch": 1.1658884613947929, "learning_rate": 3.4939026766153596e-05, "lm_loss": 0.6403, "loss": 0.6403, "step": 2601 }, { "epoch": 1.1663367076313922, "learning_rate": 3.492791969512218e-05, "lm_loss": 0.5753, "loss": 0.5753, "step": 2602 }, { "epoch": 1.1667849538679915, "learning_rate": 3.491681029705831e-05, "lm_loss": 0.4636, "loss": 0.4636, "step": 2603 }, { "epoch": 1.1672332001045909, "learning_rate": 3.4905698574565916e-05, "lm_loss": 0.7994, "loss": 0.7994, "step": 2604 }, { "epoch": 1.1676814463411902, "learning_rate": 3.4894584530249516e-05, "lm_loss": 0.9514, "loss": 0.9514, "step": 2605 }, { "epoch": 1.1681296925777893, "learning_rate": 3.488346816671416e-05, "lm_loss": 0.5351, "loss": 0.5351, "step": 2606 }, { "epoch": 1.1685779388143887, "learning_rate": 3.487234948656545e-05, "lm_loss": 0.6918, "loss": 0.6918, "step": 2607 }, { "epoch": 1.169026185050988, "learning_rate": 3.486122849240951e-05, "lm_loss": 0.6349, "loss": 0.6349, "step": 2608 }, { "epoch": 1.1694744312875873, "learning_rate": 3.485010518685304e-05, "lm_loss": 0.5594, "loss": 0.5594, "step": 2609 }, { "epoch": 1.1699226775241867, "learning_rate": 3.483897957250325e-05, "lm_loss": 0.5069, "loss": 0.5069, "step": 2610 }, { "epoch": 1.170370923760786, "learning_rate": 3.4827851651967905e-05, "lm_loss": 0.4817, "loss": 0.4817, "step": 2611 }, { "epoch": 1.1708191699973853, "learning_rate": 3.4816721427855314e-05, "lm_loss": 1.2675, "loss": 1.2675, "step": 2612 }, { "epoch": 1.1712674162339844, "learning_rate": 3.480558890277431e-05, "lm_loss": 0.5669, "loss": 0.5669, "step": 2613 }, { "epoch": 1.1717156624705838, "learning_rate": 3.479445407933428e-05, "lm_loss": 0.6091, "loss": 0.6091, "step": 2614 }, { "epoch": 1.1721639087071831, "learning_rate": 3.4783316960145165e-05, "lm_loss": 0.5089, "loss": 0.5089, "step": 2615 }, { "epoch": 1.1726121549437825, "learning_rate": 3.477217754781739e-05, "lm_loss": 0.6908, "loss": 0.6908, "step": 2616 }, { "epoch": 1.1730604011803818, "learning_rate": 3.4761035844961976e-05, "lm_loss": 0.6762, "loss": 0.6762, "step": 2617 }, { "epoch": 1.1735086474169811, "learning_rate": 3.474989185419045e-05, "lm_loss": 0.9061, "loss": 0.9061, "step": 2618 }, { "epoch": 1.1739568936535805, "learning_rate": 3.47387455781149e-05, "lm_loss": 0.6675, "loss": 0.6675, "step": 2619 }, { "epoch": 1.1744051398901796, "learning_rate": 3.47275970193479e-05, "lm_loss": 0.4663, "loss": 0.4663, "step": 2620 }, { "epoch": 1.174853386126779, "learning_rate": 3.471644618050262e-05, "lm_loss": 0.5866, "loss": 0.5866, "step": 2621 }, { "epoch": 1.1753016323633783, "learning_rate": 3.470529306419272e-05, "lm_loss": 0.711, "loss": 0.711, "step": 2622 }, { "epoch": 1.1757498785999776, "learning_rate": 3.4694137673032415e-05, "lm_loss": 1.0159, "loss": 1.0159, "step": 2623 }, { "epoch": 1.176198124836577, "learning_rate": 3.4682980009636455e-05, "lm_loss": 0.5401, "loss": 0.5401, "step": 2624 }, { "epoch": 1.1766463710731763, "learning_rate": 3.46718200766201e-05, "lm_loss": 0.8279, "loss": 0.8279, "step": 2625 }, { "epoch": 1.1770946173097756, "learning_rate": 3.466065787659917e-05, "lm_loss": 0.7976, "loss": 0.7976, "step": 2626 }, { "epoch": 1.1775428635463747, "learning_rate": 3.464949341218999e-05, "lm_loss": 0.77, "loss": 0.77, "step": 2627 }, { "epoch": 1.177991109782974, "learning_rate": 3.463832668600945e-05, "lm_loss": 1.1039, "loss": 1.1039, "step": 2628 }, { "epoch": 1.1784393560195734, "learning_rate": 3.462715770067494e-05, "lm_loss": 0.6007, "loss": 0.6007, "step": 2629 }, { "epoch": 1.1788876022561727, "learning_rate": 3.461598645880438e-05, "lm_loss": 0.542, "loss": 0.542, "step": 2630 }, { "epoch": 1.179335848492772, "learning_rate": 3.4604812963016244e-05, "lm_loss": 0.7093, "loss": 0.7093, "step": 2631 }, { "epoch": 1.1797840947293714, "learning_rate": 3.459363721592951e-05, "lm_loss": 0.9703, "loss": 0.9703, "step": 2632 }, { "epoch": 1.1802323409659707, "learning_rate": 3.458245922016368e-05, "lm_loss": 0.8086, "loss": 0.8086, "step": 2633 }, { "epoch": 1.1806805872025699, "learning_rate": 3.4571278978338814e-05, "lm_loss": 0.5397, "loss": 0.5397, "step": 2634 }, { "epoch": 1.1811288334391692, "learning_rate": 3.456009649307548e-05, "lm_loss": 0.4964, "loss": 0.4964, "step": 2635 }, { "epoch": 1.1815770796757685, "learning_rate": 3.454891176699477e-05, "lm_loss": 0.5964, "loss": 0.5964, "step": 2636 }, { "epoch": 1.1820253259123679, "learning_rate": 3.453772480271829e-05, "lm_loss": 0.5498, "loss": 0.5498, "step": 2637 }, { "epoch": 1.1824735721489672, "learning_rate": 3.452653560286819e-05, "lm_loss": 0.5963, "loss": 0.5963, "step": 2638 }, { "epoch": 1.1829218183855665, "learning_rate": 3.4515344170067145e-05, "lm_loss": 0.5219, "loss": 0.5219, "step": 2639 }, { "epoch": 1.1833700646221659, "learning_rate": 3.450415050693834e-05, "lm_loss": 0.9556, "loss": 0.9556, "step": 2640 }, { "epoch": 1.183818310858765, "learning_rate": 3.4492954616105485e-05, "lm_loss": 0.893, "loss": 0.893, "step": 2641 }, { "epoch": 1.1842665570953643, "learning_rate": 3.448175650019282e-05, "lm_loss": 0.5327, "loss": 0.5327, "step": 2642 }, { "epoch": 1.1847148033319637, "learning_rate": 3.4470556161825104e-05, "lm_loss": 0.5427, "loss": 0.5427, "step": 2643 }, { "epoch": 1.185163049568563, "learning_rate": 3.445935360362761e-05, "lm_loss": 0.6611, "loss": 0.6611, "step": 2644 }, { "epoch": 1.1856112958051623, "learning_rate": 3.444814882822614e-05, "lm_loss": 0.6831, "loss": 0.6831, "step": 2645 }, { "epoch": 1.1860595420417617, "learning_rate": 3.443694183824701e-05, "lm_loss": 0.4644, "loss": 0.4644, "step": 2646 }, { "epoch": 1.186507788278361, "learning_rate": 3.4425732636317055e-05, "lm_loss": 0.9987, "loss": 0.9987, "step": 2647 }, { "epoch": 1.1869560345149601, "learning_rate": 3.441452122506364e-05, "lm_loss": 0.638, "loss": 0.638, "step": 2648 }, { "epoch": 1.1874042807515595, "learning_rate": 3.4403307607114635e-05, "lm_loss": 0.7008, "loss": 0.7008, "step": 2649 }, { "epoch": 1.1878525269881588, "learning_rate": 3.439209178509842e-05, "lm_loss": 0.4339, "loss": 0.4339, "step": 2650 }, { "epoch": 1.1883007732247581, "learning_rate": 3.438087376164392e-05, "lm_loss": 0.5957, "loss": 0.5957, "step": 2651 }, { "epoch": 1.1887490194613575, "learning_rate": 3.436965353938055e-05, "lm_loss": 0.5515, "loss": 0.5515, "step": 2652 }, { "epoch": 1.1891972656979568, "learning_rate": 3.4358431120938244e-05, "lm_loss": 0.5624, "loss": 0.5624, "step": 2653 }, { "epoch": 1.1896455119345561, "learning_rate": 3.434720650894746e-05, "lm_loss": 0.5962, "loss": 0.5962, "step": 2654 }, { "epoch": 1.1900937581711553, "learning_rate": 3.433597970603917e-05, "lm_loss": 0.582, "loss": 0.582, "step": 2655 }, { "epoch": 1.1905420044077546, "learning_rate": 3.432475071484485e-05, "lm_loss": 0.4815, "loss": 0.4815, "step": 2656 }, { "epoch": 1.190990250644354, "learning_rate": 3.4313519537996495e-05, "lm_loss": 0.6267, "loss": 0.6267, "step": 2657 }, { "epoch": 1.1914384968809533, "learning_rate": 3.430228617812661e-05, "lm_loss": 0.6158, "loss": 0.6158, "step": 2658 }, { "epoch": 1.1918867431175526, "learning_rate": 3.4291050637868214e-05, "lm_loss": 0.5224, "loss": 0.5224, "step": 2659 }, { "epoch": 1.192334989354152, "learning_rate": 3.427981291985484e-05, "lm_loss": 1.109, "loss": 1.109, "step": 2660 }, { "epoch": 1.1927832355907513, "learning_rate": 3.426857302672053e-05, "lm_loss": 0.6312, "loss": 0.6312, "step": 2661 }, { "epoch": 1.1932314818273504, "learning_rate": 3.4257330961099815e-05, "lm_loss": 0.6554, "loss": 0.6554, "step": 2662 }, { "epoch": 1.1936797280639497, "learning_rate": 3.424608672562778e-05, "lm_loss": 0.4792, "loss": 0.4792, "step": 2663 }, { "epoch": 1.194127974300549, "learning_rate": 3.4234840322939974e-05, "lm_loss": 0.6443, "loss": 0.6443, "step": 2664 }, { "epoch": 1.1945762205371484, "learning_rate": 3.422359175567248e-05, "lm_loss": 0.5697, "loss": 0.5697, "step": 2665 }, { "epoch": 1.1950244667737477, "learning_rate": 3.4212341026461875e-05, "lm_loss": 0.6276, "loss": 0.6276, "step": 2666 }, { "epoch": 1.195472713010347, "learning_rate": 3.420108813794526e-05, "lm_loss": 0.8001, "loss": 0.8001, "step": 2667 }, { "epoch": 1.1959209592469464, "learning_rate": 3.418983309276022e-05, "lm_loss": 0.9114, "loss": 0.9114, "step": 2668 }, { "epoch": 1.1963692054835455, "learning_rate": 3.4178575893544865e-05, "lm_loss": 0.5543, "loss": 0.5543, "step": 2669 }, { "epoch": 1.1968174517201449, "learning_rate": 3.416731654293778e-05, "lm_loss": 0.8206, "loss": 0.8206, "step": 2670 }, { "epoch": 1.1972656979567442, "learning_rate": 3.41560550435781e-05, "lm_loss": 0.9187, "loss": 0.9187, "step": 2671 }, { "epoch": 1.1977139441933435, "learning_rate": 3.4144791398105425e-05, "lm_loss": 0.5857, "loss": 0.5857, "step": 2672 }, { "epoch": 1.1981621904299429, "learning_rate": 3.413352560915988e-05, "lm_loss": 0.7643, "loss": 0.7643, "step": 2673 }, { "epoch": 1.1986104366665422, "learning_rate": 3.4122257679382073e-05, "lm_loss": 0.4985, "loss": 0.4985, "step": 2674 }, { "epoch": 1.1990586829031415, "learning_rate": 3.411098761141314e-05, "lm_loss": 0.6949, "loss": 0.6949, "step": 2675 }, { "epoch": 1.1995069291397407, "learning_rate": 3.4099715407894695e-05, "lm_loss": 1.0588, "loss": 1.0588, "step": 2676 }, { "epoch": 1.19995517537634, "learning_rate": 3.408844107146886e-05, "lm_loss": 0.8716, "loss": 0.8716, "step": 2677 }, { "epoch": 1.2004034216129393, "learning_rate": 3.407716460477826e-05, "lm_loss": 0.636, "loss": 0.636, "step": 2678 }, { "epoch": 1.2008516678495387, "learning_rate": 3.406588601046601e-05, "lm_loss": 0.6236, "loss": 0.6236, "step": 2679 }, { "epoch": 1.201299914086138, "learning_rate": 3.405460529117575e-05, "lm_loss": 0.7367, "loss": 0.7367, "step": 2680 }, { "epoch": 1.2017481603227373, "learning_rate": 3.404332244955158e-05, "lm_loss": 0.5172, "loss": 0.5172, "step": 2681 }, { "epoch": 1.2021964065593367, "learning_rate": 3.403203748823812e-05, "lm_loss": 1.0041, "loss": 1.0041, "step": 2682 }, { "epoch": 1.2026446527959358, "learning_rate": 3.4020750409880484e-05, "lm_loss": 0.4945, "loss": 0.4945, "step": 2683 }, { "epoch": 1.2030928990325351, "learning_rate": 3.400946121712429e-05, "lm_loss": 0.4733, "loss": 0.4733, "step": 2684 }, { "epoch": 1.2035411452691345, "learning_rate": 3.399816991261564e-05, "lm_loss": 0.5766, "loss": 0.5766, "step": 2685 }, { "epoch": 1.2039893915057338, "learning_rate": 3.398687649900112e-05, "lm_loss": 0.7422, "loss": 0.7422, "step": 2686 }, { "epoch": 1.2044376377423331, "learning_rate": 3.3975580978927835e-05, "lm_loss": 0.4112, "loss": 0.4112, "step": 2687 }, { "epoch": 1.2048858839789325, "learning_rate": 3.3964283355043383e-05, "lm_loss": 0.7223, "loss": 0.7223, "step": 2688 }, { "epoch": 1.2053341302155318, "learning_rate": 3.395298362999583e-05, "lm_loss": 0.5669, "loss": 0.5669, "step": 2689 }, { "epoch": 1.205782376452131, "learning_rate": 3.394168180643376e-05, "lm_loss": 0.583, "loss": 0.583, "step": 2690 }, { "epoch": 1.2062306226887303, "learning_rate": 3.3930377887006214e-05, "lm_loss": 0.6169, "loss": 0.6169, "step": 2691 }, { "epoch": 1.2066788689253296, "learning_rate": 3.391907187436278e-05, "lm_loss": 0.68, "loss": 0.68, "step": 2692 }, { "epoch": 1.207127115161929, "learning_rate": 3.39077637711535e-05, "lm_loss": 0.4822, "loss": 0.4822, "step": 2693 }, { "epoch": 1.2075753613985283, "learning_rate": 3.38964535800289e-05, "lm_loss": 0.6871, "loss": 0.6871, "step": 2694 }, { "epoch": 1.2080236076351276, "learning_rate": 3.388514130364001e-05, "lm_loss": 0.5069, "loss": 0.5069, "step": 2695 }, { "epoch": 1.208471853871727, "learning_rate": 3.387382694463834e-05, "lm_loss": 0.6452, "loss": 0.6452, "step": 2696 }, { "epoch": 1.208920100108326, "learning_rate": 3.3862510505675915e-05, "lm_loss": 0.6224, "loss": 0.6224, "step": 2697 }, { "epoch": 1.2093683463449254, "learning_rate": 3.3851191989405214e-05, "lm_loss": 0.4538, "loss": 0.4538, "step": 2698 }, { "epoch": 1.2098165925815247, "learning_rate": 3.3839871398479205e-05, "lm_loss": 0.5178, "loss": 0.5178, "step": 2699 }, { "epoch": 1.210264838818124, "learning_rate": 3.382854873555137e-05, "lm_loss": 0.8538, "loss": 0.8538, "step": 2700 }, { "epoch": 1.2107130850547234, "learning_rate": 3.381722400327564e-05, "lm_loss": 0.8202, "loss": 0.8202, "step": 2701 }, { "epoch": 1.2111613312913228, "learning_rate": 3.380589720430647e-05, "lm_loss": 0.5819, "loss": 0.5819, "step": 2702 }, { "epoch": 1.211609577527922, "learning_rate": 3.379456834129876e-05, "lm_loss": 0.7819, "loss": 0.7819, "step": 2703 }, { "epoch": 1.2120578237645212, "learning_rate": 3.3783237416907936e-05, "lm_loss": 1.2961, "loss": 1.2961, "step": 2704 }, { "epoch": 1.2125060700011205, "learning_rate": 3.3771904433789864e-05, "lm_loss": 0.6698, "loss": 0.6698, "step": 2705 }, { "epoch": 1.2129543162377199, "learning_rate": 3.376056939460093e-05, "lm_loss": 0.5277, "loss": 0.5277, "step": 2706 }, { "epoch": 1.2134025624743192, "learning_rate": 3.374923230199797e-05, "lm_loss": 0.6028, "loss": 0.6028, "step": 2707 }, { "epoch": 1.2138508087109185, "learning_rate": 3.373789315863833e-05, "lm_loss": 1.0444, "loss": 1.0444, "step": 2708 }, { "epoch": 1.2142990549475179, "learning_rate": 3.372655196717981e-05, "lm_loss": 0.5309, "loss": 0.5309, "step": 2709 }, { "epoch": 1.2147473011841172, "learning_rate": 3.371520873028071e-05, "lm_loss": 1.0756, "loss": 1.0756, "step": 2710 }, { "epoch": 1.2151955474207163, "learning_rate": 3.37038634505998e-05, "lm_loss": 1.2104, "loss": 1.2104, "step": 2711 }, { "epoch": 1.2156437936573157, "learning_rate": 3.369251613079634e-05, "lm_loss": 0.4934, "loss": 0.4934, "step": 2712 }, { "epoch": 1.216092039893915, "learning_rate": 3.3681166773530056e-05, "lm_loss": 0.6954, "loss": 0.6954, "step": 2713 }, { "epoch": 1.2165402861305143, "learning_rate": 3.366981538146115e-05, "lm_loss": 0.6405, "loss": 0.6405, "step": 2714 }, { "epoch": 1.2169885323671137, "learning_rate": 3.36584619572503e-05, "lm_loss": 0.5282, "loss": 0.5282, "step": 2715 }, { "epoch": 1.217436778603713, "learning_rate": 3.3647106503558676e-05, "lm_loss": 0.7547, "loss": 0.7547, "step": 2716 }, { "epoch": 1.2178850248403124, "learning_rate": 3.3635749023047924e-05, "lm_loss": 0.6722, "loss": 0.6722, "step": 2717 }, { "epoch": 1.2183332710769115, "learning_rate": 3.362438951838014e-05, "lm_loss": 0.552, "loss": 0.552, "step": 2718 }, { "epoch": 1.2187815173135108, "learning_rate": 3.36130279922179e-05, "lm_loss": 0.6042, "loss": 0.6042, "step": 2719 }, { "epoch": 1.2192297635501101, "learning_rate": 3.360166444722429e-05, "lm_loss": 0.5446, "loss": 0.5446, "step": 2720 }, { "epoch": 1.2196780097867095, "learning_rate": 3.3590298886062833e-05, "lm_loss": 0.7378, "loss": 0.7378, "step": 2721 }, { "epoch": 1.2201262560233088, "learning_rate": 3.3578931311397524e-05, "lm_loss": 0.4789, "loss": 0.4789, "step": 2722 }, { "epoch": 1.2205745022599082, "learning_rate": 3.356756172589285e-05, "lm_loss": 0.5792, "loss": 0.5792, "step": 2723 }, { "epoch": 1.2210227484965075, "learning_rate": 3.355619013221375e-05, "lm_loss": 0.5131, "loss": 0.5131, "step": 2724 }, { "epoch": 1.2214709947331066, "learning_rate": 3.354481653302566e-05, "lm_loss": 0.5486, "loss": 0.5486, "step": 2725 }, { "epoch": 1.221919240969706, "learning_rate": 3.3533440930994454e-05, "lm_loss": 0.6628, "loss": 0.6628, "step": 2726 }, { "epoch": 1.2223674872063053, "learning_rate": 3.352206332878649e-05, "lm_loss": 0.7838, "loss": 0.7838, "step": 2727 }, { "epoch": 1.2228157334429046, "learning_rate": 3.3510683729068604e-05, "lm_loss": 0.8633, "loss": 0.8633, "step": 2728 }, { "epoch": 1.223263979679504, "learning_rate": 3.34993021345081e-05, "lm_loss": 0.576, "loss": 0.576, "step": 2729 }, { "epoch": 1.2237122259161033, "learning_rate": 3.348791854777272e-05, "lm_loss": 0.4254, "loss": 0.4254, "step": 2730 }, { "epoch": 1.2241604721527026, "learning_rate": 3.347653297153071e-05, "lm_loss": 0.7063, "loss": 0.7063, "step": 2731 }, { "epoch": 1.2246087183893017, "learning_rate": 3.3465145408450755e-05, "lm_loss": 1.1096, "loss": 1.1096, "step": 2732 }, { "epoch": 1.225056964625901, "learning_rate": 3.345375586120203e-05, "lm_loss": 0.5754, "loss": 0.5754, "step": 2733 }, { "epoch": 1.2255052108625004, "learning_rate": 3.3442364332454155e-05, "lm_loss": 0.61, "loss": 0.61, "step": 2734 }, { "epoch": 1.2259534570990998, "learning_rate": 3.3430970824877225e-05, "lm_loss": 0.569, "loss": 0.569, "step": 2735 }, { "epoch": 1.226401703335699, "learning_rate": 3.341957534114179e-05, "lm_loss": 0.5415, "loss": 0.5415, "step": 2736 }, { "epoch": 1.2268499495722984, "learning_rate": 3.340817788391887e-05, "lm_loss": 0.6714, "loss": 0.6714, "step": 2737 }, { "epoch": 1.2272981958088978, "learning_rate": 3.3396778455879954e-05, "lm_loss": 0.5346, "loss": 0.5346, "step": 2738 }, { "epoch": 1.2277464420454969, "learning_rate": 3.3385377059696976e-05, "lm_loss": 0.7081, "loss": 0.7081, "step": 2739 }, { "epoch": 1.2281946882820962, "learning_rate": 3.337397369804235e-05, "lm_loss": 0.4823, "loss": 0.4823, "step": 2740 }, { "epoch": 1.2286429345186956, "learning_rate": 3.336256837358893e-05, "lm_loss": 0.5874, "loss": 0.5874, "step": 2741 }, { "epoch": 1.2290911807552949, "learning_rate": 3.335116108901005e-05, "lm_loss": 0.6427, "loss": 0.6427, "step": 2742 }, { "epoch": 1.2295394269918942, "learning_rate": 3.33397518469795e-05, "lm_loss": 0.5678, "loss": 0.5678, "step": 2743 }, { "epoch": 1.2299876732284936, "learning_rate": 3.3328340650171506e-05, "lm_loss": 0.6505, "loss": 0.6505, "step": 2744 }, { "epoch": 1.230435919465093, "learning_rate": 3.331692750126079e-05, "lm_loss": 0.6928, "loss": 0.6928, "step": 2745 }, { "epoch": 1.230884165701692, "learning_rate": 3.330551240292249e-05, "lm_loss": 0.8778, "loss": 0.8778, "step": 2746 }, { "epoch": 1.2313324119382913, "learning_rate": 3.329409535783224e-05, "lm_loss": 0.5137, "loss": 0.5137, "step": 2747 }, { "epoch": 1.2317806581748907, "learning_rate": 3.328267636866611e-05, "lm_loss": 0.7778, "loss": 0.7778, "step": 2748 }, { "epoch": 1.23222890441149, "learning_rate": 3.3271255438100624e-05, "lm_loss": 0.5202, "loss": 0.5202, "step": 2749 }, { "epoch": 1.2326771506480894, "learning_rate": 3.325983256881276e-05, "lm_loss": 0.488, "loss": 0.488, "step": 2750 }, { "epoch": 1.2331253968846887, "learning_rate": 3.3248407763479975e-05, "lm_loss": 0.5783, "loss": 0.5783, "step": 2751 }, { "epoch": 1.233573643121288, "learning_rate": 3.3236981024780134e-05, "lm_loss": 0.5991, "loss": 0.5991, "step": 2752 }, { "epoch": 1.2340218893578871, "learning_rate": 3.3225552355391604e-05, "lm_loss": 0.4936, "loss": 0.4936, "step": 2753 }, { "epoch": 1.2344701355944865, "learning_rate": 3.321412175799318e-05, "lm_loss": 0.6706, "loss": 0.6706, "step": 2754 }, { "epoch": 1.2349183818310858, "learning_rate": 3.320268923526409e-05, "lm_loss": 0.6286, "loss": 0.6286, "step": 2755 }, { "epoch": 1.2353666280676852, "learning_rate": 3.3191254789884064e-05, "lm_loss": 0.9815, "loss": 0.9815, "step": 2756 }, { "epoch": 1.2358148743042845, "learning_rate": 3.3179818424533224e-05, "lm_loss": 0.7988, "loss": 0.7988, "step": 2757 }, { "epoch": 1.2362631205408838, "learning_rate": 3.31683801418922e-05, "lm_loss": 0.7711, "loss": 0.7711, "step": 2758 }, { "epoch": 1.2367113667774832, "learning_rate": 3.3156939944642016e-05, "lm_loss": 0.5338, "loss": 0.5338, "step": 2759 }, { "epoch": 1.2371596130140823, "learning_rate": 3.31454978354642e-05, "lm_loss": 0.5186, "loss": 0.5186, "step": 2760 }, { "epoch": 1.2376078592506816, "learning_rate": 3.313405381704067e-05, "lm_loss": 0.7282, "loss": 0.7282, "step": 2761 }, { "epoch": 1.238056105487281, "learning_rate": 3.3122607892053846e-05, "lm_loss": 0.543, "loss": 0.543, "step": 2762 }, { "epoch": 1.2385043517238803, "learning_rate": 3.311116006318655e-05, "lm_loss": 0.7465, "loss": 0.7465, "step": 2763 }, { "epoch": 1.2389525979604796, "learning_rate": 3.3099710333122085e-05, "lm_loss": 0.4655, "loss": 0.4655, "step": 2764 }, { "epoch": 1.239400844197079, "learning_rate": 3.3088258704544175e-05, "lm_loss": 0.7195, "loss": 0.7195, "step": 2765 }, { "epoch": 1.2398490904336783, "learning_rate": 3.307680518013701e-05, "lm_loss": 1.0971, "loss": 1.0971, "step": 2766 }, { "epoch": 1.2402973366702774, "learning_rate": 3.3065349762585196e-05, "lm_loss": 0.4754, "loss": 0.4754, "step": 2767 }, { "epoch": 1.2407455829068768, "learning_rate": 3.3053892454573814e-05, "lm_loss": 0.7, "loss": 0.7, "step": 2768 }, { "epoch": 1.241193829143476, "learning_rate": 3.304243325878836e-05, "lm_loss": 0.5379, "loss": 0.5379, "step": 2769 }, { "epoch": 1.2416420753800754, "learning_rate": 3.303097217791482e-05, "lm_loss": 0.4776, "loss": 0.4776, "step": 2770 }, { "epoch": 1.2420903216166748, "learning_rate": 3.301950921463954e-05, "lm_loss": 0.6445, "loss": 0.6445, "step": 2771 }, { "epoch": 1.242538567853274, "learning_rate": 3.3008044371649394e-05, "lm_loss": 0.578, "loss": 0.578, "step": 2772 }, { "epoch": 1.2429868140898734, "learning_rate": 3.299657765163164e-05, "lm_loss": 0.5934, "loss": 0.5934, "step": 2773 }, { "epoch": 1.2434350603264726, "learning_rate": 3.2985109057274e-05, "lm_loss": 1.0371, "loss": 1.0371, "step": 2774 }, { "epoch": 1.2438833065630719, "learning_rate": 3.297363859126463e-05, "lm_loss": 1.132, "loss": 1.132, "step": 2775 }, { "epoch": 1.2443315527996712, "learning_rate": 3.2962166256292113e-05, "lm_loss": 0.8161, "loss": 0.8161, "step": 2776 }, { "epoch": 1.2447797990362706, "learning_rate": 3.2950692055045494e-05, "lm_loss": 0.6053, "loss": 0.6053, "step": 2777 }, { "epoch": 1.24522804527287, "learning_rate": 3.293921599021424e-05, "lm_loss": 0.5834, "loss": 0.5834, "step": 2778 }, { "epoch": 1.2456762915094692, "learning_rate": 3.292773806448826e-05, "lm_loss": 0.5918, "loss": 0.5918, "step": 2779 }, { "epoch": 1.2461245377460686, "learning_rate": 3.291625828055789e-05, "lm_loss": 0.6584, "loss": 0.6584, "step": 2780 }, { "epoch": 1.246572783982668, "learning_rate": 3.290477664111391e-05, "lm_loss": 0.5895, "loss": 0.5895, "step": 2781 }, { "epoch": 1.247021030219267, "learning_rate": 3.2893293148847534e-05, "lm_loss": 0.7942, "loss": 0.7942, "step": 2782 }, { "epoch": 1.2474692764558664, "learning_rate": 3.288180780645041e-05, "lm_loss": 0.6005, "loss": 0.6005, "step": 2783 }, { "epoch": 1.2479175226924657, "learning_rate": 3.2870320616614626e-05, "lm_loss": 0.7039, "loss": 0.7039, "step": 2784 }, { "epoch": 1.248365768929065, "learning_rate": 3.285883158203268e-05, "lm_loss": 0.8792, "loss": 0.8792, "step": 2785 }, { "epoch": 1.2488140151656644, "learning_rate": 3.2847340705397535e-05, "lm_loss": 0.5325, "loss": 0.5325, "step": 2786 }, { "epoch": 1.2492622614022637, "learning_rate": 3.283584798940256e-05, "lm_loss": 0.7066, "loss": 0.7066, "step": 2787 }, { "epoch": 1.249710507638863, "learning_rate": 3.282435343674156e-05, "lm_loss": 0.6047, "loss": 0.6047, "step": 2788 }, { "epoch": 1.2501587538754624, "learning_rate": 3.281285705010878e-05, "lm_loss": 0.5899, "loss": 0.5899, "step": 2789 }, { "epoch": 1.2506070001120615, "learning_rate": 3.28013588321989e-05, "lm_loss": 0.6215, "loss": 0.6215, "step": 2790 }, { "epoch": 1.2510552463486608, "learning_rate": 3.2789858785707004e-05, "lm_loss": 0.588, "loss": 0.588, "step": 2791 }, { "epoch": 1.2515034925852602, "learning_rate": 3.2778356913328635e-05, "lm_loss": 0.6248, "loss": 0.6248, "step": 2792 }, { "epoch": 1.2519517388218595, "learning_rate": 3.276685321775973e-05, "lm_loss": 0.787, "loss": 0.787, "step": 2793 }, { "epoch": 1.2523999850584588, "learning_rate": 3.275534770169668e-05, "lm_loss": 0.8074, "loss": 0.8074, "step": 2794 }, { "epoch": 1.252848231295058, "learning_rate": 3.2743840367836294e-05, "lm_loss": 0.5595, "loss": 0.5595, "step": 2795 }, { "epoch": 1.2532964775316575, "learning_rate": 3.273233121887582e-05, "lm_loss": 0.5122, "loss": 0.5122, "step": 2796 }, { "epoch": 1.2537447237682566, "learning_rate": 3.27208202575129e-05, "lm_loss": 0.6801, "loss": 0.6801, "step": 2797 }, { "epoch": 1.254192970004856, "learning_rate": 3.270930748644563e-05, "lm_loss": 0.4938, "loss": 0.4938, "step": 2798 }, { "epoch": 1.2546412162414553, "learning_rate": 3.2697792908372514e-05, "lm_loss": 0.6645, "loss": 0.6645, "step": 2799 }, { "epoch": 1.2550894624780546, "learning_rate": 3.26862765259925e-05, "lm_loss": 0.521, "loss": 0.521, "step": 2800 }, { "epoch": 1.255537708714654, "learning_rate": 3.267475834200493e-05, "lm_loss": 0.5415, "loss": 0.5415, "step": 2801 }, { "epoch": 1.255985954951253, "learning_rate": 3.2663238359109586e-05, "lm_loss": 0.5584, "loss": 0.5584, "step": 2802 }, { "epoch": 1.2564342011878527, "learning_rate": 3.2651716580006666e-05, "lm_loss": 0.7568, "loss": 0.7568, "step": 2803 }, { "epoch": 1.2568824474244518, "learning_rate": 3.26401930073968e-05, "lm_loss": 0.4517, "loss": 0.4517, "step": 2804 }, { "epoch": 1.257330693661051, "learning_rate": 3.2628667643981035e-05, "lm_loss": 0.4866, "loss": 0.4866, "step": 2805 }, { "epoch": 1.2577789398976504, "learning_rate": 3.2617140492460814e-05, "lm_loss": 0.8964, "loss": 0.8964, "step": 2806 }, { "epoch": 1.2582271861342498, "learning_rate": 3.260561155553803e-05, "lm_loss": 0.6492, "loss": 0.6492, "step": 2807 }, { "epoch": 1.2586754323708491, "learning_rate": 3.2594080835914975e-05, "lm_loss": 0.5994, "loss": 0.5994, "step": 2808 }, { "epoch": 1.2591236786074482, "learning_rate": 3.258254833629438e-05, "lm_loss": 0.6548, "loss": 0.6548, "step": 2809 }, { "epoch": 1.2595719248440478, "learning_rate": 3.257101405937937e-05, "lm_loss": 0.5181, "loss": 0.5181, "step": 2810 }, { "epoch": 1.260020171080647, "learning_rate": 3.255947800787349e-05, "lm_loss": 0.6207, "loss": 0.6207, "step": 2811 }, { "epoch": 1.2604684173172462, "learning_rate": 3.2547940184480716e-05, "lm_loss": 0.6068, "loss": 0.6068, "step": 2812 }, { "epoch": 1.2609166635538456, "learning_rate": 3.2536400591905427e-05, "lm_loss": 0.655, "loss": 0.655, "step": 2813 }, { "epoch": 1.261364909790445, "learning_rate": 3.252485923285242e-05, "lm_loss": 0.5399, "loss": 0.5399, "step": 2814 }, { "epoch": 1.2618131560270442, "learning_rate": 3.2513316110026906e-05, "lm_loss": 0.5582, "loss": 0.5582, "step": 2815 }, { "epoch": 1.2622614022636434, "learning_rate": 3.2501771226134515e-05, "lm_loss": 0.5245, "loss": 0.5245, "step": 2816 }, { "epoch": 1.262709648500243, "learning_rate": 3.249022458388127e-05, "lm_loss": 0.5435, "loss": 0.5435, "step": 2817 }, { "epoch": 1.263157894736842, "learning_rate": 3.247867618597363e-05, "lm_loss": 0.6119, "loss": 0.6119, "step": 2818 }, { "epoch": 1.2636061409734414, "learning_rate": 3.2467126035118464e-05, "lm_loss": 0.4243, "loss": 0.4243, "step": 2819 }, { "epoch": 1.2640543872100407, "learning_rate": 3.245557413402304e-05, "lm_loss": 0.6505, "loss": 0.6505, "step": 2820 }, { "epoch": 1.26450263344664, "learning_rate": 3.244402048539502e-05, "lm_loss": 0.4295, "loss": 0.4295, "step": 2821 }, { "epoch": 1.2649508796832394, "learning_rate": 3.243246509194251e-05, "lm_loss": 0.5148, "loss": 0.5148, "step": 2822 }, { "epoch": 1.2653991259198385, "learning_rate": 3.242090795637402e-05, "lm_loss": 0.7726, "loss": 0.7726, "step": 2823 }, { "epoch": 1.265847372156438, "learning_rate": 3.2409349081398445e-05, "lm_loss": 0.485, "loss": 0.485, "step": 2824 }, { "epoch": 1.2662956183930372, "learning_rate": 3.239778846972511e-05, "lm_loss": 0.5027, "loss": 0.5027, "step": 2825 }, { "epoch": 1.2667438646296365, "learning_rate": 3.2386226124063726e-05, "lm_loss": 0.938, "loss": 0.938, "step": 2826 }, { "epoch": 1.2671921108662358, "learning_rate": 3.237466204712444e-05, "lm_loss": 0.5459, "loss": 0.5459, "step": 2827 }, { "epoch": 1.2676403571028352, "learning_rate": 3.236309624161778e-05, "lm_loss": 0.5166, "loss": 0.5166, "step": 2828 }, { "epoch": 1.2680886033394345, "learning_rate": 3.2351528710254684e-05, "lm_loss": 0.6267, "loss": 0.6267, "step": 2829 }, { "epoch": 1.2685368495760336, "learning_rate": 3.2339959455746504e-05, "lm_loss": 0.5587, "loss": 0.5587, "step": 2830 }, { "epoch": 1.2689850958126332, "learning_rate": 3.2328388480804985e-05, "lm_loss": 0.6058, "loss": 0.6058, "step": 2831 }, { "epoch": 1.2694333420492323, "learning_rate": 3.231681578814228e-05, "lm_loss": 0.4778, "loss": 0.4778, "step": 2832 }, { "epoch": 1.2698815882858316, "learning_rate": 3.2305241380470946e-05, "lm_loss": 0.6572, "loss": 0.6572, "step": 2833 }, { "epoch": 1.270329834522431, "learning_rate": 3.2293665260503936e-05, "lm_loss": 0.6273, "loss": 0.6273, "step": 2834 }, { "epoch": 1.2707780807590303, "learning_rate": 3.2282087430954614e-05, "lm_loss": 0.5529, "loss": 0.5529, "step": 2835 }, { "epoch": 1.2712263269956297, "learning_rate": 3.227050789453674e-05, "lm_loss": 0.6497, "loss": 0.6497, "step": 2836 }, { "epoch": 1.2716745732322288, "learning_rate": 3.225892665396447e-05, "lm_loss": 0.6379, "loss": 0.6379, "step": 2837 }, { "epoch": 1.2721228194688283, "learning_rate": 3.224734371195236e-05, "lm_loss": 0.6042, "loss": 0.6042, "step": 2838 }, { "epoch": 1.2725710657054274, "learning_rate": 3.2235759071215374e-05, "lm_loss": 0.6536, "loss": 0.6536, "step": 2839 }, { "epoch": 1.2730193119420268, "learning_rate": 3.222417273446887e-05, "lm_loss": 0.5975, "loss": 0.5975, "step": 2840 }, { "epoch": 1.2734675581786261, "learning_rate": 3.221258470442859e-05, "lm_loss": 0.4832, "loss": 0.4832, "step": 2841 }, { "epoch": 1.2739158044152255, "learning_rate": 3.220099498381069e-05, "lm_loss": 0.6005, "loss": 0.6005, "step": 2842 }, { "epoch": 1.2743640506518248, "learning_rate": 3.218940357533173e-05, "lm_loss": 0.6658, "loss": 0.6658, "step": 2843 }, { "epoch": 1.2748122968884241, "learning_rate": 3.217781048170862e-05, "lm_loss": 0.6988, "loss": 0.6988, "step": 2844 }, { "epoch": 1.2752605431250235, "learning_rate": 3.2166215705658735e-05, "lm_loss": 0.4395, "loss": 0.4395, "step": 2845 }, { "epoch": 1.2757087893616226, "learning_rate": 3.215461924989979e-05, "lm_loss": 0.7103, "loss": 0.7103, "step": 2846 }, { "epoch": 1.276157035598222, "learning_rate": 3.21430211171499e-05, "lm_loss": 1.2293, "loss": 1.2293, "step": 2847 }, { "epoch": 1.2766052818348212, "learning_rate": 3.2131421310127594e-05, "lm_loss": 0.8639, "loss": 0.8639, "step": 2848 }, { "epoch": 1.2770535280714206, "learning_rate": 3.2119819831551776e-05, "lm_loss": 0.6061, "loss": 0.6061, "step": 2849 }, { "epoch": 1.27750177430802, "learning_rate": 3.2108216684141765e-05, "lm_loss": 0.9962, "loss": 0.9962, "step": 2850 }, { "epoch": 1.2779500205446193, "learning_rate": 3.2096611870617235e-05, "lm_loss": 0.5793, "loss": 0.5793, "step": 2851 }, { "epoch": 1.2783982667812186, "learning_rate": 3.208500539369829e-05, "lm_loss": 0.4587, "loss": 0.4587, "step": 2852 }, { "epoch": 1.2788465130178177, "learning_rate": 3.2073397256105374e-05, "lm_loss": 0.4704, "loss": 0.4704, "step": 2853 }, { "epoch": 1.279294759254417, "learning_rate": 3.206178746055938e-05, "lm_loss": 0.7038, "loss": 0.7038, "step": 2854 }, { "epoch": 1.2797430054910164, "learning_rate": 3.205017600978155e-05, "lm_loss": 0.601, "loss": 0.601, "step": 2855 }, { "epoch": 1.2801912517276157, "learning_rate": 3.2038562906493526e-05, "lm_loss": 0.7718, "loss": 0.7718, "step": 2856 }, { "epoch": 1.280639497964215, "learning_rate": 3.2026948153417326e-05, "lm_loss": 0.8632, "loss": 0.8632, "step": 2857 }, { "epoch": 1.2810877442008144, "learning_rate": 3.201533175327537e-05, "lm_loss": 0.6486, "loss": 0.6486, "step": 2858 }, { "epoch": 1.2815359904374137, "learning_rate": 3.2003713708790464e-05, "lm_loss": 0.6085, "loss": 0.6085, "step": 2859 }, { "epoch": 1.2819842366740128, "learning_rate": 3.1992094022685784e-05, "lm_loss": 0.6066, "loss": 0.6066, "step": 2860 }, { "epoch": 1.2824324829106122, "learning_rate": 3.1980472697684904e-05, "lm_loss": 0.5922, "loss": 0.5922, "step": 2861 }, { "epoch": 1.2828807291472115, "learning_rate": 3.1968849736511775e-05, "lm_loss": 0.5094, "loss": 0.5094, "step": 2862 }, { "epoch": 1.2833289753838109, "learning_rate": 3.1957225141890746e-05, "lm_loss": 0.8871, "loss": 0.8871, "step": 2863 }, { "epoch": 1.2837772216204102, "learning_rate": 3.1945598916546525e-05, "lm_loss": 0.8123, "loss": 0.8123, "step": 2864 }, { "epoch": 1.2842254678570095, "learning_rate": 3.1933971063204227e-05, "lm_loss": 0.6118, "loss": 0.6118, "step": 2865 }, { "epoch": 1.2846737140936089, "learning_rate": 3.192234158458932e-05, "lm_loss": 0.6191, "loss": 0.6191, "step": 2866 }, { "epoch": 1.285121960330208, "learning_rate": 3.191071048342769e-05, "lm_loss": 0.9662, "loss": 0.9662, "step": 2867 }, { "epoch": 1.2855702065668073, "learning_rate": 3.189907776244556e-05, "lm_loss": 0.9357, "loss": 0.9357, "step": 2868 }, { "epoch": 1.2860184528034067, "learning_rate": 3.1887443424369585e-05, "lm_loss": 0.6472, "loss": 0.6472, "step": 2869 }, { "epoch": 1.286466699040006, "learning_rate": 3.187580747192674e-05, "lm_loss": 0.5085, "loss": 0.5085, "step": 2870 }, { "epoch": 1.2869149452766053, "learning_rate": 3.186416990784442e-05, "lm_loss": 0.5207, "loss": 0.5207, "step": 2871 }, { "epoch": 1.2873631915132047, "learning_rate": 3.185253073485039e-05, "lm_loss": 0.8241, "loss": 0.8241, "step": 2872 }, { "epoch": 1.287811437749804, "learning_rate": 3.184088995567279e-05, "lm_loss": 0.4016, "loss": 0.4016, "step": 2873 }, { "epoch": 1.2882596839864031, "learning_rate": 3.182924757304012e-05, "lm_loss": 0.702, "loss": 0.702, "step": 2874 }, { "epoch": 1.2887079302230025, "learning_rate": 3.181760358968128e-05, "lm_loss": 0.7254, "loss": 0.7254, "step": 2875 }, { "epoch": 1.2891561764596018, "learning_rate": 3.180595800832553e-05, "lm_loss": 0.5693, "loss": 0.5693, "step": 2876 }, { "epoch": 1.2896044226962011, "learning_rate": 3.179431083170252e-05, "lm_loss": 0.7548, "loss": 0.7548, "step": 2877 }, { "epoch": 1.2900526689328005, "learning_rate": 3.178266206254226e-05, "lm_loss": 0.53, "loss": 0.53, "step": 2878 }, { "epoch": 1.2905009151693998, "learning_rate": 3.177101170357513e-05, "lm_loss": 1.0089, "loss": 1.0089, "step": 2879 }, { "epoch": 1.2909491614059991, "learning_rate": 3.175935975753189e-05, "lm_loss": 0.5612, "loss": 0.5612, "step": 2880 }, { "epoch": 1.2913974076425982, "learning_rate": 3.1747706227143686e-05, "lm_loss": 0.5848, "loss": 0.5848, "step": 2881 }, { "epoch": 1.2918456538791976, "learning_rate": 3.1736051115142015e-05, "lm_loss": 0.5154, "loss": 0.5154, "step": 2882 }, { "epoch": 1.292293900115797, "learning_rate": 3.172439442425873e-05, "lm_loss": 0.6938, "loss": 0.6938, "step": 2883 }, { "epoch": 1.2927421463523963, "learning_rate": 3.171273615722611e-05, "lm_loss": 0.4442, "loss": 0.4442, "step": 2884 }, { "epoch": 1.2931903925889956, "learning_rate": 3.170107631677674e-05, "lm_loss": 0.5607, "loss": 0.5607, "step": 2885 }, { "epoch": 1.293638638825595, "learning_rate": 3.168941490564362e-05, "lm_loss": 0.6869, "loss": 0.6869, "step": 2886 }, { "epoch": 1.2940868850621943, "learning_rate": 3.167775192656009e-05, "lm_loss": 0.5604, "loss": 0.5604, "step": 2887 }, { "epoch": 1.2945351312987934, "learning_rate": 3.1666087382259866e-05, "lm_loss": 0.4526, "loss": 0.4526, "step": 2888 }, { "epoch": 1.2949833775353927, "learning_rate": 3.165442127547704e-05, "lm_loss": 0.514, "loss": 0.514, "step": 2889 }, { "epoch": 1.295431623771992, "learning_rate": 3.1642753608946066e-05, "lm_loss": 0.8122, "loss": 0.8122, "step": 2890 }, { "epoch": 1.2958798700085914, "learning_rate": 3.163108438540175e-05, "lm_loss": 0.5811, "loss": 0.5811, "step": 2891 }, { "epoch": 1.2963281162451907, "learning_rate": 3.161941360757929e-05, "lm_loss": 0.5726, "loss": 0.5726, "step": 2892 }, { "epoch": 1.29677636248179, "learning_rate": 3.16077412782142e-05, "lm_loss": 0.5225, "loss": 0.5225, "step": 2893 }, { "epoch": 1.2972246087183894, "learning_rate": 3.159606740004242e-05, "lm_loss": 0.6132, "loss": 0.6132, "step": 2894 }, { "epoch": 1.2976728549549885, "learning_rate": 3.158439197580022e-05, "lm_loss": 0.6231, "loss": 0.6231, "step": 2895 }, { "epoch": 1.2981211011915879, "learning_rate": 3.157271500822422e-05, "lm_loss": 0.573, "loss": 0.573, "step": 2896 }, { "epoch": 1.2985693474281872, "learning_rate": 3.156103650005142e-05, "lm_loss": 0.6828, "loss": 0.6828, "step": 2897 }, { "epoch": 1.2990175936647865, "learning_rate": 3.154935645401919e-05, "lm_loss": 0.6184, "loss": 0.6184, "step": 2898 }, { "epoch": 1.2994658399013859, "learning_rate": 3.153767487286523e-05, "lm_loss": 0.5069, "loss": 0.5069, "step": 2899 }, { "epoch": 1.2999140861379852, "learning_rate": 3.152599175932763e-05, "lm_loss": 0.5574, "loss": 0.5574, "step": 2900 }, { "epoch": 1.3003623323745845, "learning_rate": 3.1514307116144836e-05, "lm_loss": 0.8201, "loss": 0.8201, "step": 2901 }, { "epoch": 1.3008105786111837, "learning_rate": 3.150262094605562e-05, "lm_loss": 0.9523, "loss": 0.9523, "step": 2902 }, { "epoch": 1.301258824847783, "learning_rate": 3.149093325179916e-05, "lm_loss": 0.6, "loss": 0.6, "step": 2903 }, { "epoch": 1.3017070710843823, "learning_rate": 3.1479244036114946e-05, "lm_loss": 0.4435, "loss": 0.4435, "step": 2904 }, { "epoch": 1.3021553173209817, "learning_rate": 3.146755330174286e-05, "lm_loss": 0.7501, "loss": 0.7501, "step": 2905 }, { "epoch": 1.302603563557581, "learning_rate": 3.145586105142312e-05, "lm_loss": 0.5028, "loss": 0.5028, "step": 2906 }, { "epoch": 1.3030518097941803, "learning_rate": 3.14441672878963e-05, "lm_loss": 0.6345, "loss": 0.6345, "step": 2907 }, { "epoch": 1.3035000560307797, "learning_rate": 3.143247201390334e-05, "lm_loss": 0.5207, "loss": 0.5207, "step": 2908 }, { "epoch": 1.3039483022673788, "learning_rate": 3.1420775232185535e-05, "lm_loss": 0.6045, "loss": 0.6045, "step": 2909 }, { "epoch": 1.3043965485039781, "learning_rate": 3.140907694548451e-05, "lm_loss": 0.4215, "loss": 0.4215, "step": 2910 }, { "epoch": 1.3048447947405775, "learning_rate": 3.139737715654226e-05, "lm_loss": 0.7227, "loss": 0.7227, "step": 2911 }, { "epoch": 1.3052930409771768, "learning_rate": 3.138567586810115e-05, "lm_loss": 0.5385, "loss": 0.5385, "step": 2912 }, { "epoch": 1.3057412872137761, "learning_rate": 3.137397308290385e-05, "lm_loss": 0.6928, "loss": 0.6928, "step": 2913 }, { "epoch": 1.3061895334503755, "learning_rate": 3.136226880369343e-05, "lm_loss": 0.5024, "loss": 0.5024, "step": 2914 }, { "epoch": 1.3066377796869748, "learning_rate": 3.135056303321328e-05, "lm_loss": 0.6284, "loss": 0.6284, "step": 2915 }, { "epoch": 1.307086025923574, "learning_rate": 3.1338855774207144e-05, "lm_loss": 0.4217, "loss": 0.4217, "step": 2916 }, { "epoch": 1.3075342721601733, "learning_rate": 3.1327147029419123e-05, "lm_loss": 0.5338, "loss": 0.5338, "step": 2917 }, { "epoch": 1.3079825183967726, "learning_rate": 3.131543680159367e-05, "lm_loss": 0.608, "loss": 0.608, "step": 2918 }, { "epoch": 1.308430764633372, "learning_rate": 3.130372509347556e-05, "lm_loss": 0.5916, "loss": 0.5916, "step": 2919 }, { "epoch": 1.3088790108699713, "learning_rate": 3.129201190780994e-05, "lm_loss": 0.5606, "loss": 0.5606, "step": 2920 }, { "epoch": 1.3093272571065706, "learning_rate": 3.1280297247342306e-05, "lm_loss": 0.6428, "loss": 0.6428, "step": 2921 }, { "epoch": 1.30977550334317, "learning_rate": 3.1268581114818476e-05, "lm_loss": 0.4952, "loss": 0.4952, "step": 2922 }, { "epoch": 1.310223749579769, "learning_rate": 3.1256863512984635e-05, "lm_loss": 0.6948, "loss": 0.6948, "step": 2923 }, { "epoch": 1.3106719958163684, "learning_rate": 3.1245144444587296e-05, "lm_loss": 0.5351, "loss": 0.5351, "step": 2924 }, { "epoch": 1.3111202420529677, "learning_rate": 3.123342391237333e-05, "lm_loss": 0.5324, "loss": 0.5324, "step": 2925 }, { "epoch": 1.311568488289567, "learning_rate": 3.122170191908995e-05, "lm_loss": 0.507, "loss": 0.507, "step": 2926 }, { "epoch": 1.3120167345261664, "learning_rate": 3.1209978467484694e-05, "lm_loss": 0.7427, "loss": 0.7427, "step": 2927 }, { "epoch": 1.3124649807627657, "learning_rate": 3.1198253560305465e-05, "lm_loss": 0.3996, "loss": 0.3996, "step": 2928 }, { "epoch": 1.312913226999365, "learning_rate": 3.118652720030048e-05, "lm_loss": 0.5961, "loss": 0.5961, "step": 2929 }, { "epoch": 1.3133614732359642, "learning_rate": 3.117479939021834e-05, "lm_loss": 0.5589, "loss": 0.5589, "step": 2930 }, { "epoch": 1.3138097194725635, "learning_rate": 3.1163070132807933e-05, "lm_loss": 0.5252, "loss": 0.5252, "step": 2931 }, { "epoch": 1.3142579657091629, "learning_rate": 3.115133943081852e-05, "lm_loss": 0.7617, "loss": 0.7617, "step": 2932 }, { "epoch": 1.3147062119457622, "learning_rate": 3.11396072869997e-05, "lm_loss": 0.4216, "loss": 0.4216, "step": 2933 }, { "epoch": 1.3151544581823615, "learning_rate": 3.112787370410139e-05, "lm_loss": 0.5928, "loss": 0.5928, "step": 2934 }, { "epoch": 1.3156027044189609, "learning_rate": 3.111613868487387e-05, "lm_loss": 0.5282, "loss": 0.5282, "step": 2935 }, { "epoch": 1.3160509506555602, "learning_rate": 3.110440223206774e-05, "lm_loss": 0.6423, "loss": 0.6423, "step": 2936 }, { "epoch": 1.3164991968921593, "learning_rate": 3.109266434843393e-05, "lm_loss": 0.5565, "loss": 0.5565, "step": 2937 }, { "epoch": 1.3169474431287587, "learning_rate": 3.108092503672372e-05, "lm_loss": 0.4854, "loss": 0.4854, "step": 2938 }, { "epoch": 1.317395689365358, "learning_rate": 3.106918429968873e-05, "lm_loss": 0.5142, "loss": 0.5142, "step": 2939 }, { "epoch": 1.3178439356019573, "learning_rate": 3.105744214008089e-05, "lm_loss": 0.5643, "loss": 0.5643, "step": 2940 }, { "epoch": 1.3182921818385567, "learning_rate": 3.104569856065249e-05, "lm_loss": 1.0744, "loss": 1.0744, "step": 2941 }, { "epoch": 1.318740428075156, "learning_rate": 3.103395356415613e-05, "lm_loss": 0.7317, "loss": 0.7317, "step": 2942 }, { "epoch": 1.3191886743117553, "learning_rate": 3.1022207153344746e-05, "lm_loss": 0.6969, "loss": 0.6969, "step": 2943 }, { "epoch": 1.3196369205483545, "learning_rate": 3.101045933097163e-05, "lm_loss": 1.2928, "loss": 1.2928, "step": 2944 }, { "epoch": 1.3200851667849538, "learning_rate": 3.099871009979037e-05, "lm_loss": 1.028, "loss": 1.028, "step": 2945 }, { "epoch": 1.3205334130215531, "learning_rate": 3.098695946255491e-05, "lm_loss": 0.5352, "loss": 0.5352, "step": 2946 }, { "epoch": 1.3209816592581525, "learning_rate": 3.097520742201951e-05, "lm_loss": 0.7247, "loss": 0.7247, "step": 2947 }, { "epoch": 1.3214299054947518, "learning_rate": 3.096345398093877e-05, "lm_loss": 0.5298, "loss": 0.5298, "step": 2948 }, { "epoch": 1.3218781517313511, "learning_rate": 3.09516991420676e-05, "lm_loss": 0.5006, "loss": 0.5006, "step": 2949 }, { "epoch": 1.3223263979679505, "learning_rate": 3.0939942908161255e-05, "lm_loss": 0.6747, "loss": 0.6747, "step": 2950 }, { "epoch": 1.3227746442045496, "learning_rate": 3.092818528197531e-05, "lm_loss": 0.4177, "loss": 0.4177, "step": 2951 }, { "epoch": 1.323222890441149, "learning_rate": 3.091642626626567e-05, "lm_loss": 0.5014, "loss": 0.5014, "step": 2952 }, { "epoch": 1.3236711366777483, "learning_rate": 3.0904665863788574e-05, "lm_loss": 0.8481, "loss": 0.8481, "step": 2953 }, { "epoch": 1.3241193829143476, "learning_rate": 3.0892904077300547e-05, "lm_loss": 0.5435, "loss": 0.5435, "step": 2954 }, { "epoch": 1.324567629150947, "learning_rate": 3.0881140909558494e-05, "lm_loss": 0.402, "loss": 0.402, "step": 2955 }, { "epoch": 1.3250158753875463, "learning_rate": 3.08693763633196e-05, "lm_loss": 0.6451, "loss": 0.6451, "step": 2956 }, { "epoch": 1.3254641216241456, "learning_rate": 3.0857610441341396e-05, "lm_loss": 0.6915, "loss": 0.6915, "step": 2957 }, { "epoch": 1.3259123678607447, "learning_rate": 3.084584314638172e-05, "lm_loss": 0.5658, "loss": 0.5658, "step": 2958 }, { "epoch": 1.326360614097344, "learning_rate": 3.083407448119876e-05, "lm_loss": 0.5145, "loss": 0.5145, "step": 2959 }, { "epoch": 1.3268088603339434, "learning_rate": 3.082230444855098e-05, "lm_loss": 0.6327, "loss": 0.6327, "step": 2960 }, { "epoch": 1.3272571065705427, "learning_rate": 3.081053305119722e-05, "lm_loss": 0.6015, "loss": 0.6015, "step": 2961 }, { "epoch": 1.327705352807142, "learning_rate": 3.0798760291896584e-05, "lm_loss": 0.5015, "loss": 0.5015, "step": 2962 }, { "epoch": 1.3281535990437414, "learning_rate": 3.078698617340854e-05, "lm_loss": 0.6306, "loss": 0.6306, "step": 2963 }, { "epoch": 1.3286018452803408, "learning_rate": 3.077521069849284e-05, "lm_loss": 0.8047, "loss": 0.8047, "step": 2964 }, { "epoch": 1.3290500915169399, "learning_rate": 3.076343386990958e-05, "lm_loss": 0.8107, "loss": 0.8107, "step": 2965 }, { "epoch": 1.3294983377535392, "learning_rate": 3.075165569041916e-05, "lm_loss": 0.6037, "loss": 0.6037, "step": 2966 }, { "epoch": 1.3299465839901385, "learning_rate": 3.073987616278231e-05, "lm_loss": 0.4569, "loss": 0.4569, "step": 2967 }, { "epoch": 1.3303948302267379, "learning_rate": 3.072809528976006e-05, "lm_loss": 0.6957, "loss": 0.6957, "step": 2968 }, { "epoch": 1.3308430764633372, "learning_rate": 3.071631307411375e-05, "lm_loss": 0.4658, "loss": 0.4658, "step": 2969 }, { "epoch": 1.3312913226999366, "learning_rate": 3.0704529518605046e-05, "lm_loss": 0.4096, "loss": 0.4096, "step": 2970 }, { "epoch": 1.331739568936536, "learning_rate": 3.069274462599595e-05, "lm_loss": 0.7145, "loss": 0.7145, "step": 2971 }, { "epoch": 1.332187815173135, "learning_rate": 3.068095839904874e-05, "lm_loss": 0.5283, "loss": 0.5283, "step": 2972 }, { "epoch": 1.3326360614097343, "learning_rate": 3.066917084052603e-05, "lm_loss": 0.9209, "loss": 0.9209, "step": 2973 }, { "epoch": 1.3330843076463337, "learning_rate": 3.065738195319072e-05, "lm_loss": 0.6263, "loss": 0.6263, "step": 2974 }, { "epoch": 1.333532553882933, "learning_rate": 3.064559173980607e-05, "lm_loss": 0.6172, "loss": 0.6172, "step": 2975 }, { "epoch": 1.3339808001195324, "learning_rate": 3.0633800203135595e-05, "lm_loss": 0.3874, "loss": 0.3874, "step": 2976 }, { "epoch": 1.3344290463561317, "learning_rate": 3.0622007345943155e-05, "lm_loss": 1.094, "loss": 1.094, "step": 2977 }, { "epoch": 1.334877292592731, "learning_rate": 3.06102131709929e-05, "lm_loss": 0.7192, "loss": 0.7192, "step": 2978 }, { "epoch": 1.3353255388293301, "learning_rate": 3.059841768104932e-05, "lm_loss": 0.5349, "loss": 0.5349, "step": 2979 }, { "epoch": 1.3357737850659295, "learning_rate": 3.0586620878877176e-05, "lm_loss": 0.4958, "loss": 0.4958, "step": 2980 }, { "epoch": 1.3362220313025288, "learning_rate": 3.057482276724156e-05, "lm_loss": 0.6364, "loss": 0.6364, "step": 2981 }, { "epoch": 1.3366702775391281, "learning_rate": 3.056302334890786e-05, "lm_loss": 0.5647, "loss": 0.5647, "step": 2982 }, { "epoch": 1.3371185237757275, "learning_rate": 3.055122262664178e-05, "lm_loss": 0.5024, "loss": 0.5024, "step": 2983 }, { "epoch": 1.3375667700123268, "learning_rate": 3.053942060320932e-05, "lm_loss": 0.6115, "loss": 0.6115, "step": 2984 }, { "epoch": 1.3380150162489262, "learning_rate": 3.052761728137679e-05, "lm_loss": 0.5631, "loss": 0.5631, "step": 2985 }, { "epoch": 1.3384632624855253, "learning_rate": 3.0515812663910797e-05, "lm_loss": 0.5974, "loss": 0.5974, "step": 2986 }, { "epoch": 1.3389115087221246, "learning_rate": 3.050400675357826e-05, "lm_loss": 0.5141, "loss": 0.5141, "step": 2987 }, { "epoch": 1.339359754958724, "learning_rate": 3.0492199553146413e-05, "lm_loss": 0.5019, "loss": 0.5019, "step": 2988 }, { "epoch": 1.3398080011953233, "learning_rate": 3.0480391065382764e-05, "lm_loss": 0.6432, "loss": 0.6432, "step": 2989 }, { "epoch": 1.3402562474319226, "learning_rate": 3.0468581293055137e-05, "lm_loss": 0.5915, "loss": 0.5915, "step": 2990 }, { "epoch": 1.340704493668522, "learning_rate": 3.0456770238931664e-05, "lm_loss": 0.5667, "loss": 0.5667, "step": 2991 }, { "epoch": 1.3411527399051213, "learning_rate": 3.0444957905780757e-05, "lm_loss": 0.5086, "loss": 0.5086, "step": 2992 }, { "epoch": 1.3416009861417204, "learning_rate": 3.0433144296371162e-05, "lm_loss": 0.4835, "loss": 0.4835, "step": 2993 }, { "epoch": 1.3420492323783197, "learning_rate": 3.0421329413471888e-05, "lm_loss": 0.5077, "loss": 0.5077, "step": 2994 }, { "epoch": 1.342497478614919, "learning_rate": 3.0409513259852262e-05, "lm_loss": 0.686, "loss": 0.686, "step": 2995 }, { "epoch": 1.3429457248515184, "learning_rate": 3.039769583828191e-05, "lm_loss": 0.5954, "loss": 0.5954, "step": 2996 }, { "epoch": 1.3433939710881178, "learning_rate": 3.038587715153074e-05, "lm_loss": 1.1454, "loss": 1.1454, "step": 2997 }, { "epoch": 1.343842217324717, "learning_rate": 3.037405720236898e-05, "lm_loss": 0.7565, "loss": 0.7565, "step": 2998 }, { "epoch": 1.3442904635613164, "learning_rate": 3.0362235993567124e-05, "lm_loss": 0.5312, "loss": 0.5312, "step": 2999 }, { "epoch": 1.3447387097979155, "learning_rate": 3.0350413527895987e-05, "lm_loss": 0.5093, "loss": 0.5093, "step": 3000 }, { "epoch": 1.3451869560345149, "learning_rate": 3.0338589808126672e-05, "lm_loss": 0.6643, "loss": 0.6643, "step": 3001 }, { "epoch": 1.3456352022711142, "learning_rate": 3.032676483703057e-05, "lm_loss": 0.4513, "loss": 0.4513, "step": 3002 }, { "epoch": 1.3460834485077136, "learning_rate": 3.031493861737936e-05, "lm_loss": 0.6891, "loss": 0.6891, "step": 3003 }, { "epoch": 1.346531694744313, "learning_rate": 3.030311115194504e-05, "lm_loss": 0.4961, "loss": 0.4961, "step": 3004 }, { "epoch": 1.3469799409809122, "learning_rate": 3.0291282443499868e-05, "lm_loss": 0.5398, "loss": 0.5398, "step": 3005 }, { "epoch": 1.3474281872175116, "learning_rate": 3.027945249481642e-05, "lm_loss": 0.6329, "loss": 0.6329, "step": 3006 }, { "epoch": 1.3478764334541107, "learning_rate": 3.0267621308667536e-05, "lm_loss": 0.6371, "loss": 0.6371, "step": 3007 }, { "epoch": 1.34832467969071, "learning_rate": 3.025578888782637e-05, "lm_loss": 0.5246, "loss": 0.5246, "step": 3008 }, { "epoch": 1.3487729259273094, "learning_rate": 3.0243955235066358e-05, "lm_loss": 0.5129, "loss": 0.5129, "step": 3009 }, { "epoch": 1.3492211721639087, "learning_rate": 3.02321203531612e-05, "lm_loss": 0.5746, "loss": 0.5746, "step": 3010 }, { "epoch": 1.349669418400508, "learning_rate": 3.022028424488494e-05, "lm_loss": 0.5445, "loss": 0.5445, "step": 3011 }, { "epoch": 1.3501176646371074, "learning_rate": 3.0208446913011856e-05, "lm_loss": 0.6077, "loss": 0.6077, "step": 3012 }, { "epoch": 1.3505659108737067, "learning_rate": 3.0196608360316537e-05, "lm_loss": 0.9325, "loss": 0.9325, "step": 3013 }, { "epoch": 1.3510141571103058, "learning_rate": 3.0184768589573853e-05, "lm_loss": 0.6286, "loss": 0.6286, "step": 3014 }, { "epoch": 1.3514624033469052, "learning_rate": 3.017292760355896e-05, "lm_loss": 0.5377, "loss": 0.5377, "step": 3015 }, { "epoch": 1.3519106495835045, "learning_rate": 3.01610854050473e-05, "lm_loss": 0.6351, "loss": 0.6351, "step": 3016 }, { "epoch": 1.3523588958201038, "learning_rate": 3.0149241996814605e-05, "lm_loss": 0.5015, "loss": 0.5015, "step": 3017 }, { "epoch": 1.3528071420567032, "learning_rate": 3.0137397381636877e-05, "lm_loss": 0.5212, "loss": 0.5212, "step": 3018 }, { "epoch": 1.3532553882933025, "learning_rate": 3.01255515622904e-05, "lm_loss": 0.7282, "loss": 0.7282, "step": 3019 }, { "epoch": 1.3537036345299018, "learning_rate": 3.011370454155177e-05, "lm_loss": 0.5267, "loss": 0.5267, "step": 3020 }, { "epoch": 1.354151880766501, "learning_rate": 3.010185632219783e-05, "lm_loss": 0.3941, "loss": 0.3941, "step": 3021 }, { "epoch": 1.3546001270031005, "learning_rate": 3.0090006907005714e-05, "lm_loss": 1.0488, "loss": 1.0488, "step": 3022 }, { "epoch": 1.3550483732396996, "learning_rate": 3.007815629875284e-05, "lm_loss": 0.5984, "loss": 0.5984, "step": 3023 }, { "epoch": 1.355496619476299, "learning_rate": 3.0066304500216908e-05, "lm_loss": 0.6343, "loss": 0.6343, "step": 3024 }, { "epoch": 1.3559448657128983, "learning_rate": 3.0054451514175895e-05, "lm_loss": 0.8607, "loss": 0.8607, "step": 3025 }, { "epoch": 1.3563931119494976, "learning_rate": 3.004259734340806e-05, "lm_loss": 0.789, "loss": 0.789, "step": 3026 }, { "epoch": 1.356841358186097, "learning_rate": 3.0030741990691913e-05, "lm_loss": 0.5706, "loss": 0.5706, "step": 3027 }, { "epoch": 1.357289604422696, "learning_rate": 3.0018885458806282e-05, "lm_loss": 0.8839, "loss": 0.8839, "step": 3028 }, { "epoch": 1.3577378506592956, "learning_rate": 3.000702775053026e-05, "lm_loss": 0.6664, "loss": 0.6664, "step": 3029 }, { "epoch": 1.3581860968958948, "learning_rate": 2.9995168868643187e-05, "lm_loss": 0.5647, "loss": 0.5647, "step": 3030 }, { "epoch": 1.358634343132494, "learning_rate": 2.9983308815924703e-05, "lm_loss": 0.8878, "loss": 0.8878, "step": 3031 }, { "epoch": 1.3590825893690934, "learning_rate": 2.9971447595154728e-05, "lm_loss": 0.7969, "loss": 0.7969, "step": 3032 }, { "epoch": 1.3595308356056928, "learning_rate": 2.9959585209113443e-05, "lm_loss": 0.9494, "loss": 0.9494, "step": 3033 }, { "epoch": 1.359979081842292, "learning_rate": 2.9947721660581306e-05, "lm_loss": 0.6198, "loss": 0.6198, "step": 3034 }, { "epoch": 1.3604273280788912, "learning_rate": 2.9935856952339036e-05, "lm_loss": 0.5799, "loss": 0.5799, "step": 3035 }, { "epoch": 1.3608755743154908, "learning_rate": 2.9923991087167658e-05, "lm_loss": 0.4183, "loss": 0.4183, "step": 3036 }, { "epoch": 1.36132382055209, "learning_rate": 2.9912124067848417e-05, "lm_loss": 0.6215, "loss": 0.6215, "step": 3037 }, { "epoch": 1.3617720667886892, "learning_rate": 2.9900255897162877e-05, "lm_loss": 0.5779, "loss": 0.5779, "step": 3038 }, { "epoch": 1.3622203130252886, "learning_rate": 2.9888386577892845e-05, "lm_loss": 0.7273, "loss": 0.7273, "step": 3039 }, { "epoch": 1.362668559261888, "learning_rate": 2.9876516112820396e-05, "lm_loss": 0.563, "loss": 0.563, "step": 3040 }, { "epoch": 1.3631168054984872, "learning_rate": 2.986464450472789e-05, "lm_loss": 0.5058, "loss": 0.5058, "step": 3041 }, { "epoch": 1.3635650517350864, "learning_rate": 2.9852771756397946e-05, "lm_loss": 0.8141, "loss": 0.8141, "step": 3042 }, { "epoch": 1.364013297971686, "learning_rate": 2.984089787061345e-05, "lm_loss": 1.1517, "loss": 1.1517, "step": 3043 }, { "epoch": 1.364461544208285, "learning_rate": 2.982902285015754e-05, "lm_loss": 0.7134, "loss": 0.7134, "step": 3044 }, { "epoch": 1.3649097904448844, "learning_rate": 2.9817146697813652e-05, "lm_loss": 0.5342, "loss": 0.5342, "step": 3045 }, { "epoch": 1.3653580366814837, "learning_rate": 2.980526941636548e-05, "lm_loss": 0.4644, "loss": 0.4644, "step": 3046 }, { "epoch": 1.365806282918083, "learning_rate": 2.9793391008596944e-05, "lm_loss": 0.6191, "loss": 0.6191, "step": 3047 }, { "epoch": 1.3662545291546824, "learning_rate": 2.9781511477292274e-05, "lm_loss": 0.631, "loss": 0.631, "step": 3048 }, { "epoch": 1.3667027753912815, "learning_rate": 2.9769630825235946e-05, "lm_loss": 0.4915, "loss": 0.4915, "step": 3049 }, { "epoch": 1.367151021627881, "learning_rate": 2.9757749055212693e-05, "lm_loss": 0.6525, "loss": 0.6525, "step": 3050 }, { "epoch": 1.3675992678644802, "learning_rate": 2.9745866170007518e-05, "lm_loss": 0.5471, "loss": 0.5471, "step": 3051 }, { "epoch": 1.3680475141010795, "learning_rate": 2.9733982172405683e-05, "lm_loss": 0.4582, "loss": 0.4582, "step": 3052 }, { "epoch": 1.3684957603376788, "learning_rate": 2.9722097065192717e-05, "lm_loss": 0.5247, "loss": 0.5247, "step": 3053 }, { "epoch": 1.3689440065742782, "learning_rate": 2.9710210851154392e-05, "lm_loss": 0.5478, "loss": 0.5478, "step": 3054 }, { "epoch": 1.3693922528108775, "learning_rate": 2.969832353307676e-05, "lm_loss": 0.6237, "loss": 0.6237, "step": 3055 }, { "epoch": 1.3698404990474766, "learning_rate": 2.9686435113746113e-05, "lm_loss": 1.0585, "loss": 1.0585, "step": 3056 }, { "epoch": 1.3702887452840762, "learning_rate": 2.9674545595949026e-05, "lm_loss": 1.1667, "loss": 1.1667, "step": 3057 }, { "epoch": 1.3707369915206753, "learning_rate": 2.9662654982472303e-05, "lm_loss": 0.7753, "loss": 0.7753, "step": 3058 }, { "epoch": 1.3711852377572746, "learning_rate": 2.9650763276103016e-05, "lm_loss": 0.563, "loss": 0.563, "step": 3059 }, { "epoch": 1.371633483993874, "learning_rate": 2.96388704796285e-05, "lm_loss": 0.5357, "loss": 0.5357, "step": 3060 }, { "epoch": 1.3720817302304733, "learning_rate": 2.9626976595836342e-05, "lm_loss": 0.5008, "loss": 0.5008, "step": 3061 }, { "epoch": 1.3725299764670726, "learning_rate": 2.9615081627514386e-05, "lm_loss": 0.6127, "loss": 0.6127, "step": 3062 }, { "epoch": 1.3729782227036718, "learning_rate": 2.960318557745071e-05, "lm_loss": 0.976, "loss": 0.976, "step": 3063 }, { "epoch": 1.3734264689402713, "learning_rate": 2.959128844843368e-05, "lm_loss": 0.6681, "loss": 0.6681, "step": 3064 }, { "epoch": 1.3738747151768704, "learning_rate": 2.957939024325188e-05, "lm_loss": 0.4285, "loss": 0.4285, "step": 3065 }, { "epoch": 1.3743229614134698, "learning_rate": 2.9567490964694183e-05, "lm_loss": 0.7067, "loss": 0.7067, "step": 3066 }, { "epoch": 1.374771207650069, "learning_rate": 2.9555590615549683e-05, "lm_loss": 0.545, "loss": 0.545, "step": 3067 }, { "epoch": 1.3752194538866684, "learning_rate": 2.9543689198607728e-05, "lm_loss": 0.5621, "loss": 0.5621, "step": 3068 }, { "epoch": 1.3756677001232678, "learning_rate": 2.9531786716657933e-05, "lm_loss": 0.5064, "loss": 0.5064, "step": 3069 }, { "epoch": 1.376115946359867, "learning_rate": 2.951988317249016e-05, "lm_loss": 0.4704, "loss": 0.4704, "step": 3070 }, { "epoch": 1.3765641925964665, "learning_rate": 2.95079785688945e-05, "lm_loss": 0.8198, "loss": 0.8198, "step": 3071 }, { "epoch": 1.3770124388330656, "learning_rate": 2.949607290866131e-05, "lm_loss": 0.7776, "loss": 0.7776, "step": 3072 }, { "epoch": 1.377460685069665, "learning_rate": 2.9484166194581196e-05, "lm_loss": 0.8405, "loss": 0.8405, "step": 3073 }, { "epoch": 1.3779089313062642, "learning_rate": 2.9472258429445004e-05, "lm_loss": 0.5451, "loss": 0.5451, "step": 3074 }, { "epoch": 1.3783571775428636, "learning_rate": 2.9460349616043824e-05, "lm_loss": 0.624, "loss": 0.624, "step": 3075 }, { "epoch": 1.378805423779463, "learning_rate": 2.9448439757168993e-05, "lm_loss": 0.6869, "loss": 0.6869, "step": 3076 }, { "epoch": 1.379253670016062, "learning_rate": 2.94365288556121e-05, "lm_loss": 0.5913, "loss": 0.5913, "step": 3077 }, { "epoch": 1.3797019162526616, "learning_rate": 2.942461691416498e-05, "lm_loss": 0.5075, "loss": 0.5075, "step": 3078 }, { "epoch": 1.3801501624892607, "learning_rate": 2.94127039356197e-05, "lm_loss": 0.5919, "loss": 0.5919, "step": 3079 }, { "epoch": 1.38059840872586, "learning_rate": 2.9400789922768567e-05, "lm_loss": 0.6384, "loss": 0.6384, "step": 3080 }, { "epoch": 1.3810466549624594, "learning_rate": 2.9388874878404156e-05, "lm_loss": 0.8259, "loss": 0.8259, "step": 3081 }, { "epoch": 1.3814949011990587, "learning_rate": 2.9376958805319253e-05, "lm_loss": 0.7995, "loss": 0.7995, "step": 3082 }, { "epoch": 1.381943147435658, "learning_rate": 2.9365041706306917e-05, "lm_loss": 0.5248, "loss": 0.5248, "step": 3083 }, { "epoch": 1.3823913936722572, "learning_rate": 2.9353123584160414e-05, "lm_loss": 0.6696, "loss": 0.6696, "step": 3084 }, { "epoch": 1.3828396399088567, "learning_rate": 2.9341204441673266e-05, "lm_loss": 0.4587, "loss": 0.4587, "step": 3085 }, { "epoch": 1.3832878861454558, "learning_rate": 2.932928428163923e-05, "lm_loss": 0.5556, "loss": 0.5556, "step": 3086 }, { "epoch": 1.3837361323820552, "learning_rate": 2.9317363106852326e-05, "lm_loss": 0.6214, "loss": 0.6214, "step": 3087 }, { "epoch": 1.3841843786186545, "learning_rate": 2.9305440920106776e-05, "lm_loss": 0.4534, "loss": 0.4534, "step": 3088 }, { "epoch": 1.3846326248552538, "learning_rate": 2.9293517724197046e-05, "lm_loss": 0.7504, "loss": 0.7504, "step": 3089 }, { "epoch": 1.3850808710918532, "learning_rate": 2.928159352191786e-05, "lm_loss": 0.6157, "loss": 0.6157, "step": 3090 }, { "epoch": 1.3855291173284523, "learning_rate": 2.9269668316064165e-05, "lm_loss": 0.4718, "loss": 0.4718, "step": 3091 }, { "epoch": 1.3859773635650519, "learning_rate": 2.9257742109431142e-05, "lm_loss": 0.9655, "loss": 0.9655, "step": 3092 }, { "epoch": 1.386425609801651, "learning_rate": 2.9245814904814196e-05, "lm_loss": 0.7543, "loss": 0.7543, "step": 3093 }, { "epoch": 1.3868738560382503, "learning_rate": 2.9233886705008995e-05, "lm_loss": 0.4045, "loss": 0.4045, "step": 3094 }, { "epoch": 1.3873221022748496, "learning_rate": 2.9221957512811403e-05, "lm_loss": 0.568, "loss": 0.568, "step": 3095 }, { "epoch": 1.387770348511449, "learning_rate": 2.9210027331017552e-05, "lm_loss": 0.6052, "loss": 0.6052, "step": 3096 }, { "epoch": 1.3882185947480483, "learning_rate": 2.919809616242379e-05, "lm_loss": 0.3093, "loss": 0.3093, "step": 3097 }, { "epoch": 1.3886668409846474, "learning_rate": 2.918616400982669e-05, "lm_loss": 0.6247, "loss": 0.6247, "step": 3098 }, { "epoch": 1.389115087221247, "learning_rate": 2.9174230876023058e-05, "lm_loss": 0.5568, "loss": 0.5568, "step": 3099 }, { "epoch": 1.389563333457846, "learning_rate": 2.9162296763809944e-05, "lm_loss": 0.4856, "loss": 0.4856, "step": 3100 }, { "epoch": 1.3900115796944454, "learning_rate": 2.9150361675984606e-05, "lm_loss": 0.6166, "loss": 0.6166, "step": 3101 }, { "epoch": 1.3904598259310448, "learning_rate": 2.9138425615344554e-05, "lm_loss": 0.8145, "loss": 0.8145, "step": 3102 }, { "epoch": 1.3909080721676441, "learning_rate": 2.9126488584687518e-05, "lm_loss": 0.7541, "loss": 0.7541, "step": 3103 }, { "epoch": 1.3913563184042435, "learning_rate": 2.9114550586811433e-05, "lm_loss": 0.4082, "loss": 0.4082, "step": 3104 }, { "epoch": 1.3918045646408426, "learning_rate": 2.9102611624514493e-05, "lm_loss": 0.6464, "loss": 0.6464, "step": 3105 }, { "epoch": 1.3922528108774421, "learning_rate": 2.9090671700595103e-05, "lm_loss": 0.4703, "loss": 0.4703, "step": 3106 }, { "epoch": 1.3927010571140412, "learning_rate": 2.9078730817851892e-05, "lm_loss": 0.9429, "loss": 0.9429, "step": 3107 }, { "epoch": 1.3931493033506406, "learning_rate": 2.9066788979083708e-05, "lm_loss": 0.5201, "loss": 0.5201, "step": 3108 }, { "epoch": 1.39359754958724, "learning_rate": 2.905484618708965e-05, "lm_loss": 0.7342, "loss": 0.7342, "step": 3109 }, { "epoch": 1.3940457958238393, "learning_rate": 2.9042902444669005e-05, "lm_loss": 0.5812, "loss": 0.5812, "step": 3110 }, { "epoch": 1.3944940420604386, "learning_rate": 2.903095775462132e-05, "lm_loss": 1.0846, "loss": 1.0846, "step": 3111 }, { "epoch": 1.3949422882970377, "learning_rate": 2.9019012119746326e-05, "lm_loss": 0.5211, "loss": 0.5211, "step": 3112 }, { "epoch": 1.3953905345336373, "learning_rate": 2.9007065542843985e-05, "lm_loss": 0.5523, "loss": 0.5523, "step": 3113 }, { "epoch": 1.3958387807702364, "learning_rate": 2.8995118026714513e-05, "lm_loss": 0.6281, "loss": 0.6281, "step": 3114 }, { "epoch": 1.3962870270068357, "learning_rate": 2.8983169574158308e-05, "lm_loss": 0.5413, "loss": 0.5413, "step": 3115 }, { "epoch": 1.396735273243435, "learning_rate": 2.8971220187976007e-05, "lm_loss": 0.3987, "loss": 0.3987, "step": 3116 }, { "epoch": 1.3971835194800344, "learning_rate": 2.8959269870968442e-05, "lm_loss": 0.5893, "loss": 0.5893, "step": 3117 }, { "epoch": 1.3976317657166337, "learning_rate": 2.8947318625936703e-05, "lm_loss": 0.6752, "loss": 0.6752, "step": 3118 }, { "epoch": 1.3980800119532328, "learning_rate": 2.8935366455682066e-05, "lm_loss": 0.504, "loss": 0.504, "step": 3119 }, { "epoch": 1.3985282581898324, "learning_rate": 2.8923413363006036e-05, "lm_loss": 0.6399, "loss": 0.6399, "step": 3120 }, { "epoch": 1.3989765044264315, "learning_rate": 2.891145935071032e-05, "lm_loss": 0.4692, "loss": 0.4692, "step": 3121 }, { "epoch": 1.3994247506630308, "learning_rate": 2.8899504421596857e-05, "lm_loss": 1.0278, "loss": 1.0278, "step": 3122 }, { "epoch": 1.3998729968996302, "learning_rate": 2.888754857846781e-05, "lm_loss": 0.5667, "loss": 0.5667, "step": 3123 }, { "epoch": 1.4003212431362295, "learning_rate": 2.887559182412553e-05, "lm_loss": 0.6688, "loss": 0.6688, "step": 3124 }, { "epoch": 1.4007694893728289, "learning_rate": 2.8863634161372593e-05, "lm_loss": 0.487, "loss": 0.487, "step": 3125 }, { "epoch": 1.4012177356094282, "learning_rate": 2.8851675593011783e-05, "lm_loss": 0.5345, "loss": 0.5345, "step": 3126 }, { "epoch": 1.4016659818460275, "learning_rate": 2.8839716121846118e-05, "lm_loss": 0.497, "loss": 0.497, "step": 3127 }, { "epoch": 1.4021142280826266, "learning_rate": 2.8827755750678805e-05, "lm_loss": 0.6036, "loss": 0.6036, "step": 3128 }, { "epoch": 1.402562474319226, "learning_rate": 2.8815794482313262e-05, "lm_loss": 0.6899, "loss": 0.6899, "step": 3129 }, { "epoch": 1.4030107205558253, "learning_rate": 2.880383231955312e-05, "lm_loss": 0.492, "loss": 0.492, "step": 3130 }, { "epoch": 1.4034589667924247, "learning_rate": 2.8791869265202238e-05, "lm_loss": 0.4542, "loss": 0.4542, "step": 3131 }, { "epoch": 1.403907213029024, "learning_rate": 2.8779905322064667e-05, "lm_loss": 0.5694, "loss": 0.5694, "step": 3132 }, { "epoch": 1.4043554592656233, "learning_rate": 2.8767940492944656e-05, "lm_loss": 0.5043, "loss": 0.5043, "step": 3133 }, { "epoch": 1.4048037055022227, "learning_rate": 2.8755974780646682e-05, "lm_loss": 0.6467, "loss": 0.6467, "step": 3134 }, { "epoch": 1.4052519517388218, "learning_rate": 2.8744008187975425e-05, "lm_loss": 0.4288, "loss": 0.4288, "step": 3135 }, { "epoch": 1.4057001979754211, "learning_rate": 2.8732040717735765e-05, "lm_loss": 0.6797, "loss": 0.6797, "step": 3136 }, { "epoch": 1.4061484442120205, "learning_rate": 2.8720072372732786e-05, "lm_loss": 0.4022, "loss": 0.4022, "step": 3137 }, { "epoch": 1.4065966904486198, "learning_rate": 2.8708103155771787e-05, "lm_loss": 0.5955, "loss": 0.5955, "step": 3138 }, { "epoch": 1.4070449366852191, "learning_rate": 2.869613306965826e-05, "lm_loss": 0.5978, "loss": 0.5978, "step": 3139 }, { "epoch": 1.4074931829218185, "learning_rate": 2.8684162117197916e-05, "lm_loss": 0.6052, "loss": 0.6052, "step": 3140 }, { "epoch": 1.4079414291584178, "learning_rate": 2.8672190301196655e-05, "lm_loss": 0.6228, "loss": 0.6228, "step": 3141 }, { "epoch": 1.408389675395017, "learning_rate": 2.8660217624460573e-05, "lm_loss": 0.5079, "loss": 0.5079, "step": 3142 }, { "epoch": 1.4088379216316163, "learning_rate": 2.8648244089796e-05, "lm_loss": 0.6004, "loss": 0.6004, "step": 3143 }, { "epoch": 1.4092861678682156, "learning_rate": 2.863626970000942e-05, "lm_loss": 0.6136, "loss": 0.6136, "step": 3144 }, { "epoch": 1.409734414104815, "learning_rate": 2.8624294457907573e-05, "lm_loss": 0.4701, "loss": 0.4701, "step": 3145 }, { "epoch": 1.4101826603414143, "learning_rate": 2.861231836629734e-05, "lm_loss": 0.6224, "loss": 0.6224, "step": 3146 }, { "epoch": 1.4106309065780136, "learning_rate": 2.8600341427985857e-05, "lm_loss": 0.4991, "loss": 0.4991, "step": 3147 }, { "epoch": 1.411079152814613, "learning_rate": 2.8588363645780407e-05, "lm_loss": 0.6897, "loss": 0.6897, "step": 3148 }, { "epoch": 1.411527399051212, "learning_rate": 2.857638502248851e-05, "lm_loss": 0.4638, "loss": 0.4638, "step": 3149 }, { "epoch": 1.4119756452878114, "learning_rate": 2.8564405560917873e-05, "lm_loss": 0.8148, "loss": 0.8148, "step": 3150 }, { "epoch": 1.4124238915244107, "learning_rate": 2.8552425263876377e-05, "lm_loss": 0.7246, "loss": 0.7246, "step": 3151 }, { "epoch": 1.41287213776101, "learning_rate": 2.854044413417213e-05, "lm_loss": 0.6774, "loss": 0.6774, "step": 3152 }, { "epoch": 1.4133203839976094, "learning_rate": 2.852846217461342e-05, "lm_loss": 1.0951, "loss": 1.0951, "step": 3153 }, { "epoch": 1.4137686302342087, "learning_rate": 2.8516479388008727e-05, "lm_loss": 0.5306, "loss": 0.5306, "step": 3154 }, { "epoch": 1.414216876470808, "learning_rate": 2.8504495777166733e-05, "lm_loss": 0.484, "loss": 0.484, "step": 3155 }, { "epoch": 1.4146651227074072, "learning_rate": 2.8492511344896318e-05, "lm_loss": 0.5264, "loss": 0.5264, "step": 3156 }, { "epoch": 1.4151133689440065, "learning_rate": 2.8480526094006526e-05, "lm_loss": 0.5686, "loss": 0.5686, "step": 3157 }, { "epoch": 1.4155616151806059, "learning_rate": 2.8468540027306634e-05, "lm_loss": 0.5364, "loss": 0.5364, "step": 3158 }, { "epoch": 1.4160098614172052, "learning_rate": 2.8456553147606075e-05, "lm_loss": 0.5675, "loss": 0.5675, "step": 3159 }, { "epoch": 1.4164581076538045, "learning_rate": 2.8444565457714493e-05, "lm_loss": 0.5566, "loss": 0.5566, "step": 3160 }, { "epoch": 1.4169063538904039, "learning_rate": 2.843257696044172e-05, "lm_loss": 0.9479, "loss": 0.9479, "step": 3161 }, { "epoch": 1.4173546001270032, "learning_rate": 2.8420587658597757e-05, "lm_loss": 0.4747, "loss": 0.4747, "step": 3162 }, { "epoch": 1.4178028463636023, "learning_rate": 2.8408597554992822e-05, "lm_loss": 0.6688, "loss": 0.6688, "step": 3163 }, { "epoch": 1.4182510926002017, "learning_rate": 2.8396606652437314e-05, "lm_loss": 0.6069, "loss": 0.6069, "step": 3164 }, { "epoch": 1.418699338836801, "learning_rate": 2.83846149537418e-05, "lm_loss": 0.4822, "loss": 0.4822, "step": 3165 }, { "epoch": 1.4191475850734003, "learning_rate": 2.8372622461717053e-05, "lm_loss": 0.5634, "loss": 0.5634, "step": 3166 }, { "epoch": 1.4195958313099997, "learning_rate": 2.8360629179174025e-05, "lm_loss": 0.5738, "loss": 0.5738, "step": 3167 }, { "epoch": 1.420044077546599, "learning_rate": 2.834863510892386e-05, "lm_loss": 0.3899, "loss": 0.3899, "step": 3168 }, { "epoch": 1.4204923237831983, "learning_rate": 2.833664025377788e-05, "lm_loss": 0.5943, "loss": 0.5943, "step": 3169 }, { "epoch": 1.4209405700197975, "learning_rate": 2.832464461654759e-05, "lm_loss": 0.5289, "loss": 0.5289, "step": 3170 }, { "epoch": 1.4213888162563968, "learning_rate": 2.8312648200044668e-05, "lm_loss": 0.6645, "loss": 0.6645, "step": 3171 }, { "epoch": 1.4218370624929961, "learning_rate": 2.8300651007081013e-05, "lm_loss": 0.5728, "loss": 0.5728, "step": 3172 }, { "epoch": 1.4222853087295955, "learning_rate": 2.828865304046866e-05, "lm_loss": 1.0619, "loss": 1.0619, "step": 3173 }, { "epoch": 1.4227335549661948, "learning_rate": 2.8276654303019858e-05, "lm_loss": 0.9976, "loss": 0.9976, "step": 3174 }, { "epoch": 1.4231818012027941, "learning_rate": 2.8264654797547007e-05, "lm_loss": 0.5376, "loss": 0.5376, "step": 3175 }, { "epoch": 1.4236300474393935, "learning_rate": 2.8252654526862722e-05, "lm_loss": 0.5605, "loss": 0.5605, "step": 3176 }, { "epoch": 1.4240782936759926, "learning_rate": 2.8240653493779775e-05, "lm_loss": 0.4592, "loss": 0.4592, "step": 3177 }, { "epoch": 1.424526539912592, "learning_rate": 2.822865170111112e-05, "lm_loss": 0.4714, "loss": 0.4714, "step": 3178 }, { "epoch": 1.4249747861491913, "learning_rate": 2.8216649151669883e-05, "lm_loss": 0.4856, "loss": 0.4856, "step": 3179 }, { "epoch": 1.4254230323857906, "learning_rate": 2.820464584826938e-05, "lm_loss": 0.585, "loss": 0.585, "step": 3180 }, { "epoch": 1.42587127862239, "learning_rate": 2.81926417937231e-05, "lm_loss": 0.5455, "loss": 0.5455, "step": 3181 }, { "epoch": 1.4263195248589893, "learning_rate": 2.8180636990844713e-05, "lm_loss": 0.4421, "loss": 0.4421, "step": 3182 }, { "epoch": 1.4267677710955886, "learning_rate": 2.8168631442448044e-05, "lm_loss": 0.5547, "loss": 0.5547, "step": 3183 }, { "epoch": 1.4272160173321877, "learning_rate": 2.815662515134711e-05, "lm_loss": 0.5869, "loss": 0.5869, "step": 3184 }, { "epoch": 1.427664263568787, "learning_rate": 2.8144618120356108e-05, "lm_loss": 0.6869, "loss": 0.6869, "step": 3185 }, { "epoch": 1.4281125098053864, "learning_rate": 2.8132610352289395e-05, "lm_loss": 0.4879, "loss": 0.4879, "step": 3186 }, { "epoch": 1.4285607560419857, "learning_rate": 2.81206018499615e-05, "lm_loss": 0.5586, "loss": 0.5586, "step": 3187 }, { "epoch": 1.429009002278585, "learning_rate": 2.8108592616187133e-05, "lm_loss": 1.1439, "loss": 1.1439, "step": 3188 }, { "epoch": 1.4294572485151844, "learning_rate": 2.8096582653781166e-05, "lm_loss": 0.9616, "loss": 0.9616, "step": 3189 }, { "epoch": 1.4299054947517837, "learning_rate": 2.808457196555866e-05, "lm_loss": 0.664, "loss": 0.664, "step": 3190 }, { "epoch": 1.4303537409883829, "learning_rate": 2.8072560554334814e-05, "lm_loss": 0.453, "loss": 0.453, "step": 3191 }, { "epoch": 1.4308019872249822, "learning_rate": 2.8060548422925033e-05, "lm_loss": 0.5248, "loss": 0.5248, "step": 3192 }, { "epoch": 1.4312502334615815, "learning_rate": 2.8048535574144864e-05, "lm_loss": 0.5988, "loss": 0.5988, "step": 3193 }, { "epoch": 1.4316984796981809, "learning_rate": 2.803652201081004e-05, "lm_loss": 0.6559, "loss": 0.6559, "step": 3194 }, { "epoch": 1.4321467259347802, "learning_rate": 2.8024507735736445e-05, "lm_loss": 0.7004, "loss": 0.7004, "step": 3195 }, { "epoch": 1.4325949721713795, "learning_rate": 2.801249275174014e-05, "lm_loss": 0.8496, "loss": 0.8496, "step": 3196 }, { "epoch": 1.4330432184079789, "learning_rate": 2.8000477061637352e-05, "lm_loss": 0.4217, "loss": 0.4217, "step": 3197 }, { "epoch": 1.433491464644578, "learning_rate": 2.798846066824447e-05, "lm_loss": 0.6473, "loss": 0.6473, "step": 3198 }, { "epoch": 1.4339397108811773, "learning_rate": 2.797644357437806e-05, "lm_loss": 0.8193, "loss": 0.8193, "step": 3199 }, { "epoch": 1.4343879571177767, "learning_rate": 2.796442578285482e-05, "lm_loss": 0.7174, "loss": 0.7174, "step": 3200 }, { "epoch": 1.434836203354376, "learning_rate": 2.7952407296491655e-05, "lm_loss": 0.6254, "loss": 0.6254, "step": 3201 }, { "epoch": 1.4352844495909753, "learning_rate": 2.7940388118105593e-05, "lm_loss": 0.3965, "loss": 0.3965, "step": 3202 }, { "epoch": 1.4357326958275747, "learning_rate": 2.7928368250513858e-05, "lm_loss": 0.6097, "loss": 0.6097, "step": 3203 }, { "epoch": 1.436180942064174, "learning_rate": 2.791634769653381e-05, "lm_loss": 0.4463, "loss": 0.4463, "step": 3204 }, { "epoch": 1.4366291883007731, "learning_rate": 2.7904326458982988e-05, "lm_loss": 0.4676, "loss": 0.4676, "step": 3205 }, { "epoch": 1.4370774345373725, "learning_rate": 2.7892304540679075e-05, "lm_loss": 0.6411, "loss": 0.6411, "step": 3206 }, { "epoch": 1.4375256807739718, "learning_rate": 2.7880281944439923e-05, "lm_loss": 0.9942, "loss": 0.9942, "step": 3207 }, { "epoch": 1.4379739270105711, "learning_rate": 2.7868258673083548e-05, "lm_loss": 0.9644, "loss": 0.9644, "step": 3208 }, { "epoch": 1.4384221732471705, "learning_rate": 2.785623472942811e-05, "lm_loss": 0.763, "loss": 0.763, "step": 3209 }, { "epoch": 1.4388704194837698, "learning_rate": 2.7844210116291943e-05, "lm_loss": 0.4939, "loss": 0.4939, "step": 3210 }, { "epoch": 1.4393186657203692, "learning_rate": 2.7832184836493513e-05, "lm_loss": 0.5602, "loss": 0.5602, "step": 3211 }, { "epoch": 1.4397669119569683, "learning_rate": 2.7820158892851467e-05, "lm_loss": 0.631, "loss": 0.631, "step": 3212 }, { "epoch": 1.4402151581935676, "learning_rate": 2.7808132288184613e-05, "lm_loss": 0.5435, "loss": 0.5435, "step": 3213 }, { "epoch": 1.440663404430167, "learning_rate": 2.7796105025311886e-05, "lm_loss": 0.542, "loss": 0.542, "step": 3214 }, { "epoch": 1.4411116506667663, "learning_rate": 2.7784077107052392e-05, "lm_loss": 0.9018, "loss": 0.9018, "step": 3215 }, { "epoch": 1.4415598969033656, "learning_rate": 2.7772048536225377e-05, "lm_loss": 0.523, "loss": 0.523, "step": 3216 }, { "epoch": 1.442008143139965, "learning_rate": 2.7760019315650264e-05, "lm_loss": 0.6407, "loss": 0.6407, "step": 3217 }, { "epoch": 1.4424563893765643, "learning_rate": 2.7747989448146622e-05, "lm_loss": 0.3893, "loss": 0.3893, "step": 3218 }, { "epoch": 1.4429046356131634, "learning_rate": 2.773595893653415e-05, "lm_loss": 0.7903, "loss": 0.7903, "step": 3219 }, { "epoch": 1.4433528818497627, "learning_rate": 2.772392778363272e-05, "lm_loss": 0.4446, "loss": 0.4446, "step": 3220 }, { "epoch": 1.443801128086362, "learning_rate": 2.7711895992262338e-05, "lm_loss": 0.5388, "loss": 0.5388, "step": 3221 }, { "epoch": 1.4442493743229614, "learning_rate": 2.7699863565243183e-05, "lm_loss": 0.7613, "loss": 0.7613, "step": 3222 }, { "epoch": 1.4446976205595607, "learning_rate": 2.7687830505395562e-05, "lm_loss": 0.771, "loss": 0.771, "step": 3223 }, { "epoch": 1.44514586679616, "learning_rate": 2.767579681553993e-05, "lm_loss": 0.5961, "loss": 0.5961, "step": 3224 }, { "epoch": 1.4455941130327594, "learning_rate": 2.7663762498496905e-05, "lm_loss": 1.0618, "loss": 1.0618, "step": 3225 }, { "epoch": 1.4460423592693585, "learning_rate": 2.765172755708725e-05, "lm_loss": 0.8725, "loss": 0.8725, "step": 3226 }, { "epoch": 1.4464906055059579, "learning_rate": 2.7639691994131854e-05, "lm_loss": 1.2196, "loss": 1.2196, "step": 3227 }, { "epoch": 1.4469388517425572, "learning_rate": 2.762765581245177e-05, "lm_loss": 0.4469, "loss": 0.4469, "step": 3228 }, { "epoch": 1.4473870979791565, "learning_rate": 2.7615619014868195e-05, "lm_loss": 0.6483, "loss": 0.6483, "step": 3229 }, { "epoch": 1.4478353442157559, "learning_rate": 2.760358160420247e-05, "lm_loss": 0.6323, "loss": 0.6323, "step": 3230 }, { "epoch": 1.4482835904523552, "learning_rate": 2.7591543583276075e-05, "lm_loss": 0.5537, "loss": 0.5537, "step": 3231 }, { "epoch": 1.4487318366889546, "learning_rate": 2.7579504954910623e-05, "lm_loss": 0.4811, "loss": 0.4811, "step": 3232 }, { "epoch": 1.4491800829255537, "learning_rate": 2.75674657219279e-05, "lm_loss": 0.6048, "loss": 0.6048, "step": 3233 }, { "epoch": 1.449628329162153, "learning_rate": 2.7555425887149805e-05, "lm_loss": 0.3561, "loss": 0.3561, "step": 3234 }, { "epoch": 1.4500765753987523, "learning_rate": 2.754338545339839e-05, "lm_loss": 0.653, "loss": 0.653, "step": 3235 }, { "epoch": 1.4505248216353517, "learning_rate": 2.7531344423495842e-05, "lm_loss": 0.6148, "loss": 0.6148, "step": 3236 }, { "epoch": 1.450973067871951, "learning_rate": 2.7519302800264492e-05, "lm_loss": 0.4779, "loss": 0.4779, "step": 3237 }, { "epoch": 1.4514213141085504, "learning_rate": 2.7507260586526813e-05, "lm_loss": 0.4919, "loss": 0.4919, "step": 3238 }, { "epoch": 1.4518695603451497, "learning_rate": 2.7495217785105413e-05, "lm_loss": 0.6497, "loss": 0.6497, "step": 3239 }, { "epoch": 1.4523178065817488, "learning_rate": 2.7483174398823038e-05, "lm_loss": 0.5747, "loss": 0.5747, "step": 3240 }, { "epoch": 1.4527660528183481, "learning_rate": 2.747113043050256e-05, "lm_loss": 0.5292, "loss": 0.5292, "step": 3241 }, { "epoch": 1.4532142990549475, "learning_rate": 2.7459085882967007e-05, "lm_loss": 0.4325, "loss": 0.4325, "step": 3242 }, { "epoch": 1.4536625452915468, "learning_rate": 2.744704075903954e-05, "lm_loss": 0.6587, "loss": 0.6587, "step": 3243 }, { "epoch": 1.4541107915281462, "learning_rate": 2.743499506154344e-05, "lm_loss": 0.5573, "loss": 0.5573, "step": 3244 }, { "epoch": 1.4545590377647455, "learning_rate": 2.7422948793302126e-05, "lm_loss": 0.5714, "loss": 0.5714, "step": 3245 }, { "epoch": 1.4550072840013448, "learning_rate": 2.7410901957139168e-05, "lm_loss": 0.4184, "loss": 0.4184, "step": 3246 }, { "epoch": 1.455455530237944, "learning_rate": 2.7398854555878244e-05, "lm_loss": 0.6467, "loss": 0.6467, "step": 3247 }, { "epoch": 1.4559037764745433, "learning_rate": 2.7386806592343194e-05, "lm_loss": 0.5534, "loss": 0.5534, "step": 3248 }, { "epoch": 1.4563520227111426, "learning_rate": 2.7374758069357964e-05, "lm_loss": 0.4991, "loss": 0.4991, "step": 3249 }, { "epoch": 1.456800268947742, "learning_rate": 2.7362708989746645e-05, "lm_loss": 0.908, "loss": 0.908, "step": 3250 }, { "epoch": 1.4572485151843413, "learning_rate": 2.7350659356333435e-05, "lm_loss": 0.6462, "loss": 0.6462, "step": 3251 }, { "epoch": 1.4576967614209406, "learning_rate": 2.7338609171942715e-05, "lm_loss": 0.6796, "loss": 0.6796, "step": 3252 }, { "epoch": 1.45814500765754, "learning_rate": 2.7326558439398926e-05, "lm_loss": 0.5101, "loss": 0.5101, "step": 3253 }, { "epoch": 1.458593253894139, "learning_rate": 2.73145071615267e-05, "lm_loss": 0.6746, "loss": 0.6746, "step": 3254 }, { "epoch": 1.4590415001307384, "learning_rate": 2.7302455341150758e-05, "lm_loss": 0.5601, "loss": 0.5601, "step": 3255 }, { "epoch": 1.4594897463673377, "learning_rate": 2.7290402981095946e-05, "lm_loss": 0.4296, "loss": 0.4296, "step": 3256 }, { "epoch": 1.459937992603937, "learning_rate": 2.727835008418727e-05, "lm_loss": 0.549, "loss": 0.549, "step": 3257 }, { "epoch": 1.4603862388405364, "learning_rate": 2.726629665324984e-05, "lm_loss": 0.4001, "loss": 0.4001, "step": 3258 }, { "epoch": 1.4608344850771358, "learning_rate": 2.725424269110889e-05, "lm_loss": 1.1096, "loss": 1.1096, "step": 3259 }, { "epoch": 1.461282731313735, "learning_rate": 2.7242188200589767e-05, "lm_loss": 0.57, "loss": 0.57, "step": 3260 }, { "epoch": 1.4617309775503342, "learning_rate": 2.723013318451798e-05, "lm_loss": 0.7053, "loss": 0.7053, "step": 3261 }, { "epoch": 1.4621792237869335, "learning_rate": 2.721807764571912e-05, "lm_loss": 0.7997, "loss": 0.7997, "step": 3262 }, { "epoch": 1.4626274700235329, "learning_rate": 2.7206021587018926e-05, "lm_loss": 0.5592, "loss": 0.5592, "step": 3263 }, { "epoch": 1.4630757162601322, "learning_rate": 2.719396501124325e-05, "lm_loss": 0.6057, "loss": 0.6057, "step": 3264 }, { "epoch": 1.4635239624967316, "learning_rate": 2.718190792121806e-05, "lm_loss": 0.761, "loss": 0.761, "step": 3265 }, { "epoch": 1.463972208733331, "learning_rate": 2.7169850319769458e-05, "lm_loss": 0.4538, "loss": 0.4538, "step": 3266 }, { "epoch": 1.4644204549699302, "learning_rate": 2.7157792209723654e-05, "lm_loss": 0.4457, "loss": 0.4457, "step": 3267 }, { "epoch": 1.4648687012065293, "learning_rate": 2.714573359390699e-05, "lm_loss": 0.6718, "loss": 0.6718, "step": 3268 }, { "epoch": 1.4653169474431287, "learning_rate": 2.7133674475145903e-05, "lm_loss": 0.7184, "loss": 0.7184, "step": 3269 }, { "epoch": 1.465765193679728, "learning_rate": 2.7121614856266975e-05, "lm_loss": 0.4866, "loss": 0.4866, "step": 3270 }, { "epoch": 1.4662134399163274, "learning_rate": 2.7109554740096894e-05, "lm_loss": 0.6047, "loss": 0.6047, "step": 3271 }, { "epoch": 1.4666616861529267, "learning_rate": 2.709749412946246e-05, "lm_loss": 0.4987, "loss": 0.4987, "step": 3272 }, { "epoch": 1.467109932389526, "learning_rate": 2.7085433027190588e-05, "lm_loss": 0.5172, "loss": 0.5172, "step": 3273 }, { "epoch": 1.4675581786261254, "learning_rate": 2.7073371436108314e-05, "lm_loss": 0.6576, "loss": 0.6576, "step": 3274 }, { "epoch": 1.4680064248627245, "learning_rate": 2.7061309359042798e-05, "lm_loss": 0.5757, "loss": 0.5757, "step": 3275 }, { "epoch": 1.4684546710993238, "learning_rate": 2.70492467988213e-05, "lm_loss": 1.0381, "loss": 1.0381, "step": 3276 }, { "epoch": 1.4689029173359232, "learning_rate": 2.703718375827119e-05, "lm_loss": 0.9322, "loss": 0.9322, "step": 3277 }, { "epoch": 1.4693511635725225, "learning_rate": 2.7025120240219955e-05, "lm_loss": 0.6494, "loss": 0.6494, "step": 3278 }, { "epoch": 1.4697994098091218, "learning_rate": 2.701305624749521e-05, "lm_loss": 0.5792, "loss": 0.5792, "step": 3279 }, { "epoch": 1.4702476560457212, "learning_rate": 2.7000991782924662e-05, "lm_loss": 0.5763, "loss": 0.5763, "step": 3280 }, { "epoch": 1.4706959022823205, "learning_rate": 2.698892684933613e-05, "lm_loss": 0.9701, "loss": 0.9701, "step": 3281 }, { "epoch": 1.4711441485189196, "learning_rate": 2.6976861449557545e-05, "lm_loss": 0.6807, "loss": 0.6807, "step": 3282 }, { "epoch": 1.471592394755519, "learning_rate": 2.696479558641695e-05, "lm_loss": 0.4467, "loss": 0.4467, "step": 3283 }, { "epoch": 1.4720406409921183, "learning_rate": 2.6952729262742508e-05, "lm_loss": 0.6225, "loss": 0.6225, "step": 3284 }, { "epoch": 1.4724888872287176, "learning_rate": 2.6940662481362465e-05, "lm_loss": 0.4962, "loss": 0.4962, "step": 3285 }, { "epoch": 1.472937133465317, "learning_rate": 2.6928595245105193e-05, "lm_loss": 0.6164, "loss": 0.6164, "step": 3286 }, { "epoch": 1.4733853797019163, "learning_rate": 2.691652755679916e-05, "lm_loss": 0.4986, "loss": 0.4986, "step": 3287 }, { "epoch": 1.4738336259385156, "learning_rate": 2.6904459419272955e-05, "lm_loss": 0.7404, "loss": 0.7404, "step": 3288 }, { "epoch": 1.4742818721751147, "learning_rate": 2.6892390835355246e-05, "lm_loss": 0.8551, "loss": 0.8551, "step": 3289 }, { "epoch": 1.474730118411714, "learning_rate": 2.6880321807874836e-05, "lm_loss": 0.5876, "loss": 0.5876, "step": 3290 }, { "epoch": 1.4751783646483134, "learning_rate": 2.686825233966061e-05, "lm_loss": 0.4824, "loss": 0.4824, "step": 3291 }, { "epoch": 1.4756266108849128, "learning_rate": 2.6856182433541566e-05, "lm_loss": 0.4946, "loss": 0.4946, "step": 3292 }, { "epoch": 1.476074857121512, "learning_rate": 2.6844112092346807e-05, "lm_loss": 0.6155, "loss": 0.6155, "step": 3293 }, { "epoch": 1.4765231033581114, "learning_rate": 2.683204131890552e-05, "lm_loss": 0.9148, "loss": 0.9148, "step": 3294 }, { "epoch": 1.4769713495947108, "learning_rate": 2.6819970116047022e-05, "lm_loss": 0.465, "loss": 0.465, "step": 3295 }, { "epoch": 1.4774195958313099, "learning_rate": 2.680789848660071e-05, "lm_loss": 0.63, "loss": 0.63, "step": 3296 }, { "epoch": 1.4778678420679094, "learning_rate": 2.6795826433396086e-05, "lm_loss": 0.5731, "loss": 0.5731, "step": 3297 }, { "epoch": 1.4783160883045086, "learning_rate": 2.6783753959262747e-05, "lm_loss": 0.6225, "loss": 0.6225, "step": 3298 }, { "epoch": 1.478764334541108, "learning_rate": 2.6771681067030408e-05, "lm_loss": 0.5257, "loss": 0.5257, "step": 3299 }, { "epoch": 1.4792125807777072, "learning_rate": 2.6759607759528853e-05, "lm_loss": 1.1285, "loss": 1.1285, "step": 3300 }, { "epoch": 1.4796608270143066, "learning_rate": 2.674753403958798e-05, "lm_loss": 0.4408, "loss": 0.4408, "step": 3301 }, { "epoch": 1.480109073250906, "learning_rate": 2.6735459910037785e-05, "lm_loss": 0.5466, "loss": 0.5466, "step": 3302 }, { "epoch": 1.480557319487505, "learning_rate": 2.672338537370836e-05, "lm_loss": 0.4194, "loss": 0.4194, "step": 3303 }, { "epoch": 1.4810055657241046, "learning_rate": 2.6711310433429887e-05, "lm_loss": 0.7034, "loss": 0.7034, "step": 3304 }, { "epoch": 1.4814538119607037, "learning_rate": 2.6699235092032636e-05, "lm_loss": 0.5604, "loss": 0.5604, "step": 3305 }, { "epoch": 1.481902058197303, "learning_rate": 2.6687159352346992e-05, "lm_loss": 1.0882, "loss": 1.0882, "step": 3306 }, { "epoch": 1.4823503044339024, "learning_rate": 2.6675083217203407e-05, "lm_loss": 0.5026, "loss": 0.5026, "step": 3307 }, { "epoch": 1.4827985506705017, "learning_rate": 2.666300668943245e-05, "lm_loss": 0.5119, "loss": 0.5119, "step": 3308 }, { "epoch": 1.483246796907101, "learning_rate": 2.6650929771864776e-05, "lm_loss": 0.5789, "loss": 0.5789, "step": 3309 }, { "epoch": 1.4836950431437002, "learning_rate": 2.663885246733111e-05, "lm_loss": 0.5414, "loss": 0.5414, "step": 3310 }, { "epoch": 1.4841432893802997, "learning_rate": 2.6626774778662294e-05, "lm_loss": 1.0192, "loss": 1.0192, "step": 3311 }, { "epoch": 1.4845915356168988, "learning_rate": 2.6614696708689262e-05, "lm_loss": 0.5769, "loss": 0.5769, "step": 3312 }, { "epoch": 1.4850397818534982, "learning_rate": 2.660261826024301e-05, "lm_loss": 0.6217, "loss": 0.6217, "step": 3313 }, { "epoch": 1.4854880280900975, "learning_rate": 2.6590539436154645e-05, "lm_loss": 0.4768, "loss": 0.4768, "step": 3314 }, { "epoch": 1.4859362743266968, "learning_rate": 2.657846023925535e-05, "lm_loss": 0.6161, "loss": 0.6161, "step": 3315 }, { "epoch": 1.4863845205632962, "learning_rate": 2.6566380672376413e-05, "lm_loss": 0.5203, "loss": 0.5203, "step": 3316 }, { "epoch": 1.4868327667998953, "learning_rate": 2.6554300738349192e-05, "lm_loss": 0.6128, "loss": 0.6128, "step": 3317 }, { "epoch": 1.4872810130364948, "learning_rate": 2.6542220440005135e-05, "lm_loss": 0.3617, "loss": 0.3617, "step": 3318 }, { "epoch": 1.487729259273094, "learning_rate": 2.6530139780175768e-05, "lm_loss": 0.5735, "loss": 0.5735, "step": 3319 }, { "epoch": 1.4881775055096933, "learning_rate": 2.6518058761692726e-05, "lm_loss": 0.6578, "loss": 0.6578, "step": 3320 }, { "epoch": 1.4886257517462926, "learning_rate": 2.6505977387387703e-05, "lm_loss": 0.5483, "loss": 0.5483, "step": 3321 }, { "epoch": 1.489073997982892, "learning_rate": 2.6493895660092488e-05, "lm_loss": 0.5371, "loss": 0.5371, "step": 3322 }, { "epoch": 1.4895222442194913, "learning_rate": 2.648181358263894e-05, "lm_loss": 0.5211, "loss": 0.5211, "step": 3323 }, { "epoch": 1.4899704904560904, "learning_rate": 2.646973115785903e-05, "lm_loss": 0.6739, "loss": 0.6739, "step": 3324 }, { "epoch": 1.49041873669269, "learning_rate": 2.645764838858478e-05, "lm_loss": 0.3555, "loss": 0.3555, "step": 3325 }, { "epoch": 1.490866982929289, "learning_rate": 2.644556527764831e-05, "lm_loss": 0.6427, "loss": 0.6427, "step": 3326 }, { "epoch": 1.4913152291658884, "learning_rate": 2.6433481827881788e-05, "lm_loss": 0.6116, "loss": 0.6116, "step": 3327 }, { "epoch": 1.4917634754024878, "learning_rate": 2.642139804211753e-05, "lm_loss": 0.5255, "loss": 0.5255, "step": 3328 }, { "epoch": 1.492211721639087, "learning_rate": 2.6409313923187857e-05, "lm_loss": 0.4501, "loss": 0.4501, "step": 3329 }, { "epoch": 1.4926599678756864, "learning_rate": 2.6397229473925205e-05, "lm_loss": 0.8143, "loss": 0.8143, "step": 3330 }, { "epoch": 1.4931082141122856, "learning_rate": 2.6385144697162085e-05, "lm_loss": 0.8009, "loss": 0.8009, "step": 3331 }, { "epoch": 1.4935564603488851, "learning_rate": 2.6373059595731077e-05, "lm_loss": 0.6098, "loss": 0.6098, "step": 3332 }, { "epoch": 1.4940047065854842, "learning_rate": 2.636097417246485e-05, "lm_loss": 0.686, "loss": 0.686, "step": 3333 }, { "epoch": 1.4944529528220836, "learning_rate": 2.634888843019613e-05, "lm_loss": 0.4083, "loss": 0.4083, "step": 3334 }, { "epoch": 1.494901199058683, "learning_rate": 2.633680237175773e-05, "lm_loss": 0.6314, "loss": 0.6314, "step": 3335 }, { "epoch": 1.4953494452952822, "learning_rate": 2.632471599998253e-05, "lm_loss": 0.7122, "loss": 0.7122, "step": 3336 }, { "epoch": 1.4957976915318816, "learning_rate": 2.63126293177035e-05, "lm_loss": 0.824, "loss": 0.824, "step": 3337 }, { "epoch": 1.4962459377684807, "learning_rate": 2.6300542327753668e-05, "lm_loss": 0.481, "loss": 0.481, "step": 3338 }, { "epoch": 1.4966941840050803, "learning_rate": 2.628845503296613e-05, "lm_loss": 0.6374, "loss": 0.6374, "step": 3339 }, { "epoch": 1.4971424302416794, "learning_rate": 2.6276367436174064e-05, "lm_loss": 0.4529, "loss": 0.4529, "step": 3340 }, { "epoch": 1.4975906764782787, "learning_rate": 2.6264279540210713e-05, "lm_loss": 0.542, "loss": 0.542, "step": 3341 }, { "epoch": 1.498038922714878, "learning_rate": 2.62521913479094e-05, "lm_loss": 0.803, "loss": 0.803, "step": 3342 }, { "epoch": 1.4984871689514774, "learning_rate": 2.6240102862103495e-05, "lm_loss": 0.7121, "loss": 0.7121, "step": 3343 }, { "epoch": 1.4989354151880767, "learning_rate": 2.6228014085626468e-05, "lm_loss": 0.5951, "loss": 0.5951, "step": 3344 }, { "epoch": 1.4993836614246758, "learning_rate": 2.6215925021311826e-05, "lm_loss": 1.0244, "loss": 1.0244, "step": 3345 }, { "epoch": 1.4998319076612754, "learning_rate": 2.6203835671993172e-05, "lm_loss": 0.5014, "loss": 0.5014, "step": 3346 }, { "epoch": 1.5002801538978745, "learning_rate": 2.6191746040504154e-05, "lm_loss": 0.5179, "loss": 0.5179, "step": 3347 }, { "epoch": 1.5007284001344738, "learning_rate": 2.6179656129678494e-05, "lm_loss": 0.4747, "loss": 0.4747, "step": 3348 }, { "epoch": 1.5011766463710732, "learning_rate": 2.6167565942349982e-05, "lm_loss": 0.4562, "loss": 0.4562, "step": 3349 }, { "epoch": 1.5016248926076725, "learning_rate": 2.6155475481352465e-05, "lm_loss": 0.6301, "loss": 0.6301, "step": 3350 }, { "epoch": 1.5020731388442718, "learning_rate": 2.6143384749519866e-05, "lm_loss": 0.6063, "loss": 0.6063, "step": 3351 }, { "epoch": 1.502521385080871, "learning_rate": 2.613129374968616e-05, "lm_loss": 0.4125, "loss": 0.4125, "step": 3352 }, { "epoch": 1.5029696313174705, "learning_rate": 2.6119202484685396e-05, "lm_loss": 0.5921, "loss": 0.5921, "step": 3353 }, { "epoch": 1.5034178775540696, "learning_rate": 2.610711095735167e-05, "lm_loss": 0.5517, "loss": 0.5517, "step": 3354 }, { "epoch": 1.503866123790669, "learning_rate": 2.609501917051916e-05, "lm_loss": 0.5505, "loss": 0.5505, "step": 3355 }, { "epoch": 1.5043143700272683, "learning_rate": 2.6082927127022082e-05, "lm_loss": 0.5438, "loss": 0.5438, "step": 3356 }, { "epoch": 1.5047626162638676, "learning_rate": 2.6070834829694736e-05, "lm_loss": 0.5089, "loss": 0.5089, "step": 3357 }, { "epoch": 1.505210862500467, "learning_rate": 2.6058742281371457e-05, "lm_loss": 0.9316, "loss": 0.9316, "step": 3358 }, { "epoch": 1.505659108737066, "learning_rate": 2.604664948488665e-05, "lm_loss": 1.1584, "loss": 1.1584, "step": 3359 }, { "epoch": 1.5061073549736657, "learning_rate": 2.6034556443074782e-05, "lm_loss": 0.4735, "loss": 0.4735, "step": 3360 }, { "epoch": 1.5065556012102648, "learning_rate": 2.602246315877039e-05, "lm_loss": 0.5795, "loss": 0.5795, "step": 3361 }, { "epoch": 1.507003847446864, "learning_rate": 2.601036963480803e-05, "lm_loss": 0.5303, "loss": 0.5303, "step": 3362 }, { "epoch": 1.5074520936834634, "learning_rate": 2.5998275874022344e-05, "lm_loss": 0.5357, "loss": 0.5357, "step": 3363 }, { "epoch": 1.5079003399200628, "learning_rate": 2.5986181879248016e-05, "lm_loss": 0.5071, "loss": 0.5071, "step": 3364 }, { "epoch": 1.5083485861566621, "learning_rate": 2.5974087653319813e-05, "lm_loss": 0.5293, "loss": 0.5293, "step": 3365 }, { "epoch": 1.5087968323932612, "learning_rate": 2.5961993199072516e-05, "lm_loss": 0.5765, "loss": 0.5765, "step": 3366 }, { "epoch": 1.5092450786298608, "learning_rate": 2.5949898519340977e-05, "lm_loss": 0.4178, "loss": 0.4178, "step": 3367 }, { "epoch": 1.50969332486646, "learning_rate": 2.5937803616960092e-05, "lm_loss": 0.6505, "loss": 0.6505, "step": 3368 }, { "epoch": 1.5101415711030592, "learning_rate": 2.592570849476485e-05, "lm_loss": 0.6494, "loss": 0.6494, "step": 3369 }, { "epoch": 1.5105898173396586, "learning_rate": 2.591361315559024e-05, "lm_loss": 0.5337, "loss": 0.5337, "step": 3370 }, { "epoch": 1.511038063576258, "learning_rate": 2.5901517602271314e-05, "lm_loss": 0.5844, "loss": 0.5844, "step": 3371 }, { "epoch": 1.5114863098128573, "learning_rate": 2.5889421837643184e-05, "lm_loss": 0.5189, "loss": 0.5189, "step": 3372 }, { "epoch": 1.5119345560494564, "learning_rate": 2.5877325864541036e-05, "lm_loss": 0.464, "loss": 0.464, "step": 3373 }, { "epoch": 1.512382802286056, "learning_rate": 2.5865229685800053e-05, "lm_loss": 1.0984, "loss": 1.0984, "step": 3374 }, { "epoch": 1.512831048522655, "learning_rate": 2.5853133304255493e-05, "lm_loss": 0.4481, "loss": 0.4481, "step": 3375 }, { "epoch": 1.5132792947592544, "learning_rate": 2.5841036722742667e-05, "lm_loss": 0.517, "loss": 0.517, "step": 3376 }, { "epoch": 1.5137275409958537, "learning_rate": 2.582893994409692e-05, "lm_loss": 0.6006, "loss": 0.6006, "step": 3377 }, { "epoch": 1.514175787232453, "learning_rate": 2.581684297115366e-05, "lm_loss": 1.0176, "loss": 1.0176, "step": 3378 }, { "epoch": 1.5146240334690524, "learning_rate": 2.5804745806748325e-05, "lm_loss": 0.7352, "loss": 0.7352, "step": 3379 }, { "epoch": 1.5150722797056515, "learning_rate": 2.5792648453716396e-05, "lm_loss": 0.6974, "loss": 0.6974, "step": 3380 }, { "epoch": 1.515520525942251, "learning_rate": 2.578055091489341e-05, "lm_loss": 0.4661, "loss": 0.4661, "step": 3381 }, { "epoch": 1.5159687721788502, "learning_rate": 2.5768453193114944e-05, "lm_loss": 0.5814, "loss": 0.5814, "step": 3382 }, { "epoch": 1.5164170184154495, "learning_rate": 2.5756355291216622e-05, "lm_loss": 0.6419, "loss": 0.6419, "step": 3383 }, { "epoch": 1.5168652646520489, "learning_rate": 2.5744257212034084e-05, "lm_loss": 0.4576, "loss": 0.4576, "step": 3384 }, { "epoch": 1.5173135108886482, "learning_rate": 2.5732158958403058e-05, "lm_loss": 0.9472, "loss": 0.9472, "step": 3385 }, { "epoch": 1.5177617571252475, "learning_rate": 2.5720060533159263e-05, "lm_loss": 0.6487, "loss": 0.6487, "step": 3386 }, { "epoch": 1.5182100033618466, "learning_rate": 2.5707961939138502e-05, "lm_loss": 0.5876, "loss": 0.5876, "step": 3387 }, { "epoch": 1.5186582495984462, "learning_rate": 2.5695863179176587e-05, "lm_loss": 0.3771, "loss": 0.3771, "step": 3388 }, { "epoch": 1.5191064958350453, "learning_rate": 2.5683764256109388e-05, "lm_loss": 0.9694, "loss": 0.9694, "step": 3389 }, { "epoch": 1.5195547420716446, "learning_rate": 2.567166517277279e-05, "lm_loss": 0.5454, "loss": 0.5454, "step": 3390 }, { "epoch": 1.520002988308244, "learning_rate": 2.565956593200275e-05, "lm_loss": 0.5932, "loss": 0.5932, "step": 3391 }, { "epoch": 1.5204512345448433, "learning_rate": 2.564746653663523e-05, "lm_loss": 0.5713, "loss": 0.5713, "step": 3392 }, { "epoch": 1.5208994807814427, "learning_rate": 2.563536698950624e-05, "lm_loss": 0.3833, "loss": 0.3833, "step": 3393 }, { "epoch": 1.5213477270180418, "learning_rate": 2.5623267293451826e-05, "lm_loss": 0.7305, "loss": 0.7305, "step": 3394 }, { "epoch": 1.5217959732546413, "learning_rate": 2.561116745130807e-05, "lm_loss": 0.4611, "loss": 0.4611, "step": 3395 }, { "epoch": 1.5222442194912404, "learning_rate": 2.5599067465911103e-05, "lm_loss": 0.6421, "loss": 0.6421, "step": 3396 }, { "epoch": 1.5226924657278398, "learning_rate": 2.558696734009705e-05, "lm_loss": 0.6168, "loss": 0.6168, "step": 3397 }, { "epoch": 1.5231407119644391, "learning_rate": 2.5574867076702107e-05, "lm_loss": 0.4634, "loss": 0.4634, "step": 3398 }, { "epoch": 1.5235889582010385, "learning_rate": 2.5562766678562484e-05, "lm_loss": 0.6917, "loss": 0.6917, "step": 3399 }, { "epoch": 1.5240372044376378, "learning_rate": 2.555066614851443e-05, "lm_loss": 1.1597, "loss": 1.1597, "step": 3400 }, { "epoch": 1.524485450674237, "learning_rate": 2.553856548939421e-05, "lm_loss": 0.6833, "loss": 0.6833, "step": 3401 }, { "epoch": 1.5249336969108365, "learning_rate": 2.552646470403815e-05, "lm_loss": 0.4728, "loss": 0.4728, "step": 3402 }, { "epoch": 1.5253819431474356, "learning_rate": 2.551436379528257e-05, "lm_loss": 0.4836, "loss": 0.4836, "step": 3403 }, { "epoch": 1.525830189384035, "learning_rate": 2.5502262765963848e-05, "lm_loss": 0.6629, "loss": 0.6629, "step": 3404 }, { "epoch": 1.5262784356206343, "learning_rate": 2.5490161618918364e-05, "lm_loss": 0.5025, "loss": 0.5025, "step": 3405 }, { "epoch": 1.5267266818572336, "learning_rate": 2.5478060356982547e-05, "lm_loss": 0.4983, "loss": 0.4983, "step": 3406 }, { "epoch": 1.527174928093833, "learning_rate": 2.5465958982992853e-05, "lm_loss": 0.5383, "loss": 0.5383, "step": 3407 }, { "epoch": 1.527623174330432, "learning_rate": 2.545385749978574e-05, "lm_loss": 0.5756, "loss": 0.5756, "step": 3408 }, { "epoch": 1.5280714205670316, "learning_rate": 2.5441755910197713e-05, "lm_loss": 0.5321, "loss": 0.5321, "step": 3409 }, { "epoch": 1.5285196668036307, "learning_rate": 2.5429654217065307e-05, "lm_loss": 0.5482, "loss": 0.5482, "step": 3410 }, { "epoch": 1.52896791304023, "learning_rate": 2.5417552423225065e-05, "lm_loss": 0.3996, "loss": 0.3996, "step": 3411 }, { "epoch": 1.5294161592768294, "learning_rate": 2.5405450531513553e-05, "lm_loss": 0.7049, "loss": 0.7049, "step": 3412 }, { "epoch": 1.5298644055134287, "learning_rate": 2.5393348544767374e-05, "lm_loss": 0.4952, "loss": 0.4952, "step": 3413 }, { "epoch": 1.530312651750028, "learning_rate": 2.538124646582315e-05, "lm_loss": 0.5294, "loss": 0.5294, "step": 3414 }, { "epoch": 1.5307608979866272, "learning_rate": 2.5369144297517515e-05, "lm_loss": 0.6195, "loss": 0.6195, "step": 3415 }, { "epoch": 1.5312091442232267, "learning_rate": 2.535704204268713e-05, "lm_loss": 0.5621, "loss": 0.5621, "step": 3416 }, { "epoch": 1.5316573904598259, "learning_rate": 2.5344939704168676e-05, "lm_loss": 0.6342, "loss": 0.6342, "step": 3417 }, { "epoch": 1.5321056366964252, "learning_rate": 2.5332837284798844e-05, "lm_loss": 0.6027, "loss": 0.6027, "step": 3418 }, { "epoch": 1.5325538829330245, "learning_rate": 2.5320734787414373e-05, "lm_loss": 0.4867, "loss": 0.4867, "step": 3419 }, { "epoch": 1.5330021291696239, "learning_rate": 2.5308632214851992e-05, "lm_loss": 0.5021, "loss": 0.5021, "step": 3420 }, { "epoch": 1.5334503754062232, "learning_rate": 2.5296529569948446e-05, "lm_loss": 0.5368, "loss": 0.5368, "step": 3421 }, { "epoch": 1.5338986216428223, "learning_rate": 2.528442685554052e-05, "lm_loss": 0.5283, "loss": 0.5283, "step": 3422 }, { "epoch": 1.5343468678794219, "learning_rate": 2.5272324074465005e-05, "lm_loss": 0.672, "loss": 0.672, "step": 3423 }, { "epoch": 1.534795114116021, "learning_rate": 2.5260221229558695e-05, "lm_loss": 0.5371, "loss": 0.5371, "step": 3424 }, { "epoch": 1.5352433603526203, "learning_rate": 2.524811832365841e-05, "lm_loss": 0.5843, "loss": 0.5843, "step": 3425 }, { "epoch": 1.5356916065892197, "learning_rate": 2.5236015359600985e-05, "lm_loss": 0.4382, "loss": 0.4382, "step": 3426 }, { "epoch": 1.536139852825819, "learning_rate": 2.5223912340223276e-05, "lm_loss": 0.5333, "loss": 0.5333, "step": 3427 }, { "epoch": 1.5365880990624183, "learning_rate": 2.521180926836214e-05, "lm_loss": 0.6082, "loss": 0.6082, "step": 3428 }, { "epoch": 1.5370363452990174, "learning_rate": 2.5199706146854434e-05, "lm_loss": 0.4774, "loss": 0.4774, "step": 3429 }, { "epoch": 1.537484591535617, "learning_rate": 2.5187602978537057e-05, "lm_loss": 0.5281, "loss": 0.5281, "step": 3430 }, { "epoch": 1.5379328377722161, "learning_rate": 2.5175499766246906e-05, "lm_loss": 0.5723, "loss": 0.5723, "step": 3431 }, { "epoch": 1.5383810840088155, "learning_rate": 2.516339651282088e-05, "lm_loss": 0.439, "loss": 0.439, "step": 3432 }, { "epoch": 1.5388293302454148, "learning_rate": 2.5151293221095896e-05, "lm_loss": 0.5821, "loss": 0.5821, "step": 3433 }, { "epoch": 1.5392775764820141, "learning_rate": 2.5139189893908867e-05, "lm_loss": 0.5787, "loss": 0.5787, "step": 3434 }, { "epoch": 1.5397258227186135, "learning_rate": 2.5127086534096737e-05, "lm_loss": 0.5519, "loss": 0.5519, "step": 3435 }, { "epoch": 1.5401740689552126, "learning_rate": 2.511498314449645e-05, "lm_loss": 0.4366, "loss": 0.4366, "step": 3436 }, { "epoch": 1.5406223151918121, "learning_rate": 2.510287972794495e-05, "lm_loss": 0.516, "loss": 0.516, "step": 3437 }, { "epoch": 1.5410705614284113, "learning_rate": 2.5090776287279177e-05, "lm_loss": 0.4786, "loss": 0.4786, "step": 3438 }, { "epoch": 1.5415188076650106, "learning_rate": 2.50786728253361e-05, "lm_loss": 0.671, "loss": 0.671, "step": 3439 }, { "epoch": 1.54196705390161, "learning_rate": 2.506656934495269e-05, "lm_loss": 0.4649, "loss": 0.4649, "step": 3440 }, { "epoch": 1.5424153001382093, "learning_rate": 2.5054465848965907e-05, "lm_loss": 1.0198, "loss": 1.0198, "step": 3441 }, { "epoch": 1.5428635463748086, "learning_rate": 2.5042362340212717e-05, "lm_loss": 0.5239, "loss": 0.5239, "step": 3442 }, { "epoch": 1.5433117926114077, "learning_rate": 2.5030258821530105e-05, "lm_loss": 0.589, "loss": 0.589, "step": 3443 }, { "epoch": 1.5437600388480073, "learning_rate": 2.5018155295755042e-05, "lm_loss": 0.6563, "loss": 0.6563, "step": 3444 }, { "epoch": 1.5442082850846064, "learning_rate": 2.5006051765724513e-05, "lm_loss": 0.5257, "loss": 0.5257, "step": 3445 }, { "epoch": 1.5446565313212057, "learning_rate": 2.4993948234275492e-05, "lm_loss": 0.5047, "loss": 0.5047, "step": 3446 }, { "epoch": 1.545104777557805, "learning_rate": 2.4981844704244957e-05, "lm_loss": 0.6618, "loss": 0.6618, "step": 3447 }, { "epoch": 1.5455530237944044, "learning_rate": 2.49697411784699e-05, "lm_loss": 0.8748, "loss": 0.8748, "step": 3448 }, { "epoch": 1.5460012700310037, "learning_rate": 2.495763765978729e-05, "lm_loss": 0.4089, "loss": 0.4089, "step": 3449 }, { "epoch": 1.5464495162676029, "learning_rate": 2.49455341510341e-05, "lm_loss": 0.4795, "loss": 0.4795, "step": 3450 }, { "epoch": 1.5468977625042024, "learning_rate": 2.4933430655047315e-05, "lm_loss": 0.9747, "loss": 0.9747, "step": 3451 }, { "epoch": 1.5473460087408015, "learning_rate": 2.4921327174663905e-05, "lm_loss": 0.759, "loss": 0.759, "step": 3452 }, { "epoch": 1.547794254977401, "learning_rate": 2.4909223712720832e-05, "lm_loss": 0.6311, "loss": 0.6311, "step": 3453 }, { "epoch": 1.5482425012140002, "learning_rate": 2.4897120272055064e-05, "lm_loss": 0.4004, "loss": 0.4004, "step": 3454 }, { "epoch": 1.5486907474505995, "learning_rate": 2.4885016855503548e-05, "lm_loss": 0.6618, "loss": 0.6618, "step": 3455 }, { "epoch": 1.5491389936871989, "learning_rate": 2.4872913465903262e-05, "lm_loss": 0.4124, "loss": 0.4124, "step": 3456 }, { "epoch": 1.549587239923798, "learning_rate": 2.4860810106091136e-05, "lm_loss": 0.6762, "loss": 0.6762, "step": 3457 }, { "epoch": 1.5500354861603975, "learning_rate": 2.484870677890411e-05, "lm_loss": 0.5581, "loss": 0.5581, "step": 3458 }, { "epoch": 1.5504837323969967, "learning_rate": 2.4836603487179125e-05, "lm_loss": 0.9309, "loss": 0.9309, "step": 3459 }, { "epoch": 1.5509319786335962, "learning_rate": 2.48245002337531e-05, "lm_loss": 0.9029, "loss": 0.9029, "step": 3460 }, { "epoch": 1.5513802248701953, "learning_rate": 2.481239702146295e-05, "lm_loss": 0.6747, "loss": 0.6747, "step": 3461 }, { "epoch": 1.5518284711067947, "learning_rate": 2.4800293853145572e-05, "lm_loss": 0.6194, "loss": 0.6194, "step": 3462 }, { "epoch": 1.552276717343394, "learning_rate": 2.478819073163787e-05, "lm_loss": 0.3569, "loss": 0.3569, "step": 3463 }, { "epoch": 1.5527249635799931, "learning_rate": 2.4776087659776723e-05, "lm_loss": 1.1103, "loss": 1.1103, "step": 3464 }, { "epoch": 1.5531732098165927, "learning_rate": 2.4763984640399014e-05, "lm_loss": 0.7075, "loss": 0.7075, "step": 3465 }, { "epoch": 1.5536214560531918, "learning_rate": 2.4751881676341596e-05, "lm_loss": 0.7833, "loss": 0.7833, "step": 3466 }, { "epoch": 1.5540697022897914, "learning_rate": 2.473977877044131e-05, "lm_loss": 0.5911, "loss": 0.5911, "step": 3467 }, { "epoch": 1.5545179485263905, "learning_rate": 2.4727675925535e-05, "lm_loss": 0.7008, "loss": 0.7008, "step": 3468 }, { "epoch": 1.5549661947629898, "learning_rate": 2.4715573144459488e-05, "lm_loss": 0.7531, "loss": 0.7531, "step": 3469 }, { "epoch": 1.5554144409995891, "learning_rate": 2.470347043005156e-05, "lm_loss": 0.5937, "loss": 0.5937, "step": 3470 }, { "epoch": 1.5558626872361883, "learning_rate": 2.469136778514802e-05, "lm_loss": 0.8568, "loss": 0.8568, "step": 3471 }, { "epoch": 1.5563109334727878, "learning_rate": 2.4679265212585636e-05, "lm_loss": 0.5514, "loss": 0.5514, "step": 3472 }, { "epoch": 1.556759179709387, "learning_rate": 2.4667162715201155e-05, "lm_loss": 0.6003, "loss": 0.6003, "step": 3473 }, { "epoch": 1.5572074259459865, "learning_rate": 2.4655060295831333e-05, "lm_loss": 0.4784, "loss": 0.4784, "step": 3474 }, { "epoch": 1.5576556721825856, "learning_rate": 2.4642957957312873e-05, "lm_loss": 0.5589, "loss": 0.5589, "step": 3475 }, { "epoch": 1.558103918419185, "learning_rate": 2.4630855702482487e-05, "lm_loss": 0.5616, "loss": 0.5616, "step": 3476 }, { "epoch": 1.5585521646557843, "learning_rate": 2.4618753534176856e-05, "lm_loss": 0.4962, "loss": 0.4962, "step": 3477 }, { "epoch": 1.5590004108923834, "learning_rate": 2.4606651455232628e-05, "lm_loss": 0.5633, "loss": 0.5633, "step": 3478 }, { "epoch": 1.559448657128983, "learning_rate": 2.459454946848645e-05, "lm_loss": 0.5714, "loss": 0.5714, "step": 3479 }, { "epoch": 1.559896903365582, "learning_rate": 2.4582447576774945e-05, "lm_loss": 0.6079, "loss": 0.6079, "step": 3480 }, { "epoch": 1.5603451496021816, "learning_rate": 2.4570345782934702e-05, "lm_loss": 0.3749, "loss": 0.3749, "step": 3481 }, { "epoch": 1.5607933958387807, "learning_rate": 2.4558244089802286e-05, "lm_loss": 0.5943, "loss": 0.5943, "step": 3482 }, { "epoch": 1.56124164207538, "learning_rate": 2.454614250021426e-05, "lm_loss": 0.6808, "loss": 0.6808, "step": 3483 }, { "epoch": 1.5616898883119794, "learning_rate": 2.4534041017007152e-05, "lm_loss": 1.1227, "loss": 1.1227, "step": 3484 }, { "epoch": 1.5621381345485785, "learning_rate": 2.4521939643017455e-05, "lm_loss": 1.1587, "loss": 1.1587, "step": 3485 }, { "epoch": 1.562586380785178, "learning_rate": 2.4509838381081642e-05, "lm_loss": 0.4492, "loss": 0.4492, "step": 3486 }, { "epoch": 1.5630346270217772, "learning_rate": 2.449773723403616e-05, "lm_loss": 0.5617, "loss": 0.5617, "step": 3487 }, { "epoch": 1.5634828732583768, "learning_rate": 2.4485636204717435e-05, "lm_loss": 0.3867, "loss": 0.3867, "step": 3488 }, { "epoch": 1.5639311194949759, "learning_rate": 2.447353529596186e-05, "lm_loss": 0.6948, "loss": 0.6948, "step": 3489 }, { "epoch": 1.5643793657315752, "learning_rate": 2.4461434510605795e-05, "lm_loss": 0.3566, "loss": 0.3566, "step": 3490 }, { "epoch": 1.5648276119681745, "learning_rate": 2.444933385148558e-05, "lm_loss": 0.4989, "loss": 0.4989, "step": 3491 }, { "epoch": 1.5652758582047737, "learning_rate": 2.443723332143752e-05, "lm_loss": 0.5539, "loss": 0.5539, "step": 3492 }, { "epoch": 1.5657241044413732, "learning_rate": 2.44251329232979e-05, "lm_loss": 0.5326, "loss": 0.5326, "step": 3493 }, { "epoch": 1.5661723506779723, "learning_rate": 2.4413032659902958e-05, "lm_loss": 0.5061, "loss": 0.5061, "step": 3494 }, { "epoch": 1.566620596914572, "learning_rate": 2.4400932534088906e-05, "lm_loss": 0.5719, "loss": 0.5719, "step": 3495 }, { "epoch": 1.567068843151171, "learning_rate": 2.438883254869193e-05, "lm_loss": 0.5551, "loss": 0.5551, "step": 3496 }, { "epoch": 1.5675170893877703, "learning_rate": 2.4376732706548183e-05, "lm_loss": 0.5847, "loss": 0.5847, "step": 3497 }, { "epoch": 1.5679653356243697, "learning_rate": 2.4364633010493777e-05, "lm_loss": 0.7124, "loss": 0.7124, "step": 3498 }, { "epoch": 1.5684135818609688, "learning_rate": 2.435253346336478e-05, "lm_loss": 0.4199, "loss": 0.4199, "step": 3499 }, { "epoch": 1.5688618280975684, "learning_rate": 2.434043406799725e-05, "lm_loss": 0.5636, "loss": 0.5636, "step": 3500 }, { "epoch": 1.5693100743341675, "learning_rate": 2.4328334827227208e-05, "lm_loss": 0.5461, "loss": 0.5461, "step": 3501 }, { "epoch": 1.569758320570767, "learning_rate": 2.431623574389062e-05, "lm_loss": 0.4283, "loss": 0.4283, "step": 3502 }, { "epoch": 1.5702065668073661, "learning_rate": 2.430413682082342e-05, "lm_loss": 0.5925, "loss": 0.5925, "step": 3503 }, { "epoch": 1.5706548130439655, "learning_rate": 2.42920380608615e-05, "lm_loss": 0.7822, "loss": 0.7822, "step": 3504 }, { "epoch": 1.5711030592805648, "learning_rate": 2.427993946684074e-05, "lm_loss": 0.808, "loss": 0.808, "step": 3505 }, { "epoch": 1.571551305517164, "learning_rate": 2.426784104159695e-05, "lm_loss": 0.5268, "loss": 0.5268, "step": 3506 }, { "epoch": 1.5719995517537635, "learning_rate": 2.425574278796592e-05, "lm_loss": 0.9214, "loss": 0.9214, "step": 3507 }, { "epoch": 1.5724477979903626, "learning_rate": 2.424364470878339e-05, "lm_loss": 0.906, "loss": 0.906, "step": 3508 }, { "epoch": 1.5728960442269622, "learning_rate": 2.4231546806885052e-05, "lm_loss": 0.5459, "loss": 0.5459, "step": 3509 }, { "epoch": 1.5733442904635613, "learning_rate": 2.4219449085106592e-05, "lm_loss": 0.7096, "loss": 0.7096, "step": 3510 }, { "epoch": 1.5737925367001606, "learning_rate": 2.420735154628361e-05, "lm_loss": 0.4341, "loss": 0.4341, "step": 3511 }, { "epoch": 1.57424078293676, "learning_rate": 2.4195254193251678e-05, "lm_loss": 0.5988, "loss": 0.5988, "step": 3512 }, { "epoch": 1.574689029173359, "learning_rate": 2.4183157028846342e-05, "lm_loss": 0.4209, "loss": 0.4209, "step": 3513 }, { "epoch": 1.5751372754099586, "learning_rate": 2.4171060055903084e-05, "lm_loss": 0.6506, "loss": 0.6506, "step": 3514 }, { "epoch": 1.5755855216465577, "learning_rate": 2.4158963277257342e-05, "lm_loss": 0.3964, "loss": 0.3964, "step": 3515 }, { "epoch": 1.5760337678831573, "learning_rate": 2.4146866695744516e-05, "lm_loss": 0.5543, "loss": 0.5543, "step": 3516 }, { "epoch": 1.5764820141197564, "learning_rate": 2.4134770314199963e-05, "lm_loss": 0.8644, "loss": 0.8644, "step": 3517 }, { "epoch": 1.5769302603563558, "learning_rate": 2.4122674135458967e-05, "lm_loss": 0.6249, "loss": 0.6249, "step": 3518 }, { "epoch": 1.577378506592955, "learning_rate": 2.411057816235681e-05, "lm_loss": 0.6401, "loss": 0.6401, "step": 3519 }, { "epoch": 1.5778267528295542, "learning_rate": 2.4098482397728688e-05, "lm_loss": 0.4769, "loss": 0.4769, "step": 3520 }, { "epoch": 1.5782749990661538, "learning_rate": 2.408638684440977e-05, "lm_loss": 0.573, "loss": 0.573, "step": 3521 }, { "epoch": 1.5787232453027529, "learning_rate": 2.4074291505235156e-05, "lm_loss": 0.8565, "loss": 0.8565, "step": 3522 }, { "epoch": 1.5791714915393524, "learning_rate": 2.406219638303991e-05, "lm_loss": 0.5226, "loss": 0.5226, "step": 3523 }, { "epoch": 1.5796197377759515, "learning_rate": 2.4050101480659035e-05, "lm_loss": 1.0238, "loss": 1.0238, "step": 3524 }, { "epoch": 1.5800679840125509, "learning_rate": 2.40380068009275e-05, "lm_loss": 0.4814, "loss": 0.4814, "step": 3525 }, { "epoch": 1.5805162302491502, "learning_rate": 2.40259123466802e-05, "lm_loss": 0.8666, "loss": 0.8666, "step": 3526 }, { "epoch": 1.5809644764857496, "learning_rate": 2.401381812075198e-05, "lm_loss": 1.0711, "loss": 1.0711, "step": 3527 }, { "epoch": 1.581412722722349, "learning_rate": 2.4001724125977665e-05, "lm_loss": 0.6838, "loss": 0.6838, "step": 3528 }, { "epoch": 1.581860968958948, "learning_rate": 2.3989630365191977e-05, "lm_loss": 0.5884, "loss": 0.5884, "step": 3529 }, { "epoch": 1.5823092151955476, "learning_rate": 2.397753684122962e-05, "lm_loss": 0.5175, "loss": 0.5175, "step": 3530 }, { "epoch": 1.5827574614321467, "learning_rate": 2.396544355692522e-05, "lm_loss": 0.6843, "loss": 0.6843, "step": 3531 }, { "epoch": 1.583205707668746, "learning_rate": 2.3953350515113356e-05, "lm_loss": 0.8427, "loss": 0.8427, "step": 3532 }, { "epoch": 1.5836539539053454, "learning_rate": 2.3941257718628556e-05, "lm_loss": 0.3793, "loss": 0.3793, "step": 3533 }, { "epoch": 1.5841022001419447, "learning_rate": 2.392916517030528e-05, "lm_loss": 0.6274, "loss": 0.6274, "step": 3534 }, { "epoch": 1.584550446378544, "learning_rate": 2.3917072872977924e-05, "lm_loss": 0.6796, "loss": 0.6796, "step": 3535 }, { "epoch": 1.5849986926151431, "learning_rate": 2.3904980829480843e-05, "lm_loss": 0.9958, "loss": 0.9958, "step": 3536 }, { "epoch": 1.5854469388517427, "learning_rate": 2.3892889042648332e-05, "lm_loss": 0.5907, "loss": 0.5907, "step": 3537 }, { "epoch": 1.5858951850883418, "learning_rate": 2.388079751531461e-05, "lm_loss": 0.4489, "loss": 0.4489, "step": 3538 }, { "epoch": 1.5863434313249412, "learning_rate": 2.3868706250313845e-05, "lm_loss": 0.898, "loss": 0.898, "step": 3539 }, { "epoch": 1.5867916775615405, "learning_rate": 2.385661525048014e-05, "lm_loss": 0.5003, "loss": 0.5003, "step": 3540 }, { "epoch": 1.5872399237981398, "learning_rate": 2.3844524518647544e-05, "lm_loss": 0.602, "loss": 0.602, "step": 3541 }, { "epoch": 1.5876881700347392, "learning_rate": 2.383243405765003e-05, "lm_loss": 0.525, "loss": 0.525, "step": 3542 }, { "epoch": 1.5881364162713383, "learning_rate": 2.382034387032152e-05, "lm_loss": 0.5536, "loss": 0.5536, "step": 3543 }, { "epoch": 1.5885846625079378, "learning_rate": 2.3808253959495848e-05, "lm_loss": 0.4851, "loss": 0.4851, "step": 3544 }, { "epoch": 1.589032908744537, "learning_rate": 2.3796164328006827e-05, "lm_loss": 0.5747, "loss": 0.5747, "step": 3545 }, { "epoch": 1.5894811549811363, "learning_rate": 2.3784074978688173e-05, "lm_loss": 0.5567, "loss": 0.5567, "step": 3546 }, { "epoch": 1.5899294012177356, "learning_rate": 2.3771985914373538e-05, "lm_loss": 0.468, "loss": 0.468, "step": 3547 }, { "epoch": 1.590377647454335, "learning_rate": 2.3759897137896508e-05, "lm_loss": 0.5913, "loss": 0.5913, "step": 3548 }, { "epoch": 1.5908258936909343, "learning_rate": 2.3747808652090607e-05, "lm_loss": 0.5626, "loss": 0.5626, "step": 3549 }, { "epoch": 1.5912741399275334, "learning_rate": 2.373572045978929e-05, "lm_loss": 0.5437, "loss": 0.5437, "step": 3550 }, { "epoch": 1.591722386164133, "learning_rate": 2.3723632563825945e-05, "lm_loss": 0.5255, "loss": 0.5255, "step": 3551 }, { "epoch": 1.592170632400732, "learning_rate": 2.3711544967033875e-05, "lm_loss": 0.4858, "loss": 0.4858, "step": 3552 }, { "epoch": 1.5926188786373314, "learning_rate": 2.3699457672246335e-05, "lm_loss": 0.4693, "loss": 0.4693, "step": 3553 }, { "epoch": 1.5930671248739308, "learning_rate": 2.36873706822965e-05, "lm_loss": 0.4935, "loss": 0.4935, "step": 3554 }, { "epoch": 1.59351537111053, "learning_rate": 2.367528400001747e-05, "lm_loss": 0.5174, "loss": 0.5174, "step": 3555 }, { "epoch": 1.5939636173471294, "learning_rate": 2.366319762824228e-05, "lm_loss": 0.6687, "loss": 0.6687, "step": 3556 }, { "epoch": 1.5944118635837286, "learning_rate": 2.3651111569803876e-05, "lm_loss": 0.495, "loss": 0.495, "step": 3557 }, { "epoch": 1.594860109820328, "learning_rate": 2.3639025827535157e-05, "lm_loss": 0.4439, "loss": 0.4439, "step": 3558 }, { "epoch": 1.5953083560569272, "learning_rate": 2.362694040426893e-05, "lm_loss": 0.4306, "loss": 0.4306, "step": 3559 }, { "epoch": 1.5957566022935266, "learning_rate": 2.361485530283792e-05, "lm_loss": 0.541, "loss": 0.541, "step": 3560 }, { "epoch": 1.596204848530126, "learning_rate": 2.3602770526074804e-05, "lm_loss": 0.6204, "loss": 0.6204, "step": 3561 }, { "epoch": 1.5966530947667252, "learning_rate": 2.3590686076812145e-05, "lm_loss": 0.5299, "loss": 0.5299, "step": 3562 }, { "epoch": 1.5971013410033246, "learning_rate": 2.3578601957882473e-05, "lm_loss": 0.4536, "loss": 0.4536, "step": 3563 }, { "epoch": 1.5975495872399237, "learning_rate": 2.356651817211821e-05, "lm_loss": 0.6335, "loss": 0.6335, "step": 3564 }, { "epoch": 1.5979978334765232, "learning_rate": 2.35544347223517e-05, "lm_loss": 0.7326, "loss": 0.7326, "step": 3565 }, { "epoch": 1.5984460797131224, "learning_rate": 2.354235161141522e-05, "lm_loss": 0.6131, "loss": 0.6131, "step": 3566 }, { "epoch": 1.5988943259497217, "learning_rate": 2.3530268842140973e-05, "lm_loss": 0.5535, "loss": 0.5535, "step": 3567 }, { "epoch": 1.599342572186321, "learning_rate": 2.3518186417361064e-05, "lm_loss": 0.6864, "loss": 0.6864, "step": 3568 }, { "epoch": 1.5997908184229204, "learning_rate": 2.350610433990752e-05, "lm_loss": 0.413, "loss": 0.413, "step": 3569 }, { "epoch": 1.6002390646595197, "learning_rate": 2.3494022612612306e-05, "lm_loss": 0.4335, "loss": 0.4335, "step": 3570 }, { "epoch": 1.6006873108961188, "learning_rate": 2.3481941238307277e-05, "lm_loss": 0.5468, "loss": 0.5468, "step": 3571 }, { "epoch": 1.6011355571327184, "learning_rate": 2.3469860219824234e-05, "lm_loss": 0.4528, "loss": 0.4528, "step": 3572 }, { "epoch": 1.6015838033693175, "learning_rate": 2.3457779559994874e-05, "lm_loss": 0.6042, "loss": 0.6042, "step": 3573 }, { "epoch": 1.6020320496059168, "learning_rate": 2.344569926165081e-05, "lm_loss": 0.3662, "loss": 0.3662, "step": 3574 }, { "epoch": 1.6024802958425162, "learning_rate": 2.343361932762359e-05, "lm_loss": 0.6007, "loss": 0.6007, "step": 3575 }, { "epoch": 1.6029285420791155, "learning_rate": 2.3421539760744653e-05, "lm_loss": 0.474, "loss": 0.474, "step": 3576 }, { "epoch": 1.6033767883157148, "learning_rate": 2.340946056384536e-05, "lm_loss": 0.6165, "loss": 0.6165, "step": 3577 }, { "epoch": 1.603825034552314, "learning_rate": 2.3397381739757e-05, "lm_loss": 0.6049, "loss": 0.6049, "step": 3578 }, { "epoch": 1.6042732807889135, "learning_rate": 2.338530329131075e-05, "lm_loss": 0.5146, "loss": 0.5146, "step": 3579 }, { "epoch": 1.6047215270255126, "learning_rate": 2.3373225221337705e-05, "lm_loss": 0.4552, "loss": 0.4552, "step": 3580 }, { "epoch": 1.605169773262112, "learning_rate": 2.3361147532668894e-05, "lm_loss": 0.4659, "loss": 0.4659, "step": 3581 }, { "epoch": 1.6056180194987113, "learning_rate": 2.334907022813523e-05, "lm_loss": 0.479, "loss": 0.479, "step": 3582 }, { "epoch": 1.6060662657353106, "learning_rate": 2.3336993310567555e-05, "lm_loss": 0.5605, "loss": 0.5605, "step": 3583 }, { "epoch": 1.60651451197191, "learning_rate": 2.3324916782796602e-05, "lm_loss": 0.4589, "loss": 0.4589, "step": 3584 }, { "epoch": 1.606962758208509, "learning_rate": 2.3312840647653017e-05, "lm_loss": 0.8018, "loss": 0.8018, "step": 3585 }, { "epoch": 1.6074110044451086, "learning_rate": 2.330076490796737e-05, "lm_loss": 0.8498, "loss": 0.8498, "step": 3586 }, { "epoch": 1.6078592506817078, "learning_rate": 2.3288689566570123e-05, "lm_loss": 0.5437, "loss": 0.5437, "step": 3587 }, { "epoch": 1.608307496918307, "learning_rate": 2.327661462629165e-05, "lm_loss": 0.412, "loss": 0.412, "step": 3588 }, { "epoch": 1.6087557431549064, "learning_rate": 2.3264540089962214e-05, "lm_loss": 0.5532, "loss": 0.5532, "step": 3589 }, { "epoch": 1.6092039893915058, "learning_rate": 2.3252465960412016e-05, "lm_loss": 0.4319, "loss": 0.4319, "step": 3590 }, { "epoch": 1.609652235628105, "learning_rate": 2.324039224047115e-05, "lm_loss": 0.5784, "loss": 0.5784, "step": 3591 }, { "epoch": 1.6101004818647042, "learning_rate": 2.3228318932969598e-05, "lm_loss": 0.4976, "loss": 0.4976, "step": 3592 }, { "epoch": 1.6105487281013038, "learning_rate": 2.321624604073726e-05, "lm_loss": 0.8987, "loss": 0.8987, "step": 3593 }, { "epoch": 1.610996974337903, "learning_rate": 2.320417356660392e-05, "lm_loss": 0.8106, "loss": 0.8106, "step": 3594 }, { "epoch": 1.6114452205745022, "learning_rate": 2.3192101513399297e-05, "lm_loss": 0.7599, "loss": 0.7599, "step": 3595 }, { "epoch": 1.6118934668111016, "learning_rate": 2.3180029883952987e-05, "lm_loss": 0.6444, "loss": 0.6444, "step": 3596 }, { "epoch": 1.612341713047701, "learning_rate": 2.3167958681094488e-05, "lm_loss": 0.4014, "loss": 0.4014, "step": 3597 }, { "epoch": 1.6127899592843002, "learning_rate": 2.3155887907653202e-05, "lm_loss": 0.6173, "loss": 0.6173, "step": 3598 }, { "epoch": 1.6132382055208994, "learning_rate": 2.3143817566458437e-05, "lm_loss": 0.5773, "loss": 0.5773, "step": 3599 }, { "epoch": 1.613686451757499, "learning_rate": 2.3131747660339394e-05, "lm_loss": 0.485, "loss": 0.485, "step": 3600 }, { "epoch": 1.614134697994098, "learning_rate": 2.311967819212517e-05, "lm_loss": 0.4981, "loss": 0.4981, "step": 3601 }, { "epoch": 1.6145829442306974, "learning_rate": 2.3107609164644757e-05, "lm_loss": 0.5377, "loss": 0.5377, "step": 3602 }, { "epoch": 1.6150311904672967, "learning_rate": 2.3095540580727055e-05, "lm_loss": 0.532, "loss": 0.532, "step": 3603 }, { "epoch": 1.615479436703896, "learning_rate": 2.3083472443200845e-05, "lm_loss": 0.5866, "loss": 0.5866, "step": 3604 }, { "epoch": 1.6159276829404954, "learning_rate": 2.307140475489482e-05, "lm_loss": 0.4535, "loss": 0.4535, "step": 3605 }, { "epoch": 1.6163759291770945, "learning_rate": 2.305933751863754e-05, "lm_loss": 0.6476, "loss": 0.6476, "step": 3606 }, { "epoch": 1.616824175413694, "learning_rate": 2.3047270737257494e-05, "lm_loss": 0.3391, "loss": 0.3391, "step": 3607 }, { "epoch": 1.6172724216502932, "learning_rate": 2.3035204413583047e-05, "lm_loss": 0.4854, "loss": 0.4854, "step": 3608 }, { "epoch": 1.6177206678868925, "learning_rate": 2.302313855044246e-05, "lm_loss": 0.6555, "loss": 0.6555, "step": 3609 }, { "epoch": 1.6181689141234918, "learning_rate": 2.3011073150663875e-05, "lm_loss": 0.5797, "loss": 0.5797, "step": 3610 }, { "epoch": 1.6186171603600912, "learning_rate": 2.2999008217075344e-05, "lm_loss": 0.5474, "loss": 0.5474, "step": 3611 }, { "epoch": 1.6190654065966905, "learning_rate": 2.2986943752504795e-05, "lm_loss": 0.4044, "loss": 0.4044, "step": 3612 }, { "epoch": 1.6195136528332896, "learning_rate": 2.2974879759780048e-05, "lm_loss": 1.0046, "loss": 1.0046, "step": 3613 }, { "epoch": 1.6199618990698892, "learning_rate": 2.2962816241728817e-05, "lm_loss": 0.5593, "loss": 0.5593, "step": 3614 }, { "epoch": 1.6204101453064883, "learning_rate": 2.295075320117871e-05, "lm_loss": 0.5258, "loss": 0.5258, "step": 3615 }, { "epoch": 1.6208583915430876, "learning_rate": 2.29386906409572e-05, "lm_loss": 0.5663, "loss": 0.5663, "step": 3616 }, { "epoch": 1.621306637779687, "learning_rate": 2.2926628563891685e-05, "lm_loss": 0.4075, "loss": 0.4075, "step": 3617 }, { "epoch": 1.6217548840162863, "learning_rate": 2.291456697280942e-05, "lm_loss": 0.4831, "loss": 0.4831, "step": 3618 }, { "epoch": 1.6222031302528857, "learning_rate": 2.2902505870537546e-05, "lm_loss": 0.5728, "loss": 0.5728, "step": 3619 }, { "epoch": 1.6226513764894848, "learning_rate": 2.2890445259903112e-05, "lm_loss": 0.5375, "loss": 0.5375, "step": 3620 }, { "epoch": 1.6230996227260843, "learning_rate": 2.287838514373303e-05, "lm_loss": 0.6644, "loss": 0.6644, "step": 3621 }, { "epoch": 1.6235478689626834, "learning_rate": 2.2866325524854103e-05, "lm_loss": 0.4986, "loss": 0.4986, "step": 3622 }, { "epoch": 1.6239961151992828, "learning_rate": 2.2854266406093022e-05, "lm_loss": 0.5452, "loss": 0.5452, "step": 3623 }, { "epoch": 1.6244443614358821, "learning_rate": 2.2842207790276355e-05, "lm_loss": 0.5325, "loss": 0.5325, "step": 3624 }, { "epoch": 1.6248926076724814, "learning_rate": 2.2830149680230545e-05, "lm_loss": 0.5326, "loss": 0.5326, "step": 3625 }, { "epoch": 1.6253408539090808, "learning_rate": 2.2818092078781944e-05, "lm_loss": 0.471, "loss": 0.471, "step": 3626 }, { "epoch": 1.62578910014568, "learning_rate": 2.2806034988756756e-05, "lm_loss": 0.5856, "loss": 0.5856, "step": 3627 }, { "epoch": 1.6262373463822795, "learning_rate": 2.279397841298108e-05, "lm_loss": 0.561, "loss": 0.561, "step": 3628 }, { "epoch": 1.6266855926188786, "learning_rate": 2.2781922354280888e-05, "lm_loss": 0.9647, "loss": 0.9647, "step": 3629 }, { "epoch": 1.627133838855478, "learning_rate": 2.2769866815482024e-05, "lm_loss": 0.6513, "loss": 0.6513, "step": 3630 }, { "epoch": 1.6275820850920772, "learning_rate": 2.2757811799410235e-05, "lm_loss": 0.5737, "loss": 0.5737, "step": 3631 }, { "epoch": 1.6280303313286766, "learning_rate": 2.274575730889112e-05, "lm_loss": 0.5053, "loss": 0.5053, "step": 3632 }, { "epoch": 1.628478577565276, "learning_rate": 2.273370334675017e-05, "lm_loss": 0.4607, "loss": 0.4607, "step": 3633 }, { "epoch": 1.628926823801875, "learning_rate": 2.2721649915812727e-05, "lm_loss": 0.5388, "loss": 0.5388, "step": 3634 }, { "epoch": 1.6293750700384746, "learning_rate": 2.270959701890405e-05, "lm_loss": 0.4859, "loss": 0.4859, "step": 3635 }, { "epoch": 1.6298233162750737, "learning_rate": 2.269754465884925e-05, "lm_loss": 0.5479, "loss": 0.5479, "step": 3636 }, { "epoch": 1.630271562511673, "learning_rate": 2.2685492838473303e-05, "lm_loss": 0.5697, "loss": 0.5697, "step": 3637 }, { "epoch": 1.6307198087482724, "learning_rate": 2.2673441560601076e-05, "lm_loss": 0.9372, "loss": 0.9372, "step": 3638 }, { "epoch": 1.6311680549848717, "learning_rate": 2.2661390828057294e-05, "lm_loss": 1.0276, "loss": 1.0276, "step": 3639 }, { "epoch": 1.631616301221471, "learning_rate": 2.2649340643666567e-05, "lm_loss": 0.5567, "loss": 0.5567, "step": 3640 }, { "epoch": 1.6320645474580702, "learning_rate": 2.263729101025337e-05, "lm_loss": 0.3519, "loss": 0.3519, "step": 3641 }, { "epoch": 1.6325127936946697, "learning_rate": 2.2625241930642045e-05, "lm_loss": 0.5129, "loss": 0.5129, "step": 3642 }, { "epoch": 1.6329610399312688, "learning_rate": 2.2613193407656808e-05, "lm_loss": 0.6094, "loss": 0.6094, "step": 3643 }, { "epoch": 1.6334092861678682, "learning_rate": 2.2601145444121752e-05, "lm_loss": 0.4259, "loss": 0.4259, "step": 3644 }, { "epoch": 1.6338575324044675, "learning_rate": 2.2589098042860838e-05, "lm_loss": 0.3905, "loss": 0.3905, "step": 3645 }, { "epoch": 1.6343057786410669, "learning_rate": 2.257705120669788e-05, "lm_loss": 0.6272, "loss": 0.6272, "step": 3646 }, { "epoch": 1.6347540248776662, "learning_rate": 2.2565004938456567e-05, "lm_loss": 0.5057, "loss": 0.5057, "step": 3647 }, { "epoch": 1.6352022711142653, "learning_rate": 2.2552959240960467e-05, "lm_loss": 0.513, "loss": 0.513, "step": 3648 }, { "epoch": 1.6356505173508649, "learning_rate": 2.2540914117032995e-05, "lm_loss": 0.5635, "loss": 0.5635, "step": 3649 }, { "epoch": 1.636098763587464, "learning_rate": 2.252886956949745e-05, "lm_loss": 0.4806, "loss": 0.4806, "step": 3650 }, { "epoch": 1.6365470098240633, "learning_rate": 2.2516825601176975e-05, "lm_loss": 0.5194, "loss": 0.5194, "step": 3651 }, { "epoch": 1.6369952560606627, "learning_rate": 2.250478221489459e-05, "lm_loss": 0.5271, "loss": 0.5271, "step": 3652 }, { "epoch": 1.637443502297262, "learning_rate": 2.249273941347319e-05, "lm_loss": 0.6705, "loss": 0.6705, "step": 3653 }, { "epoch": 1.6378917485338613, "learning_rate": 2.248069719973551e-05, "lm_loss": 0.5933, "loss": 0.5933, "step": 3654 }, { "epoch": 1.6383399947704604, "learning_rate": 2.246865557650416e-05, "lm_loss": 0.5022, "loss": 0.5022, "step": 3655 }, { "epoch": 1.63878824100706, "learning_rate": 2.245661454660162e-05, "lm_loss": 0.5465, "loss": 0.5465, "step": 3656 }, { "epoch": 1.6392364872436591, "learning_rate": 2.2444574112850204e-05, "lm_loss": 0.5811, "loss": 0.5811, "step": 3657 }, { "epoch": 1.6396847334802584, "learning_rate": 2.243253427807211e-05, "lm_loss": 0.6141, "loss": 0.6141, "step": 3658 }, { "epoch": 1.6401329797168578, "learning_rate": 2.242049504508938e-05, "lm_loss": 0.5081, "loss": 0.5081, "step": 3659 }, { "epoch": 1.6405812259534571, "learning_rate": 2.2408456416723937e-05, "lm_loss": 0.9373, "loss": 0.9373, "step": 3660 }, { "epoch": 1.6410294721900565, "learning_rate": 2.239641839579753e-05, "lm_loss": 0.4116, "loss": 0.4116, "step": 3661 }, { "epoch": 1.6414777184266556, "learning_rate": 2.2384380985131807e-05, "lm_loss": 0.5807, "loss": 0.5807, "step": 3662 }, { "epoch": 1.6419259646632551, "learning_rate": 2.2372344187548236e-05, "lm_loss": 0.4362, "loss": 0.4362, "step": 3663 }, { "epoch": 1.6423742108998542, "learning_rate": 2.236030800586815e-05, "lm_loss": 0.5312, "loss": 0.5312, "step": 3664 }, { "epoch": 1.6428224571364536, "learning_rate": 2.234827244291276e-05, "lm_loss": 0.5641, "loss": 0.5641, "step": 3665 }, { "epoch": 1.643270703373053, "learning_rate": 2.23362375015031e-05, "lm_loss": 0.5884, "loss": 0.5884, "step": 3666 }, { "epoch": 1.6437189496096523, "learning_rate": 2.2324203184460073e-05, "lm_loss": 0.5747, "loss": 0.5747, "step": 3667 }, { "epoch": 1.6441671958462516, "learning_rate": 2.231216949460445e-05, "lm_loss": 0.4537, "loss": 0.4537, "step": 3668 }, { "epoch": 1.6446154420828507, "learning_rate": 2.230013643475683e-05, "lm_loss": 0.6966, "loss": 0.6966, "step": 3669 }, { "epoch": 1.6450636883194503, "learning_rate": 2.2288104007737665e-05, "lm_loss": 0.5177, "loss": 0.5177, "step": 3670 }, { "epoch": 1.6455119345560494, "learning_rate": 2.2276072216367287e-05, "lm_loss": 0.4872, "loss": 0.4872, "step": 3671 }, { "epoch": 1.6459601807926487, "learning_rate": 2.2264041063465856e-05, "lm_loss": 0.5516, "loss": 0.5516, "step": 3672 }, { "epoch": 1.646408427029248, "learning_rate": 2.2252010551853384e-05, "lm_loss": 0.4992, "loss": 0.4992, "step": 3673 }, { "epoch": 1.6468566732658474, "learning_rate": 2.223998068434974e-05, "lm_loss": 0.5474, "loss": 0.5474, "step": 3674 }, { "epoch": 1.6473049195024467, "learning_rate": 2.2227951463774632e-05, "lm_loss": 0.3922, "loss": 0.3922, "step": 3675 }, { "epoch": 1.6477531657390458, "learning_rate": 2.221592289294762e-05, "lm_loss": 0.6542, "loss": 0.6542, "step": 3676 }, { "epoch": 1.6482014119756454, "learning_rate": 2.2203894974688124e-05, "lm_loss": 0.8986, "loss": 0.8986, "step": 3677 }, { "epoch": 1.6486496582122445, "learning_rate": 2.21918677118154e-05, "lm_loss": 0.4343, "loss": 0.4343, "step": 3678 }, { "epoch": 1.6490979044488439, "learning_rate": 2.217984110714853e-05, "lm_loss": 0.5059, "loss": 0.5059, "step": 3679 }, { "epoch": 1.6495461506854432, "learning_rate": 2.216781516350649e-05, "lm_loss": 0.5207, "loss": 0.5207, "step": 3680 }, { "epoch": 1.6499943969220425, "learning_rate": 2.2155789883708067e-05, "lm_loss": 0.4651, "loss": 0.4651, "step": 3681 }, { "epoch": 1.6504426431586419, "learning_rate": 2.2143765270571896e-05, "lm_loss": 0.4772, "loss": 0.4772, "step": 3682 }, { "epoch": 1.650890889395241, "learning_rate": 2.213174132691646e-05, "lm_loss": 0.557, "loss": 0.557, "step": 3683 }, { "epoch": 1.6513391356318405, "learning_rate": 2.211971805556008e-05, "lm_loss": 0.4989, "loss": 0.4989, "step": 3684 }, { "epoch": 1.6517873818684397, "learning_rate": 2.210769545932093e-05, "lm_loss": 0.5434, "loss": 0.5434, "step": 3685 }, { "epoch": 1.652235628105039, "learning_rate": 2.209567354101702e-05, "lm_loss": 0.4918, "loss": 0.4918, "step": 3686 }, { "epoch": 1.6526838743416383, "learning_rate": 2.2083652303466195e-05, "lm_loss": 0.8436, "loss": 0.8436, "step": 3687 }, { "epoch": 1.6531321205782377, "learning_rate": 2.2071631749486145e-05, "lm_loss": 0.7716, "loss": 0.7716, "step": 3688 }, { "epoch": 1.653580366814837, "learning_rate": 2.2059611881894406e-05, "lm_loss": 0.882, "loss": 0.882, "step": 3689 }, { "epoch": 1.6540286130514361, "learning_rate": 2.204759270350835e-05, "lm_loss": 0.5255, "loss": 0.5255, "step": 3690 }, { "epoch": 1.6544768592880357, "learning_rate": 2.2035574217145185e-05, "lm_loss": 0.6368, "loss": 0.6368, "step": 3691 }, { "epoch": 1.6549251055246348, "learning_rate": 2.202355642562195e-05, "lm_loss": 0.4499, "loss": 0.4499, "step": 3692 }, { "epoch": 1.6553733517612341, "learning_rate": 2.2011539331755535e-05, "lm_loss": 0.5178, "loss": 0.5178, "step": 3693 }, { "epoch": 1.6558215979978335, "learning_rate": 2.1999522938362657e-05, "lm_loss": 0.5336, "loss": 0.5336, "step": 3694 }, { "epoch": 1.6562698442344328, "learning_rate": 2.198750724825987e-05, "lm_loss": 0.5531, "loss": 0.5531, "step": 3695 }, { "epoch": 1.6567180904710321, "learning_rate": 2.1975492264263564e-05, "lm_loss": 0.6417, "loss": 0.6417, "step": 3696 }, { "epoch": 1.6571663367076312, "learning_rate": 2.1963477989189963e-05, "lm_loss": 0.5085, "loss": 0.5085, "step": 3697 }, { "epoch": 1.6576145829442308, "learning_rate": 2.1951464425855138e-05, "lm_loss": 0.8489, "loss": 0.8489, "step": 3698 }, { "epoch": 1.65806282918083, "learning_rate": 2.193945157707497e-05, "lm_loss": 0.6078, "loss": 0.6078, "step": 3699 }, { "epoch": 1.6585110754174293, "learning_rate": 2.1927439445665192e-05, "lm_loss": 0.4277, "loss": 0.4277, "step": 3700 }, { "epoch": 1.6589593216540286, "learning_rate": 2.191542803444135e-05, "lm_loss": 0.5332, "loss": 0.5332, "step": 3701 }, { "epoch": 1.659407567890628, "learning_rate": 2.190341734621884e-05, "lm_loss": 0.4807, "loss": 0.4807, "step": 3702 }, { "epoch": 1.6598558141272273, "learning_rate": 2.189140738381288e-05, "lm_loss": 0.6298, "loss": 0.6298, "step": 3703 }, { "epoch": 1.6603040603638264, "learning_rate": 2.1879398150038504e-05, "lm_loss": 0.4732, "loss": 0.4732, "step": 3704 }, { "epoch": 1.660752306600426, "learning_rate": 2.186738964771061e-05, "lm_loss": 0.7074, "loss": 0.7074, "step": 3705 }, { "epoch": 1.661200552837025, "learning_rate": 2.1855381879643887e-05, "lm_loss": 0.673, "loss": 0.673, "step": 3706 }, { "epoch": 1.6616487990736244, "learning_rate": 2.1843374848652888e-05, "lm_loss": 0.6723, "loss": 0.6723, "step": 3707 }, { "epoch": 1.6620970453102237, "learning_rate": 2.183136855755196e-05, "lm_loss": 0.4172, "loss": 0.4172, "step": 3708 }, { "epoch": 1.662545291546823, "learning_rate": 2.181936300915529e-05, "lm_loss": 0.817, "loss": 0.817, "step": 3709 }, { "epoch": 1.6629935377834224, "learning_rate": 2.1807358206276903e-05, "lm_loss": 0.6571, "loss": 0.6571, "step": 3710 }, { "epoch": 1.6634417840200215, "learning_rate": 2.179535415173063e-05, "lm_loss": 0.9345, "loss": 0.9345, "step": 3711 }, { "epoch": 1.663890030256621, "learning_rate": 2.1783350848330126e-05, "lm_loss": 0.4129, "loss": 0.4129, "step": 3712 }, { "epoch": 1.6643382764932202, "learning_rate": 2.1771348298888895e-05, "lm_loss": 0.5989, "loss": 0.5989, "step": 3713 }, { "epoch": 1.6647865227298195, "learning_rate": 2.1759346506220235e-05, "lm_loss": 0.5961, "loss": 0.5961, "step": 3714 }, { "epoch": 1.6652347689664189, "learning_rate": 2.174734547313728e-05, "lm_loss": 0.5583, "loss": 0.5583, "step": 3715 }, { "epoch": 1.6656830152030182, "learning_rate": 2.1735345202452995e-05, "lm_loss": 0.5012, "loss": 0.5012, "step": 3716 }, { "epoch": 1.6661312614396175, "learning_rate": 2.1723345696980145e-05, "lm_loss": 0.487, "loss": 0.487, "step": 3717 }, { "epoch": 1.6665795076762167, "learning_rate": 2.1711346959531343e-05, "lm_loss": 0.5398, "loss": 0.5398, "step": 3718 }, { "epoch": 1.6670277539128162, "learning_rate": 2.1699348992918996e-05, "lm_loss": 0.5981, "loss": 0.5981, "step": 3719 }, { "epoch": 1.6674760001494153, "learning_rate": 2.1687351799955335e-05, "lm_loss": 0.5426, "loss": 0.5426, "step": 3720 }, { "epoch": 1.6679242463860147, "learning_rate": 2.167535538345242e-05, "lm_loss": 0.5271, "loss": 0.5271, "step": 3721 }, { "epoch": 1.668372492622614, "learning_rate": 2.166335974622213e-05, "lm_loss": 0.4641, "loss": 0.4641, "step": 3722 }, { "epoch": 1.6688207388592133, "learning_rate": 2.1651364891076135e-05, "lm_loss": 0.53, "loss": 0.53, "step": 3723 }, { "epoch": 1.6692689850958127, "learning_rate": 2.1639370820825974e-05, "lm_loss": 0.5478, "loss": 0.5478, "step": 3724 }, { "epoch": 1.6697172313324118, "learning_rate": 2.1627377538282952e-05, "lm_loss": 0.4234, "loss": 0.4234, "step": 3725 }, { "epoch": 1.6701654775690113, "learning_rate": 2.1615385046258206e-05, "lm_loss": 0.499, "loss": 0.499, "step": 3726 }, { "epoch": 1.6706137238056105, "learning_rate": 2.1603393347562695e-05, "lm_loss": 0.505, "loss": 0.505, "step": 3727 }, { "epoch": 1.6710619700422098, "learning_rate": 2.1591402445007184e-05, "lm_loss": 0.5035, "loss": 0.5035, "step": 3728 }, { "epoch": 1.6715102162788091, "learning_rate": 2.157941234140225e-05, "lm_loss": 0.5542, "loss": 0.5542, "step": 3729 }, { "epoch": 1.6719584625154085, "learning_rate": 2.1567423039558294e-05, "lm_loss": 0.559, "loss": 0.559, "step": 3730 }, { "epoch": 1.6724067087520078, "learning_rate": 2.155543454228552e-05, "lm_loss": 0.5449, "loss": 0.5449, "step": 3731 }, { "epoch": 1.672854954988607, "learning_rate": 2.1543446852393927e-05, "lm_loss": 0.4788, "loss": 0.4788, "step": 3732 }, { "epoch": 1.6733032012252065, "learning_rate": 2.153145997269337e-05, "lm_loss": 0.5007, "loss": 0.5007, "step": 3733 }, { "epoch": 1.6737514474618056, "learning_rate": 2.1519473905993473e-05, "lm_loss": 0.5618, "loss": 0.5618, "step": 3734 }, { "epoch": 1.6741996936984052, "learning_rate": 2.150748865510369e-05, "lm_loss": 0.8971, "loss": 0.8971, "step": 3735 }, { "epoch": 1.6746479399350043, "learning_rate": 2.1495504222833273e-05, "lm_loss": 0.5131, "loss": 0.5131, "step": 3736 }, { "epoch": 1.6750961861716036, "learning_rate": 2.1483520611991276e-05, "lm_loss": 0.6698, "loss": 0.6698, "step": 3737 }, { "epoch": 1.675544432408203, "learning_rate": 2.147153782538659e-05, "lm_loss": 0.5017, "loss": 0.5017, "step": 3738 }, { "epoch": 1.675992678644802, "learning_rate": 2.145955586582788e-05, "lm_loss": 0.5713, "loss": 0.5713, "step": 3739 }, { "epoch": 1.6764409248814016, "learning_rate": 2.1447574736123636e-05, "lm_loss": 0.4704, "loss": 0.4704, "step": 3740 }, { "epoch": 1.6768891711180007, "learning_rate": 2.1435594439082136e-05, "lm_loss": 0.7118, "loss": 0.7118, "step": 3741 }, { "epoch": 1.6773374173546003, "learning_rate": 2.1423614977511486e-05, "lm_loss": 0.6904, "loss": 0.6904, "step": 3742 }, { "epoch": 1.6777856635911994, "learning_rate": 2.1411636354219595e-05, "lm_loss": 0.6985, "loss": 0.6985, "step": 3743 }, { "epoch": 1.6782339098277987, "learning_rate": 2.139965857201415e-05, "lm_loss": 0.4805, "loss": 0.4805, "step": 3744 }, { "epoch": 1.678682156064398, "learning_rate": 2.138768163370266e-05, "lm_loss": 0.5111, "loss": 0.5111, "step": 3745 }, { "epoch": 1.6791304023009972, "learning_rate": 2.1375705542092433e-05, "lm_loss": 0.4714, "loss": 0.4714, "step": 3746 }, { "epoch": 1.6795786485375968, "learning_rate": 2.136373029999058e-05, "lm_loss": 0.4725, "loss": 0.4725, "step": 3747 }, { "epoch": 1.6800268947741959, "learning_rate": 2.1351755910204012e-05, "lm_loss": 0.6134, "loss": 0.6134, "step": 3748 }, { "epoch": 1.6804751410107954, "learning_rate": 2.133978237553943e-05, "lm_loss": 0.5808, "loss": 0.5808, "step": 3749 }, { "epoch": 1.6809233872473945, "learning_rate": 2.132780969880335e-05, "lm_loss": 0.4331, "loss": 0.4331, "step": 3750 }, { "epoch": 1.6813716334839939, "learning_rate": 2.1315837882802083e-05, "lm_loss": 0.4681, "loss": 0.4681, "step": 3751 }, { "epoch": 1.6818198797205932, "learning_rate": 2.130386693034174e-05, "lm_loss": 0.5704, "loss": 0.5704, "step": 3752 }, { "epoch": 1.6822681259571923, "learning_rate": 2.129189684422822e-05, "lm_loss": 0.5474, "loss": 0.5474, "step": 3753 }, { "epoch": 1.6827163721937919, "learning_rate": 2.1279927627267217e-05, "lm_loss": 0.5417, "loss": 0.5417, "step": 3754 }, { "epoch": 1.683164618430391, "learning_rate": 2.126795928226424e-05, "lm_loss": 0.4949, "loss": 0.4949, "step": 3755 }, { "epoch": 1.6836128646669906, "learning_rate": 2.1255991812024584e-05, "lm_loss": 0.4803, "loss": 0.4803, "step": 3756 }, { "epoch": 1.6840611109035897, "learning_rate": 2.1244025219353324e-05, "lm_loss": 0.495, "loss": 0.495, "step": 3757 }, { "epoch": 1.684509357140189, "learning_rate": 2.1232059507055353e-05, "lm_loss": 0.5603, "loss": 0.5603, "step": 3758 }, { "epoch": 1.6849576033767883, "learning_rate": 2.1220094677935336e-05, "lm_loss": 0.5849, "loss": 0.5849, "step": 3759 }, { "epoch": 1.6854058496133875, "learning_rate": 2.120813073479776e-05, "lm_loss": 0.4949, "loss": 0.4949, "step": 3760 }, { "epoch": 1.685854095849987, "learning_rate": 2.119616768044688e-05, "lm_loss": 0.5092, "loss": 0.5092, "step": 3761 }, { "epoch": 1.6863023420865861, "learning_rate": 2.1184205517686744e-05, "lm_loss": 0.9576, "loss": 0.9576, "step": 3762 }, { "epoch": 1.6867505883231857, "learning_rate": 2.1172244249321204e-05, "lm_loss": 0.5536, "loss": 0.5536, "step": 3763 }, { "epoch": 1.6871988345597848, "learning_rate": 2.1160283878153888e-05, "lm_loss": 0.5588, "loss": 0.5588, "step": 3764 }, { "epoch": 1.6876470807963841, "learning_rate": 2.1148324406988223e-05, "lm_loss": 0.41, "loss": 0.41, "step": 3765 }, { "epoch": 1.6880953270329835, "learning_rate": 2.1136365838627417e-05, "lm_loss": 0.5797, "loss": 0.5797, "step": 3766 }, { "epoch": 1.6885435732695826, "learning_rate": 2.112440817587448e-05, "lm_loss": 0.5136, "loss": 0.5136, "step": 3767 }, { "epoch": 1.6889918195061822, "learning_rate": 2.1112451421532188e-05, "lm_loss": 0.5415, "loss": 0.5415, "step": 3768 }, { "epoch": 1.6894400657427813, "learning_rate": 2.110049557840314e-05, "lm_loss": 0.6162, "loss": 0.6162, "step": 3769 }, { "epoch": 1.6898883119793808, "learning_rate": 2.1088540649289686e-05, "lm_loss": 0.4221, "loss": 0.4221, "step": 3770 }, { "epoch": 1.69033655821598, "learning_rate": 2.1076586636993973e-05, "lm_loss": 0.4922, "loss": 0.4922, "step": 3771 }, { "epoch": 1.6907848044525793, "learning_rate": 2.106463354431794e-05, "lm_loss": 0.4998, "loss": 0.4998, "step": 3772 }, { "epoch": 1.6912330506891786, "learning_rate": 2.1052681374063306e-05, "lm_loss": 0.3861, "loss": 0.3861, "step": 3773 }, { "epoch": 1.6916812969257777, "learning_rate": 2.104073012903156e-05, "lm_loss": 0.4424, "loss": 0.4424, "step": 3774 }, { "epoch": 1.6921295431623773, "learning_rate": 2.1028779812024005e-05, "lm_loss": 0.4786, "loss": 0.4786, "step": 3775 }, { "epoch": 1.6925777893989764, "learning_rate": 2.10168304258417e-05, "lm_loss": 0.872, "loss": 0.872, "step": 3776 }, { "epoch": 1.693026035635576, "learning_rate": 2.100488197328549e-05, "lm_loss": 0.8978, "loss": 0.8978, "step": 3777 }, { "epoch": 1.693474281872175, "learning_rate": 2.0992934457156018e-05, "lm_loss": 0.6049, "loss": 0.6049, "step": 3778 }, { "epoch": 1.6939225281087744, "learning_rate": 2.0980987880253683e-05, "lm_loss": 0.5714, "loss": 0.5714, "step": 3779 }, { "epoch": 1.6943707743453738, "learning_rate": 2.0969042245378686e-05, "lm_loss": 0.5, "loss": 0.5, "step": 3780 }, { "epoch": 1.6948190205819729, "learning_rate": 2.0957097555330997e-05, "lm_loss": 0.4541, "loss": 0.4541, "step": 3781 }, { "epoch": 1.6952672668185724, "learning_rate": 2.0945153812910352e-05, "lm_loss": 0.5331, "loss": 0.5331, "step": 3782 }, { "epoch": 1.6957155130551715, "learning_rate": 2.0933211020916294e-05, "lm_loss": 0.4553, "loss": 0.4553, "step": 3783 }, { "epoch": 1.696163759291771, "learning_rate": 2.092126918214812e-05, "lm_loss": 0.8211, "loss": 0.8211, "step": 3784 }, { "epoch": 1.6966120055283702, "learning_rate": 2.090932829940491e-05, "lm_loss": 0.6453, "loss": 0.6453, "step": 3785 }, { "epoch": 1.6970602517649696, "learning_rate": 2.089738837548551e-05, "lm_loss": 0.3157, "loss": 0.3157, "step": 3786 }, { "epoch": 1.6975084980015689, "learning_rate": 2.0885449413188566e-05, "lm_loss": 0.6344, "loss": 0.6344, "step": 3787 }, { "epoch": 1.697956744238168, "learning_rate": 2.0873511415312485e-05, "lm_loss": 0.4745, "loss": 0.4745, "step": 3788 }, { "epoch": 1.6984049904747676, "learning_rate": 2.0861574384655448e-05, "lm_loss": 0.7458, "loss": 0.7458, "step": 3789 }, { "epoch": 1.6988532367113667, "learning_rate": 2.08496383240154e-05, "lm_loss": 0.7166, "loss": 0.7166, "step": 3790 }, { "epoch": 1.6993014829479662, "learning_rate": 2.0837703236190066e-05, "lm_loss": 0.5405, "loss": 0.5405, "step": 3791 }, { "epoch": 1.6997497291845653, "learning_rate": 2.082576912397695e-05, "lm_loss": 0.9021, "loss": 0.9021, "step": 3792 }, { "epoch": 1.7001979754211647, "learning_rate": 2.0813835990173326e-05, "lm_loss": 0.4351, "loss": 0.4351, "step": 3793 }, { "epoch": 1.700646221657764, "learning_rate": 2.080190383757622e-05, "lm_loss": 0.6552, "loss": 0.6552, "step": 3794 }, { "epoch": 1.7010944678943631, "learning_rate": 2.078997266898245e-05, "lm_loss": 0.4679, "loss": 0.4679, "step": 3795 }, { "epoch": 1.7015427141309627, "learning_rate": 2.07780424871886e-05, "lm_loss": 0.5706, "loss": 0.5706, "step": 3796 }, { "epoch": 1.7019909603675618, "learning_rate": 2.0766113294991014e-05, "lm_loss": 0.5624, "loss": 0.5624, "step": 3797 }, { "epoch": 1.7024392066041614, "learning_rate": 2.075418509518581e-05, "lm_loss": 0.5194, "loss": 0.5194, "step": 3798 }, { "epoch": 1.7028874528407605, "learning_rate": 2.0742257890568867e-05, "lm_loss": 0.4546, "loss": 0.4546, "step": 3799 }, { "epoch": 1.7033356990773598, "learning_rate": 2.073033168393584e-05, "lm_loss": 0.6597, "loss": 0.6597, "step": 3800 }, { "epoch": 1.7037839453139592, "learning_rate": 2.0718406478082147e-05, "lm_loss": 0.4598, "loss": 0.4598, "step": 3801 }, { "epoch": 1.7042321915505585, "learning_rate": 2.070648227580296e-05, "lm_loss": 0.4823, "loss": 0.4823, "step": 3802 }, { "epoch": 1.7046804377871578, "learning_rate": 2.069455907989324e-05, "lm_loss": 0.5879, "loss": 0.5879, "step": 3803 }, { "epoch": 1.705128684023757, "learning_rate": 2.0682636893147673e-05, "lm_loss": 0.3729, "loss": 0.3729, "step": 3804 }, { "epoch": 1.7055769302603565, "learning_rate": 2.0670715718360768e-05, "lm_loss": 0.7039, "loss": 0.7039, "step": 3805 }, { "epoch": 1.7060251764969556, "learning_rate": 2.0658795558326743e-05, "lm_loss": 0.4324, "loss": 0.4324, "step": 3806 }, { "epoch": 1.706473422733555, "learning_rate": 2.0646876415839595e-05, "lm_loss": 0.6271, "loss": 0.6271, "step": 3807 }, { "epoch": 1.7069216689701543, "learning_rate": 2.0634958293693092e-05, "lm_loss": 0.4821, "loss": 0.4821, "step": 3808 }, { "epoch": 1.7073699152067536, "learning_rate": 2.062304119468075e-05, "lm_loss": 0.5083, "loss": 0.5083, "step": 3809 }, { "epoch": 1.707818161443353, "learning_rate": 2.0611125121595854e-05, "lm_loss": 0.4214, "loss": 0.4214, "step": 3810 }, { "epoch": 1.708266407679952, "learning_rate": 2.0599210077231436e-05, "lm_loss": 0.4364, "loss": 0.4364, "step": 3811 }, { "epoch": 1.7087146539165516, "learning_rate": 2.0587296064380312e-05, "lm_loss": 0.457, "loss": 0.457, "step": 3812 }, { "epoch": 1.7091629001531508, "learning_rate": 2.0575383085835022e-05, "lm_loss": 0.5148, "loss": 0.5148, "step": 3813 }, { "epoch": 1.70961114638975, "learning_rate": 2.05634711443879e-05, "lm_loss": 0.5433, "loss": 0.5433, "step": 3814 }, { "epoch": 1.7100593926263494, "learning_rate": 2.0551560242831012e-05, "lm_loss": 0.4878, "loss": 0.4878, "step": 3815 }, { "epoch": 1.7105076388629488, "learning_rate": 2.0539650383956182e-05, "lm_loss": 0.4727, "loss": 0.4727, "step": 3816 }, { "epoch": 1.710955885099548, "learning_rate": 2.0527741570555002e-05, "lm_loss": 0.5171, "loss": 0.5171, "step": 3817 }, { "epoch": 1.7114041313361472, "learning_rate": 2.051583380541881e-05, "lm_loss": 0.5168, "loss": 0.5168, "step": 3818 }, { "epoch": 1.7118523775727468, "learning_rate": 2.0503927091338692e-05, "lm_loss": 0.3992, "loss": 0.3992, "step": 3819 }, { "epoch": 1.712300623809346, "learning_rate": 2.0492021431105506e-05, "lm_loss": 0.6577, "loss": 0.6577, "step": 3820 }, { "epoch": 1.7127488700459452, "learning_rate": 2.0480116827509852e-05, "lm_loss": 0.5025, "loss": 0.5025, "step": 3821 }, { "epoch": 1.7131971162825446, "learning_rate": 2.0468213283342066e-05, "lm_loss": 0.7774, "loss": 0.7774, "step": 3822 }, { "epoch": 1.713645362519144, "learning_rate": 2.0456310801392274e-05, "lm_loss": 0.905, "loss": 0.905, "step": 3823 }, { "epoch": 1.7140936087557432, "learning_rate": 2.0444409384450323e-05, "lm_loss": 0.7255, "loss": 0.7255, "step": 3824 }, { "epoch": 1.7145418549923424, "learning_rate": 2.043250903530582e-05, "lm_loss": 0.4407, "loss": 0.4407, "step": 3825 }, { "epoch": 1.714990101228942, "learning_rate": 2.0420609756748122e-05, "lm_loss": 0.5042, "loss": 0.5042, "step": 3826 }, { "epoch": 1.715438347465541, "learning_rate": 2.0408711551566327e-05, "lm_loss": 0.6125, "loss": 0.6125, "step": 3827 }, { "epoch": 1.7158865937021404, "learning_rate": 2.0396814422549297e-05, "lm_loss": 0.4862, "loss": 0.4862, "step": 3828 }, { "epoch": 1.7163348399387397, "learning_rate": 2.0384918372485626e-05, "lm_loss": 0.4122, "loss": 0.4122, "step": 3829 }, { "epoch": 1.716783086175339, "learning_rate": 2.0373023404163667e-05, "lm_loss": 0.6692, "loss": 0.6692, "step": 3830 }, { "epoch": 1.7172313324119384, "learning_rate": 2.03611295203715e-05, "lm_loss": 0.4916, "loss": 0.4916, "step": 3831 }, { "epoch": 1.7176795786485375, "learning_rate": 2.0349236723896983e-05, "lm_loss": 0.8173, "loss": 0.8173, "step": 3832 }, { "epoch": 1.718127824885137, "learning_rate": 2.0337345017527703e-05, "lm_loss": 0.5978, "loss": 0.5978, "step": 3833 }, { "epoch": 1.7185760711217362, "learning_rate": 2.032545440405098e-05, "lm_loss": 0.4493, "loss": 0.4493, "step": 3834 }, { "epoch": 1.7190243173583355, "learning_rate": 2.0313564886253893e-05, "lm_loss": 0.8982, "loss": 0.8982, "step": 3835 }, { "epoch": 1.7194725635949348, "learning_rate": 2.0301676466923243e-05, "lm_loss": 0.6215, "loss": 0.6215, "step": 3836 }, { "epoch": 1.7199208098315342, "learning_rate": 2.0289789148845613e-05, "lm_loss": 0.5887, "loss": 0.5887, "step": 3837 }, { "epoch": 1.7203690560681335, "learning_rate": 2.0277902934807295e-05, "lm_loss": 0.5249, "loss": 0.5249, "step": 3838 }, { "epoch": 1.7208173023047326, "learning_rate": 2.0266017827594326e-05, "lm_loss": 0.4382, "loss": 0.4382, "step": 3839 }, { "epoch": 1.7212655485413322, "learning_rate": 2.0254133829992488e-05, "lm_loss": 0.669, "loss": 0.669, "step": 3840 }, { "epoch": 1.7217137947779313, "learning_rate": 2.024225094478731e-05, "lm_loss": 0.5248, "loss": 0.5248, "step": 3841 }, { "epoch": 1.7221620410145306, "learning_rate": 2.023036917476406e-05, "lm_loss": 0.4673, "loss": 0.4673, "step": 3842 }, { "epoch": 1.72261028725113, "learning_rate": 2.0218488522707732e-05, "lm_loss": 0.8195, "loss": 0.8195, "step": 3843 }, { "epoch": 1.7230585334877293, "learning_rate": 2.020660899140306e-05, "lm_loss": 0.7147, "loss": 0.7147, "step": 3844 }, { "epoch": 1.7235067797243286, "learning_rate": 2.019473058363453e-05, "lm_loss": 0.4123, "loss": 0.4123, "step": 3845 }, { "epoch": 1.7239550259609278, "learning_rate": 2.018285330218635e-05, "lm_loss": 0.4863, "loss": 0.4863, "step": 3846 }, { "epoch": 1.7244032721975273, "learning_rate": 2.0170977149842467e-05, "lm_loss": 0.8714, "loss": 0.8714, "step": 3847 }, { "epoch": 1.7248515184341264, "learning_rate": 2.0159102129386563e-05, "lm_loss": 0.6049, "loss": 0.6049, "step": 3848 }, { "epoch": 1.7252997646707258, "learning_rate": 2.0147228243602057e-05, "lm_loss": 0.5372, "loss": 0.5372, "step": 3849 }, { "epoch": 1.725748010907325, "learning_rate": 2.013535549527211e-05, "lm_loss": 0.3899, "loss": 0.3899, "step": 3850 }, { "epoch": 1.7261962571439244, "learning_rate": 2.012348388717961e-05, "lm_loss": 0.5532, "loss": 0.5532, "step": 3851 }, { "epoch": 1.7266445033805238, "learning_rate": 2.011161342210716e-05, "lm_loss": 0.5417, "loss": 0.5417, "step": 3852 }, { "epoch": 1.727092749617123, "learning_rate": 2.009974410283713e-05, "lm_loss": 0.4066, "loss": 0.4066, "step": 3853 }, { "epoch": 1.7275409958537224, "learning_rate": 2.008787593215159e-05, "lm_loss": 0.4803, "loss": 0.4803, "step": 3854 }, { "epoch": 1.7279892420903216, "learning_rate": 2.0076008912832355e-05, "lm_loss": 0.4958, "loss": 0.4958, "step": 3855 }, { "epoch": 1.728437488326921, "learning_rate": 2.0064143047660966e-05, "lm_loss": 0.4494, "loss": 0.4494, "step": 3856 }, { "epoch": 1.7288857345635202, "learning_rate": 2.0052278339418703e-05, "lm_loss": 0.5001, "loss": 0.5001, "step": 3857 }, { "epoch": 1.7293339808001196, "learning_rate": 2.0040414790886556e-05, "lm_loss": 0.6392, "loss": 0.6392, "step": 3858 }, { "epoch": 1.729782227036719, "learning_rate": 2.0028552404845274e-05, "lm_loss": 0.8156, "loss": 0.8156, "step": 3859 }, { "epoch": 1.730230473273318, "learning_rate": 2.0016691184075302e-05, "lm_loss": 0.5053, "loss": 0.5053, "step": 3860 }, { "epoch": 1.7306787195099176, "learning_rate": 2.000483113135682e-05, "lm_loss": 0.712, "loss": 0.712, "step": 3861 }, { "epoch": 1.7311269657465167, "learning_rate": 1.999297224946975e-05, "lm_loss": 0.763, "loss": 0.763, "step": 3862 }, { "epoch": 1.731575211983116, "learning_rate": 1.998111454119372e-05, "lm_loss": 0.4905, "loss": 0.4905, "step": 3863 }, { "epoch": 1.7320234582197154, "learning_rate": 1.996925800930809e-05, "lm_loss": 0.517, "loss": 0.517, "step": 3864 }, { "epoch": 1.7324717044563147, "learning_rate": 1.9957402656591953e-05, "lm_loss": 0.4577, "loss": 0.4577, "step": 3865 }, { "epoch": 1.732919950692914, "learning_rate": 1.9945548485824114e-05, "lm_loss": 0.5388, "loss": 0.5388, "step": 3866 }, { "epoch": 1.7333681969295132, "learning_rate": 1.993369549978309e-05, "lm_loss": 0.4994, "loss": 0.4994, "step": 3867 }, { "epoch": 1.7338164431661127, "learning_rate": 1.9921843701247163e-05, "lm_loss": 0.5842, "loss": 0.5842, "step": 3868 }, { "epoch": 1.7342646894027118, "learning_rate": 1.990999309299429e-05, "lm_loss": 0.4498, "loss": 0.4498, "step": 3869 }, { "epoch": 1.7347129356393112, "learning_rate": 1.9898143677802178e-05, "lm_loss": 0.3566, "loss": 0.3566, "step": 3870 }, { "epoch": 1.7351611818759105, "learning_rate": 1.9886295458448234e-05, "lm_loss": 0.5576, "loss": 0.5576, "step": 3871 }, { "epoch": 1.7356094281125098, "learning_rate": 1.9874448437709604e-05, "lm_loss": 0.5834, "loss": 0.5834, "step": 3872 }, { "epoch": 1.7360576743491092, "learning_rate": 1.9862602618363133e-05, "lm_loss": 0.4648, "loss": 0.4648, "step": 3873 }, { "epoch": 1.7365059205857083, "learning_rate": 1.9850758003185404e-05, "lm_loss": 0.5618, "loss": 0.5618, "step": 3874 }, { "epoch": 1.7369541668223079, "learning_rate": 1.983891459495271e-05, "lm_loss": 0.5176, "loss": 0.5176, "step": 3875 }, { "epoch": 1.737402413058907, "learning_rate": 1.9827072396441044e-05, "lm_loss": 0.3632, "loss": 0.3632, "step": 3876 }, { "epoch": 1.7378506592955063, "learning_rate": 1.981523141042615e-05, "lm_loss": 0.486, "loss": 0.486, "step": 3877 }, { "epoch": 1.7382989055321056, "learning_rate": 1.9803391639683466e-05, "lm_loss": 0.5508, "loss": 0.5508, "step": 3878 }, { "epoch": 1.738747151768705, "learning_rate": 1.9791553086988153e-05, "lm_loss": 0.4967, "loss": 0.4967, "step": 3879 }, { "epoch": 1.7391953980053043, "learning_rate": 1.977971575511507e-05, "lm_loss": 0.5551, "loss": 0.5551, "step": 3880 }, { "epoch": 1.7396436442419034, "learning_rate": 1.97678796468388e-05, "lm_loss": 0.4912, "loss": 0.4912, "step": 3881 }, { "epoch": 1.740091890478503, "learning_rate": 1.9756044764933658e-05, "lm_loss": 0.5805, "loss": 0.5805, "step": 3882 }, { "epoch": 1.740540136715102, "learning_rate": 1.974421111217364e-05, "lm_loss": 0.7966, "loss": 0.7966, "step": 3883 }, { "epoch": 1.7409883829517014, "learning_rate": 1.973237869133247e-05, "lm_loss": 0.5329, "loss": 0.5329, "step": 3884 }, { "epoch": 1.7414366291883008, "learning_rate": 1.9720547505183583e-05, "lm_loss": 0.6567, "loss": 0.6567, "step": 3885 }, { "epoch": 1.7418848754249001, "learning_rate": 1.9708717556500128e-05, "lm_loss": 0.9228, "loss": 0.9228, "step": 3886 }, { "epoch": 1.7423331216614995, "learning_rate": 1.9696888848054962e-05, "lm_loss": 0.9557, "loss": 0.9557, "step": 3887 }, { "epoch": 1.7427813678980986, "learning_rate": 1.968506138262064e-05, "lm_loss": 0.5338, "loss": 0.5338, "step": 3888 }, { "epoch": 1.7432296141346981, "learning_rate": 1.9673235162969436e-05, "lm_loss": 0.4355, "loss": 0.4355, "step": 3889 }, { "epoch": 1.7436778603712972, "learning_rate": 1.9661410191873337e-05, "lm_loss": 0.8841, "loss": 0.8841, "step": 3890 }, { "epoch": 1.7441261066078966, "learning_rate": 1.964958647210402e-05, "lm_loss": 0.6316, "loss": 0.6316, "step": 3891 }, { "epoch": 1.744574352844496, "learning_rate": 1.9637764006432892e-05, "lm_loss": 0.5594, "loss": 0.5594, "step": 3892 }, { "epoch": 1.7450225990810952, "learning_rate": 1.9625942797631034e-05, "lm_loss": 0.4032, "loss": 0.4032, "step": 3893 }, { "epoch": 1.7454708453176946, "learning_rate": 1.9614122848469257e-05, "lm_loss": 0.6385, "loss": 0.6385, "step": 3894 }, { "epoch": 1.7459190915542937, "learning_rate": 1.9602304161718093e-05, "lm_loss": 0.8371, "loss": 0.8371, "step": 3895 }, { "epoch": 1.7463673377908933, "learning_rate": 1.959048674014774e-05, "lm_loss": 0.4185, "loss": 0.4185, "step": 3896 }, { "epoch": 1.7468155840274924, "learning_rate": 1.957867058652812e-05, "lm_loss": 0.7928, "loss": 0.7928, "step": 3897 }, { "epoch": 1.7472638302640917, "learning_rate": 1.956685570362884e-05, "lm_loss": 0.6343, "loss": 0.6343, "step": 3898 }, { "epoch": 1.747712076500691, "learning_rate": 1.9555042094219246e-05, "lm_loss": 0.5077, "loss": 0.5077, "step": 3899 }, { "epoch": 1.7481603227372904, "learning_rate": 1.9543229761068348e-05, "lm_loss": 0.3813, "loss": 0.3813, "step": 3900 }, { "epoch": 1.7486085689738897, "learning_rate": 1.9531418706944872e-05, "lm_loss": 0.9206, "loss": 0.9206, "step": 3901 }, { "epoch": 1.7490568152104888, "learning_rate": 1.9519608934617235e-05, "lm_loss": 0.6454, "loss": 0.6454, "step": 3902 }, { "epoch": 1.7495050614470884, "learning_rate": 1.9507800446853586e-05, "lm_loss": 0.5925, "loss": 0.5925, "step": 3903 }, { "epoch": 1.7499533076836875, "learning_rate": 1.9495993246421737e-05, "lm_loss": 0.4949, "loss": 0.4949, "step": 3904 }, { "epoch": 1.7504015539202868, "learning_rate": 1.948418733608921e-05, "lm_loss": 0.4007, "loss": 0.4007, "step": 3905 }, { "epoch": 1.7508498001568862, "learning_rate": 1.9472382718623218e-05, "lm_loss": 0.7745, "loss": 0.7745, "step": 3906 }, { "epoch": 1.7512980463934855, "learning_rate": 1.9460579396790684e-05, "lm_loss": 0.7163, "loss": 0.7163, "step": 3907 }, { "epoch": 1.7517462926300849, "learning_rate": 1.9448777373358225e-05, "lm_loss": 0.571, "loss": 0.571, "step": 3908 }, { "epoch": 1.752194538866684, "learning_rate": 1.9436976651092144e-05, "lm_loss": 0.3532, "loss": 0.3532, "step": 3909 }, { "epoch": 1.7526427851032835, "learning_rate": 1.9425177232758446e-05, "lm_loss": 0.6799, "loss": 0.6799, "step": 3910 }, { "epoch": 1.7530910313398826, "learning_rate": 1.9413379121122823e-05, "lm_loss": 0.512, "loss": 0.512, "step": 3911 }, { "epoch": 1.753539277576482, "learning_rate": 1.9401582318950685e-05, "lm_loss": 0.5362, "loss": 0.5362, "step": 3912 }, { "epoch": 1.7539875238130813, "learning_rate": 1.9389786829007104e-05, "lm_loss": 0.5267, "loss": 0.5267, "step": 3913 }, { "epoch": 1.7544357700496807, "learning_rate": 1.9377992654056854e-05, "lm_loss": 0.3835, "loss": 0.3835, "step": 3914 }, { "epoch": 1.75488401628628, "learning_rate": 1.9366199796864414e-05, "lm_loss": 0.6686, "loss": 0.6686, "step": 3915 }, { "epoch": 1.755332262522879, "learning_rate": 1.935440826019394e-05, "lm_loss": 0.7447, "loss": 0.7447, "step": 3916 }, { "epoch": 1.7557805087594787, "learning_rate": 1.9342618046809286e-05, "lm_loss": 0.397, "loss": 0.397, "step": 3917 }, { "epoch": 1.7562287549960778, "learning_rate": 1.933082915947398e-05, "lm_loss": 0.5177, "loss": 0.5177, "step": 3918 }, { "epoch": 1.7566770012326771, "learning_rate": 1.9319041600951266e-05, "lm_loss": 0.4246, "loss": 0.4246, "step": 3919 }, { "epoch": 1.7571252474692765, "learning_rate": 1.9307255374004048e-05, "lm_loss": 0.6354, "loss": 0.6354, "step": 3920 }, { "epoch": 1.7575734937058758, "learning_rate": 1.929547048139495e-05, "lm_loss": 0.5382, "loss": 0.5382, "step": 3921 }, { "epoch": 1.7580217399424751, "learning_rate": 1.928368692588626e-05, "lm_loss": 0.4834, "loss": 0.4834, "step": 3922 }, { "epoch": 1.7584699861790742, "learning_rate": 1.927190471023995e-05, "lm_loss": 0.5102, "loss": 0.5102, "step": 3923 }, { "epoch": 1.7589182324156738, "learning_rate": 1.9260123837217694e-05, "lm_loss": 0.4302, "loss": 0.4302, "step": 3924 }, { "epoch": 1.759366478652273, "learning_rate": 1.9248344309580842e-05, "lm_loss": 0.6181, "loss": 0.6181, "step": 3925 }, { "epoch": 1.7598147248888723, "learning_rate": 1.9236566130090422e-05, "lm_loss": 0.4888, "loss": 0.4888, "step": 3926 }, { "epoch": 1.7602629711254716, "learning_rate": 1.9224789301507167e-05, "lm_loss": 0.6271, "loss": 0.6271, "step": 3927 }, { "epoch": 1.760711217362071, "learning_rate": 1.9213013826591473e-05, "lm_loss": 0.5417, "loss": 0.5417, "step": 3928 }, { "epoch": 1.7611594635986703, "learning_rate": 1.9201239708103415e-05, "lm_loss": 0.4771, "loss": 0.4771, "step": 3929 }, { "epoch": 1.7616077098352694, "learning_rate": 1.9189466948802787e-05, "lm_loss": 0.4434, "loss": 0.4434, "step": 3930 }, { "epoch": 1.762055956071869, "learning_rate": 1.9177695551449014e-05, "lm_loss": 0.5818, "loss": 0.5818, "step": 3931 }, { "epoch": 1.762504202308468, "learning_rate": 1.9165925518801247e-05, "lm_loss": 0.5971, "loss": 0.5971, "step": 3932 }, { "epoch": 1.7629524485450674, "learning_rate": 1.9154156853618283e-05, "lm_loss": 0.5786, "loss": 0.5786, "step": 3933 }, { "epoch": 1.7634006947816667, "learning_rate": 1.914238955865861e-05, "lm_loss": 0.4056, "loss": 0.4056, "step": 3934 }, { "epoch": 1.763848941018266, "learning_rate": 1.9130623636680408e-05, "lm_loss": 0.4542, "loss": 0.4542, "step": 3935 }, { "epoch": 1.7642971872548654, "learning_rate": 1.9118859090441515e-05, "lm_loss": 0.6829, "loss": 0.6829, "step": 3936 }, { "epoch": 1.7647454334914645, "learning_rate": 1.9107095922699463e-05, "lm_loss": 0.8241, "loss": 0.8241, "step": 3937 }, { "epoch": 1.765193679728064, "learning_rate": 1.9095334136211436e-05, "lm_loss": 0.4747, "loss": 0.4747, "step": 3938 }, { "epoch": 1.7656419259646632, "learning_rate": 1.9083573733734326e-05, "lm_loss": 0.4722, "loss": 0.4722, "step": 3939 }, { "epoch": 1.7660901722012625, "learning_rate": 1.9071814718024687e-05, "lm_loss": 0.4555, "loss": 0.4555, "step": 3940 }, { "epoch": 1.7665384184378619, "learning_rate": 1.906005709183875e-05, "lm_loss": 0.5153, "loss": 0.5153, "step": 3941 }, { "epoch": 1.7669866646744612, "learning_rate": 1.904830085793241e-05, "lm_loss": 0.5795, "loss": 0.5795, "step": 3942 }, { "epoch": 1.7674349109110605, "learning_rate": 1.9036546019061242e-05, "lm_loss": 0.4902, "loss": 0.4902, "step": 3943 }, { "epoch": 1.7678831571476596, "learning_rate": 1.90247925779805e-05, "lm_loss": 0.4488, "loss": 0.4488, "step": 3944 }, { "epoch": 1.7683314033842592, "learning_rate": 1.90130405374451e-05, "lm_loss": 0.5633, "loss": 0.5633, "step": 3945 }, { "epoch": 1.7687796496208583, "learning_rate": 1.900128990020964e-05, "lm_loss": 0.5523, "loss": 0.5523, "step": 3946 }, { "epoch": 1.7692278958574577, "learning_rate": 1.8989540669028378e-05, "lm_loss": 0.5392, "loss": 0.5392, "step": 3947 }, { "epoch": 1.769676142094057, "learning_rate": 1.8977792846655253e-05, "lm_loss": 0.3983, "loss": 0.3983, "step": 3948 }, { "epoch": 1.7701243883306563, "learning_rate": 1.8966046435843876e-05, "lm_loss": 0.6473, "loss": 0.6473, "step": 3949 }, { "epoch": 1.7705726345672557, "learning_rate": 1.8954301439347515e-05, "lm_loss": 0.4449, "loss": 0.4449, "step": 3950 }, { "epoch": 1.7710208808038548, "learning_rate": 1.8942557859919106e-05, "lm_loss": 0.4632, "loss": 0.4632, "step": 3951 }, { "epoch": 1.7714691270404543, "learning_rate": 1.8930815700311272e-05, "lm_loss": 0.5122, "loss": 0.5122, "step": 3952 }, { "epoch": 1.7719173732770535, "learning_rate": 1.891907496327628e-05, "lm_loss": 0.5278, "loss": 0.5278, "step": 3953 }, { "epoch": 1.7723656195136528, "learning_rate": 1.8907335651566075e-05, "lm_loss": 0.5498, "loss": 0.5498, "step": 3954 }, { "epoch": 1.7728138657502521, "learning_rate": 1.889559776793227e-05, "lm_loss": 0.7419, "loss": 0.7419, "step": 3955 }, { "epoch": 1.7732621119868515, "learning_rate": 1.8883861315126127e-05, "lm_loss": 0.5862, "loss": 0.5862, "step": 3956 }, { "epoch": 1.7737103582234508, "learning_rate": 1.887212629589861e-05, "lm_loss": 0.5438, "loss": 0.5438, "step": 3957 }, { "epoch": 1.77415860446005, "learning_rate": 1.886039271300031e-05, "lm_loss": 0.4466, "loss": 0.4466, "step": 3958 }, { "epoch": 1.7746068506966495, "learning_rate": 1.884866056918148e-05, "lm_loss": 0.9088, "loss": 0.9088, "step": 3959 }, { "epoch": 1.7750550969332486, "learning_rate": 1.8836929867192076e-05, "lm_loss": 0.5744, "loss": 0.5744, "step": 3960 }, { "epoch": 1.775503343169848, "learning_rate": 1.8825200609781672e-05, "lm_loss": 0.5053, "loss": 0.5053, "step": 3961 }, { "epoch": 1.7759515894064473, "learning_rate": 1.881347279969953e-05, "lm_loss": 0.5162, "loss": 0.5162, "step": 3962 }, { "epoch": 1.7763998356430466, "learning_rate": 1.8801746439694544e-05, "lm_loss": 0.3826, "loss": 0.3826, "step": 3963 }, { "epoch": 1.776848081879646, "learning_rate": 1.8790021532515318e-05, "lm_loss": 0.5667, "loss": 0.5667, "step": 3964 }, { "epoch": 1.777296328116245, "learning_rate": 1.8778298080910055e-05, "lm_loss": 0.5513, "loss": 0.5513, "step": 3965 }, { "epoch": 1.7777445743528446, "learning_rate": 1.8766576087626668e-05, "lm_loss": 0.5678, "loss": 0.5678, "step": 3966 }, { "epoch": 1.7781928205894437, "learning_rate": 1.875485555541271e-05, "lm_loss": 0.404, "loss": 0.404, "step": 3967 }, { "epoch": 1.778641066826043, "learning_rate": 1.874313648701537e-05, "lm_loss": 0.5556, "loss": 0.5556, "step": 3968 }, { "epoch": 1.7790893130626424, "learning_rate": 1.873141888518153e-05, "lm_loss": 0.6513, "loss": 0.6513, "step": 3969 }, { "epoch": 1.7795375592992417, "learning_rate": 1.8719702752657704e-05, "lm_loss": 0.3654, "loss": 0.3654, "step": 3970 }, { "epoch": 1.779985805535841, "learning_rate": 1.870798809219006e-05, "lm_loss": 0.5524, "loss": 0.5524, "step": 3971 }, { "epoch": 1.7804340517724402, "learning_rate": 1.869627490652445e-05, "lm_loss": 0.4545, "loss": 0.4545, "step": 3972 }, { "epoch": 1.7808822980090397, "learning_rate": 1.8684563198406347e-05, "lm_loss": 0.4206, "loss": 0.4206, "step": 3973 }, { "epoch": 1.7813305442456389, "learning_rate": 1.867285297058088e-05, "lm_loss": 0.6135, "loss": 0.6135, "step": 3974 }, { "epoch": 1.7817787904822382, "learning_rate": 1.8661144225792858e-05, "lm_loss": 0.6483, "loss": 0.6483, "step": 3975 }, { "epoch": 1.7822270367188375, "learning_rate": 1.8649436966786723e-05, "lm_loss": 0.3886, "loss": 0.3886, "step": 3976 }, { "epoch": 1.7826752829554369, "learning_rate": 1.8637731196306575e-05, "lm_loss": 0.4431, "loss": 0.4431, "step": 3977 }, { "epoch": 1.7831235291920362, "learning_rate": 1.8626026917096154e-05, "lm_loss": 0.5813, "loss": 0.5813, "step": 3978 }, { "epoch": 1.7835717754286353, "learning_rate": 1.8614324131898857e-05, "lm_loss": 0.4532, "loss": 0.4532, "step": 3979 }, { "epoch": 1.7840200216652349, "learning_rate": 1.8602622843457744e-05, "lm_loss": 0.5127, "loss": 0.5127, "step": 3980 }, { "epoch": 1.784468267901834, "learning_rate": 1.8590923054515503e-05, "lm_loss": 0.5832, "loss": 0.5832, "step": 3981 }, { "epoch": 1.7849165141384333, "learning_rate": 1.8579224767814478e-05, "lm_loss": 0.4324, "loss": 0.4324, "step": 3982 }, { "epoch": 1.7853647603750327, "learning_rate": 1.8567527986096657e-05, "lm_loss": 0.9201, "loss": 0.9201, "step": 3983 }, { "epoch": 1.785813006611632, "learning_rate": 1.85558327121037e-05, "lm_loss": 0.5864, "loss": 0.5864, "step": 3984 }, { "epoch": 1.7862612528482313, "learning_rate": 1.8544138948576885e-05, "lm_loss": 0.404, "loss": 0.404, "step": 3985 }, { "epoch": 1.7867094990848305, "learning_rate": 1.8532446698257144e-05, "lm_loss": 0.4127, "loss": 0.4127, "step": 3986 }, { "epoch": 1.78715774532143, "learning_rate": 1.852075596388506e-05, "lm_loss": 0.5155, "loss": 0.5155, "step": 3987 }, { "epoch": 1.7876059915580291, "learning_rate": 1.8509066748200844e-05, "lm_loss": 0.4672, "loss": 0.4672, "step": 3988 }, { "epoch": 1.7880542377946285, "learning_rate": 1.849737905394438e-05, "lm_loss": 0.6526, "loss": 0.6526, "step": 3989 }, { "epoch": 1.7885024840312278, "learning_rate": 1.848569288385517e-05, "lm_loss": 0.4291, "loss": 0.4291, "step": 3990 }, { "epoch": 1.7889507302678271, "learning_rate": 1.847400824067237e-05, "lm_loss": 0.4965, "loss": 0.4965, "step": 3991 }, { "epoch": 1.7893989765044265, "learning_rate": 1.8462325127134773e-05, "lm_loss": 0.4901, "loss": 0.4901, "step": 3992 }, { "epoch": 1.7898472227410256, "learning_rate": 1.8450643545980812e-05, "lm_loss": 0.506, "loss": 0.506, "step": 3993 }, { "epoch": 1.7902954689776251, "learning_rate": 1.843896349994858e-05, "lm_loss": 0.5838, "loss": 0.5838, "step": 3994 }, { "epoch": 1.7907437152142243, "learning_rate": 1.842728499177579e-05, "lm_loss": 0.54, "loss": 0.54, "step": 3995 }, { "epoch": 1.7911919614508236, "learning_rate": 1.8415608024199788e-05, "lm_loss": 0.5709, "loss": 0.5709, "step": 3996 }, { "epoch": 1.791640207687423, "learning_rate": 1.840393259995758e-05, "lm_loss": 0.3835, "loss": 0.3835, "step": 3997 }, { "epoch": 1.7920884539240223, "learning_rate": 1.8392258721785806e-05, "lm_loss": 0.5317, "loss": 0.5317, "step": 3998 }, { "epoch": 1.7925367001606216, "learning_rate": 1.8380586392420722e-05, "lm_loss": 0.4948, "loss": 0.4948, "step": 3999 }, { "epoch": 1.7929849463972207, "learning_rate": 1.8368915614598254e-05, "lm_loss": 0.6252, "loss": 0.6252, "step": 4000 }, { "epoch": 1.7934331926338203, "learning_rate": 1.8357246391053933e-05, "lm_loss": 0.4095, "loss": 0.4095, "step": 4001 }, { "epoch": 1.7938814388704194, "learning_rate": 1.8345578724522956e-05, "lm_loss": 0.7031, "loss": 0.7031, "step": 4002 }, { "epoch": 1.7943296851070187, "learning_rate": 1.8333912617740136e-05, "lm_loss": 0.4002, "loss": 0.4002, "step": 4003 }, { "epoch": 1.794777931343618, "learning_rate": 1.832224807343991e-05, "lm_loss": 0.5032, "loss": 0.5032, "step": 4004 }, { "epoch": 1.7952261775802174, "learning_rate": 1.8310585094356387e-05, "lm_loss": 0.5188, "loss": 0.5188, "step": 4005 }, { "epoch": 1.7956744238168167, "learning_rate": 1.829892368322326e-05, "lm_loss": 0.6322, "loss": 0.6322, "step": 4006 }, { "epoch": 1.7961226700534159, "learning_rate": 1.82872638427739e-05, "lm_loss": 0.4925, "loss": 0.4925, "step": 4007 }, { "epoch": 1.7965709162900154, "learning_rate": 1.827560557574127e-05, "lm_loss": 1.0357, "loss": 1.0357, "step": 4008 }, { "epoch": 1.7970191625266145, "learning_rate": 1.8263948884857997e-05, "lm_loss": 0.556, "loss": 0.556, "step": 4009 }, { "epoch": 1.797467408763214, "learning_rate": 1.825229377285631e-05, "lm_loss": 0.8024, "loss": 0.8024, "step": 4010 }, { "epoch": 1.7979156549998132, "learning_rate": 1.8240640242468106e-05, "lm_loss": 0.535, "loss": 0.535, "step": 4011 }, { "epoch": 1.7983639012364125, "learning_rate": 1.8228988296424877e-05, "lm_loss": 0.5712, "loss": 0.5712, "step": 4012 }, { "epoch": 1.7988121474730119, "learning_rate": 1.8217337937457747e-05, "lm_loss": 0.5137, "loss": 0.5137, "step": 4013 }, { "epoch": 1.799260393709611, "learning_rate": 1.8205689168297482e-05, "lm_loss": 0.4149, "loss": 0.4149, "step": 4014 }, { "epoch": 1.7997086399462106, "learning_rate": 1.8194041991674473e-05, "lm_loss": 0.5671, "loss": 0.5671, "step": 4015 }, { "epoch": 1.8001568861828097, "learning_rate": 1.8182396410318724e-05, "lm_loss": 0.3685, "loss": 0.3685, "step": 4016 }, { "epoch": 1.8006051324194092, "learning_rate": 1.817075242695989e-05, "lm_loss": 0.5955, "loss": 0.5955, "step": 4017 }, { "epoch": 1.8010533786560083, "learning_rate": 1.8159110044327224e-05, "lm_loss": 0.4904, "loss": 0.4904, "step": 4018 }, { "epoch": 1.8015016248926077, "learning_rate": 1.8147469265149607e-05, "lm_loss": 0.5173, "loss": 0.5173, "step": 4019 }, { "epoch": 1.801949871129207, "learning_rate": 1.813583009215558e-05, "lm_loss": 0.6776, "loss": 0.6776, "step": 4020 }, { "epoch": 1.8023981173658061, "learning_rate": 1.8124192528073265e-05, "lm_loss": 0.8235, "loss": 0.8235, "step": 4021 }, { "epoch": 1.8028463636024057, "learning_rate": 1.8112556575630424e-05, "lm_loss": 0.4203, "loss": 0.4203, "step": 4022 }, { "epoch": 1.8032946098390048, "learning_rate": 1.810092223755444e-05, "lm_loss": 0.594, "loss": 0.594, "step": 4023 }, { "epoch": 1.8037428560756044, "learning_rate": 1.8089289516572316e-05, "lm_loss": 0.451, "loss": 0.451, "step": 4024 }, { "epoch": 1.8041911023122035, "learning_rate": 1.8077658415410683e-05, "lm_loss": 0.4148, "loss": 0.4148, "step": 4025 }, { "epoch": 1.8046393485488028, "learning_rate": 1.8066028936795783e-05, "lm_loss": 0.5451, "loss": 0.5451, "step": 4026 }, { "epoch": 1.8050875947854021, "learning_rate": 1.8054401083453485e-05, "lm_loss": 0.6696, "loss": 0.6696, "step": 4027 }, { "epoch": 1.8055358410220013, "learning_rate": 1.8042774858109257e-05, "lm_loss": 0.4559, "loss": 0.4559, "step": 4028 }, { "epoch": 1.8059840872586008, "learning_rate": 1.8031150263488224e-05, "lm_loss": 0.5529, "loss": 0.5529, "step": 4029 }, { "epoch": 1.8064323334952, "learning_rate": 1.80195273023151e-05, "lm_loss": 0.8255, "loss": 0.8255, "step": 4030 }, { "epoch": 1.8068805797317995, "learning_rate": 1.8007905977314225e-05, "lm_loss": 0.644, "loss": 0.644, "step": 4031 }, { "epoch": 1.8073288259683986, "learning_rate": 1.7996286291209545e-05, "lm_loss": 1.1812, "loss": 1.1812, "step": 4032 }, { "epoch": 1.807777072204998, "learning_rate": 1.7984668246724634e-05, "lm_loss": 0.7543, "loss": 0.7543, "step": 4033 }, { "epoch": 1.8082253184415973, "learning_rate": 1.7973051846582683e-05, "lm_loss": 0.4604, "loss": 0.4604, "step": 4034 }, { "epoch": 1.8086735646781964, "learning_rate": 1.796143709350649e-05, "lm_loss": 0.5032, "loss": 0.5032, "step": 4035 }, { "epoch": 1.809121810914796, "learning_rate": 1.7949823990218455e-05, "lm_loss": 0.6819, "loss": 0.6819, "step": 4036 }, { "epoch": 1.809570057151395, "learning_rate": 1.793821253944062e-05, "lm_loss": 0.5046, "loss": 0.5046, "step": 4037 }, { "epoch": 1.8100183033879946, "learning_rate": 1.7926602743894622e-05, "lm_loss": 1.0648, "loss": 1.0648, "step": 4038 }, { "epoch": 1.8104665496245937, "learning_rate": 1.7914994606301722e-05, "lm_loss": 0.5237, "loss": 0.5237, "step": 4039 }, { "epoch": 1.810914795861193, "learning_rate": 1.790338812938277e-05, "lm_loss": 0.46, "loss": 0.46, "step": 4040 }, { "epoch": 1.8113630420977924, "learning_rate": 1.789178331585824e-05, "lm_loss": 0.3782, "loss": 0.3782, "step": 4041 }, { "epoch": 1.8118112883343915, "learning_rate": 1.7880180168448223e-05, "lm_loss": 0.6875, "loss": 0.6875, "step": 4042 }, { "epoch": 1.812259534570991, "learning_rate": 1.7868578689872415e-05, "lm_loss": 0.4607, "loss": 0.4607, "step": 4043 }, { "epoch": 1.8127077808075902, "learning_rate": 1.7856978882850113e-05, "lm_loss": 0.7016, "loss": 0.7016, "step": 4044 }, { "epoch": 1.8131560270441898, "learning_rate": 1.784538075010022e-05, "lm_loss": 0.843, "loss": 0.843, "step": 4045 }, { "epoch": 1.8136042732807889, "learning_rate": 1.783378429434126e-05, "lm_loss": 1.0912, "loss": 1.0912, "step": 4046 }, { "epoch": 1.8140525195173882, "learning_rate": 1.7822189518291373e-05, "lm_loss": 0.3154, "loss": 0.3154, "step": 4047 }, { "epoch": 1.8145007657539876, "learning_rate": 1.781059642466828e-05, "lm_loss": 0.6483, "loss": 0.6483, "step": 4048 }, { "epoch": 1.8149490119905867, "learning_rate": 1.7799005016189307e-05, "lm_loss": 0.4533, "loss": 0.4533, "step": 4049 }, { "epoch": 1.8153972582271862, "learning_rate": 1.7787415295571415e-05, "lm_loss": 0.4419, "loss": 0.4419, "step": 4050 }, { "epoch": 1.8158455044637853, "learning_rate": 1.777582726553114e-05, "lm_loss": 0.4262, "loss": 0.4262, "step": 4051 }, { "epoch": 1.816293750700385, "learning_rate": 1.7764240928784638e-05, "lm_loss": 0.4943, "loss": 0.4943, "step": 4052 }, { "epoch": 1.816741996936984, "learning_rate": 1.7752656288047648e-05, "lm_loss": 0.6087, "loss": 0.6087, "step": 4053 }, { "epoch": 1.8171902431735834, "learning_rate": 1.7741073346035542e-05, "lm_loss": 0.6973, "loss": 0.6973, "step": 4054 }, { "epoch": 1.8176384894101827, "learning_rate": 1.7729492105463264e-05, "lm_loss": 0.614, "loss": 0.614, "step": 4055 }, { "epoch": 1.8180867356467818, "learning_rate": 1.7717912569045392e-05, "lm_loss": 0.8105, "loss": 0.8105, "step": 4056 }, { "epoch": 1.8185349818833814, "learning_rate": 1.7706334739496073e-05, "lm_loss": 0.4935, "loss": 0.4935, "step": 4057 }, { "epoch": 1.8189832281199805, "learning_rate": 1.7694758619529063e-05, "lm_loss": 0.4891, "loss": 0.4891, "step": 4058 }, { "epoch": 1.81943147435658, "learning_rate": 1.768318421185773e-05, "lm_loss": 0.5961, "loss": 0.5961, "step": 4059 }, { "epoch": 1.8198797205931792, "learning_rate": 1.7671611519195024e-05, "lm_loss": 0.4094, "loss": 0.4094, "step": 4060 }, { "epoch": 1.8203279668297785, "learning_rate": 1.7660040544253502e-05, "lm_loss": 0.4551, "loss": 0.4551, "step": 4061 }, { "epoch": 1.8207762130663778, "learning_rate": 1.7648471289745322e-05, "lm_loss": 0.4674, "loss": 0.4674, "step": 4062 }, { "epoch": 1.821224459302977, "learning_rate": 1.7636903758382227e-05, "lm_loss": 0.5577, "loss": 0.5577, "step": 4063 }, { "epoch": 1.8216727055395765, "learning_rate": 1.762533795287556e-05, "lm_loss": 0.5658, "loss": 0.5658, "step": 4064 }, { "epoch": 1.8221209517761756, "learning_rate": 1.7613773875936273e-05, "lm_loss": 0.4875, "loss": 0.4875, "step": 4065 }, { "epoch": 1.8225691980127752, "learning_rate": 1.7602211530274896e-05, "lm_loss": 0.5479, "loss": 0.5479, "step": 4066 }, { "epoch": 1.8230174442493743, "learning_rate": 1.759065091860156e-05, "lm_loss": 0.4549, "loss": 0.4549, "step": 4067 }, { "epoch": 1.8234656904859736, "learning_rate": 1.757909204362599e-05, "lm_loss": 0.4876, "loss": 0.4876, "step": 4068 }, { "epoch": 1.823913936722573, "learning_rate": 1.7567534908057497e-05, "lm_loss": 0.5292, "loss": 0.5292, "step": 4069 }, { "epoch": 1.824362182959172, "learning_rate": 1.755597951460499e-05, "lm_loss": 0.4378, "loss": 0.4378, "step": 4070 }, { "epoch": 1.8248104291957716, "learning_rate": 1.7544425865976977e-05, "lm_loss": 0.5942, "loss": 0.5942, "step": 4071 }, { "epoch": 1.8252586754323707, "learning_rate": 1.7532873964881548e-05, "lm_loss": 0.3989, "loss": 0.3989, "step": 4072 }, { "epoch": 1.8257069216689703, "learning_rate": 1.7521323814026368e-05, "lm_loss": 0.9031, "loss": 0.9031, "step": 4073 }, { "epoch": 1.8261551679055694, "learning_rate": 1.750977541611873e-05, "lm_loss": 0.6366, "loss": 0.6366, "step": 4074 }, { "epoch": 1.8266034141421688, "learning_rate": 1.749822877386549e-05, "lm_loss": 0.5524, "loss": 0.5524, "step": 4075 }, { "epoch": 1.827051660378768, "learning_rate": 1.74866838899731e-05, "lm_loss": 0.4169, "loss": 0.4169, "step": 4076 }, { "epoch": 1.8274999066153672, "learning_rate": 1.7475140767147587e-05, "lm_loss": 0.4609, "loss": 0.4609, "step": 4077 }, { "epoch": 1.8279481528519668, "learning_rate": 1.7463599408094576e-05, "lm_loss": 0.4812, "loss": 0.4812, "step": 4078 }, { "epoch": 1.8283963990885659, "learning_rate": 1.7452059815519293e-05, "lm_loss": 0.4994, "loss": 0.4994, "step": 4079 }, { "epoch": 1.8288446453251654, "learning_rate": 1.7440521992126517e-05, "lm_loss": 0.6177, "loss": 0.6177, "step": 4080 }, { "epoch": 1.8292928915617646, "learning_rate": 1.7428985940620636e-05, "lm_loss": 0.5268, "loss": 0.5268, "step": 4081 }, { "epoch": 1.829741137798364, "learning_rate": 1.7417451663705625e-05, "lm_loss": 0.4615, "loss": 0.4615, "step": 4082 }, { "epoch": 1.8301893840349632, "learning_rate": 1.740591916408502e-05, "lm_loss": 0.4412, "loss": 0.4412, "step": 4083 }, { "epoch": 1.8306376302715626, "learning_rate": 1.7394388444461973e-05, "lm_loss": 0.4663, "loss": 0.4663, "step": 4084 }, { "epoch": 1.831085876508162, "learning_rate": 1.738285950753919e-05, "lm_loss": 0.6434, "loss": 0.6434, "step": 4085 }, { "epoch": 1.831534122744761, "learning_rate": 1.737133235601897e-05, "lm_loss": 0.9464, "loss": 0.9464, "step": 4086 }, { "epoch": 1.8319823689813606, "learning_rate": 1.7359806992603202e-05, "lm_loss": 0.783, "loss": 0.783, "step": 4087 }, { "epoch": 1.8324306152179597, "learning_rate": 1.734828341999334e-05, "lm_loss": 0.4281, "loss": 0.4281, "step": 4088 }, { "epoch": 1.832878861454559, "learning_rate": 1.7336761640890427e-05, "lm_loss": 0.4612, "loss": 0.4612, "step": 4089 }, { "epoch": 1.8333271076911584, "learning_rate": 1.732524165799508e-05, "lm_loss": 0.4235, "loss": 0.4235, "step": 4090 }, { "epoch": 1.8337753539277577, "learning_rate": 1.7313723474007504e-05, "lm_loss": 0.9237, "loss": 0.9237, "step": 4091 }, { "epoch": 1.834223600164357, "learning_rate": 1.730220709162749e-05, "lm_loss": 0.4815, "loss": 0.4815, "step": 4092 }, { "epoch": 1.8346718464009562, "learning_rate": 1.729069251355438e-05, "lm_loss": 0.5681, "loss": 0.5681, "step": 4093 }, { "epoch": 1.8351200926375557, "learning_rate": 1.727917974248711e-05, "lm_loss": 0.5037, "loss": 0.5037, "step": 4094 }, { "epoch": 1.8355683388741548, "learning_rate": 1.7267668781124192e-05, "lm_loss": 0.7537, "loss": 0.7537, "step": 4095 }, { "epoch": 1.8360165851107542, "learning_rate": 1.725615963216371e-05, "lm_loss": 0.863, "loss": 0.863, "step": 4096 }, { "epoch": 1.8364648313473535, "learning_rate": 1.7244652298303333e-05, "lm_loss": 0.3843, "loss": 0.3843, "step": 4097 }, { "epoch": 1.8369130775839528, "learning_rate": 1.7233146782240282e-05, "lm_loss": 0.5761, "loss": 0.5761, "step": 4098 }, { "epoch": 1.8373613238205522, "learning_rate": 1.7221643086671368e-05, "lm_loss": 0.482, "loss": 0.482, "step": 4099 }, { "epoch": 1.8378095700571513, "learning_rate": 1.7210141214292995e-05, "lm_loss": 0.4532, "loss": 0.4532, "step": 4100 }, { "epoch": 1.8382578162937508, "learning_rate": 1.7198641167801103e-05, "lm_loss": 0.4738, "loss": 0.4738, "step": 4101 }, { "epoch": 1.83870606253035, "learning_rate": 1.7187142949891223e-05, "lm_loss": 0.5714, "loss": 0.5714, "step": 4102 }, { "epoch": 1.8391543087669493, "learning_rate": 1.717564656325844e-05, "lm_loss": 0.5458, "loss": 0.5458, "step": 4103 }, { "epoch": 1.8396025550035486, "learning_rate": 1.7164152010597452e-05, "lm_loss": 0.8037, "loss": 0.8037, "step": 4104 }, { "epoch": 1.840050801240148, "learning_rate": 1.7152659294602474e-05, "lm_loss": 0.5463, "loss": 0.5463, "step": 4105 }, { "epoch": 1.8404990474767473, "learning_rate": 1.7141168417967324e-05, "lm_loss": 0.5016, "loss": 0.5016, "step": 4106 }, { "epoch": 1.8409472937133464, "learning_rate": 1.7129679383385383e-05, "lm_loss": 0.4415, "loss": 0.4415, "step": 4107 }, { "epoch": 1.841395539949946, "learning_rate": 1.7118192193549586e-05, "lm_loss": 0.5424, "loss": 0.5424, "step": 4108 }, { "epoch": 1.841843786186545, "learning_rate": 1.7106706851152465e-05, "lm_loss": 0.464, "loss": 0.464, "step": 4109 }, { "epoch": 1.8422920324231444, "learning_rate": 1.7095223358886093e-05, "lm_loss": 0.7847, "loss": 0.7847, "step": 4110 }, { "epoch": 1.8427402786597438, "learning_rate": 1.708374171944211e-05, "lm_loss": 0.8245, "loss": 0.8245, "step": 4111 }, { "epoch": 1.843188524896343, "learning_rate": 1.7072261935511745e-05, "lm_loss": 0.5353, "loss": 0.5353, "step": 4112 }, { "epoch": 1.8436367711329424, "learning_rate": 1.7060784009785762e-05, "lm_loss": 0.5815, "loss": 0.5815, "step": 4113 }, { "epoch": 1.8440850173695416, "learning_rate": 1.704930794495451e-05, "lm_loss": 0.4275, "loss": 0.4275, "step": 4114 }, { "epoch": 1.8445332636061411, "learning_rate": 1.7037833743707892e-05, "lm_loss": 0.5679, "loss": 0.5679, "step": 4115 }, { "epoch": 1.8449815098427402, "learning_rate": 1.7026361408735383e-05, "lm_loss": 0.6599, "loss": 0.6599, "step": 4116 }, { "epoch": 1.8454297560793396, "learning_rate": 1.7014890942726e-05, "lm_loss": 0.7507, "loss": 0.7507, "step": 4117 }, { "epoch": 1.845878002315939, "learning_rate": 1.7003422348368365e-05, "lm_loss": 0.4788, "loss": 0.4788, "step": 4118 }, { "epoch": 1.8463262485525382, "learning_rate": 1.6991955628350615e-05, "lm_loss": 0.4172, "loss": 0.4172, "step": 4119 }, { "epoch": 1.8467744947891376, "learning_rate": 1.698049078536046e-05, "lm_loss": 0.5331, "loss": 0.5331, "step": 4120 }, { "epoch": 1.8472227410257367, "learning_rate": 1.696902782208519e-05, "lm_loss": 0.4469, "loss": 0.4469, "step": 4121 }, { "epoch": 1.8476709872623363, "learning_rate": 1.695756674121164e-05, "lm_loss": 0.3856, "loss": 0.3856, "step": 4122 }, { "epoch": 1.8481192334989354, "learning_rate": 1.694610754542619e-05, "lm_loss": 0.6015, "loss": 0.6015, "step": 4123 }, { "epoch": 1.8485674797355347, "learning_rate": 1.6934650237414813e-05, "lm_loss": 0.5016, "loss": 0.5016, "step": 4124 }, { "epoch": 1.849015725972134, "learning_rate": 1.6923194819863005e-05, "lm_loss": 0.4325, "loss": 0.4325, "step": 4125 }, { "epoch": 1.8494639722087334, "learning_rate": 1.6911741295455824e-05, "lm_loss": 0.4181, "loss": 0.4181, "step": 4126 }, { "epoch": 1.8499122184453327, "learning_rate": 1.6900289666877918e-05, "lm_loss": 0.6195, "loss": 0.6195, "step": 4127 }, { "epoch": 1.8503604646819318, "learning_rate": 1.688883993681345e-05, "lm_loss": 0.5127, "loss": 0.5127, "step": 4128 }, { "epoch": 1.8508087109185314, "learning_rate": 1.687739210794616e-05, "lm_loss": 0.7627, "loss": 0.7627, "step": 4129 }, { "epoch": 1.8512569571551305, "learning_rate": 1.6865946182959333e-05, "lm_loss": 0.6224, "loss": 0.6224, "step": 4130 }, { "epoch": 1.8517052033917298, "learning_rate": 1.6854502164535806e-05, "lm_loss": 0.4792, "loss": 0.4792, "step": 4131 }, { "epoch": 1.8521534496283292, "learning_rate": 1.6843060055357983e-05, "lm_loss": 0.5251, "loss": 0.5251, "step": 4132 }, { "epoch": 1.8526016958649285, "learning_rate": 1.683161985810781e-05, "lm_loss": 0.384, "loss": 0.384, "step": 4133 }, { "epoch": 1.8530499421015278, "learning_rate": 1.6820181575466788e-05, "lm_loss": 0.5393, "loss": 0.5393, "step": 4134 }, { "epoch": 1.853498188338127, "learning_rate": 1.6808745210115945e-05, "lm_loss": 0.5067, "loss": 0.5067, "step": 4135 }, { "epoch": 1.8539464345747265, "learning_rate": 1.679731076473591e-05, "lm_loss": 0.4942, "loss": 0.4942, "step": 4136 }, { "epoch": 1.8543946808113256, "learning_rate": 1.678587824200683e-05, "lm_loss": 0.4423, "loss": 0.4423, "step": 4137 }, { "epoch": 1.854842927047925, "learning_rate": 1.6774447644608402e-05, "lm_loss": 0.512, "loss": 0.512, "step": 4138 }, { "epoch": 1.8552911732845243, "learning_rate": 1.6763018975219876e-05, "lm_loss": 1.0024, "loss": 1.0024, "step": 4139 }, { "epoch": 1.8557394195211236, "learning_rate": 1.6751592236520038e-05, "lm_loss": 0.5565, "loss": 0.5565, "step": 4140 }, { "epoch": 1.856187665757723, "learning_rate": 1.6740167431187244e-05, "lm_loss": 0.417, "loss": 0.417, "step": 4141 }, { "epoch": 1.856635911994322, "learning_rate": 1.672874456189939e-05, "lm_loss": 0.4394, "loss": 0.4394, "step": 4142 }, { "epoch": 1.8570841582309217, "learning_rate": 1.6717323631333896e-05, "lm_loss": 0.5157, "loss": 0.5157, "step": 4143 }, { "epoch": 1.8575324044675208, "learning_rate": 1.6705904642167763e-05, "lm_loss": 0.5662, "loss": 0.5662, "step": 4144 }, { "epoch": 1.85798065070412, "learning_rate": 1.669448759707751e-05, "lm_loss": 0.5226, "loss": 0.5226, "step": 4145 }, { "epoch": 1.8584288969407194, "learning_rate": 1.668307249873922e-05, "lm_loss": 0.3546, "loss": 0.3546, "step": 4146 }, { "epoch": 1.8588771431773188, "learning_rate": 1.66716593498285e-05, "lm_loss": 0.5429, "loss": 0.5429, "step": 4147 }, { "epoch": 1.8593253894139181, "learning_rate": 1.6660248153020508e-05, "lm_loss": 0.4095, "loss": 0.4095, "step": 4148 }, { "epoch": 1.8597736356505172, "learning_rate": 1.6648838910989955e-05, "lm_loss": 0.3917, "loss": 0.3917, "step": 4149 }, { "epoch": 1.8602218818871168, "learning_rate": 1.6637431626411076e-05, "lm_loss": 0.5249, "loss": 0.5249, "step": 4150 }, { "epoch": 1.860670128123716, "learning_rate": 1.6626026301957658e-05, "lm_loss": 0.4495, "loss": 0.4495, "step": 4151 }, { "epoch": 1.8611183743603152, "learning_rate": 1.661462294030303e-05, "lm_loss": 0.5478, "loss": 0.5478, "step": 4152 }, { "epoch": 1.8615666205969146, "learning_rate": 1.6603221544120048e-05, "lm_loss": 0.469, "loss": 0.469, "step": 4153 }, { "epoch": 1.862014866833514, "learning_rate": 1.659182211608113e-05, "lm_loss": 0.5321, "loss": 0.5321, "step": 4154 }, { "epoch": 1.8624631130701133, "learning_rate": 1.658042465885822e-05, "lm_loss": 0.7875, "loss": 0.7875, "step": 4155 }, { "epoch": 1.8629113593067124, "learning_rate": 1.656902917512278e-05, "lm_loss": 0.5396, "loss": 0.5396, "step": 4156 }, { "epoch": 1.863359605543312, "learning_rate": 1.655763566754585e-05, "lm_loss": 0.5623, "loss": 0.5623, "step": 4157 }, { "epoch": 1.863807851779911, "learning_rate": 1.6546244138797978e-05, "lm_loss": 0.4511, "loss": 0.4511, "step": 4158 }, { "epoch": 1.8642560980165104, "learning_rate": 1.6534854591549254e-05, "lm_loss": 0.4157, "loss": 0.4157, "step": 4159 }, { "epoch": 1.8647043442531097, "learning_rate": 1.6523467028469297e-05, "lm_loss": 0.4672, "loss": 0.4672, "step": 4160 }, { "epoch": 1.865152590489709, "learning_rate": 1.651208145222729e-05, "lm_loss": 0.6007, "loss": 0.6007, "step": 4161 }, { "epoch": 1.8656008367263084, "learning_rate": 1.6500697865491904e-05, "lm_loss": 0.4496, "loss": 0.4496, "step": 4162 }, { "epoch": 1.8660490829629075, "learning_rate": 1.6489316270931395e-05, "lm_loss": 0.5114, "loss": 0.5114, "step": 4163 }, { "epoch": 1.866497329199507, "learning_rate": 1.6477936671213514e-05, "lm_loss": 0.2859, "loss": 0.2859, "step": 4164 }, { "epoch": 1.8669455754361062, "learning_rate": 1.6466559069005555e-05, "lm_loss": 0.623, "loss": 0.623, "step": 4165 }, { "epoch": 1.8673938216727055, "learning_rate": 1.645518346697435e-05, "lm_loss": 0.5203, "loss": 0.5203, "step": 4166 }, { "epoch": 1.8678420679093048, "learning_rate": 1.644380986778626e-05, "lm_loss": 0.5468, "loss": 0.5468, "step": 4167 }, { "epoch": 1.8682903141459042, "learning_rate": 1.643243827410716e-05, "lm_loss": 0.4456, "loss": 0.4456, "step": 4168 }, { "epoch": 1.8687385603825035, "learning_rate": 1.6421068688602485e-05, "lm_loss": 0.4611, "loss": 0.4611, "step": 4169 }, { "epoch": 1.8691868066191026, "learning_rate": 1.6409701113937183e-05, "lm_loss": 0.4199, "loss": 0.4199, "step": 4170 }, { "epoch": 1.8696350528557022, "learning_rate": 1.639833555277571e-05, "lm_loss": 0.559, "loss": 0.559, "step": 4171 }, { "epoch": 1.8700832990923013, "learning_rate": 1.63869720077821e-05, "lm_loss": 0.3657, "loss": 0.3657, "step": 4172 }, { "epoch": 1.8705315453289006, "learning_rate": 1.6375610481619868e-05, "lm_loss": 0.5552, "loss": 0.5552, "step": 4173 }, { "epoch": 1.8709797915655, "learning_rate": 1.6364250976952082e-05, "lm_loss": 0.4708, "loss": 0.4708, "step": 4174 }, { "epoch": 1.8714280378020993, "learning_rate": 1.6352893496441326e-05, "lm_loss": 0.4198, "loss": 0.4198, "step": 4175 }, { "epoch": 1.8718762840386987, "learning_rate": 1.6341538042749705e-05, "lm_loss": 0.5448, "loss": 0.5448, "step": 4176 }, { "epoch": 1.8723245302752978, "learning_rate": 1.6330184618538862e-05, "lm_loss": 0.4054, "loss": 0.4054, "step": 4177 }, { "epoch": 1.8727727765118973, "learning_rate": 1.6318833226469953e-05, "lm_loss": 0.5005, "loss": 0.5005, "step": 4178 }, { "epoch": 1.8732210227484964, "learning_rate": 1.6307483869203665e-05, "lm_loss": 0.4611, "loss": 0.4611, "step": 4179 }, { "epoch": 1.8736692689850958, "learning_rate": 1.62961365494002e-05, "lm_loss": 0.4229, "loss": 0.4229, "step": 4180 }, { "epoch": 1.8741175152216951, "learning_rate": 1.6284791269719288e-05, "lm_loss": 0.9976, "loss": 0.9976, "step": 4181 }, { "epoch": 1.8745657614582945, "learning_rate": 1.6273448032820193e-05, "lm_loss": 0.3952, "loss": 0.3952, "step": 4182 }, { "epoch": 1.8750140076948938, "learning_rate": 1.626210684136168e-05, "lm_loss": 0.4949, "loss": 0.4949, "step": 4183 }, { "epoch": 1.875462253931493, "learning_rate": 1.6250767698002034e-05, "lm_loss": 0.5086, "loss": 0.5086, "step": 4184 }, { "epoch": 1.8759105001680925, "learning_rate": 1.6239430605399077e-05, "lm_loss": 0.3158, "loss": 0.3158, "step": 4185 }, { "epoch": 1.8763587464046916, "learning_rate": 1.622809556621014e-05, "lm_loss": 0.5744, "loss": 0.5744, "step": 4186 }, { "epoch": 1.876806992641291, "learning_rate": 1.6216762583092073e-05, "lm_loss": 0.5497, "loss": 0.5497, "step": 4187 }, { "epoch": 1.8772552388778903, "learning_rate": 1.6205431658701243e-05, "lm_loss": 0.8475, "loss": 0.8475, "step": 4188 }, { "epoch": 1.8777034851144896, "learning_rate": 1.6194102795693535e-05, "lm_loss": 0.4851, "loss": 0.4851, "step": 4189 }, { "epoch": 1.878151731351089, "learning_rate": 1.618277599672436e-05, "lm_loss": 0.5083, "loss": 0.5083, "step": 4190 }, { "epoch": 1.878599977587688, "learning_rate": 1.6171451264448638e-05, "lm_loss": 0.9609, "loss": 0.9609, "step": 4191 }, { "epoch": 1.8790482238242876, "learning_rate": 1.61601286015208e-05, "lm_loss": 0.488, "loss": 0.488, "step": 4192 }, { "epoch": 1.8794964700608867, "learning_rate": 1.6148808010594792e-05, "lm_loss": 0.4241, "loss": 0.4241, "step": 4193 }, { "epoch": 1.879944716297486, "learning_rate": 1.6137489494324087e-05, "lm_loss": 0.5497, "loss": 0.5497, "step": 4194 }, { "epoch": 1.8803929625340854, "learning_rate": 1.612617305536166e-05, "lm_loss": 0.4278, "loss": 0.4278, "step": 4195 }, { "epoch": 1.8808412087706847, "learning_rate": 1.6114858696359998e-05, "lm_loss": 0.5215, "loss": 0.5215, "step": 4196 }, { "epoch": 1.881289455007284, "learning_rate": 1.610354641997111e-05, "lm_loss": 0.7768, "loss": 0.7768, "step": 4197 }, { "epoch": 1.8817377012438832, "learning_rate": 1.6092236228846503e-05, "lm_loss": 0.5252, "loss": 0.5252, "step": 4198 }, { "epoch": 1.8821859474804827, "learning_rate": 1.608092812563722e-05, "lm_loss": 0.9348, "loss": 0.9348, "step": 4199 }, { "epoch": 1.8826341937170818, "learning_rate": 1.6069622112993788e-05, "lm_loss": 0.5013, "loss": 0.5013, "step": 4200 }, { "epoch": 1.8830824399536812, "learning_rate": 1.605831819356625e-05, "lm_loss": 0.5099, "loss": 0.5099, "step": 4201 }, { "epoch": 1.8835306861902805, "learning_rate": 1.6047016370004176e-05, "lm_loss": 0.4019, "loss": 0.4019, "step": 4202 }, { "epoch": 1.8839789324268799, "learning_rate": 1.6035716644956626e-05, "lm_loss": 0.4516, "loss": 0.4516, "step": 4203 }, { "epoch": 1.8844271786634792, "learning_rate": 1.6024419021072168e-05, "lm_loss": 0.4978, "loss": 0.4978, "step": 4204 }, { "epoch": 1.8848754249000783, "learning_rate": 1.6013123500998884e-05, "lm_loss": 1.0379, "loss": 1.0379, "step": 4205 }, { "epoch": 1.8853236711366779, "learning_rate": 1.6001830087384373e-05, "lm_loss": 0.6351, "loss": 0.6351, "step": 4206 }, { "epoch": 1.885771917373277, "learning_rate": 1.5990538782875707e-05, "lm_loss": 0.4481, "loss": 0.4481, "step": 4207 }, { "epoch": 1.8862201636098763, "learning_rate": 1.5979249590119515e-05, "lm_loss": 0.4418, "loss": 0.4418, "step": 4208 }, { "epoch": 1.8866684098464757, "learning_rate": 1.5967962511761887e-05, "lm_loss": 0.4367, "loss": 0.4367, "step": 4209 }, { "epoch": 1.887116656083075, "learning_rate": 1.5956677550448423e-05, "lm_loss": 0.434, "loss": 0.434, "step": 4210 }, { "epoch": 1.8875649023196743, "learning_rate": 1.5945394708824257e-05, "lm_loss": 0.8874, "loss": 0.8874, "step": 4211 }, { "epoch": 1.8880131485562734, "learning_rate": 1.5934113989533992e-05, "lm_loss": 0.4935, "loss": 0.4935, "step": 4212 }, { "epoch": 1.888461394792873, "learning_rate": 1.5922835395221746e-05, "lm_loss": 0.838, "loss": 0.838, "step": 4213 }, { "epoch": 1.8889096410294721, "learning_rate": 1.5911558928531148e-05, "lm_loss": 0.591, "loss": 0.591, "step": 4214 }, { "epoch": 1.8893578872660715, "learning_rate": 1.5900284592105314e-05, "lm_loss": 0.4833, "loss": 0.4833, "step": 4215 }, { "epoch": 1.8898061335026708, "learning_rate": 1.588901238858686e-05, "lm_loss": 0.4133, "loss": 0.4133, "step": 4216 }, { "epoch": 1.8902543797392701, "learning_rate": 1.587774232061793e-05, "lm_loss": 0.4346, "loss": 0.4346, "step": 4217 }, { "epoch": 1.8907026259758695, "learning_rate": 1.5866474390840125e-05, "lm_loss": 0.4659, "loss": 0.4659, "step": 4218 }, { "epoch": 1.8911508722124686, "learning_rate": 1.5855208601894577e-05, "lm_loss": 0.3626, "loss": 0.3626, "step": 4219 }, { "epoch": 1.8915991184490681, "learning_rate": 1.584394495642191e-05, "lm_loss": 0.5467, "loss": 0.5467, "step": 4220 }, { "epoch": 1.8920473646856673, "learning_rate": 1.5832683457062226e-05, "lm_loss": 0.5183, "loss": 0.5183, "step": 4221 }, { "epoch": 1.8924956109222666, "learning_rate": 1.5821424106455148e-05, "lm_loss": 0.7678, "loss": 0.7678, "step": 4222 }, { "epoch": 1.892943857158866, "learning_rate": 1.581016690723979e-05, "lm_loss": 0.6556, "loss": 0.6556, "step": 4223 }, { "epoch": 1.8933921033954653, "learning_rate": 1.579891186205475e-05, "lm_loss": 0.5529, "loss": 0.5529, "step": 4224 }, { "epoch": 1.8938403496320646, "learning_rate": 1.5787658973538124e-05, "lm_loss": 0.7865, "loss": 0.7865, "step": 4225 }, { "epoch": 1.8942885958686637, "learning_rate": 1.577640824432752e-05, "lm_loss": 0.822, "loss": 0.822, "step": 4226 }, { "epoch": 1.8947368421052633, "learning_rate": 1.576515967706003e-05, "lm_loss": 0.6265, "loss": 0.6265, "step": 4227 }, { "epoch": 1.8951850883418624, "learning_rate": 1.5753913274372227e-05, "lm_loss": 0.446, "loss": 0.446, "step": 4228 }, { "epoch": 1.8956333345784617, "learning_rate": 1.574266903890019e-05, "lm_loss": 0.3657, "loss": 0.3657, "step": 4229 }, { "epoch": 1.896081580815061, "learning_rate": 1.5731426973279482e-05, "lm_loss": 0.4034, "loss": 0.4034, "step": 4230 }, { "epoch": 1.8965298270516604, "learning_rate": 1.572018708014517e-05, "lm_loss": 0.6059, "loss": 0.6059, "step": 4231 }, { "epoch": 1.8969780732882597, "learning_rate": 1.5708949362131798e-05, "lm_loss": 0.4445, "loss": 0.4445, "step": 4232 }, { "epoch": 1.8974263195248589, "learning_rate": 1.5697713821873398e-05, "lm_loss": 0.6323, "loss": 0.6323, "step": 4233 }, { "epoch": 1.8978745657614584, "learning_rate": 1.5686480462003514e-05, "lm_loss": 0.6579, "loss": 0.6579, "step": 4234 }, { "epoch": 1.8983228119980575, "learning_rate": 1.5675249285155155e-05, "lm_loss": 0.4711, "loss": 0.4711, "step": 4235 }, { "epoch": 1.8987710582346569, "learning_rate": 1.5664020293960835e-05, "lm_loss": 0.5754, "loss": 0.5754, "step": 4236 }, { "epoch": 1.8992193044712562, "learning_rate": 1.5652793491052546e-05, "lm_loss": 0.3655, "loss": 0.3655, "step": 4237 }, { "epoch": 1.8996675507078555, "learning_rate": 1.5641568879061762e-05, "lm_loss": 0.6243, "loss": 0.6243, "step": 4238 }, { "epoch": 1.9001157969444549, "learning_rate": 1.563034646061946e-05, "lm_loss": 0.4509, "loss": 0.4509, "step": 4239 }, { "epoch": 1.900564043181054, "learning_rate": 1.5619126238356086e-05, "lm_loss": 0.3272, "loss": 0.3272, "step": 4240 }, { "epoch": 1.9010122894176535, "learning_rate": 1.5607908214901588e-05, "lm_loss": 0.6233, "loss": 0.6233, "step": 4241 }, { "epoch": 1.9014605356542527, "learning_rate": 1.5596692392885374e-05, "lm_loss": 0.359, "loss": 0.359, "step": 4242 }, { "epoch": 1.901908781890852, "learning_rate": 1.5585478774936357e-05, "lm_loss": 0.4747, "loss": 0.4747, "step": 4243 }, { "epoch": 1.9023570281274513, "learning_rate": 1.5574267363682944e-05, "lm_loss": 0.6601, "loss": 0.6601, "step": 4244 }, { "epoch": 1.9028052743640507, "learning_rate": 1.5563058161752998e-05, "lm_loss": 0.466, "loss": 0.466, "step": 4245 }, { "epoch": 1.90325352060065, "learning_rate": 1.5551851171773866e-05, "lm_loss": 0.271, "loss": 0.271, "step": 4246 }, { "epoch": 1.9037017668372491, "learning_rate": 1.5540646396372396e-05, "lm_loss": 0.505, "loss": 0.505, "step": 4247 }, { "epoch": 1.9041500130738487, "learning_rate": 1.5529443838174905e-05, "lm_loss": 0.4892, "loss": 0.4892, "step": 4248 }, { "epoch": 1.9045982593104478, "learning_rate": 1.551824349980719e-05, "lm_loss": 0.5014, "loss": 0.5014, "step": 4249 }, { "epoch": 1.9050465055470471, "learning_rate": 1.550704538389452e-05, "lm_loss": 0.9105, "loss": 0.9105, "step": 4250 }, { "epoch": 1.9054947517836465, "learning_rate": 1.549584949306167e-05, "lm_loss": 0.4894, "loss": 0.4894, "step": 4251 }, { "epoch": 1.9059429980202458, "learning_rate": 1.5484655829932854e-05, "lm_loss": 0.7312, "loss": 0.7312, "step": 4252 }, { "epoch": 1.9063912442568451, "learning_rate": 1.547346439713181e-05, "lm_loss": 0.6211, "loss": 0.6211, "step": 4253 }, { "epoch": 1.9068394904934443, "learning_rate": 1.5462275197281716e-05, "lm_loss": 0.467, "loss": 0.467, "step": 4254 }, { "epoch": 1.9072877367300438, "learning_rate": 1.5451088233005234e-05, "lm_loss": 0.4793, "loss": 0.4793, "step": 4255 }, { "epoch": 1.907735982966643, "learning_rate": 1.543990350692452e-05, "lm_loss": 0.4363, "loss": 0.4363, "step": 4256 }, { "epoch": 1.9081842292032423, "learning_rate": 1.5428721021661185e-05, "lm_loss": 0.4305, "loss": 0.4305, "step": 4257 }, { "epoch": 1.9086324754398416, "learning_rate": 1.541754077983632e-05, "lm_loss": 0.49, "loss": 0.49, "step": 4258 }, { "epoch": 1.909080721676441, "learning_rate": 1.5406362784070506e-05, "lm_loss": 0.3948, "loss": 0.3948, "step": 4259 }, { "epoch": 1.9095289679130403, "learning_rate": 1.539518703698376e-05, "lm_loss": 0.4644, "loss": 0.4644, "step": 4260 }, { "epoch": 1.9099772141496394, "learning_rate": 1.538401354119562e-05, "lm_loss": 0.5472, "loss": 0.5472, "step": 4261 }, { "epoch": 1.910425460386239, "learning_rate": 1.537284229932507e-05, "lm_loss": 0.4878, "loss": 0.4878, "step": 4262 }, { "epoch": 1.910873706622838, "learning_rate": 1.5361673313990548e-05, "lm_loss": 0.9354, "loss": 0.9354, "step": 4263 }, { "epoch": 1.9113219528594374, "learning_rate": 1.535050658781001e-05, "lm_loss": 0.4279, "loss": 0.4279, "step": 4264 }, { "epoch": 1.9117701990960367, "learning_rate": 1.533934212340084e-05, "lm_loss": 0.9392, "loss": 0.9392, "step": 4265 }, { "epoch": 1.912218445332636, "learning_rate": 1.532817992337991e-05, "lm_loss": 0.8068, "loss": 0.8068, "step": 4266 }, { "epoch": 1.9126666915692354, "learning_rate": 1.5317019990363557e-05, "lm_loss": 0.3574, "loss": 0.3574, "step": 4267 }, { "epoch": 1.9131149378058345, "learning_rate": 1.530586232696759e-05, "lm_loss": 0.5672, "loss": 0.5672, "step": 4268 }, { "epoch": 1.913563184042434, "learning_rate": 1.529470693580728e-05, "lm_loss": 0.5069, "loss": 0.5069, "step": 4269 }, { "epoch": 1.9140114302790332, "learning_rate": 1.528355381949739e-05, "lm_loss": 0.3027, "loss": 0.3027, "step": 4270 }, { "epoch": 1.9144596765156325, "learning_rate": 1.5272402980652102e-05, "lm_loss": 0.4216, "loss": 0.4216, "step": 4271 }, { "epoch": 1.9149079227522319, "learning_rate": 1.526125442188511e-05, "lm_loss": 0.5772, "loss": 0.5772, "step": 4272 }, { "epoch": 1.9153561689888312, "learning_rate": 1.525010814580955e-05, "lm_loss": 0.4651, "loss": 0.4651, "step": 4273 }, { "epoch": 1.9158044152254305, "learning_rate": 1.5238964155038033e-05, "lm_loss": 0.4653, "loss": 0.4653, "step": 4274 }, { "epoch": 1.9162526614620297, "learning_rate": 1.5227822452182617e-05, "lm_loss": 0.554, "loss": 0.554, "step": 4275 }, { "epoch": 1.9167009076986292, "learning_rate": 1.5216683039854851e-05, "lm_loss": 0.331, "loss": 0.331, "step": 4276 }, { "epoch": 1.9171491539352283, "learning_rate": 1.5205545920665726e-05, "lm_loss": 1.0711, "loss": 1.0711, "step": 4277 }, { "epoch": 1.9175974001718277, "learning_rate": 1.519441109722569e-05, "lm_loss": 0.7879, "loss": 0.7879, "step": 4278 }, { "epoch": 1.918045646408427, "learning_rate": 1.5183278572144693e-05, "lm_loss": 0.5055, "loss": 0.5055, "step": 4279 }, { "epoch": 1.9184938926450263, "learning_rate": 1.5172148348032095e-05, "lm_loss": 0.4232, "loss": 0.4232, "step": 4280 }, { "epoch": 1.9189421388816257, "learning_rate": 1.5161020427496753e-05, "lm_loss": 0.6193, "loss": 0.6193, "step": 4281 }, { "epoch": 1.9193903851182248, "learning_rate": 1.5149894813146965e-05, "lm_loss": 0.6705, "loss": 0.6705, "step": 4282 }, { "epoch": 1.9198386313548244, "learning_rate": 1.5138771507590488e-05, "lm_loss": 0.5237, "loss": 0.5237, "step": 4283 }, { "epoch": 1.9202868775914235, "learning_rate": 1.5127650513434558e-05, "lm_loss": 0.6282, "loss": 0.6282, "step": 4284 }, { "epoch": 1.9207351238280228, "learning_rate": 1.511653183328585e-05, "lm_loss": 0.6968, "loss": 0.6968, "step": 4285 }, { "epoch": 1.9211833700646221, "learning_rate": 1.51054154697505e-05, "lm_loss": 0.5189, "loss": 0.5189, "step": 4286 }, { "epoch": 1.9216316163012215, "learning_rate": 1.5094301425434093e-05, "lm_loss": 0.458, "loss": 0.458, "step": 4287 }, { "epoch": 1.9220798625378208, "learning_rate": 1.5083189702941697e-05, "lm_loss": 0.5187, "loss": 0.5187, "step": 4288 }, { "epoch": 1.92252810877442, "learning_rate": 1.5072080304877816e-05, "lm_loss": 0.3767, "loss": 0.3767, "step": 4289 }, { "epoch": 1.9229763550110195, "learning_rate": 1.5060973233846415e-05, "lm_loss": 0.6087, "loss": 0.6087, "step": 4290 }, { "epoch": 1.9234246012476186, "learning_rate": 1.5049868492450902e-05, "lm_loss": 0.3278, "loss": 0.3278, "step": 4291 }, { "epoch": 1.9238728474842182, "learning_rate": 1.5038766083294151e-05, "lm_loss": 0.5164, "loss": 0.5164, "step": 4292 }, { "epoch": 1.9243210937208173, "learning_rate": 1.5027666008978492e-05, "lm_loss": 0.6717, "loss": 0.6717, "step": 4293 }, { "epoch": 1.9247693399574166, "learning_rate": 1.5016568272105694e-05, "lm_loss": 0.651, "loss": 0.651, "step": 4294 }, { "epoch": 1.925217586194016, "learning_rate": 1.5005472875276989e-05, "lm_loss": 0.367, "loss": 0.367, "step": 4295 }, { "epoch": 1.925665832430615, "learning_rate": 1.4994379821093049e-05, "lm_loss": 0.6396, "loss": 0.6396, "step": 4296 }, { "epoch": 1.9261140786672146, "learning_rate": 1.4983289112154026e-05, "lm_loss": 0.485, "loss": 0.485, "step": 4297 }, { "epoch": 1.9265623249038137, "learning_rate": 1.4972200751059493e-05, "lm_loss": 0.4154, "loss": 0.4154, "step": 4298 }, { "epoch": 1.9270105711404133, "learning_rate": 1.4961114740408477e-05, "lm_loss": 0.512, "loss": 0.512, "step": 4299 }, { "epoch": 1.9274588173770124, "learning_rate": 1.4950031082799457e-05, "lm_loss": 0.3965, "loss": 0.3965, "step": 4300 }, { "epoch": 1.9279070636136117, "learning_rate": 1.493894978083037e-05, "lm_loss": 0.4587, "loss": 0.4587, "step": 4301 }, { "epoch": 1.928355309850211, "learning_rate": 1.492787083709859e-05, "lm_loss": 0.6426, "loss": 0.6426, "step": 4302 }, { "epoch": 1.9288035560868102, "learning_rate": 1.4916794254200933e-05, "lm_loss": 0.6555, "loss": 0.6555, "step": 4303 }, { "epoch": 1.9292518023234098, "learning_rate": 1.4905720034733684e-05, "lm_loss": 0.557, "loss": 0.557, "step": 4304 }, { "epoch": 1.9297000485600089, "learning_rate": 1.4894648181292541e-05, "lm_loss": 0.6087, "loss": 0.6087, "step": 4305 }, { "epoch": 1.9301482947966084, "learning_rate": 1.488357869647269e-05, "lm_loss": 0.7601, "loss": 0.7601, "step": 4306 }, { "epoch": 1.9305965410332075, "learning_rate": 1.4872511582868723e-05, "lm_loss": 0.4296, "loss": 0.4296, "step": 4307 }, { "epoch": 1.9310447872698069, "learning_rate": 1.4861446843074688e-05, "lm_loss": 0.3627, "loss": 0.3627, "step": 4308 }, { "epoch": 1.9314930335064062, "learning_rate": 1.4850384479684098e-05, "lm_loss": 0.4839, "loss": 0.4839, "step": 4309 }, { "epoch": 1.9319412797430053, "learning_rate": 1.4839324495289874e-05, "lm_loss": 0.5762, "loss": 0.5762, "step": 4310 }, { "epoch": 1.932389525979605, "learning_rate": 1.4828266892484402e-05, "lm_loss": 0.3835, "loss": 0.3835, "step": 4311 }, { "epoch": 1.932837772216204, "learning_rate": 1.4817211673859498e-05, "lm_loss": 0.4078, "loss": 0.4078, "step": 4312 }, { "epoch": 1.9332860184528036, "learning_rate": 1.4806158842006434e-05, "lm_loss": 0.5765, "loss": 0.5765, "step": 4313 }, { "epoch": 1.9337342646894027, "learning_rate": 1.4795108399515905e-05, "lm_loss": 0.4796, "loss": 0.4796, "step": 4314 }, { "epoch": 1.934182510926002, "learning_rate": 1.4784060348978067e-05, "lm_loss": 0.4004, "loss": 0.4004, "step": 4315 }, { "epoch": 1.9346307571626014, "learning_rate": 1.4773014692982495e-05, "lm_loss": 0.5453, "loss": 0.5453, "step": 4316 }, { "epoch": 1.9350790033992005, "learning_rate": 1.4761971434118208e-05, "lm_loss": 0.541, "loss": 0.541, "step": 4317 }, { "epoch": 1.9355272496358, "learning_rate": 1.4750930574973673e-05, "lm_loss": 0.4505, "loss": 0.4505, "step": 4318 }, { "epoch": 1.9359754958723991, "learning_rate": 1.4739892118136785e-05, "lm_loss": 0.3895, "loss": 0.3895, "step": 4319 }, { "epoch": 1.9364237421089987, "learning_rate": 1.4728856066194875e-05, "lm_loss": 0.659, "loss": 0.659, "step": 4320 }, { "epoch": 1.9368719883455978, "learning_rate": 1.4717822421734718e-05, "lm_loss": 0.656, "loss": 0.656, "step": 4321 }, { "epoch": 1.9373202345821972, "learning_rate": 1.4706791187342517e-05, "lm_loss": 0.4999, "loss": 0.4999, "step": 4322 }, { "epoch": 1.9377684808187965, "learning_rate": 1.4695762365603905e-05, "lm_loss": 0.4795, "loss": 0.4795, "step": 4323 }, { "epoch": 1.9382167270553956, "learning_rate": 1.4684735959103979e-05, "lm_loss": 0.5713, "loss": 0.5713, "step": 4324 }, { "epoch": 1.9386649732919952, "learning_rate": 1.4673711970427235e-05, "lm_loss": 0.3824, "loss": 0.3824, "step": 4325 }, { "epoch": 1.9391132195285943, "learning_rate": 1.466269040215762e-05, "lm_loss": 0.4348, "loss": 0.4348, "step": 4326 }, { "epoch": 1.9395614657651938, "learning_rate": 1.4651671256878511e-05, "lm_loss": 0.503, "loss": 0.503, "step": 4327 }, { "epoch": 1.940009712001793, "learning_rate": 1.4640654537172704e-05, "lm_loss": 0.9127, "loss": 0.9127, "step": 4328 }, { "epoch": 1.9404579582383923, "learning_rate": 1.4629640245622456e-05, "lm_loss": 0.639, "loss": 0.639, "step": 4329 }, { "epoch": 1.9409062044749916, "learning_rate": 1.461862838480943e-05, "lm_loss": 0.2339, "loss": 0.2339, "step": 4330 }, { "epoch": 1.9413544507115907, "learning_rate": 1.4607618957314722e-05, "lm_loss": 0.4971, "loss": 0.4971, "step": 4331 }, { "epoch": 1.9418026969481903, "learning_rate": 1.459661196571886e-05, "lm_loss": 0.4356, "loss": 0.4356, "step": 4332 }, { "epoch": 1.9422509431847894, "learning_rate": 1.4585607412601813e-05, "lm_loss": 0.4219, "loss": 0.4219, "step": 4333 }, { "epoch": 1.942699189421389, "learning_rate": 1.457460530054296e-05, "lm_loss": 0.4916, "loss": 0.4916, "step": 4334 }, { "epoch": 1.943147435657988, "learning_rate": 1.4563605632121121e-05, "lm_loss": 0.539, "loss": 0.539, "step": 4335 }, { "epoch": 1.9435956818945874, "learning_rate": 1.4552608409914548e-05, "lm_loss": 0.4019, "loss": 0.4019, "step": 4336 }, { "epoch": 1.9440439281311868, "learning_rate": 1.4541613636500893e-05, "lm_loss": 0.9743, "loss": 0.9743, "step": 4337 }, { "epoch": 1.9444921743677859, "learning_rate": 1.4530621314457255e-05, "lm_loss": 0.4356, "loss": 0.4356, "step": 4338 }, { "epoch": 1.9449404206043854, "learning_rate": 1.4519631446360168e-05, "lm_loss": 0.4754, "loss": 0.4754, "step": 4339 }, { "epoch": 1.9453886668409845, "learning_rate": 1.4508644034785557e-05, "lm_loss": 0.3712, "loss": 0.3712, "step": 4340 }, { "epoch": 1.945836913077584, "learning_rate": 1.4497659082308807e-05, "lm_loss": 0.7412, "loss": 0.7412, "step": 4341 }, { "epoch": 1.9462851593141832, "learning_rate": 1.44866765915047e-05, "lm_loss": 0.792, "loss": 0.792, "step": 4342 }, { "epoch": 1.9467334055507826, "learning_rate": 1.4475696564947464e-05, "lm_loss": 0.398, "loss": 0.398, "step": 4343 }, { "epoch": 1.947181651787382, "learning_rate": 1.4464719005210741e-05, "lm_loss": 0.3167, "loss": 0.3167, "step": 4344 }, { "epoch": 1.947629898023981, "learning_rate": 1.4453743914867574e-05, "lm_loss": 0.5762, "loss": 0.5762, "step": 4345 }, { "epoch": 1.9480781442605806, "learning_rate": 1.444277129649046e-05, "lm_loss": 0.9913, "loss": 0.9913, "step": 4346 }, { "epoch": 1.9485263904971797, "learning_rate": 1.4431801152651289e-05, "lm_loss": 0.4062, "loss": 0.4062, "step": 4347 }, { "epoch": 1.9489746367337792, "learning_rate": 1.4420833485921387e-05, "lm_loss": 0.3392, "loss": 0.3392, "step": 4348 }, { "epoch": 1.9494228829703784, "learning_rate": 1.440986829887151e-05, "lm_loss": 0.478, "loss": 0.478, "step": 4349 }, { "epoch": 1.9498711292069777, "learning_rate": 1.4398905594071784e-05, "lm_loss": 0.5007, "loss": 0.5007, "step": 4350 }, { "epoch": 1.950319375443577, "learning_rate": 1.4387945374091833e-05, "lm_loss": 0.6098, "loss": 0.6098, "step": 4351 }, { "epoch": 1.9507676216801761, "learning_rate": 1.4376987641500617e-05, "lm_loss": 0.4713, "loss": 0.4713, "step": 4352 }, { "epoch": 1.9512158679167757, "learning_rate": 1.4366032398866563e-05, "lm_loss": 0.4227, "loss": 0.4227, "step": 4353 }, { "epoch": 1.9516641141533748, "learning_rate": 1.4355079648757513e-05, "lm_loss": 0.4821, "loss": 0.4821, "step": 4354 }, { "epoch": 1.9521123603899744, "learning_rate": 1.4344129393740689e-05, "lm_loss": 0.465, "loss": 0.465, "step": 4355 }, { "epoch": 1.9525606066265735, "learning_rate": 1.4333181636382758e-05, "lm_loss": 0.4497, "loss": 0.4497, "step": 4356 }, { "epoch": 1.9530088528631728, "learning_rate": 1.432223637924981e-05, "lm_loss": 0.7497, "loss": 0.7497, "step": 4357 }, { "epoch": 1.9534570990997722, "learning_rate": 1.4311293624907312e-05, "lm_loss": 0.5892, "loss": 0.5892, "step": 4358 }, { "epoch": 1.9539053453363715, "learning_rate": 1.430035337592018e-05, "lm_loss": 0.5058, "loss": 0.5058, "step": 4359 }, { "epoch": 1.9543535915729708, "learning_rate": 1.4289415634852727e-05, "lm_loss": 0.4677, "loss": 0.4677, "step": 4360 }, { "epoch": 1.95480183780957, "learning_rate": 1.4278480404268674e-05, "lm_loss": 0.3973, "loss": 0.3973, "step": 4361 }, { "epoch": 1.9552500840461695, "learning_rate": 1.4267547686731181e-05, "lm_loss": 0.4811, "loss": 0.4811, "step": 4362 }, { "epoch": 1.9556983302827686, "learning_rate": 1.4256617484802766e-05, "lm_loss": 0.383, "loss": 0.383, "step": 4363 }, { "epoch": 1.956146576519368, "learning_rate": 1.4245689801045409e-05, "lm_loss": 0.561, "loss": 0.561, "step": 4364 }, { "epoch": 1.9565948227559673, "learning_rate": 1.4234764638020488e-05, "lm_loss": 0.3767, "loss": 0.3767, "step": 4365 }, { "epoch": 1.9570430689925666, "learning_rate": 1.4223841998288755e-05, "lm_loss": 0.3799, "loss": 0.3799, "step": 4366 }, { "epoch": 1.957491315229166, "learning_rate": 1.4212921884410419e-05, "lm_loss": 0.4504, "loss": 0.4504, "step": 4367 }, { "epoch": 1.957939561465765, "learning_rate": 1.4202004298945066e-05, "lm_loss": 0.5431, "loss": 0.5431, "step": 4368 }, { "epoch": 1.9583878077023646, "learning_rate": 1.4191089244451705e-05, "lm_loss": 0.3782, "loss": 0.3782, "step": 4369 }, { "epoch": 1.9588360539389638, "learning_rate": 1.4180176723488753e-05, "lm_loss": 0.4341, "loss": 0.4341, "step": 4370 }, { "epoch": 1.959284300175563, "learning_rate": 1.4169266738614004e-05, "lm_loss": 0.688, "loss": 0.688, "step": 4371 }, { "epoch": 1.9597325464121624, "learning_rate": 1.4158359292384694e-05, "lm_loss": 0.2827, "loss": 0.2827, "step": 4372 }, { "epoch": 1.9601807926487618, "learning_rate": 1.4147454387357456e-05, "lm_loss": 0.4412, "loss": 0.4412, "step": 4373 }, { "epoch": 1.960629038885361, "learning_rate": 1.4136552026088301e-05, "lm_loss": 0.6756, "loss": 0.6756, "step": 4374 }, { "epoch": 1.9610772851219602, "learning_rate": 1.4125652211132672e-05, "lm_loss": 0.6977, "loss": 0.6977, "step": 4375 }, { "epoch": 1.9615255313585598, "learning_rate": 1.4114754945045422e-05, "lm_loss": 0.5283, "loss": 0.5283, "step": 4376 }, { "epoch": 1.961973777595159, "learning_rate": 1.410386023038076e-05, "lm_loss": 0.5935, "loss": 0.5935, "step": 4377 }, { "epoch": 1.9624220238317582, "learning_rate": 1.4092968069692364e-05, "lm_loss": 0.8385, "loss": 0.8385, "step": 4378 }, { "epoch": 1.9628702700683576, "learning_rate": 1.408207846553325e-05, "lm_loss": 0.3862, "loss": 0.3862, "step": 4379 }, { "epoch": 1.963318516304957, "learning_rate": 1.4071191420455873e-05, "lm_loss": 0.5285, "loss": 0.5285, "step": 4380 }, { "epoch": 1.9637667625415562, "learning_rate": 1.4060306937012086e-05, "lm_loss": 0.4815, "loss": 0.4815, "step": 4381 }, { "epoch": 1.9642150087781554, "learning_rate": 1.4049425017753116e-05, "lm_loss": 0.4205, "loss": 0.4205, "step": 4382 }, { "epoch": 1.964663255014755, "learning_rate": 1.4038545665229613e-05, "lm_loss": 0.353, "loss": 0.353, "step": 4383 }, { "epoch": 1.965111501251354, "learning_rate": 1.4027668881991629e-05, "lm_loss": 0.5448, "loss": 0.5448, "step": 4384 }, { "epoch": 1.9655597474879534, "learning_rate": 1.4016794670588584e-05, "lm_loss": 0.771, "loss": 0.771, "step": 4385 }, { "epoch": 1.9660079937245527, "learning_rate": 1.4005923033569324e-05, "lm_loss": 0.4809, "loss": 0.4809, "step": 4386 }, { "epoch": 1.966456239961152, "learning_rate": 1.399505397348208e-05, "lm_loss": 0.3195, "loss": 0.3195, "step": 4387 }, { "epoch": 1.9669044861977514, "learning_rate": 1.3984187492874488e-05, "lm_loss": 0.5638, "loss": 0.5638, "step": 4388 }, { "epoch": 1.9673527324343505, "learning_rate": 1.3973323594293574e-05, "lm_loss": 0.5925, "loss": 0.5925, "step": 4389 }, { "epoch": 1.96780097867095, "learning_rate": 1.396246228028574e-05, "lm_loss": 0.6918, "loss": 0.6918, "step": 4390 }, { "epoch": 1.9682492249075492, "learning_rate": 1.3951603553396821e-05, "lm_loss": 0.421, "loss": 0.421, "step": 4391 }, { "epoch": 1.9686974711441485, "learning_rate": 1.3940747416172004e-05, "lm_loss": 0.5734, "loss": 0.5734, "step": 4392 }, { "epoch": 1.9691457173807478, "learning_rate": 1.3929893871155896e-05, "lm_loss": 0.4939, "loss": 0.4939, "step": 4393 }, { "epoch": 1.9695939636173472, "learning_rate": 1.3919042920892502e-05, "lm_loss": 0.4074, "loss": 0.4074, "step": 4394 }, { "epoch": 1.9700422098539465, "learning_rate": 1.3908194567925176e-05, "lm_loss": 0.7235, "loss": 0.7235, "step": 4395 }, { "epoch": 1.9704904560905456, "learning_rate": 1.389734881479673e-05, "lm_loss": 0.7234, "loss": 0.7234, "step": 4396 }, { "epoch": 1.9709387023271452, "learning_rate": 1.3886505664049304e-05, "lm_loss": 0.4188, "loss": 0.4188, "step": 4397 }, { "epoch": 1.9713869485637443, "learning_rate": 1.3875665118224465e-05, "lm_loss": 0.6275, "loss": 0.6275, "step": 4398 }, { "epoch": 1.9718351948003436, "learning_rate": 1.3864827179863162e-05, "lm_loss": 0.7466, "loss": 0.7466, "step": 4399 }, { "epoch": 1.972283441036943, "learning_rate": 1.3853991851505715e-05, "lm_loss": 0.4711, "loss": 0.4711, "step": 4400 }, { "epoch": 1.9727316872735423, "learning_rate": 1.3843159135691857e-05, "lm_loss": 0.8872, "loss": 0.8872, "step": 4401 }, { "epoch": 1.9731799335101416, "learning_rate": 1.3832329034960706e-05, "lm_loss": 0.266, "loss": 0.266, "step": 4402 }, { "epoch": 1.9736281797467408, "learning_rate": 1.3821501551850734e-05, "lm_loss": 0.5306, "loss": 0.5306, "step": 4403 }, { "epoch": 1.9740764259833403, "learning_rate": 1.3810676688899845e-05, "lm_loss": 0.5214, "loss": 0.5214, "step": 4404 }, { "epoch": 1.9745246722199394, "learning_rate": 1.3799854448645299e-05, "lm_loss": 0.4637, "loss": 0.4637, "step": 4405 }, { "epoch": 1.9749729184565388, "learning_rate": 1.378903483362376e-05, "lm_loss": 0.4761, "loss": 0.4761, "step": 4406 }, { "epoch": 1.975421164693138, "learning_rate": 1.3778217846371268e-05, "lm_loss": 0.4673, "loss": 0.4673, "step": 4407 }, { "epoch": 1.9758694109297374, "learning_rate": 1.3767403489423233e-05, "lm_loss": 0.514, "loss": 0.514, "step": 4408 }, { "epoch": 1.9763176571663368, "learning_rate": 1.375659176531447e-05, "lm_loss": 0.4011, "loss": 0.4011, "step": 4409 }, { "epoch": 1.976765903402936, "learning_rate": 1.3745782676579178e-05, "lm_loss": 0.4178, "loss": 0.4178, "step": 4410 }, { "epoch": 1.9772141496395355, "learning_rate": 1.3734976225750912e-05, "lm_loss": 0.5892, "loss": 0.5892, "step": 4411 }, { "epoch": 1.9776623958761346, "learning_rate": 1.372417241536263e-05, "lm_loss": 0.5, "loss": 0.5, "step": 4412 }, { "epoch": 1.978110642112734, "learning_rate": 1.3713371247946671e-05, "lm_loss": 0.7481, "loss": 0.7481, "step": 4413 }, { "epoch": 1.9785588883493332, "learning_rate": 1.3702572726034751e-05, "lm_loss": 0.4825, "loss": 0.4825, "step": 4414 }, { "epoch": 1.9790071345859326, "learning_rate": 1.3691776852157972e-05, "lm_loss": 0.5681, "loss": 0.5681, "step": 4415 }, { "epoch": 1.979455380822532, "learning_rate": 1.3680983628846794e-05, "lm_loss": 0.84, "loss": 0.84, "step": 4416 }, { "epoch": 1.979903627059131, "learning_rate": 1.3670193058631076e-05, "lm_loss": 0.439, "loss": 0.439, "step": 4417 }, { "epoch": 1.9803518732957306, "learning_rate": 1.365940514404006e-05, "lm_loss": 0.4652, "loss": 0.4652, "step": 4418 }, { "epoch": 1.9808001195323297, "learning_rate": 1.3648619887602338e-05, "lm_loss": 0.4363, "loss": 0.4363, "step": 4419 }, { "epoch": 1.981248365768929, "learning_rate": 1.3637837291845901e-05, "lm_loss": 0.5192, "loss": 0.5192, "step": 4420 }, { "epoch": 1.9816966120055284, "learning_rate": 1.3627057359298126e-05, "lm_loss": 0.5717, "loss": 0.5717, "step": 4421 }, { "epoch": 1.9821448582421277, "learning_rate": 1.3616280092485717e-05, "lm_loss": 0.468, "loss": 0.468, "step": 4422 }, { "epoch": 1.982593104478727, "learning_rate": 1.3605505493934833e-05, "lm_loss": 0.7505, "loss": 0.7505, "step": 4423 }, { "epoch": 1.9830413507153262, "learning_rate": 1.3594733566170926e-05, "lm_loss": 0.9399, "loss": 0.9399, "step": 4424 }, { "epoch": 1.9834895969519257, "learning_rate": 1.3583964311718872e-05, "lm_loss": 0.4788, "loss": 0.4788, "step": 4425 }, { "epoch": 1.9839378431885248, "learning_rate": 1.357319773310291e-05, "lm_loss": 0.5121, "loss": 0.5121, "step": 4426 }, { "epoch": 1.9843860894251242, "learning_rate": 1.3562433832846639e-05, "lm_loss": 0.3318, "loss": 0.3318, "step": 4427 }, { "epoch": 1.9848343356617235, "learning_rate": 1.3551672613473049e-05, "lm_loss": 0.4219, "loss": 0.4219, "step": 4428 }, { "epoch": 1.9852825818983229, "learning_rate": 1.3540914077504476e-05, "lm_loss": 0.4946, "loss": 0.4946, "step": 4429 }, { "epoch": 1.9857308281349222, "learning_rate": 1.3530158227462653e-05, "lm_loss": 0.4894, "loss": 0.4894, "step": 4430 }, { "epoch": 1.9861790743715213, "learning_rate": 1.3519405065868674e-05, "lm_loss": 0.4278, "loss": 0.4278, "step": 4431 }, { "epoch": 1.9866273206081209, "learning_rate": 1.3508654595243e-05, "lm_loss": 0.4733, "loss": 0.4733, "step": 4432 }, { "epoch": 1.98707556684472, "learning_rate": 1.3497906818105466e-05, "lm_loss": 0.4095, "loss": 0.4095, "step": 4433 }, { "epoch": 1.9875238130813193, "learning_rate": 1.3487161736975278e-05, "lm_loss": 0.5721, "loss": 0.5721, "step": 4434 }, { "epoch": 1.9879720593179186, "learning_rate": 1.3476419354370994e-05, "lm_loss": 0.9965, "loss": 0.9965, "step": 4435 }, { "epoch": 1.988420305554518, "learning_rate": 1.3465679672810564e-05, "lm_loss": 0.4763, "loss": 0.4763, "step": 4436 }, { "epoch": 1.9888685517911173, "learning_rate": 1.3454942694811273e-05, "lm_loss": 0.6805, "loss": 0.6805, "step": 4437 }, { "epoch": 1.9893167980277164, "learning_rate": 1.3444208422889797e-05, "lm_loss": 0.7003, "loss": 0.7003, "step": 4438 }, { "epoch": 1.989765044264316, "learning_rate": 1.3433476859562183e-05, "lm_loss": 0.4328, "loss": 0.4328, "step": 4439 }, { "epoch": 1.990213290500915, "learning_rate": 1.3422748007343819e-05, "lm_loss": 0.4897, "loss": 0.4897, "step": 4440 }, { "epoch": 1.9906615367375144, "learning_rate": 1.3412021868749488e-05, "lm_loss": 0.4783, "loss": 0.4783, "step": 4441 }, { "epoch": 1.9911097829741138, "learning_rate": 1.3401298446293297e-05, "lm_loss": 0.4718, "loss": 0.4718, "step": 4442 }, { "epoch": 1.9915580292107131, "learning_rate": 1.3390577742488747e-05, "lm_loss": 0.9922, "loss": 0.9922, "step": 4443 }, { "epoch": 1.9920062754473125, "learning_rate": 1.3379859759848707e-05, "lm_loss": 0.905, "loss": 0.905, "step": 4444 }, { "epoch": 1.9924545216839116, "learning_rate": 1.336914450088537e-05, "lm_loss": 0.3089, "loss": 0.3089, "step": 4445 }, { "epoch": 1.9929027679205111, "learning_rate": 1.335843196811033e-05, "lm_loss": 0.5969, "loss": 0.5969, "step": 4446 }, { "epoch": 1.9933510141571102, "learning_rate": 1.3347722164034531e-05, "lm_loss": 0.3368, "loss": 0.3368, "step": 4447 }, { "epoch": 1.9937992603937096, "learning_rate": 1.3337015091168247e-05, "lm_loss": 0.6108, "loss": 0.6108, "step": 4448 }, { "epoch": 1.994247506630309, "learning_rate": 1.3326310752021182e-05, "lm_loss": 0.4971, "loss": 0.4971, "step": 4449 }, { "epoch": 1.9946957528669083, "learning_rate": 1.331560914910232e-05, "lm_loss": 0.3499, "loss": 0.3499, "step": 4450 }, { "epoch": 1.9951439991035076, "learning_rate": 1.3304910284920053e-05, "lm_loss": 0.7271, "loss": 0.7271, "step": 4451 }, { "epoch": 1.9955922453401067, "learning_rate": 1.3294214161982121e-05, "lm_loss": 0.5443, "loss": 0.5443, "step": 4452 }, { "epoch": 1.9960404915767063, "learning_rate": 1.3283520782795606e-05, "lm_loss": 0.3545, "loss": 0.3545, "step": 4453 }, { "epoch": 1.9964887378133054, "learning_rate": 1.327283014986696e-05, "lm_loss": 0.4749, "loss": 0.4749, "step": 4454 }, { "epoch": 1.9969369840499047, "learning_rate": 1.326214226570201e-05, "lm_loss": 0.4731, "loss": 0.4731, "step": 4455 }, { "epoch": 1.997385230286504, "learning_rate": 1.3251457132805892e-05, "lm_loss": 0.6002, "loss": 0.6002, "step": 4456 }, { "epoch": 1.9978334765231034, "learning_rate": 1.3240774753683133e-05, "lm_loss": 0.401, "loss": 0.401, "step": 4457 }, { "epoch": 1.9982817227597027, "learning_rate": 1.323009513083761e-05, "lm_loss": 0.364, "loss": 0.364, "step": 4458 }, { "epoch": 1.9987299689963018, "learning_rate": 1.3219418266772546e-05, "lm_loss": 0.5437, "loss": 0.5437, "step": 4459 }, { "epoch": 1.9991782152329014, "learning_rate": 1.3208744163990534e-05, "lm_loss": 0.4307, "loss": 0.4307, "step": 4460 }, { "epoch": 1.9996264614695005, "learning_rate": 1.3198072824993479e-05, "lm_loss": 0.4614, "loss": 0.4614, "step": 4461 }, { "epoch": 2.0000747077061, "learning_rate": 1.3187404252282681e-05, "lm_loss": 0.6547, "loss": 0.6547, "step": 4462 }, { "epoch": 2.000522953942699, "learning_rate": 1.3176738448358785e-05, "lm_loss": 0.3954, "loss": 0.3954, "step": 4463 }, { "epoch": 2.0009712001792983, "learning_rate": 1.3166075415721762e-05, "lm_loss": 0.4596, "loss": 0.4596, "step": 4464 }, { "epoch": 2.001419446415898, "learning_rate": 1.3155415156870955e-05, "lm_loss": 0.4249, "loss": 0.4249, "step": 4465 }, { "epoch": 2.001867692652497, "learning_rate": 1.3144757674305049e-05, "lm_loss": 0.3838, "loss": 0.3838, "step": 4466 }, { "epoch": 2.0023159388890965, "learning_rate": 1.3134102970522088e-05, "lm_loss": 0.3274, "loss": 0.3274, "step": 4467 }, { "epoch": 2.0027641851256957, "learning_rate": 1.3123451048019463e-05, "lm_loss": 0.4486, "loss": 0.4486, "step": 4468 }, { "epoch": 2.003212431362295, "learning_rate": 1.311280190929389e-05, "lm_loss": 0.4234, "loss": 0.4234, "step": 4469 }, { "epoch": 2.0036606775988943, "learning_rate": 1.3102155556841455e-05, "lm_loss": 0.4839, "loss": 0.4839, "step": 4470 }, { "epoch": 2.0041089238354934, "learning_rate": 1.3091511993157601e-05, "lm_loss": 0.2948, "loss": 0.2948, "step": 4471 }, { "epoch": 2.004557170072093, "learning_rate": 1.3080871220737082e-05, "lm_loss": 0.4515, "loss": 0.4515, "step": 4472 }, { "epoch": 2.005005416308692, "learning_rate": 1.3070233242074037e-05, "lm_loss": 0.3407, "loss": 0.3407, "step": 4473 }, { "epoch": 2.0054536625452917, "learning_rate": 1.3059598059661906e-05, "lm_loss": 0.3406, "loss": 0.3406, "step": 4474 }, { "epoch": 2.005901908781891, "learning_rate": 1.3048965675993508e-05, "lm_loss": 0.5666, "loss": 0.5666, "step": 4475 }, { "epoch": 2.0063501550184903, "learning_rate": 1.3038336093561022e-05, "lm_loss": 0.293, "loss": 0.293, "step": 4476 }, { "epoch": 2.0067984012550895, "learning_rate": 1.3027709314855913e-05, "lm_loss": 0.4367, "loss": 0.4367, "step": 4477 }, { "epoch": 2.0072466474916886, "learning_rate": 1.3017085342369032e-05, "lm_loss": 0.3264, "loss": 0.3264, "step": 4478 }, { "epoch": 2.007694893728288, "learning_rate": 1.3006464178590571e-05, "lm_loss": 0.4125, "loss": 0.4125, "step": 4479 }, { "epoch": 2.0081431399648872, "learning_rate": 1.2995845826010038e-05, "lm_loss": 0.699, "loss": 0.699, "step": 4480 }, { "epoch": 2.008591386201487, "learning_rate": 1.2985230287116318e-05, "lm_loss": 0.3026, "loss": 0.3026, "step": 4481 }, { "epoch": 2.009039632438086, "learning_rate": 1.297461756439759e-05, "lm_loss": 0.4727, "loss": 0.4727, "step": 4482 }, { "epoch": 2.0094878786746855, "learning_rate": 1.2964007660341415e-05, "lm_loss": 0.5919, "loss": 0.5919, "step": 4483 }, { "epoch": 2.0099361249112846, "learning_rate": 1.2953400577434676e-05, "lm_loss": 0.5991, "loss": 0.5991, "step": 4484 }, { "epoch": 2.0103843711478837, "learning_rate": 1.2942796318163595e-05, "lm_loss": 0.4873, "loss": 0.4873, "step": 4485 }, { "epoch": 2.0108326173844833, "learning_rate": 1.2932194885013744e-05, "lm_loss": 0.354, "loss": 0.354, "step": 4486 }, { "epoch": 2.0112808636210824, "learning_rate": 1.2921596280470006e-05, "lm_loss": 0.4011, "loss": 0.4011, "step": 4487 }, { "epoch": 2.011729109857682, "learning_rate": 1.2911000507016618e-05, "lm_loss": 0.4096, "loss": 0.4096, "step": 4488 }, { "epoch": 2.012177356094281, "learning_rate": 1.2900407567137173e-05, "lm_loss": 0.3244, "loss": 0.3244, "step": 4489 }, { "epoch": 2.0126256023308806, "learning_rate": 1.2889817463314552e-05, "lm_loss": 0.3664, "loss": 0.3664, "step": 4490 }, { "epoch": 2.0130738485674797, "learning_rate": 1.2879230198031015e-05, "lm_loss": 0.4743, "loss": 0.4743, "step": 4491 }, { "epoch": 2.013522094804079, "learning_rate": 1.2868645773768145e-05, "lm_loss": 0.3904, "loss": 0.3904, "step": 4492 }, { "epoch": 2.0139703410406784, "learning_rate": 1.2858064193006824e-05, "lm_loss": 0.2956, "loss": 0.2956, "step": 4493 }, { "epoch": 2.0144185872772775, "learning_rate": 1.2847485458227343e-05, "lm_loss": 0.4687, "loss": 0.4687, "step": 4494 }, { "epoch": 2.014866833513877, "learning_rate": 1.283690957190925e-05, "lm_loss": 0.3058, "loss": 0.3058, "step": 4495 }, { "epoch": 2.015315079750476, "learning_rate": 1.2826336536531461e-05, "lm_loss": 0.443, "loss": 0.443, "step": 4496 }, { "epoch": 2.0157633259870757, "learning_rate": 1.281576635457223e-05, "lm_loss": 0.3304, "loss": 0.3304, "step": 4497 }, { "epoch": 2.016211572223675, "learning_rate": 1.2805199028509118e-05, "lm_loss": 0.3932, "loss": 0.3932, "step": 4498 }, { "epoch": 2.016659818460274, "learning_rate": 1.2794634560819033e-05, "lm_loss": 0.5052, "loss": 0.5052, "step": 4499 }, { "epoch": 2.0171080646968735, "learning_rate": 1.2784072953978221e-05, "lm_loss": 0.4653, "loss": 0.4653, "step": 4500 }, { "epoch": 2.0175563109334727, "learning_rate": 1.2773514210462226e-05, "lm_loss": 0.5642, "loss": 0.5642, "step": 4501 }, { "epoch": 2.018004557170072, "learning_rate": 1.2762958332745953e-05, "lm_loss": 0.3519, "loss": 0.3519, "step": 4502 }, { "epoch": 2.0184528034066713, "learning_rate": 1.275240532330362e-05, "lm_loss": 0.4091, "loss": 0.4091, "step": 4503 }, { "epoch": 2.018901049643271, "learning_rate": 1.274185518460878e-05, "lm_loss": 0.2918, "loss": 0.2918, "step": 4504 }, { "epoch": 2.01934929587987, "learning_rate": 1.2731307919134317e-05, "lm_loss": 0.4737, "loss": 0.4737, "step": 4505 }, { "epoch": 2.019797542116469, "learning_rate": 1.2720763529352414e-05, "lm_loss": 0.4812, "loss": 0.4812, "step": 4506 }, { "epoch": 2.0202457883530687, "learning_rate": 1.2710222017734608e-05, "lm_loss": 0.3968, "loss": 0.3968, "step": 4507 }, { "epoch": 2.020694034589668, "learning_rate": 1.2699683386751766e-05, "lm_loss": 0.5035, "loss": 0.5035, "step": 4508 }, { "epoch": 2.0211422808262673, "learning_rate": 1.2689147638874044e-05, "lm_loss": 0.4489, "loss": 0.4489, "step": 4509 }, { "epoch": 2.0215905270628665, "learning_rate": 1.2678614776570952e-05, "lm_loss": 0.3723, "loss": 0.3723, "step": 4510 }, { "epoch": 2.022038773299466, "learning_rate": 1.2668084802311325e-05, "lm_loss": 0.4617, "loss": 0.4617, "step": 4511 }, { "epoch": 2.022487019536065, "learning_rate": 1.2657557718563309e-05, "lm_loss": 0.4061, "loss": 0.4061, "step": 4512 }, { "epoch": 2.0229352657726642, "learning_rate": 1.2647033527794383e-05, "lm_loss": 0.4386, "loss": 0.4386, "step": 4513 }, { "epoch": 2.023383512009264, "learning_rate": 1.2636512232471326e-05, "lm_loss": 0.2125, "loss": 0.2125, "step": 4514 }, { "epoch": 2.023831758245863, "learning_rate": 1.2625993835060262e-05, "lm_loss": 0.548, "loss": 0.548, "step": 4515 }, { "epoch": 2.0242800044824625, "learning_rate": 1.2615478338026634e-05, "lm_loss": 0.3721, "loss": 0.3721, "step": 4516 }, { "epoch": 2.0247282507190616, "learning_rate": 1.260496574383518e-05, "lm_loss": 0.3593, "loss": 0.3593, "step": 4517 }, { "epoch": 2.025176496955661, "learning_rate": 1.259445605495e-05, "lm_loss": 0.4969, "loss": 0.4969, "step": 4518 }, { "epoch": 2.0256247431922603, "learning_rate": 1.2583949273834463e-05, "lm_loss": 0.2858, "loss": 0.2858, "step": 4519 }, { "epoch": 2.0260729894288594, "learning_rate": 1.2573445402951287e-05, "lm_loss": 0.4947, "loss": 0.4947, "step": 4520 }, { "epoch": 2.026521235665459, "learning_rate": 1.2562944444762528e-05, "lm_loss": 0.4908, "loss": 0.4908, "step": 4521 }, { "epoch": 2.026969481902058, "learning_rate": 1.2552446401729512e-05, "lm_loss": 0.4573, "loss": 0.4573, "step": 4522 }, { "epoch": 2.0274177281386576, "learning_rate": 1.2541951276312915e-05, "lm_loss": 0.3128, "loss": 0.3128, "step": 4523 }, { "epoch": 2.0278659743752567, "learning_rate": 1.2531459070972706e-05, "lm_loss": 0.3908, "loss": 0.3908, "step": 4524 }, { "epoch": 2.0283142206118563, "learning_rate": 1.252096978816819e-05, "lm_loss": 0.3692, "loss": 0.3692, "step": 4525 }, { "epoch": 2.0287624668484554, "learning_rate": 1.251048343035799e-05, "lm_loss": 0.4325, "loss": 0.4325, "step": 4526 }, { "epoch": 2.0292107130850545, "learning_rate": 1.2500000000000006e-05, "lm_loss": 0.4787, "loss": 0.4787, "step": 4527 }, { "epoch": 2.029658959321654, "learning_rate": 1.2489519499551494e-05, "lm_loss": 0.5649, "loss": 0.5649, "step": 4528 }, { "epoch": 2.030107205558253, "learning_rate": 1.2479041931469007e-05, "lm_loss": 0.4569, "loss": 0.4569, "step": 4529 }, { "epoch": 2.0305554517948528, "learning_rate": 1.2468567298208411e-05, "lm_loss": 0.4663, "loss": 0.4663, "step": 4530 }, { "epoch": 2.031003698031452, "learning_rate": 1.2458095602224892e-05, "lm_loss": 0.3607, "loss": 0.3607, "step": 4531 }, { "epoch": 2.0314519442680514, "learning_rate": 1.2447626845972923e-05, "lm_loss": 0.4943, "loss": 0.4943, "step": 4532 }, { "epoch": 2.0319001905046505, "learning_rate": 1.2437161031906312e-05, "lm_loss": 0.3587, "loss": 0.3587, "step": 4533 }, { "epoch": 2.0323484367412497, "learning_rate": 1.2426698162478179e-05, "lm_loss": 0.3065, "loss": 0.3065, "step": 4534 }, { "epoch": 2.032796682977849, "learning_rate": 1.2416238240140929e-05, "lm_loss": 0.3712, "loss": 0.3712, "step": 4535 }, { "epoch": 2.0332449292144483, "learning_rate": 1.2405781267346297e-05, "lm_loss": 0.4165, "loss": 0.4165, "step": 4536 }, { "epoch": 2.033693175451048, "learning_rate": 1.2395327246545335e-05, "lm_loss": 0.3489, "loss": 0.3489, "step": 4537 }, { "epoch": 2.034141421687647, "learning_rate": 1.2384876180188362e-05, "lm_loss": 0.5038, "loss": 0.5038, "step": 4538 }, { "epoch": 2.0345896679242466, "learning_rate": 1.2374428070725067e-05, "lm_loss": 0.3218, "loss": 0.3218, "step": 4539 }, { "epoch": 2.0350379141608457, "learning_rate": 1.2363982920604384e-05, "lm_loss": 0.5082, "loss": 0.5082, "step": 4540 }, { "epoch": 2.035486160397445, "learning_rate": 1.235354073227459e-05, "lm_loss": 0.4138, "loss": 0.4138, "step": 4541 }, { "epoch": 2.0359344066340443, "learning_rate": 1.2343101508183266e-05, "lm_loss": 0.6687, "loss": 0.6687, "step": 4542 }, { "epoch": 2.0363826528706435, "learning_rate": 1.2332665250777276e-05, "lm_loss": 0.426, "loss": 0.426, "step": 4543 }, { "epoch": 2.036830899107243, "learning_rate": 1.2322231962502808e-05, "lm_loss": 0.4853, "loss": 0.4853, "step": 4544 }, { "epoch": 2.037279145343842, "learning_rate": 1.2311801645805365e-05, "lm_loss": 0.428, "loss": 0.428, "step": 4545 }, { "epoch": 2.0377273915804417, "learning_rate": 1.2301374303129711e-05, "lm_loss": 0.4831, "loss": 0.4831, "step": 4546 }, { "epoch": 2.038175637817041, "learning_rate": 1.2290949936919952e-05, "lm_loss": 0.3663, "loss": 0.3663, "step": 4547 }, { "epoch": 2.03862388405364, "learning_rate": 1.2280528549619485e-05, "lm_loss": 0.3896, "loss": 0.3896, "step": 4548 }, { "epoch": 2.0390721302902395, "learning_rate": 1.227011014367101e-05, "lm_loss": 0.4478, "loss": 0.4478, "step": 4549 }, { "epoch": 2.0395203765268386, "learning_rate": 1.225969472151653e-05, "lm_loss": 0.4356, "loss": 0.4356, "step": 4550 }, { "epoch": 2.039968622763438, "learning_rate": 1.224928228559733e-05, "lm_loss": 0.4011, "loss": 0.4011, "step": 4551 }, { "epoch": 2.0404168690000373, "learning_rate": 1.2238872838354015e-05, "lm_loss": 0.2977, "loss": 0.2977, "step": 4552 }, { "epoch": 2.040865115236637, "learning_rate": 1.2228466382226498e-05, "lm_loss": 0.4412, "loss": 0.4412, "step": 4553 }, { "epoch": 2.041313361473236, "learning_rate": 1.2218062919653952e-05, "lm_loss": 0.3452, "loss": 0.3452, "step": 4554 }, { "epoch": 2.041761607709835, "learning_rate": 1.2207662453074889e-05, "lm_loss": 0.3914, "loss": 0.3914, "step": 4555 }, { "epoch": 2.0422098539464346, "learning_rate": 1.2197264984927101e-05, "lm_loss": 0.499, "loss": 0.499, "step": 4556 }, { "epoch": 2.0426581001830337, "learning_rate": 1.2186870517647678e-05, "lm_loss": 0.3474, "loss": 0.3474, "step": 4557 }, { "epoch": 2.0431063464196333, "learning_rate": 1.217647905367302e-05, "lm_loss": 0.4801, "loss": 0.4801, "step": 4558 }, { "epoch": 2.0435545926562324, "learning_rate": 1.2166090595438786e-05, "lm_loss": 0.3708, "loss": 0.3708, "step": 4559 }, { "epoch": 2.044002838892832, "learning_rate": 1.2155705145379973e-05, "lm_loss": 0.4583, "loss": 0.4583, "step": 4560 }, { "epoch": 2.044451085129431, "learning_rate": 1.2145322705930859e-05, "lm_loss": 0.526, "loss": 0.526, "step": 4561 }, { "epoch": 2.04489933136603, "learning_rate": 1.2134943279524997e-05, "lm_loss": 0.4593, "loss": 0.4593, "step": 4562 }, { "epoch": 2.0453475776026298, "learning_rate": 1.2124566868595263e-05, "lm_loss": 0.3081, "loss": 0.3081, "step": 4563 }, { "epoch": 2.045795823839229, "learning_rate": 1.2114193475573798e-05, "lm_loss": 0.5979, "loss": 0.5979, "step": 4564 }, { "epoch": 2.0462440700758284, "learning_rate": 1.210382310289205e-05, "lm_loss": 0.4824, "loss": 0.4824, "step": 4565 }, { "epoch": 2.0466923163124275, "learning_rate": 1.209345575298079e-05, "lm_loss": 0.3435, "loss": 0.3435, "step": 4566 }, { "epoch": 2.047140562549027, "learning_rate": 1.2083091428270016e-05, "lm_loss": 0.3724, "loss": 0.3724, "step": 4567 }, { "epoch": 2.047588808785626, "learning_rate": 1.2072730131189072e-05, "lm_loss": 0.5028, "loss": 0.5028, "step": 4568 }, { "epoch": 2.0480370550222253, "learning_rate": 1.2062371864166552e-05, "lm_loss": 0.4199, "loss": 0.4199, "step": 4569 }, { "epoch": 2.048485301258825, "learning_rate": 1.2052016629630369e-05, "lm_loss": 0.4241, "loss": 0.4241, "step": 4570 }, { "epoch": 2.048933547495424, "learning_rate": 1.2041664430007724e-05, "lm_loss": 0.311, "loss": 0.311, "step": 4571 }, { "epoch": 2.0493817937320236, "learning_rate": 1.2031315267725077e-05, "lm_loss": 0.4584, "loss": 0.4584, "step": 4572 }, { "epoch": 2.0498300399686227, "learning_rate": 1.2020969145208207e-05, "lm_loss": 0.3467, "loss": 0.3467, "step": 4573 }, { "epoch": 2.0502782862052222, "learning_rate": 1.2010626064882174e-05, "lm_loss": 0.415, "loss": 0.415, "step": 4574 }, { "epoch": 2.0507265324418213, "learning_rate": 1.2000286029171318e-05, "lm_loss": 0.3418, "loss": 0.3418, "step": 4575 }, { "epoch": 2.0511747786784205, "learning_rate": 1.1989949040499274e-05, "lm_loss": 0.3774, "loss": 0.3774, "step": 4576 }, { "epoch": 2.05162302491502, "learning_rate": 1.1979615101288946e-05, "lm_loss": 0.3322, "loss": 0.3322, "step": 4577 }, { "epoch": 2.052071271151619, "learning_rate": 1.1969284213962537e-05, "lm_loss": 0.5073, "loss": 0.5073, "step": 4578 }, { "epoch": 2.0525195173882187, "learning_rate": 1.1958956380941547e-05, "lm_loss": 0.4449, "loss": 0.4449, "step": 4579 }, { "epoch": 2.052967763624818, "learning_rate": 1.1948631604646723e-05, "lm_loss": 0.3895, "loss": 0.3895, "step": 4580 }, { "epoch": 2.0534160098614174, "learning_rate": 1.193830988749813e-05, "lm_loss": 0.461, "loss": 0.461, "step": 4581 }, { "epoch": 2.0538642560980165, "learning_rate": 1.1927991231915112e-05, "lm_loss": 0.4089, "loss": 0.4089, "step": 4582 }, { "epoch": 2.0543125023346156, "learning_rate": 1.1917675640316257e-05, "lm_loss": 0.3792, "loss": 0.3792, "step": 4583 }, { "epoch": 2.054760748571215, "learning_rate": 1.1907363115119502e-05, "lm_loss": 0.3968, "loss": 0.3968, "step": 4584 }, { "epoch": 2.0552089948078143, "learning_rate": 1.1897053658742005e-05, "lm_loss": 0.3867, "loss": 0.3867, "step": 4585 }, { "epoch": 2.055657241044414, "learning_rate": 1.1886747273600232e-05, "lm_loss": 0.4125, "loss": 0.4125, "step": 4586 }, { "epoch": 2.056105487281013, "learning_rate": 1.187644396210994e-05, "lm_loss": 0.349, "loss": 0.349, "step": 4587 }, { "epoch": 2.0565537335176125, "learning_rate": 1.1866143726686127e-05, "lm_loss": 0.3091, "loss": 0.3091, "step": 4588 }, { "epoch": 2.0570019797542116, "learning_rate": 1.1855846569743104e-05, "lm_loss": 0.4947, "loss": 0.4947, "step": 4589 }, { "epoch": 2.057450225990811, "learning_rate": 1.1845552493694462e-05, "lm_loss": 0.3981, "loss": 0.3981, "step": 4590 }, { "epoch": 2.0578984722274103, "learning_rate": 1.1835261500953035e-05, "lm_loss": 0.5678, "loss": 0.5678, "step": 4591 }, { "epoch": 2.0583467184640094, "learning_rate": 1.1824973593930969e-05, "lm_loss": 0.4654, "loss": 0.4654, "step": 4592 }, { "epoch": 2.058794964700609, "learning_rate": 1.1814688775039676e-05, "lm_loss": 0.3882, "loss": 0.3882, "step": 4593 }, { "epoch": 2.059243210937208, "learning_rate": 1.1804407046689841e-05, "lm_loss": 0.3511, "loss": 0.3511, "step": 4594 }, { "epoch": 2.0596914571738076, "learning_rate": 1.1794128411291436e-05, "lm_loss": 0.3721, "loss": 0.3721, "step": 4595 }, { "epoch": 2.0601397034104068, "learning_rate": 1.1783852871253681e-05, "lm_loss": 0.3203, "loss": 0.3203, "step": 4596 }, { "epoch": 2.0605879496470063, "learning_rate": 1.1773580428985098e-05, "lm_loss": 0.4614, "loss": 0.4614, "step": 4597 }, { "epoch": 2.0610361958836054, "learning_rate": 1.1763311086893481e-05, "lm_loss": 0.4203, "loss": 0.4203, "step": 4598 }, { "epoch": 2.0614844421202045, "learning_rate": 1.175304484738587e-05, "lm_loss": 0.2773, "loss": 0.2773, "step": 4599 }, { "epoch": 2.061932688356804, "learning_rate": 1.1742781712868617e-05, "lm_loss": 0.3675, "loss": 0.3675, "step": 4600 }, { "epoch": 2.062380934593403, "learning_rate": 1.17325216857473e-05, "lm_loss": 0.4507, "loss": 0.4507, "step": 4601 }, { "epoch": 2.0628291808300028, "learning_rate": 1.1722264768426822e-05, "lm_loss": 0.3454, "loss": 0.3454, "step": 4602 }, { "epoch": 2.063277427066602, "learning_rate": 1.171201096331133e-05, "lm_loss": 0.3872, "loss": 0.3872, "step": 4603 }, { "epoch": 2.0637256733032014, "learning_rate": 1.170176027280422e-05, "lm_loss": 0.4223, "loss": 0.4223, "step": 4604 }, { "epoch": 2.0641739195398006, "learning_rate": 1.1691512699308193e-05, "lm_loss": 0.4254, "loss": 0.4254, "step": 4605 }, { "epoch": 2.0646221657763997, "learning_rate": 1.1681268245225215e-05, "lm_loss": 0.2922, "loss": 0.2922, "step": 4606 }, { "epoch": 2.0650704120129992, "learning_rate": 1.1671026912956493e-05, "lm_loss": 0.4413, "loss": 0.4413, "step": 4607 }, { "epoch": 2.0655186582495983, "learning_rate": 1.1660788704902537e-05, "lm_loss": 0.5568, "loss": 0.5568, "step": 4608 }, { "epoch": 2.065966904486198, "learning_rate": 1.1650553623463093e-05, "lm_loss": 0.422, "loss": 0.422, "step": 4609 }, { "epoch": 2.066415150722797, "learning_rate": 1.1640321671037189e-05, "lm_loss": 0.507, "loss": 0.507, "step": 4610 }, { "epoch": 2.0668633969593966, "learning_rate": 1.1630092850023147e-05, "lm_loss": 0.3584, "loss": 0.3584, "step": 4611 }, { "epoch": 2.0673116431959957, "learning_rate": 1.1619867162818504e-05, "lm_loss": 0.3214, "loss": 0.3214, "step": 4612 }, { "epoch": 2.067759889432595, "learning_rate": 1.1609644611820102e-05, "lm_loss": 0.4014, "loss": 0.4014, "step": 4613 }, { "epoch": 2.0682081356691944, "learning_rate": 1.1599425199424019e-05, "lm_loss": 0.3476, "loss": 0.3476, "step": 4614 }, { "epoch": 2.0686563819057935, "learning_rate": 1.1589208928025615e-05, "lm_loss": 0.3229, "loss": 0.3229, "step": 4615 }, { "epoch": 2.069104628142393, "learning_rate": 1.1578995800019527e-05, "lm_loss": 0.3889, "loss": 0.3889, "step": 4616 }, { "epoch": 2.069552874378992, "learning_rate": 1.156878581779961e-05, "lm_loss": 0.4674, "loss": 0.4674, "step": 4617 }, { "epoch": 2.0700011206155917, "learning_rate": 1.1558578983759025e-05, "lm_loss": 0.3511, "loss": 0.3511, "step": 4618 }, { "epoch": 2.070449366852191, "learning_rate": 1.1548375300290179e-05, "lm_loss": 0.3409, "loss": 0.3409, "step": 4619 }, { "epoch": 2.07089761308879, "learning_rate": 1.153817476978474e-05, "lm_loss": 0.4544, "loss": 0.4544, "step": 4620 }, { "epoch": 2.0713458593253895, "learning_rate": 1.1527977394633652e-05, "lm_loss": 0.3161, "loss": 0.3161, "step": 4621 }, { "epoch": 2.0717941055619886, "learning_rate": 1.1517783177227082e-05, "lm_loss": 0.4636, "loss": 0.4636, "step": 4622 }, { "epoch": 2.072242351798588, "learning_rate": 1.1507592119954493e-05, "lm_loss": 0.3157, "loss": 0.3157, "step": 4623 }, { "epoch": 2.0726905980351873, "learning_rate": 1.1497404225204606e-05, "lm_loss": 0.4585, "loss": 0.4585, "step": 4624 }, { "epoch": 2.073138844271787, "learning_rate": 1.1487219495365367e-05, "lm_loss": 0.4418, "loss": 0.4418, "step": 4625 }, { "epoch": 2.073587090508386, "learning_rate": 1.1477037932824013e-05, "lm_loss": 0.422, "loss": 0.422, "step": 4626 }, { "epoch": 2.074035336744985, "learning_rate": 1.1466859539967032e-05, "lm_loss": 0.5167, "loss": 0.5167, "step": 4627 }, { "epoch": 2.0744835829815846, "learning_rate": 1.1456684319180161e-05, "lm_loss": 0.371, "loss": 0.371, "step": 4628 }, { "epoch": 2.0749318292181838, "learning_rate": 1.1446512272848412e-05, "lm_loss": 0.5806, "loss": 0.5806, "step": 4629 }, { "epoch": 2.0753800754547833, "learning_rate": 1.1436343403356017e-05, "lm_loss": 0.4777, "loss": 0.4777, "step": 4630 }, { "epoch": 2.0758283216913824, "learning_rate": 1.1426177713086494e-05, "lm_loss": 0.6415, "loss": 0.6415, "step": 4631 }, { "epoch": 2.076276567927982, "learning_rate": 1.141601520442262e-05, "lm_loss": 0.6152, "loss": 0.6152, "step": 4632 }, { "epoch": 2.076724814164581, "learning_rate": 1.140585587974639e-05, "lm_loss": 0.3171, "loss": 0.3171, "step": 4633 }, { "epoch": 2.07717306040118, "learning_rate": 1.1395699741439092e-05, "lm_loss": 0.3681, "loss": 0.3681, "step": 4634 }, { "epoch": 2.0776213066377798, "learning_rate": 1.1385546791881257e-05, "lm_loss": 0.4193, "loss": 0.4193, "step": 4635 }, { "epoch": 2.078069552874379, "learning_rate": 1.1375397033452634e-05, "lm_loss": 0.4072, "loss": 0.4072, "step": 4636 }, { "epoch": 2.0785177991109784, "learning_rate": 1.136525046853229e-05, "lm_loss": 0.2303, "loss": 0.2303, "step": 4637 }, { "epoch": 2.0789660453475776, "learning_rate": 1.135510709949848e-05, "lm_loss": 0.4318, "loss": 0.4318, "step": 4638 }, { "epoch": 2.079414291584177, "learning_rate": 1.1344966928728746e-05, "lm_loss": 0.4722, "loss": 0.4722, "step": 4639 }, { "epoch": 2.0798625378207762, "learning_rate": 1.1334829958599876e-05, "lm_loss": 0.446, "loss": 0.446, "step": 4640 }, { "epoch": 2.0803107840573754, "learning_rate": 1.1324696191487887e-05, "lm_loss": 0.3793, "loss": 0.3793, "step": 4641 }, { "epoch": 2.080759030293975, "learning_rate": 1.1314565629768072e-05, "lm_loss": 0.3686, "loss": 0.3686, "step": 4642 }, { "epoch": 2.081207276530574, "learning_rate": 1.1304438275814967e-05, "lm_loss": 0.4252, "loss": 0.4252, "step": 4643 }, { "epoch": 2.0816555227671736, "learning_rate": 1.1294314132002332e-05, "lm_loss": 0.5309, "loss": 0.5309, "step": 4644 }, { "epoch": 2.0821037690037727, "learning_rate": 1.1284193200703205e-05, "lm_loss": 0.4883, "loss": 0.4883, "step": 4645 }, { "epoch": 2.0825520152403723, "learning_rate": 1.1274075484289855e-05, "lm_loss": 0.3934, "loss": 0.3934, "step": 4646 }, { "epoch": 2.0830002614769714, "learning_rate": 1.1263960985133807e-05, "lm_loss": 0.296, "loss": 0.296, "step": 4647 }, { "epoch": 2.0834485077135705, "learning_rate": 1.125384970560583e-05, "lm_loss": 0.4613, "loss": 0.4613, "step": 4648 }, { "epoch": 2.08389675395017, "learning_rate": 1.1243741648075917e-05, "lm_loss": 0.327, "loss": 0.327, "step": 4649 }, { "epoch": 2.084345000186769, "learning_rate": 1.1233636814913336e-05, "lm_loss": 0.5721, "loss": 0.5721, "step": 4650 }, { "epoch": 2.0847932464233687, "learning_rate": 1.1223535208486593e-05, "lm_loss": 0.5038, "loss": 0.5038, "step": 4651 }, { "epoch": 2.085241492659968, "learning_rate": 1.1213436831163416e-05, "lm_loss": 0.5916, "loss": 0.5916, "step": 4652 }, { "epoch": 2.0856897388965674, "learning_rate": 1.1203341685310809e-05, "lm_loss": 0.4222, "loss": 0.4222, "step": 4653 }, { "epoch": 2.0861379851331665, "learning_rate": 1.119324977329497e-05, "lm_loss": 0.3062, "loss": 0.3062, "step": 4654 }, { "epoch": 2.0865862313697656, "learning_rate": 1.1183161097481402e-05, "lm_loss": 0.3637, "loss": 0.3637, "step": 4655 }, { "epoch": 2.087034477606365, "learning_rate": 1.1173075660234811e-05, "lm_loss": 0.4232, "loss": 0.4232, "step": 4656 }, { "epoch": 2.0874827238429643, "learning_rate": 1.1162993463919141e-05, "lm_loss": 0.3473, "loss": 0.3473, "step": 4657 }, { "epoch": 2.087930970079564, "learning_rate": 1.11529145108976e-05, "lm_loss": 0.4807, "loss": 0.4807, "step": 4658 }, { "epoch": 2.088379216316163, "learning_rate": 1.1142838803532601e-05, "lm_loss": 0.3492, "loss": 0.3492, "step": 4659 }, { "epoch": 2.0888274625527625, "learning_rate": 1.1132766344185829e-05, "lm_loss": 0.5195, "loss": 0.5195, "step": 4660 }, { "epoch": 2.0892757087893616, "learning_rate": 1.1122697135218205e-05, "lm_loss": 0.2536, "loss": 0.2536, "step": 4661 }, { "epoch": 2.0897239550259608, "learning_rate": 1.1112631178989857e-05, "lm_loss": 0.5071, "loss": 0.5071, "step": 4662 }, { "epoch": 2.0901722012625603, "learning_rate": 1.1102568477860187e-05, "lm_loss": 0.3965, "loss": 0.3965, "step": 4663 }, { "epoch": 2.0906204474991594, "learning_rate": 1.1092509034187814e-05, "lm_loss": 0.3801, "loss": 0.3801, "step": 4664 }, { "epoch": 2.091068693735759, "learning_rate": 1.1082452850330599e-05, "lm_loss": 0.3786, "loss": 0.3786, "step": 4665 }, { "epoch": 2.091516939972358, "learning_rate": 1.1072399928645652e-05, "lm_loss": 0.3735, "loss": 0.3735, "step": 4666 }, { "epoch": 2.0919651862089577, "learning_rate": 1.1062350271489283e-05, "lm_loss": 0.4612, "loss": 0.4612, "step": 4667 }, { "epoch": 2.0924134324455568, "learning_rate": 1.1052303881217068e-05, "lm_loss": 0.3915, "loss": 0.3915, "step": 4668 }, { "epoch": 2.092861678682156, "learning_rate": 1.104226076018382e-05, "lm_loss": 0.6559, "loss": 0.6559, "step": 4669 }, { "epoch": 2.0933099249187554, "learning_rate": 1.1032220910743554e-05, "lm_loss": 0.3671, "loss": 0.3671, "step": 4670 }, { "epoch": 2.0937581711553546, "learning_rate": 1.1022184335249547e-05, "lm_loss": 0.3135, "loss": 0.3135, "step": 4671 }, { "epoch": 2.094206417391954, "learning_rate": 1.10121510360543e-05, "lm_loss": 0.3981, "loss": 0.3981, "step": 4672 }, { "epoch": 2.0946546636285532, "learning_rate": 1.1002121015509544e-05, "lm_loss": 0.5076, "loss": 0.5076, "step": 4673 }, { "epoch": 2.095102909865153, "learning_rate": 1.0992094275966256e-05, "lm_loss": 0.3433, "loss": 0.3433, "step": 4674 }, { "epoch": 2.095551156101752, "learning_rate": 1.098207081977461e-05, "lm_loss": 0.3946, "loss": 0.3946, "step": 4675 }, { "epoch": 2.095999402338351, "learning_rate": 1.0972050649284038e-05, "lm_loss": 0.3806, "loss": 0.3806, "step": 4676 }, { "epoch": 2.0964476485749506, "learning_rate": 1.0962033766843211e-05, "lm_loss": 0.349, "loss": 0.349, "step": 4677 }, { "epoch": 2.0968958948115497, "learning_rate": 1.0952020174799993e-05, "lm_loss": 0.4235, "loss": 0.4235, "step": 4678 }, { "epoch": 2.0973441410481493, "learning_rate": 1.0942009875501502e-05, "lm_loss": 0.3498, "loss": 0.3498, "step": 4679 }, { "epoch": 2.0977923872847484, "learning_rate": 1.0932002871294095e-05, "lm_loss": 0.2831, "loss": 0.2831, "step": 4680 }, { "epoch": 2.098240633521348, "learning_rate": 1.0921999164523311e-05, "lm_loss": 0.4743, "loss": 0.4743, "step": 4681 }, { "epoch": 2.098688879757947, "learning_rate": 1.0911998757533983e-05, "lm_loss": 0.2288, "loss": 0.2288, "step": 4682 }, { "epoch": 2.099137125994546, "learning_rate": 1.0902001652670107e-05, "lm_loss": 0.471, "loss": 0.471, "step": 4683 }, { "epoch": 2.0995853722311457, "learning_rate": 1.0892007852274944e-05, "lm_loss": 0.4839, "loss": 0.4839, "step": 4684 }, { "epoch": 2.100033618467745, "learning_rate": 1.0882017358690972e-05, "lm_loss": 0.5092, "loss": 0.5092, "step": 4685 }, { "epoch": 2.1004818647043444, "learning_rate": 1.0872030174259875e-05, "lm_loss": 0.4484, "loss": 0.4484, "step": 4686 }, { "epoch": 2.1009301109409435, "learning_rate": 1.0862046301322587e-05, "lm_loss": 0.3052, "loss": 0.3052, "step": 4687 }, { "epoch": 2.101378357177543, "learning_rate": 1.085206574221926e-05, "lm_loss": 0.3783, "loss": 0.3783, "step": 4688 }, { "epoch": 2.101826603414142, "learning_rate": 1.0842088499289252e-05, "lm_loss": 0.3636, "loss": 0.3636, "step": 4689 }, { "epoch": 2.1022748496507413, "learning_rate": 1.0832114574871164e-05, "lm_loss": 0.7387, "loss": 0.7387, "step": 4690 }, { "epoch": 2.102723095887341, "learning_rate": 1.082214397130281e-05, "lm_loss": 0.43, "loss": 0.43, "step": 4691 }, { "epoch": 2.10317134212394, "learning_rate": 1.0812176690921227e-05, "lm_loss": 0.3843, "loss": 0.3843, "step": 4692 }, { "epoch": 2.1036195883605395, "learning_rate": 1.0802212736062686e-05, "lm_loss": 0.3541, "loss": 0.3541, "step": 4693 }, { "epoch": 2.1040678345971386, "learning_rate": 1.0792252109062645e-05, "lm_loss": 0.4985, "loss": 0.4985, "step": 4694 }, { "epoch": 2.104516080833738, "learning_rate": 1.078229481225582e-05, "lm_loss": 0.2843, "loss": 0.2843, "step": 4695 }, { "epoch": 2.1049643270703373, "learning_rate": 1.0772340847976115e-05, "lm_loss": 0.5497, "loss": 0.5497, "step": 4696 }, { "epoch": 2.1054125733069364, "learning_rate": 1.076239021855667e-05, "lm_loss": 0.3981, "loss": 0.3981, "step": 4697 }, { "epoch": 2.105860819543536, "learning_rate": 1.0752442926329856e-05, "lm_loss": 0.3273, "loss": 0.3273, "step": 4698 }, { "epoch": 2.106309065780135, "learning_rate": 1.0742498973627216e-05, "lm_loss": 0.4066, "loss": 0.4066, "step": 4699 }, { "epoch": 2.1067573120167347, "learning_rate": 1.0732558362779566e-05, "lm_loss": 0.5218, "loss": 0.5218, "step": 4700 }, { "epoch": 2.1072055582533338, "learning_rate": 1.0722621096116917e-05, "lm_loss": 0.2536, "loss": 0.2536, "step": 4701 }, { "epoch": 2.1076538044899333, "learning_rate": 1.0712687175968472e-05, "lm_loss": 0.3292, "loss": 0.3292, "step": 4702 }, { "epoch": 2.1081020507265325, "learning_rate": 1.0702756604662689e-05, "lm_loss": 0.4316, "loss": 0.4316, "step": 4703 }, { "epoch": 2.1085502969631316, "learning_rate": 1.0692829384527203e-05, "lm_loss": 0.3773, "loss": 0.3773, "step": 4704 }, { "epoch": 2.108998543199731, "learning_rate": 1.0682905517888891e-05, "lm_loss": 0.3943, "loss": 0.3943, "step": 4705 }, { "epoch": 2.1094467894363302, "learning_rate": 1.0672985007073847e-05, "lm_loss": 0.3104, "loss": 0.3104, "step": 4706 }, { "epoch": 2.10989503567293, "learning_rate": 1.0663067854407346e-05, "lm_loss": 0.4486, "loss": 0.4486, "step": 4707 }, { "epoch": 2.110343281909529, "learning_rate": 1.0653154062213908e-05, "lm_loss": 0.5292, "loss": 0.5292, "step": 4708 }, { "epoch": 2.1107915281461285, "learning_rate": 1.0643243632817252e-05, "lm_loss": 0.2839, "loss": 0.2839, "step": 4709 }, { "epoch": 2.1112397743827276, "learning_rate": 1.0633336568540312e-05, "lm_loss": 0.4933, "loss": 0.4933, "step": 4710 }, { "epoch": 2.1116880206193267, "learning_rate": 1.0623432871705241e-05, "lm_loss": 0.4141, "loss": 0.4141, "step": 4711 }, { "epoch": 2.1121362668559263, "learning_rate": 1.0613532544633376e-05, "lm_loss": 0.316, "loss": 0.316, "step": 4712 }, { "epoch": 2.1125845130925254, "learning_rate": 1.0603635589645292e-05, "lm_loss": 0.4238, "loss": 0.4238, "step": 4713 }, { "epoch": 2.113032759329125, "learning_rate": 1.059374200906077e-05, "lm_loss": 0.3466, "loss": 0.3466, "step": 4714 }, { "epoch": 2.113481005565724, "learning_rate": 1.0583851805198777e-05, "lm_loss": 0.4911, "loss": 0.4911, "step": 4715 }, { "epoch": 2.1139292518023236, "learning_rate": 1.0573964980377516e-05, "lm_loss": 0.4631, "loss": 0.4631, "step": 4716 }, { "epoch": 2.1143774980389227, "learning_rate": 1.0564081536914382e-05, "lm_loss": 0.4454, "loss": 0.4454, "step": 4717 }, { "epoch": 2.114825744275522, "learning_rate": 1.055420147712599e-05, "lm_loss": 0.5012, "loss": 0.5012, "step": 4718 }, { "epoch": 2.1152739905121214, "learning_rate": 1.0544324803328157e-05, "lm_loss": 0.4846, "loss": 0.4846, "step": 4719 }, { "epoch": 2.1157222367487205, "learning_rate": 1.053445151783589e-05, "lm_loss": 0.3581, "loss": 0.3581, "step": 4720 }, { "epoch": 2.11617048298532, "learning_rate": 1.0524581622963419e-05, "lm_loss": 0.4888, "loss": 0.4888, "step": 4721 }, { "epoch": 2.116618729221919, "learning_rate": 1.051471512102419e-05, "lm_loss": 0.458, "loss": 0.458, "step": 4722 }, { "epoch": 2.1170669754585187, "learning_rate": 1.050485201433082e-05, "lm_loss": 0.452, "loss": 0.452, "step": 4723 }, { "epoch": 2.117515221695118, "learning_rate": 1.0494992305195161e-05, "lm_loss": 0.5231, "loss": 0.5231, "step": 4724 }, { "epoch": 2.117963467931717, "learning_rate": 1.0485135995928266e-05, "lm_loss": 0.4263, "loss": 0.4263, "step": 4725 }, { "epoch": 2.1184117141683165, "learning_rate": 1.047528308884035e-05, "lm_loss": 0.33, "loss": 0.33, "step": 4726 }, { "epoch": 2.1188599604049156, "learning_rate": 1.0465433586240905e-05, "lm_loss": 0.3675, "loss": 0.3675, "step": 4727 }, { "epoch": 2.119308206641515, "learning_rate": 1.0455587490438554e-05, "lm_loss": 0.4412, "loss": 0.4412, "step": 4728 }, { "epoch": 2.1197564528781143, "learning_rate": 1.0445744803741162e-05, "lm_loss": 0.3604, "loss": 0.3604, "step": 4729 }, { "epoch": 2.120204699114714, "learning_rate": 1.0435905528455787e-05, "lm_loss": 0.3563, "loss": 0.3563, "step": 4730 }, { "epoch": 2.120652945351313, "learning_rate": 1.0426069666888669e-05, "lm_loss": 0.3163, "loss": 0.3163, "step": 4731 }, { "epoch": 2.121101191587912, "learning_rate": 1.0416237221345274e-05, "lm_loss": 0.5191, "loss": 0.5191, "step": 4732 }, { "epoch": 2.1215494378245117, "learning_rate": 1.0406408194130259e-05, "lm_loss": 0.2468, "loss": 0.2468, "step": 4733 }, { "epoch": 2.121997684061111, "learning_rate": 1.0396582587547466e-05, "lm_loss": 0.4945, "loss": 0.4945, "step": 4734 }, { "epoch": 2.1224459302977103, "learning_rate": 1.038676040389995e-05, "lm_loss": 0.2939, "loss": 0.2939, "step": 4735 }, { "epoch": 2.1228941765343095, "learning_rate": 1.037694164548996e-05, "lm_loss": 0.5104, "loss": 0.5104, "step": 4736 }, { "epoch": 2.123342422770909, "learning_rate": 1.0367126314618947e-05, "lm_loss": 0.4088, "loss": 0.4088, "step": 4737 }, { "epoch": 2.123790669007508, "learning_rate": 1.0357314413587555e-05, "lm_loss": 0.2768, "loss": 0.2768, "step": 4738 }, { "epoch": 2.1242389152441072, "learning_rate": 1.034750594469561e-05, "lm_loss": 0.493, "loss": 0.493, "step": 4739 }, { "epoch": 2.124687161480707, "learning_rate": 1.0337700910242162e-05, "lm_loss": 0.5003, "loss": 0.5003, "step": 4740 }, { "epoch": 2.125135407717306, "learning_rate": 1.0327899312525422e-05, "lm_loss": 0.5056, "loss": 0.5056, "step": 4741 }, { "epoch": 2.1255836539539055, "learning_rate": 1.0318101153842821e-05, "lm_loss": 0.4353, "loss": 0.4353, "step": 4742 }, { "epoch": 2.1260319001905046, "learning_rate": 1.030830643649099e-05, "lm_loss": 0.3048, "loss": 0.3048, "step": 4743 }, { "epoch": 2.126480146427104, "learning_rate": 1.029851516276571e-05, "lm_loss": 0.309, "loss": 0.309, "step": 4744 }, { "epoch": 2.1269283926637033, "learning_rate": 1.028872733496202e-05, "lm_loss": 0.3559, "loss": 0.3559, "step": 4745 }, { "epoch": 2.1273766389003024, "learning_rate": 1.0278942955374088e-05, "lm_loss": 0.4507, "loss": 0.4507, "step": 4746 }, { "epoch": 2.127824885136902, "learning_rate": 1.0269162026295315e-05, "lm_loss": 0.4085, "loss": 0.4085, "step": 4747 }, { "epoch": 2.128273131373501, "learning_rate": 1.0259384550018283e-05, "lm_loss": 0.3879, "loss": 0.3879, "step": 4748 }, { "epoch": 2.1287213776101006, "learning_rate": 1.024961052883475e-05, "lm_loss": 0.3646, "loss": 0.3646, "step": 4749 }, { "epoch": 2.1291696238466997, "learning_rate": 1.023983996503568e-05, "lm_loss": 0.3065, "loss": 0.3065, "step": 4750 }, { "epoch": 2.1296178700832993, "learning_rate": 1.0230072860911232e-05, "lm_loss": 0.4607, "loss": 0.4607, "step": 4751 }, { "epoch": 2.1300661163198984, "learning_rate": 1.0220309218750732e-05, "lm_loss": 0.3491, "loss": 0.3491, "step": 4752 }, { "epoch": 2.1305143625564975, "learning_rate": 1.021054904084271e-05, "lm_loss": 0.4342, "loss": 0.4342, "step": 4753 }, { "epoch": 2.130962608793097, "learning_rate": 1.0200792329474878e-05, "lm_loss": 0.3349, "loss": 0.3349, "step": 4754 }, { "epoch": 2.131410855029696, "learning_rate": 1.0191039086934149e-05, "lm_loss": 0.4654, "loss": 0.4654, "step": 4755 }, { "epoch": 2.1318591012662957, "learning_rate": 1.0181289315506612e-05, "lm_loss": 0.3182, "loss": 0.3182, "step": 4756 }, { "epoch": 2.132307347502895, "learning_rate": 1.017154301747753e-05, "lm_loss": 0.3869, "loss": 0.3869, "step": 4757 }, { "epoch": 2.1327555937394944, "learning_rate": 1.0161800195131371e-05, "lm_loss": 0.2783, "loss": 0.2783, "step": 4758 }, { "epoch": 2.1332038399760935, "learning_rate": 1.0152060850751794e-05, "lm_loss": 0.5513, "loss": 0.5513, "step": 4759 }, { "epoch": 2.1336520862126926, "learning_rate": 1.0142324986621607e-05, "lm_loss": 0.2721, "loss": 0.2721, "step": 4760 }, { "epoch": 2.134100332449292, "learning_rate": 1.0132592605022842e-05, "lm_loss": 0.388, "loss": 0.388, "step": 4761 }, { "epoch": 2.1345485786858913, "learning_rate": 1.0122863708236693e-05, "lm_loss": 0.4717, "loss": 0.4717, "step": 4762 }, { "epoch": 2.134996824922491, "learning_rate": 1.0113138298543547e-05, "lm_loss": 0.6075, "loss": 0.6075, "step": 4763 }, { "epoch": 2.13544507115909, "learning_rate": 1.0103416378222977e-05, "lm_loss": 0.2463, "loss": 0.2463, "step": 4764 }, { "epoch": 2.1358933173956896, "learning_rate": 1.0093697949553712e-05, "lm_loss": 0.4421, "loss": 0.4421, "step": 4765 }, { "epoch": 2.1363415636322887, "learning_rate": 1.0083983014813694e-05, "lm_loss": 0.5683, "loss": 0.5683, "step": 4766 }, { "epoch": 2.136789809868888, "learning_rate": 1.0074271576280036e-05, "lm_loss": 0.5135, "loss": 0.5135, "step": 4767 }, { "epoch": 2.1372380561054873, "learning_rate": 1.0064563636229016e-05, "lm_loss": 0.4801, "loss": 0.4801, "step": 4768 }, { "epoch": 2.1376863023420865, "learning_rate": 1.0054859196936111e-05, "lm_loss": 0.5315, "loss": 0.5315, "step": 4769 }, { "epoch": 2.138134548578686, "learning_rate": 1.0045158260675985e-05, "lm_loss": 0.2314, "loss": 0.2314, "step": 4770 }, { "epoch": 2.138582794815285, "learning_rate": 1.0035460829722434e-05, "lm_loss": 0.3166, "loss": 0.3166, "step": 4771 }, { "epoch": 2.1390310410518847, "learning_rate": 1.0025766906348505e-05, "lm_loss": 0.4755, "loss": 0.4755, "step": 4772 }, { "epoch": 2.139479287288484, "learning_rate": 1.0016076492826354e-05, "lm_loss": 0.3885, "loss": 0.3885, "step": 4773 }, { "epoch": 2.139927533525083, "learning_rate": 1.0006389591427353e-05, "lm_loss": 0.3979, "loss": 0.3979, "step": 4774 }, { "epoch": 2.1403757797616825, "learning_rate": 9.996706204422057e-06, "lm_loss": 0.296, "loss": 0.296, "step": 4775 }, { "epoch": 2.1408240259982816, "learning_rate": 9.987026334080154e-06, "lm_loss": 0.3922, "loss": 0.3922, "step": 4776 }, { "epoch": 2.141272272234881, "learning_rate": 9.977349982670553e-06, "lm_loss": 0.364, "loss": 0.364, "step": 4777 }, { "epoch": 2.1417205184714803, "learning_rate": 9.967677152461324e-06, "lm_loss": 0.404, "loss": 0.404, "step": 4778 }, { "epoch": 2.14216876470808, "learning_rate": 9.958007845719691e-06, "lm_loss": 0.3179, "loss": 0.3179, "step": 4779 }, { "epoch": 2.142617010944679, "learning_rate": 9.948342064712083e-06, "lm_loss": 0.4066, "loss": 0.4066, "step": 4780 }, { "epoch": 2.143065257181278, "learning_rate": 9.938679811704082e-06, "lm_loss": 0.4237, "loss": 0.4237, "step": 4781 }, { "epoch": 2.1435135034178776, "learning_rate": 9.929021088960459e-06, "lm_loss": 0.2997, "loss": 0.2997, "step": 4782 }, { "epoch": 2.1439617496544767, "learning_rate": 9.919365898745148e-06, "lm_loss": 0.4132, "loss": 0.4132, "step": 4783 }, { "epoch": 2.1444099958910763, "learning_rate": 9.909714243321243e-06, "lm_loss": 0.3401, "loss": 0.3401, "step": 4784 }, { "epoch": 2.1448582421276754, "learning_rate": 9.90006612495104e-06, "lm_loss": 0.5886, "loss": 0.5886, "step": 4785 }, { "epoch": 2.145306488364275, "learning_rate": 9.89042154589597e-06, "lm_loss": 0.373, "loss": 0.373, "step": 4786 }, { "epoch": 2.145754734600874, "learning_rate": 9.88078050841666e-06, "lm_loss": 0.3426, "loss": 0.3426, "step": 4787 }, { "epoch": 2.146202980837473, "learning_rate": 9.87114301477291e-06, "lm_loss": 0.4251, "loss": 0.4251, "step": 4788 }, { "epoch": 2.1466512270740727, "learning_rate": 9.861509067223652e-06, "lm_loss": 0.3749, "loss": 0.3749, "step": 4789 }, { "epoch": 2.147099473310672, "learning_rate": 9.851878668027045e-06, "lm_loss": 0.4677, "loss": 0.4677, "step": 4790 }, { "epoch": 2.1475477195472714, "learning_rate": 9.842251819440364e-06, "lm_loss": 0.5054, "loss": 0.5054, "step": 4791 }, { "epoch": 2.1479959657838705, "learning_rate": 9.832628523720081e-06, "lm_loss": 0.4635, "loss": 0.4635, "step": 4792 }, { "epoch": 2.14844421202047, "learning_rate": 9.823008783121829e-06, "lm_loss": 0.2949, "loss": 0.2949, "step": 4793 }, { "epoch": 2.148892458257069, "learning_rate": 9.813392599900395e-06, "lm_loss": 0.512, "loss": 0.512, "step": 4794 }, { "epoch": 2.1493407044936683, "learning_rate": 9.803779976309748e-06, "lm_loss": 0.4485, "loss": 0.4485, "step": 4795 }, { "epoch": 2.149788950730268, "learning_rate": 9.794170914603029e-06, "lm_loss": 0.4318, "loss": 0.4318, "step": 4796 }, { "epoch": 2.150237196966867, "learning_rate": 9.784565417032513e-06, "lm_loss": 0.6, "loss": 0.6, "step": 4797 }, { "epoch": 2.1506854432034666, "learning_rate": 9.774963485849666e-06, "lm_loss": 0.393, "loss": 0.393, "step": 4798 }, { "epoch": 2.1511336894400657, "learning_rate": 9.765365123305114e-06, "lm_loss": 0.522, "loss": 0.522, "step": 4799 }, { "epoch": 2.1515819356766652, "learning_rate": 9.755770331648642e-06, "lm_loss": 0.623, "loss": 0.623, "step": 4800 }, { "epoch": 2.1520301819132643, "learning_rate": 9.746179113129211e-06, "lm_loss": 0.3603, "loss": 0.3603, "step": 4801 }, { "epoch": 2.1524784281498635, "learning_rate": 9.736591469994913e-06, "lm_loss": 0.3047, "loss": 0.3047, "step": 4802 }, { "epoch": 2.152926674386463, "learning_rate": 9.72700740449303e-06, "lm_loss": 0.5491, "loss": 0.5491, "step": 4803 }, { "epoch": 2.153374920623062, "learning_rate": 9.71742691887001e-06, "lm_loss": 0.3576, "loss": 0.3576, "step": 4804 }, { "epoch": 2.1538231668596617, "learning_rate": 9.707850015371428e-06, "lm_loss": 0.4277, "loss": 0.4277, "step": 4805 }, { "epoch": 2.154271413096261, "learning_rate": 9.698276696242054e-06, "lm_loss": 0.3993, "loss": 0.3993, "step": 4806 }, { "epoch": 2.1547196593328604, "learning_rate": 9.688706963725799e-06, "lm_loss": 0.3097, "loss": 0.3097, "step": 4807 }, { "epoch": 2.1551679055694595, "learning_rate": 9.679140820065744e-06, "lm_loss": 0.4755, "loss": 0.4755, "step": 4808 }, { "epoch": 2.1556161518060586, "learning_rate": 9.669578267504131e-06, "lm_loss": 0.3239, "loss": 0.3239, "step": 4809 }, { "epoch": 2.156064398042658, "learning_rate": 9.660019308282333e-06, "lm_loss": 0.5198, "loss": 0.5198, "step": 4810 }, { "epoch": 2.1565126442792573, "learning_rate": 9.650463944640913e-06, "lm_loss": 0.3248, "loss": 0.3248, "step": 4811 }, { "epoch": 2.156960890515857, "learning_rate": 9.64091217881959e-06, "lm_loss": 0.5042, "loss": 0.5042, "step": 4812 }, { "epoch": 2.157409136752456, "learning_rate": 9.631364013057203e-06, "lm_loss": 0.4623, "loss": 0.4623, "step": 4813 }, { "epoch": 2.1578573829890555, "learning_rate": 9.621819449591787e-06, "lm_loss": 0.5701, "loss": 0.5701, "step": 4814 }, { "epoch": 2.1583056292256546, "learning_rate": 9.61227849066052e-06, "lm_loss": 0.5049, "loss": 0.5049, "step": 4815 }, { "epoch": 2.1587538754622537, "learning_rate": 9.60274113849973e-06, "lm_loss": 0.441, "loss": 0.441, "step": 4816 }, { "epoch": 2.1592021216988533, "learning_rate": 9.593207395344916e-06, "lm_loss": 0.409, "loss": 0.409, "step": 4817 }, { "epoch": 2.1596503679354524, "learning_rate": 9.583677263430696e-06, "lm_loss": 0.4638, "loss": 0.4638, "step": 4818 }, { "epoch": 2.160098614172052, "learning_rate": 9.574150744990875e-06, "lm_loss": 0.5053, "loss": 0.5053, "step": 4819 }, { "epoch": 2.160546860408651, "learning_rate": 9.564627842258411e-06, "lm_loss": 0.4918, "loss": 0.4918, "step": 4820 }, { "epoch": 2.1609951066452506, "learning_rate": 9.555108557465384e-06, "lm_loss": 0.6155, "loss": 0.6155, "step": 4821 }, { "epoch": 2.1614433528818497, "learning_rate": 9.545592892843053e-06, "lm_loss": 0.3452, "loss": 0.3452, "step": 4822 }, { "epoch": 2.161891599118449, "learning_rate": 9.536080850621831e-06, "lm_loss": 0.4912, "loss": 0.4912, "step": 4823 }, { "epoch": 2.1623398453550484, "learning_rate": 9.526572433031245e-06, "lm_loss": 0.2593, "loss": 0.2593, "step": 4824 }, { "epoch": 2.1627880915916475, "learning_rate": 9.517067642300031e-06, "lm_loss": 0.3576, "loss": 0.3576, "step": 4825 }, { "epoch": 2.163236337828247, "learning_rate": 9.507566480656024e-06, "lm_loss": 0.4603, "loss": 0.4603, "step": 4826 }, { "epoch": 2.163684584064846, "learning_rate": 9.498068950326234e-06, "lm_loss": 0.4796, "loss": 0.4796, "step": 4827 }, { "epoch": 2.1641328303014458, "learning_rate": 9.488575053536817e-06, "lm_loss": 0.4214, "loss": 0.4214, "step": 4828 }, { "epoch": 2.164581076538045, "learning_rate": 9.47908479251306e-06, "lm_loss": 0.3447, "loss": 0.3447, "step": 4829 }, { "epoch": 2.165029322774644, "learning_rate": 9.469598169479429e-06, "lm_loss": 0.4589, "loss": 0.4589, "step": 4830 }, { "epoch": 2.1654775690112436, "learning_rate": 9.460115186659504e-06, "lm_loss": 0.3618, "loss": 0.3618, "step": 4831 }, { "epoch": 2.1659258152478427, "learning_rate": 9.450635846276032e-06, "lm_loss": 0.3516, "loss": 0.3516, "step": 4832 }, { "epoch": 2.1663740614844422, "learning_rate": 9.441160150550903e-06, "lm_loss": 0.411, "loss": 0.411, "step": 4833 }, { "epoch": 2.1668223077210413, "learning_rate": 9.431688101705156e-06, "lm_loss": 0.4448, "loss": 0.4448, "step": 4834 }, { "epoch": 2.167270553957641, "learning_rate": 9.422219701958973e-06, "lm_loss": 0.4049, "loss": 0.4049, "step": 4835 }, { "epoch": 2.16771880019424, "learning_rate": 9.412754953531663e-06, "lm_loss": 0.3378, "loss": 0.3378, "step": 4836 }, { "epoch": 2.168167046430839, "learning_rate": 9.403293858641708e-06, "lm_loss": 0.3462, "loss": 0.3462, "step": 4837 }, { "epoch": 2.1686152926674387, "learning_rate": 9.393836419506718e-06, "lm_loss": 0.2959, "loss": 0.2959, "step": 4838 }, { "epoch": 2.169063538904038, "learning_rate": 9.384382638343442e-06, "lm_loss": 0.4965, "loss": 0.4965, "step": 4839 }, { "epoch": 2.1695117851406374, "learning_rate": 9.37493251736778e-06, "lm_loss": 0.3393, "loss": 0.3393, "step": 4840 }, { "epoch": 2.1699600313772365, "learning_rate": 9.365486058794783e-06, "lm_loss": 0.434, "loss": 0.434, "step": 4841 }, { "epoch": 2.170408277613836, "learning_rate": 9.356043264838608e-06, "lm_loss": 0.3434, "loss": 0.3434, "step": 4842 }, { "epoch": 2.170856523850435, "learning_rate": 9.346604137712608e-06, "lm_loss": 0.2803, "loss": 0.2803, "step": 4843 }, { "epoch": 2.1713047700870343, "learning_rate": 9.337168679629222e-06, "lm_loss": 0.5255, "loss": 0.5255, "step": 4844 }, { "epoch": 2.171753016323634, "learning_rate": 9.327736892800062e-06, "lm_loss": 0.3299, "loss": 0.3299, "step": 4845 }, { "epoch": 2.172201262560233, "learning_rate": 9.31830877943588e-06, "lm_loss": 0.5158, "loss": 0.5158, "step": 4846 }, { "epoch": 2.1726495087968325, "learning_rate": 9.30888434174654e-06, "lm_loss": 0.5617, "loss": 0.5617, "step": 4847 }, { "epoch": 2.1730977550334316, "learning_rate": 9.299463581941068e-06, "lm_loss": 0.6098, "loss": 0.6098, "step": 4848 }, { "epoch": 2.173546001270031, "learning_rate": 9.290046502227634e-06, "lm_loss": 0.4862, "loss": 0.4862, "step": 4849 }, { "epoch": 2.1739942475066303, "learning_rate": 9.280633104813513e-06, "lm_loss": 0.3422, "loss": 0.3422, "step": 4850 }, { "epoch": 2.1744424937432294, "learning_rate": 9.271223391905148e-06, "lm_loss": 0.3389, "loss": 0.3389, "step": 4851 }, { "epoch": 2.174890739979829, "learning_rate": 9.261817365708108e-06, "lm_loss": 0.3909, "loss": 0.3909, "step": 4852 }, { "epoch": 2.175338986216428, "learning_rate": 9.252415028427094e-06, "lm_loss": 0.2766, "loss": 0.2766, "step": 4853 }, { "epoch": 2.1757872324530276, "learning_rate": 9.243016382265959e-06, "lm_loss": 0.3861, "loss": 0.3861, "step": 4854 }, { "epoch": 2.1762354786896267, "learning_rate": 9.233621429427656e-06, "lm_loss": 0.4912, "loss": 0.4912, "step": 4855 }, { "epoch": 2.1766837249262263, "learning_rate": 9.224230172114304e-06, "lm_loss": 0.326, "loss": 0.326, "step": 4856 }, { "epoch": 2.1771319711628254, "learning_rate": 9.214842612527155e-06, "lm_loss": 0.4725, "loss": 0.4725, "step": 4857 }, { "epoch": 2.1775802173994245, "learning_rate": 9.205458752866569e-06, "lm_loss": 0.3193, "loss": 0.3193, "step": 4858 }, { "epoch": 2.178028463636024, "learning_rate": 9.196078595332062e-06, "lm_loss": 0.3349, "loss": 0.3349, "step": 4859 }, { "epoch": 2.178476709872623, "learning_rate": 9.186702142122274e-06, "lm_loss": 0.5154, "loss": 0.5154, "step": 4860 }, { "epoch": 2.1789249561092228, "learning_rate": 9.177329395434977e-06, "lm_loss": 0.3918, "loss": 0.3918, "step": 4861 }, { "epoch": 2.179373202345822, "learning_rate": 9.167960357467089e-06, "lm_loss": 0.4335, "loss": 0.4335, "step": 4862 }, { "epoch": 2.1798214485824214, "learning_rate": 9.15859503041462e-06, "lm_loss": 0.4136, "loss": 0.4136, "step": 4863 }, { "epoch": 2.1802696948190206, "learning_rate": 9.14923341647275e-06, "lm_loss": 0.321, "loss": 0.321, "step": 4864 }, { "epoch": 2.1807179410556197, "learning_rate": 9.139875517835778e-06, "lm_loss": 0.5194, "loss": 0.5194, "step": 4865 }, { "epoch": 2.1811661872922192, "learning_rate": 9.130521336697109e-06, "lm_loss": 0.4641, "loss": 0.4641, "step": 4866 }, { "epoch": 2.1816144335288183, "learning_rate": 9.121170875249319e-06, "lm_loss": 0.3998, "loss": 0.3998, "step": 4867 }, { "epoch": 2.182062679765418, "learning_rate": 9.111824135684067e-06, "lm_loss": 0.3865, "loss": 0.3865, "step": 4868 }, { "epoch": 2.182510926002017, "learning_rate": 9.102481120192159e-06, "lm_loss": 0.3401, "loss": 0.3401, "step": 4869 }, { "epoch": 2.1829591722386166, "learning_rate": 9.093141830963558e-06, "lm_loss": 0.3684, "loss": 0.3684, "step": 4870 }, { "epoch": 2.1834074184752157, "learning_rate": 9.083806270187304e-06, "lm_loss": 0.373, "loss": 0.373, "step": 4871 }, { "epoch": 2.183855664711815, "learning_rate": 9.074474440051587e-06, "lm_loss": 0.3471, "loss": 0.3471, "step": 4872 }, { "epoch": 2.1843039109484144, "learning_rate": 9.065146342743733e-06, "lm_loss": 0.3262, "loss": 0.3262, "step": 4873 }, { "epoch": 2.1847521571850135, "learning_rate": 9.05582198045016e-06, "lm_loss": 0.2963, "loss": 0.2963, "step": 4874 }, { "epoch": 2.185200403421613, "learning_rate": 9.046501355356452e-06, "lm_loss": 0.4802, "loss": 0.4802, "step": 4875 }, { "epoch": 2.185648649658212, "learning_rate": 9.037184469647277e-06, "lm_loss": 0.328, "loss": 0.328, "step": 4876 }, { "epoch": 2.1860968958948117, "learning_rate": 9.027871325506453e-06, "lm_loss": 0.396, "loss": 0.396, "step": 4877 }, { "epoch": 2.186545142131411, "learning_rate": 9.018561925116917e-06, "lm_loss": 0.4007, "loss": 0.4007, "step": 4878 }, { "epoch": 2.18699338836801, "learning_rate": 9.009256270660722e-06, "lm_loss": 0.4892, "loss": 0.4892, "step": 4879 }, { "epoch": 2.1874416346046095, "learning_rate": 8.999954364319051e-06, "lm_loss": 0.699, "loss": 0.699, "step": 4880 }, { "epoch": 2.1878898808412086, "learning_rate": 8.990656208272196e-06, "lm_loss": 0.4612, "loss": 0.4612, "step": 4881 }, { "epoch": 2.188338127077808, "learning_rate": 8.981361804699579e-06, "lm_loss": 0.553, "loss": 0.553, "step": 4882 }, { "epoch": 2.1887863733144073, "learning_rate": 8.972071155779751e-06, "lm_loss": 0.4766, "loss": 0.4766, "step": 4883 }, { "epoch": 2.189234619551007, "learning_rate": 8.962784263690358e-06, "lm_loss": 0.3975, "loss": 0.3975, "step": 4884 }, { "epoch": 2.189682865787606, "learning_rate": 8.953501130608184e-06, "lm_loss": 0.4183, "loss": 0.4183, "step": 4885 }, { "epoch": 2.190131112024205, "learning_rate": 8.94422175870914e-06, "lm_loss": 0.3204, "loss": 0.3204, "step": 4886 }, { "epoch": 2.1905793582608046, "learning_rate": 8.934946150168217e-06, "lm_loss": 0.4963, "loss": 0.4963, "step": 4887 }, { "epoch": 2.1910276044974037, "learning_rate": 8.925674307159585e-06, "lm_loss": 0.3253, "loss": 0.3253, "step": 4888 }, { "epoch": 2.1914758507340033, "learning_rate": 8.91640623185647e-06, "lm_loss": 0.3667, "loss": 0.3667, "step": 4889 }, { "epoch": 2.1919240969706024, "learning_rate": 8.907141926431253e-06, "lm_loss": 0.3659, "loss": 0.3659, "step": 4890 }, { "epoch": 2.192372343207202, "learning_rate": 8.897881393055427e-06, "lm_loss": 0.4663, "loss": 0.4663, "step": 4891 }, { "epoch": 2.192820589443801, "learning_rate": 8.888624633899575e-06, "lm_loss": 0.3215, "loss": 0.3215, "step": 4892 }, { "epoch": 2.1932688356804, "learning_rate": 8.879371651133428e-06, "lm_loss": 0.3505, "loss": 0.3505, "step": 4893 }, { "epoch": 2.1937170819169998, "learning_rate": 8.870122446925822e-06, "lm_loss": 0.4365, "loss": 0.4365, "step": 4894 }, { "epoch": 2.194165328153599, "learning_rate": 8.860877023444685e-06, "lm_loss": 0.416, "loss": 0.416, "step": 4895 }, { "epoch": 2.1946135743901984, "learning_rate": 8.851635382857092e-06, "lm_loss": 0.4306, "loss": 0.4306, "step": 4896 }, { "epoch": 2.1950618206267976, "learning_rate": 8.842397527329213e-06, "lm_loss": 0.4262, "loss": 0.4262, "step": 4897 }, { "epoch": 2.195510066863397, "learning_rate": 8.833163459026334e-06, "lm_loss": 0.3015, "loss": 0.3015, "step": 4898 }, { "epoch": 2.1959583130999962, "learning_rate": 8.823933180112865e-06, "lm_loss": 0.3839, "loss": 0.3839, "step": 4899 }, { "epoch": 2.1964065593365953, "learning_rate": 8.814706692752295e-06, "lm_loss": 0.4558, "loss": 0.4558, "step": 4900 }, { "epoch": 2.196854805573195, "learning_rate": 8.805483999107258e-06, "lm_loss": 0.3892, "loss": 0.3892, "step": 4901 }, { "epoch": 2.197303051809794, "learning_rate": 8.796265101339493e-06, "lm_loss": 0.5356, "loss": 0.5356, "step": 4902 }, { "epoch": 2.1977512980463936, "learning_rate": 8.787050001609828e-06, "lm_loss": 0.3896, "loss": 0.3896, "step": 4903 }, { "epoch": 2.1981995442829927, "learning_rate": 8.777838702078222e-06, "lm_loss": 0.5062, "loss": 0.5062, "step": 4904 }, { "epoch": 2.1986477905195922, "learning_rate": 8.768631204903737e-06, "lm_loss": 0.3921, "loss": 0.3921, "step": 4905 }, { "epoch": 2.1990960367561914, "learning_rate": 8.759427512244547e-06, "lm_loss": 0.388, "loss": 0.388, "step": 4906 }, { "epoch": 2.1995442829927905, "learning_rate": 8.750227626257935e-06, "lm_loss": 0.3266, "loss": 0.3266, "step": 4907 }, { "epoch": 2.19999252922939, "learning_rate": 8.741031549100273e-06, "lm_loss": 0.4392, "loss": 0.4392, "step": 4908 }, { "epoch": 2.200440775465989, "learning_rate": 8.731839282927063e-06, "lm_loss": 0.3853, "loss": 0.3853, "step": 4909 }, { "epoch": 2.2008890217025887, "learning_rate": 8.722650829892914e-06, "lm_loss": 0.3725, "loss": 0.3725, "step": 4910 }, { "epoch": 2.201337267939188, "learning_rate": 8.713466192151515e-06, "lm_loss": 0.4119, "loss": 0.4119, "step": 4911 }, { "epoch": 2.2017855141757874, "learning_rate": 8.704285371855697e-06, "lm_loss": 0.3167, "loss": 0.3167, "step": 4912 }, { "epoch": 2.2022337604123865, "learning_rate": 8.69510837115736e-06, "lm_loss": 0.4626, "loss": 0.4626, "step": 4913 }, { "epoch": 2.2026820066489856, "learning_rate": 8.685935192207529e-06, "lm_loss": 0.3529, "loss": 0.3529, "step": 4914 }, { "epoch": 2.203130252885585, "learning_rate": 8.67676583715635e-06, "lm_loss": 0.3037, "loss": 0.3037, "step": 4915 }, { "epoch": 2.2035784991221843, "learning_rate": 8.667600308153034e-06, "lm_loss": 0.4341, "loss": 0.4341, "step": 4916 }, { "epoch": 2.204026745358784, "learning_rate": 8.65843860734593e-06, "lm_loss": 0.3829, "loss": 0.3829, "step": 4917 }, { "epoch": 2.204474991595383, "learning_rate": 8.649280736882456e-06, "lm_loss": 0.4861, "loss": 0.4861, "step": 4918 }, { "epoch": 2.2049232378319825, "learning_rate": 8.64012669890916e-06, "lm_loss": 0.3903, "loss": 0.3903, "step": 4919 }, { "epoch": 2.2053714840685816, "learning_rate": 8.630976495571691e-06, "lm_loss": 0.3809, "loss": 0.3809, "step": 4920 }, { "epoch": 2.2058197303051807, "learning_rate": 8.621830129014771e-06, "lm_loss": 0.2724, "loss": 0.2724, "step": 4921 }, { "epoch": 2.2062679765417803, "learning_rate": 8.612687601382255e-06, "lm_loss": 0.3918, "loss": 0.3918, "step": 4922 }, { "epoch": 2.2067162227783794, "learning_rate": 8.603548914817084e-06, "lm_loss": 0.3841, "loss": 0.3841, "step": 4923 }, { "epoch": 2.207164469014979, "learning_rate": 8.594414071461293e-06, "lm_loss": 0.465, "loss": 0.465, "step": 4924 }, { "epoch": 2.207612715251578, "learning_rate": 8.585283073456041e-06, "lm_loss": 0.4342, "loss": 0.4342, "step": 4925 }, { "epoch": 2.2080609614881777, "learning_rate": 8.576155922941548e-06, "lm_loss": 0.4149, "loss": 0.4149, "step": 4926 }, { "epoch": 2.2085092077247768, "learning_rate": 8.567032622057158e-06, "lm_loss": 0.3217, "loss": 0.3217, "step": 4927 }, { "epoch": 2.208957453961376, "learning_rate": 8.557913172941318e-06, "lm_loss": 0.3562, "loss": 0.3562, "step": 4928 }, { "epoch": 2.2094057001979754, "learning_rate": 8.548797577731541e-06, "lm_loss": 0.5925, "loss": 0.5925, "step": 4929 }, { "epoch": 2.2098539464345746, "learning_rate": 8.539685838564467e-06, "lm_loss": 0.3593, "loss": 0.3593, "step": 4930 }, { "epoch": 2.210302192671174, "learning_rate": 8.53057795757583e-06, "lm_loss": 0.4815, "loss": 0.4815, "step": 4931 }, { "epoch": 2.2107504389077732, "learning_rate": 8.521473936900426e-06, "lm_loss": 0.4482, "loss": 0.4482, "step": 4932 }, { "epoch": 2.211198685144373, "learning_rate": 8.512373778672206e-06, "lm_loss": 0.3226, "loss": 0.3226, "step": 4933 }, { "epoch": 2.211646931380972, "learning_rate": 8.503277485024153e-06, "lm_loss": 0.3422, "loss": 0.3422, "step": 4934 }, { "epoch": 2.212095177617571, "learning_rate": 8.494185058088383e-06, "lm_loss": 0.3506, "loss": 0.3506, "step": 4935 }, { "epoch": 2.2125434238541706, "learning_rate": 8.485096499996104e-06, "lm_loss": 0.3742, "loss": 0.3742, "step": 4936 }, { "epoch": 2.2129916700907697, "learning_rate": 8.476011812877587e-06, "lm_loss": 0.4085, "loss": 0.4085, "step": 4937 }, { "epoch": 2.2134399163273693, "learning_rate": 8.466930998862233e-06, "lm_loss": 0.3795, "loss": 0.3795, "step": 4938 }, { "epoch": 2.2138881625639684, "learning_rate": 8.45785406007852e-06, "lm_loss": 0.3258, "loss": 0.3258, "step": 4939 }, { "epoch": 2.214336408800568, "learning_rate": 8.448780998654008e-06, "lm_loss": 0.4359, "loss": 0.4359, "step": 4940 }, { "epoch": 2.214784655037167, "learning_rate": 8.439711816715354e-06, "lm_loss": 0.2511, "loss": 0.2511, "step": 4941 }, { "epoch": 2.2152329012737666, "learning_rate": 8.430646516388318e-06, "lm_loss": 0.3427, "loss": 0.3427, "step": 4942 }, { "epoch": 2.2156811475103657, "learning_rate": 8.421585099797738e-06, "lm_loss": 0.464, "loss": 0.464, "step": 4943 }, { "epoch": 2.216129393746965, "learning_rate": 8.412527569067552e-06, "lm_loss": 0.2666, "loss": 0.2666, "step": 4944 }, { "epoch": 2.2165776399835644, "learning_rate": 8.403473926320765e-06, "lm_loss": 0.5807, "loss": 0.5807, "step": 4945 }, { "epoch": 2.2170258862201635, "learning_rate": 8.394424173679489e-06, "lm_loss": 0.4374, "loss": 0.4374, "step": 4946 }, { "epoch": 2.217474132456763, "learning_rate": 8.385378313264933e-06, "lm_loss": 0.3826, "loss": 0.3826, "step": 4947 }, { "epoch": 2.217922378693362, "learning_rate": 8.376336347197363e-06, "lm_loss": 0.422, "loss": 0.422, "step": 4948 }, { "epoch": 2.2183706249299617, "learning_rate": 8.367298277596161e-06, "lm_loss": 0.3319, "loss": 0.3319, "step": 4949 }, { "epoch": 2.218818871166561, "learning_rate": 8.35826410657978e-06, "lm_loss": 0.4144, "loss": 0.4144, "step": 4950 }, { "epoch": 2.21926711740316, "learning_rate": 8.34923383626577e-06, "lm_loss": 0.3603, "loss": 0.3603, "step": 4951 }, { "epoch": 2.2197153636397595, "learning_rate": 8.340207468770763e-06, "lm_loss": 0.3997, "loss": 0.3997, "step": 4952 }, { "epoch": 2.2201636098763586, "learning_rate": 8.331185006210466e-06, "lm_loss": 0.4765, "loss": 0.4765, "step": 4953 }, { "epoch": 2.220611856112958, "learning_rate": 8.322166450699679e-06, "lm_loss": 0.3218, "loss": 0.3218, "step": 4954 }, { "epoch": 2.2210601023495573, "learning_rate": 8.313151804352298e-06, "lm_loss": 0.4123, "loss": 0.4123, "step": 4955 }, { "epoch": 2.221508348586157, "learning_rate": 8.304141069281277e-06, "lm_loss": 0.4235, "loss": 0.4235, "step": 4956 }, { "epoch": 2.221956594822756, "learning_rate": 8.295134247598677e-06, "lm_loss": 0.444, "loss": 0.444, "step": 4957 }, { "epoch": 2.222404841059355, "learning_rate": 8.28613134141562e-06, "lm_loss": 0.2961, "loss": 0.2961, "step": 4958 }, { "epoch": 2.2228530872959547, "learning_rate": 8.277132352842321e-06, "lm_loss": 0.4228, "loss": 0.4228, "step": 4959 }, { "epoch": 2.2233013335325538, "learning_rate": 8.2681372839881e-06, "lm_loss": 0.4567, "loss": 0.4567, "step": 4960 }, { "epoch": 2.2237495797691533, "learning_rate": 8.259146136961315e-06, "lm_loss": 0.2822, "loss": 0.2822, "step": 4961 }, { "epoch": 2.2241978260057524, "learning_rate": 8.250158913869443e-06, "lm_loss": 0.6037, "loss": 0.6037, "step": 4962 }, { "epoch": 2.224646072242352, "learning_rate": 8.241175616819003e-06, "lm_loss": 0.44, "loss": 0.44, "step": 4963 }, { "epoch": 2.225094318478951, "learning_rate": 8.232196247915627e-06, "lm_loss": 0.39, "loss": 0.39, "step": 4964 }, { "epoch": 2.2255425647155502, "learning_rate": 8.223220809264018e-06, "lm_loss": 0.2655, "loss": 0.2655, "step": 4965 }, { "epoch": 2.22599081095215, "learning_rate": 8.214249302967941e-06, "lm_loss": 0.3281, "loss": 0.3281, "step": 4966 }, { "epoch": 2.226439057188749, "learning_rate": 8.20528173113026e-06, "lm_loss": 0.3799, "loss": 0.3799, "step": 4967 }, { "epoch": 2.2268873034253485, "learning_rate": 8.196318095852909e-06, "lm_loss": 0.4356, "loss": 0.4356, "step": 4968 }, { "epoch": 2.2273355496619476, "learning_rate": 8.187358399236897e-06, "lm_loss": 0.455, "loss": 0.455, "step": 4969 }, { "epoch": 2.227783795898547, "learning_rate": 8.178402643382322e-06, "lm_loss": 0.4618, "loss": 0.4618, "step": 4970 }, { "epoch": 2.2282320421351463, "learning_rate": 8.169450830388332e-06, "lm_loss": 0.3427, "loss": 0.3427, "step": 4971 }, { "epoch": 2.2286802883717454, "learning_rate": 8.160502962353172e-06, "lm_loss": 0.5679, "loss": 0.5679, "step": 4972 }, { "epoch": 2.229128534608345, "learning_rate": 8.151559041374171e-06, "lm_loss": 0.3311, "loss": 0.3311, "step": 4973 }, { "epoch": 2.229576780844944, "learning_rate": 8.1426190695477e-06, "lm_loss": 0.4159, "loss": 0.4159, "step": 4974 }, { "epoch": 2.2300250270815436, "learning_rate": 8.133683048969231e-06, "lm_loss": 0.3629, "loss": 0.3629, "step": 4975 }, { "epoch": 2.2304732733181427, "learning_rate": 8.124750981733314e-06, "lm_loss": 0.3632, "loss": 0.3632, "step": 4976 }, { "epoch": 2.2309215195547423, "learning_rate": 8.115822869933531e-06, "lm_loss": 0.3652, "loss": 0.3652, "step": 4977 }, { "epoch": 2.2313697657913414, "learning_rate": 8.106898715662606e-06, "lm_loss": 0.5015, "loss": 0.5015, "step": 4978 }, { "epoch": 2.2318180120279405, "learning_rate": 8.097978521012267e-06, "lm_loss": 0.4338, "loss": 0.4338, "step": 4979 }, { "epoch": 2.23226625826454, "learning_rate": 8.089062288073354e-06, "lm_loss": 0.3922, "loss": 0.3922, "step": 4980 }, { "epoch": 2.232714504501139, "learning_rate": 8.080150018935773e-06, "lm_loss": 0.2884, "loss": 0.2884, "step": 4981 }, { "epoch": 2.2331627507377387, "learning_rate": 8.07124171568848e-06, "lm_loss": 0.5214, "loss": 0.5214, "step": 4982 }, { "epoch": 2.233610996974338, "learning_rate": 8.062337380419524e-06, "lm_loss": 0.3959, "loss": 0.3959, "step": 4983 }, { "epoch": 2.2340592432109374, "learning_rate": 8.053437015216025e-06, "lm_loss": 0.3933, "loss": 0.3933, "step": 4984 }, { "epoch": 2.2345074894475365, "learning_rate": 8.044540622164143e-06, "lm_loss": 0.4305, "loss": 0.4305, "step": 4985 }, { "epoch": 2.2349557356841356, "learning_rate": 8.035648203349157e-06, "lm_loss": 0.3327, "loss": 0.3327, "step": 4986 }, { "epoch": 2.235403981920735, "learning_rate": 8.026759760855363e-06, "lm_loss": 0.5563, "loss": 0.5563, "step": 4987 }, { "epoch": 2.2358522281573343, "learning_rate": 8.017875296766155e-06, "lm_loss": 0.4811, "loss": 0.4811, "step": 4988 }, { "epoch": 2.236300474393934, "learning_rate": 8.008994813163995e-06, "lm_loss": 0.3753, "loss": 0.3753, "step": 4989 }, { "epoch": 2.236748720630533, "learning_rate": 8.00011831213039e-06, "lm_loss": 0.4227, "loss": 0.4227, "step": 4990 }, { "epoch": 2.2371969668671325, "learning_rate": 7.99124579574593e-06, "lm_loss": 0.4197, "loss": 0.4197, "step": 4991 }, { "epoch": 2.2376452131037317, "learning_rate": 7.982377266090283e-06, "lm_loss": 0.39, "loss": 0.39, "step": 4992 }, { "epoch": 2.2380934593403308, "learning_rate": 7.97351272524215e-06, "lm_loss": 0.4214, "loss": 0.4214, "step": 4993 }, { "epoch": 2.2385417055769303, "learning_rate": 7.964652175279318e-06, "lm_loss": 0.332, "loss": 0.332, "step": 4994 }, { "epoch": 2.2389899518135294, "learning_rate": 7.955795618278644e-06, "lm_loss": 0.3772, "loss": 0.3772, "step": 4995 }, { "epoch": 2.239438198050129, "learning_rate": 7.946943056316039e-06, "lm_loss": 0.2936, "loss": 0.2936, "step": 4996 }, { "epoch": 2.239886444286728, "learning_rate": 7.938094491466483e-06, "lm_loss": 0.4773, "loss": 0.4773, "step": 4997 }, { "epoch": 2.2403346905233277, "learning_rate": 7.929249925804002e-06, "lm_loss": 0.3566, "loss": 0.3566, "step": 4998 }, { "epoch": 2.240782936759927, "learning_rate": 7.920409361401707e-06, "lm_loss": 0.3526, "loss": 0.3526, "step": 4999 }, { "epoch": 2.241231182996526, "learning_rate": 7.911572800331768e-06, "lm_loss": 0.4462, "loss": 0.4462, "step": 5000 }, { "epoch": 2.2416794292331255, "learning_rate": 7.9027402446654e-06, "lm_loss": 0.3448, "loss": 0.3448, "step": 5001 }, { "epoch": 2.2421276754697246, "learning_rate": 7.893911696472898e-06, "lm_loss": 0.3447, "loss": 0.3447, "step": 5002 }, { "epoch": 2.242575921706324, "learning_rate": 7.885087157823593e-06, "lm_loss": 0.2594, "loss": 0.2594, "step": 5003 }, { "epoch": 2.2430241679429233, "learning_rate": 7.876266630785913e-06, "lm_loss": 0.4602, "loss": 0.4602, "step": 5004 }, { "epoch": 2.243472414179523, "learning_rate": 7.867450117427327e-06, "lm_loss": 0.3797, "loss": 0.3797, "step": 5005 }, { "epoch": 2.243920660416122, "learning_rate": 7.858637619814346e-06, "lm_loss": 0.301, "loss": 0.301, "step": 5006 }, { "epoch": 2.244368906652721, "learning_rate": 7.849829140012571e-06, "lm_loss": 0.3719, "loss": 0.3719, "step": 5007 }, { "epoch": 2.2448171528893206, "learning_rate": 7.841024680086632e-06, "lm_loss": 0.4709, "loss": 0.4709, "step": 5008 }, { "epoch": 2.2452653991259197, "learning_rate": 7.832224242100233e-06, "lm_loss": 0.2969, "loss": 0.2969, "step": 5009 }, { "epoch": 2.2457136453625193, "learning_rate": 7.823427828116148e-06, "lm_loss": 0.3682, "loss": 0.3682, "step": 5010 }, { "epoch": 2.2461618915991184, "learning_rate": 7.814635440196172e-06, "lm_loss": 0.3391, "loss": 0.3391, "step": 5011 }, { "epoch": 2.246610137835718, "learning_rate": 7.805847080401188e-06, "lm_loss": 0.5332, "loss": 0.5332, "step": 5012 }, { "epoch": 2.247058384072317, "learning_rate": 7.79706275079112e-06, "lm_loss": 0.236, "loss": 0.236, "step": 5013 }, { "epoch": 2.247506630308916, "learning_rate": 7.788282453424958e-06, "lm_loss": 0.4728, "loss": 0.4728, "step": 5014 }, { "epoch": 2.2479548765455157, "learning_rate": 7.779506190360742e-06, "lm_loss": 0.3855, "loss": 0.3855, "step": 5015 }, { "epoch": 2.248403122782115, "learning_rate": 7.770733963655547e-06, "lm_loss": 0.2708, "loss": 0.2708, "step": 5016 }, { "epoch": 2.2488513690187144, "learning_rate": 7.761965775365534e-06, "lm_loss": 0.4096, "loss": 0.4096, "step": 5017 }, { "epoch": 2.2492996152553135, "learning_rate": 7.753201627545906e-06, "lm_loss": 0.3971, "loss": 0.3971, "step": 5018 }, { "epoch": 2.249747861491913, "learning_rate": 7.744441522250903e-06, "lm_loss": 0.4339, "loss": 0.4339, "step": 5019 }, { "epoch": 2.250196107728512, "learning_rate": 7.735685461533835e-06, "lm_loss": 0.314, "loss": 0.314, "step": 5020 }, { "epoch": 2.2506443539651113, "learning_rate": 7.726933447447063e-06, "lm_loss": 0.3242, "loss": 0.3242, "step": 5021 }, { "epoch": 2.251092600201711, "learning_rate": 7.718185482041989e-06, "lm_loss": 0.485, "loss": 0.485, "step": 5022 }, { "epoch": 2.25154084643831, "learning_rate": 7.709441567369089e-06, "lm_loss": 0.4631, "loss": 0.4631, "step": 5023 }, { "epoch": 2.2519890926749095, "learning_rate": 7.700701705477853e-06, "lm_loss": 0.2546, "loss": 0.2546, "step": 5024 }, { "epoch": 2.2524373389115087, "learning_rate": 7.69196589841685e-06, "lm_loss": 0.4266, "loss": 0.4266, "step": 5025 }, { "epoch": 2.252885585148108, "learning_rate": 7.683234148233697e-06, "lm_loss": 0.591, "loss": 0.591, "step": 5026 }, { "epoch": 2.2533338313847073, "learning_rate": 7.67450645697504e-06, "lm_loss": 0.4679, "loss": 0.4679, "step": 5027 }, { "epoch": 2.2537820776213064, "learning_rate": 7.665782826686589e-06, "lm_loss": 0.3856, "loss": 0.3856, "step": 5028 }, { "epoch": 2.254230323857906, "learning_rate": 7.65706325941311e-06, "lm_loss": 0.3829, "loss": 0.3829, "step": 5029 }, { "epoch": 2.254678570094505, "learning_rate": 7.648347757198387e-06, "lm_loss": 0.4636, "loss": 0.4636, "step": 5030 }, { "epoch": 2.2551268163311047, "learning_rate": 7.6396363220853e-06, "lm_loss": 0.4944, "loss": 0.4944, "step": 5031 }, { "epoch": 2.255575062567704, "learning_rate": 7.630928956115719e-06, "lm_loss": 0.3055, "loss": 0.3055, "step": 5032 }, { "epoch": 2.2560233088043034, "learning_rate": 7.622225661330598e-06, "lm_loss": 0.3807, "loss": 0.3807, "step": 5033 }, { "epoch": 2.2564715550409025, "learning_rate": 7.613526439769936e-06, "lm_loss": 0.4178, "loss": 0.4178, "step": 5034 }, { "epoch": 2.2569198012775016, "learning_rate": 7.604831293472747e-06, "lm_loss": 0.4149, "loss": 0.4149, "step": 5035 }, { "epoch": 2.257368047514101, "learning_rate": 7.596140224477122e-06, "lm_loss": 0.37, "loss": 0.37, "step": 5036 }, { "epoch": 2.2578162937507003, "learning_rate": 7.587453234820194e-06, "lm_loss": 0.4463, "loss": 0.4463, "step": 5037 }, { "epoch": 2.2582645399873, "learning_rate": 7.578770326538109e-06, "lm_loss": 0.4311, "loss": 0.4311, "step": 5038 }, { "epoch": 2.258712786223899, "learning_rate": 7.570091501666091e-06, "lm_loss": 0.3042, "loss": 0.3042, "step": 5039 }, { "epoch": 2.2591610324604985, "learning_rate": 7.56141676223839e-06, "lm_loss": 0.4847, "loss": 0.4847, "step": 5040 }, { "epoch": 2.2596092786970976, "learning_rate": 7.552746110288306e-06, "lm_loss": 0.3943, "loss": 0.3943, "step": 5041 }, { "epoch": 2.260057524933697, "learning_rate": 7.5440795478481815e-06, "lm_loss": 0.4168, "loss": 0.4168, "step": 5042 }, { "epoch": 2.2605057711702963, "learning_rate": 7.535417076949383e-06, "lm_loss": 0.2008, "loss": 0.2008, "step": 5043 }, { "epoch": 2.2609540174068954, "learning_rate": 7.526758699622335e-06, "lm_loss": 0.4017, "loss": 0.4017, "step": 5044 }, { "epoch": 2.261402263643495, "learning_rate": 7.51810441789651e-06, "lm_loss": 0.3674, "loss": 0.3674, "step": 5045 }, { "epoch": 2.261850509880094, "learning_rate": 7.5094542338003895e-06, "lm_loss": 0.3685, "loss": 0.3685, "step": 5046 }, { "epoch": 2.2622987561166936, "learning_rate": 7.500808149361535e-06, "lm_loss": 0.3857, "loss": 0.3857, "step": 5047 }, { "epoch": 2.2627470023532927, "learning_rate": 7.492166166606499e-06, "lm_loss": 0.4348, "loss": 0.4348, "step": 5048 }, { "epoch": 2.2631952485898923, "learning_rate": 7.483528287560921e-06, "lm_loss": 0.3698, "loss": 0.3698, "step": 5049 }, { "epoch": 2.2636434948264914, "learning_rate": 7.474894514249462e-06, "lm_loss": 0.4021, "loss": 0.4021, "step": 5050 }, { "epoch": 2.2640917410630905, "learning_rate": 7.466264848695797e-06, "lm_loss": 0.2972, "loss": 0.2972, "step": 5051 }, { "epoch": 2.26453998729969, "learning_rate": 7.457639292922675e-06, "lm_loss": 0.3279, "loss": 0.3279, "step": 5052 }, { "epoch": 2.264988233536289, "learning_rate": 7.449017848951848e-06, "lm_loss": 0.5239, "loss": 0.5239, "step": 5053 }, { "epoch": 2.2654364797728888, "learning_rate": 7.440400518804125e-06, "lm_loss": 0.3502, "loss": 0.3502, "step": 5054 }, { "epoch": 2.265884726009488, "learning_rate": 7.4317873044993575e-06, "lm_loss": 0.4503, "loss": 0.4503, "step": 5055 }, { "epoch": 2.2663329722460874, "learning_rate": 7.423178208056403e-06, "lm_loss": 0.4751, "loss": 0.4751, "step": 5056 }, { "epoch": 2.2667812184826865, "learning_rate": 7.4145732314931774e-06, "lm_loss": 0.3162, "loss": 0.3162, "step": 5057 }, { "epoch": 2.2672294647192857, "learning_rate": 7.405972376826628e-06, "lm_loss": 0.3841, "loss": 0.3841, "step": 5058 }, { "epoch": 2.267677710955885, "learning_rate": 7.397375646072733e-06, "lm_loss": 0.4012, "loss": 0.4012, "step": 5059 }, { "epoch": 2.2681259571924843, "learning_rate": 7.388783041246508e-06, "lm_loss": 0.262, "loss": 0.262, "step": 5060 }, { "epoch": 2.268574203429084, "learning_rate": 7.380194564361983e-06, "lm_loss": 0.5483, "loss": 0.5483, "step": 5061 }, { "epoch": 2.269022449665683, "learning_rate": 7.371610217432245e-06, "lm_loss": 0.4058, "loss": 0.4058, "step": 5062 }, { "epoch": 2.2694706959022826, "learning_rate": 7.363030002469409e-06, "lm_loss": 0.5832, "loss": 0.5832, "step": 5063 }, { "epoch": 2.2699189421388817, "learning_rate": 7.3544539214846e-06, "lm_loss": 0.3242, "loss": 0.3242, "step": 5064 }, { "epoch": 2.270367188375481, "learning_rate": 7.345881976487998e-06, "lm_loss": 0.4962, "loss": 0.4962, "step": 5065 }, { "epoch": 2.2708154346120804, "learning_rate": 7.337314169488802e-06, "lm_loss": 0.529, "loss": 0.529, "step": 5066 }, { "epoch": 2.2712636808486795, "learning_rate": 7.3287505024952435e-06, "lm_loss": 0.3131, "loss": 0.3131, "step": 5067 }, { "epoch": 2.271711927085279, "learning_rate": 7.320190977514596e-06, "lm_loss": 0.3791, "loss": 0.3791, "step": 5068 }, { "epoch": 2.272160173321878, "learning_rate": 7.3116355965531315e-06, "lm_loss": 0.2542, "loss": 0.2542, "step": 5069 }, { "epoch": 2.2726084195584777, "learning_rate": 7.30308436161618e-06, "lm_loss": 0.4816, "loss": 0.4816, "step": 5070 }, { "epoch": 2.273056665795077, "learning_rate": 7.294537274708094e-06, "lm_loss": 0.2594, "loss": 0.2594, "step": 5071 }, { "epoch": 2.273504912031676, "learning_rate": 7.285994337832233e-06, "lm_loss": 0.4383, "loss": 0.4383, "step": 5072 }, { "epoch": 2.2739531582682755, "learning_rate": 7.277455552991011e-06, "lm_loss": 0.5458, "loss": 0.5458, "step": 5073 }, { "epoch": 2.2744014045048746, "learning_rate": 7.26892092218586e-06, "lm_loss": 0.4449, "loss": 0.4449, "step": 5074 }, { "epoch": 2.274849650741474, "learning_rate": 7.260390447417214e-06, "lm_loss": 0.3831, "loss": 0.3831, "step": 5075 }, { "epoch": 2.2752978969780733, "learning_rate": 7.251864130684588e-06, "lm_loss": 0.3826, "loss": 0.3826, "step": 5076 }, { "epoch": 2.275746143214673, "learning_rate": 7.243341973986464e-06, "lm_loss": 0.4187, "loss": 0.4187, "step": 5077 }, { "epoch": 2.276194389451272, "learning_rate": 7.23482397932038e-06, "lm_loss": 0.3855, "loss": 0.3855, "step": 5078 }, { "epoch": 2.276642635687871, "learning_rate": 7.226310148682902e-06, "lm_loss": 0.3588, "loss": 0.3588, "step": 5079 }, { "epoch": 2.2770908819244706, "learning_rate": 7.217800484069595e-06, "lm_loss": 0.3469, "loss": 0.3469, "step": 5080 }, { "epoch": 2.2775391281610697, "learning_rate": 7.209294987475071e-06, "lm_loss": 0.4472, "loss": 0.4472, "step": 5081 }, { "epoch": 2.2779873743976693, "learning_rate": 7.2007936608929625e-06, "lm_loss": 0.3933, "loss": 0.3933, "step": 5082 }, { "epoch": 2.2784356206342684, "learning_rate": 7.192296506315904e-06, "lm_loss": 0.3619, "loss": 0.3619, "step": 5083 }, { "epoch": 2.278883866870868, "learning_rate": 7.183803525735575e-06, "lm_loss": 0.3297, "loss": 0.3297, "step": 5084 }, { "epoch": 2.279332113107467, "learning_rate": 7.1753147211426675e-06, "lm_loss": 0.4479, "loss": 0.4479, "step": 5085 }, { "epoch": 2.279780359344066, "learning_rate": 7.166830094526897e-06, "lm_loss": 0.5055, "loss": 0.5055, "step": 5086 }, { "epoch": 2.2802286055806658, "learning_rate": 7.158349647877008e-06, "lm_loss": 0.2242, "loss": 0.2242, "step": 5087 }, { "epoch": 2.280676851817265, "learning_rate": 7.149873383180739e-06, "lm_loss": 0.3854, "loss": 0.3854, "step": 5088 }, { "epoch": 2.2811250980538644, "learning_rate": 7.1414013024248774e-06, "lm_loss": 0.4097, "loss": 0.4097, "step": 5089 }, { "epoch": 2.2815733442904635, "learning_rate": 7.132933407595205e-06, "lm_loss": 0.4436, "loss": 0.4436, "step": 5090 }, { "epoch": 2.282021590527063, "learning_rate": 7.124469700676545e-06, "lm_loss": 0.3189, "loss": 0.3189, "step": 5091 }, { "epoch": 2.282469836763662, "learning_rate": 7.116010183652732e-06, "lm_loss": 0.2832, "loss": 0.2832, "step": 5092 }, { "epoch": 2.2829180830002613, "learning_rate": 7.107554858506596e-06, "lm_loss": 0.4096, "loss": 0.4096, "step": 5093 }, { "epoch": 2.283366329236861, "learning_rate": 7.0991037272200244e-06, "lm_loss": 0.2751, "loss": 0.2751, "step": 5094 }, { "epoch": 2.28381457547346, "learning_rate": 7.090656791773903e-06, "lm_loss": 0.5733, "loss": 0.5733, "step": 5095 }, { "epoch": 2.2842628217100596, "learning_rate": 7.082214054148115e-06, "lm_loss": 0.434, "loss": 0.434, "step": 5096 }, { "epoch": 2.2847110679466587, "learning_rate": 7.073775516321596e-06, "lm_loss": 0.2415, "loss": 0.2415, "step": 5097 }, { "epoch": 2.2851593141832582, "learning_rate": 7.065341180272258e-06, "lm_loss": 0.4514, "loss": 0.4514, "step": 5098 }, { "epoch": 2.2856075604198574, "learning_rate": 7.05691104797706e-06, "lm_loss": 0.3847, "loss": 0.3847, "step": 5099 }, { "epoch": 2.2860558066564565, "learning_rate": 7.0484851214119715e-06, "lm_loss": 0.2756, "loss": 0.2756, "step": 5100 }, { "epoch": 2.286504052893056, "learning_rate": 7.040063402551949e-06, "lm_loss": 0.5181, "loss": 0.5181, "step": 5101 }, { "epoch": 2.286952299129655, "learning_rate": 7.031645893370995e-06, "lm_loss": 0.3763, "loss": 0.3763, "step": 5102 }, { "epoch": 2.2874005453662547, "learning_rate": 7.0232325958421105e-06, "lm_loss": 0.4539, "loss": 0.4539, "step": 5103 }, { "epoch": 2.287848791602854, "learning_rate": 7.01482351193731e-06, "lm_loss": 0.3431, "loss": 0.3431, "step": 5104 }, { "epoch": 2.2882970378394534, "learning_rate": 7.006418643627632e-06, "lm_loss": 0.418, "loss": 0.418, "step": 5105 }, { "epoch": 2.2887452840760525, "learning_rate": 6.998017992883102e-06, "lm_loss": 0.4706, "loss": 0.4706, "step": 5106 }, { "epoch": 2.2891935303126516, "learning_rate": 6.989621561672777e-06, "lm_loss": 0.2716, "loss": 0.2716, "step": 5107 }, { "epoch": 2.289641776549251, "learning_rate": 6.9812293519647295e-06, "lm_loss": 0.4169, "loss": 0.4169, "step": 5108 }, { "epoch": 2.2900900227858503, "learning_rate": 6.972841365726015e-06, "lm_loss": 0.3652, "loss": 0.3652, "step": 5109 }, { "epoch": 2.29053826902245, "learning_rate": 6.9644576049227254e-06, "lm_loss": 0.5178, "loss": 0.5178, "step": 5110 }, { "epoch": 2.290986515259049, "learning_rate": 6.956078071519953e-06, "lm_loss": 0.3904, "loss": 0.3904, "step": 5111 }, { "epoch": 2.2914347614956485, "learning_rate": 6.947702767481803e-06, "lm_loss": 0.4384, "loss": 0.4384, "step": 5112 }, { "epoch": 2.2918830077322476, "learning_rate": 6.939331694771389e-06, "lm_loss": 0.4232, "loss": 0.4232, "step": 5113 }, { "epoch": 2.2923312539688467, "learning_rate": 6.930964855350816e-06, "lm_loss": 0.2929, "loss": 0.2929, "step": 5114 }, { "epoch": 2.2927795002054463, "learning_rate": 6.92260225118122e-06, "lm_loss": 0.492, "loss": 0.492, "step": 5115 }, { "epoch": 2.2932277464420454, "learning_rate": 6.91424388422274e-06, "lm_loss": 0.3282, "loss": 0.3282, "step": 5116 }, { "epoch": 2.293675992678645, "learning_rate": 6.905889756434506e-06, "lm_loss": 0.4074, "loss": 0.4074, "step": 5117 }, { "epoch": 2.294124238915244, "learning_rate": 6.897539869774666e-06, "lm_loss": 0.3364, "loss": 0.3364, "step": 5118 }, { "epoch": 2.2945724851518436, "learning_rate": 6.889194226200385e-06, "lm_loss": 0.4451, "loss": 0.4451, "step": 5119 }, { "epoch": 2.2950207313884428, "learning_rate": 6.880852827667797e-06, "lm_loss": 0.4925, "loss": 0.4925, "step": 5120 }, { "epoch": 2.295468977625042, "learning_rate": 6.872515676132099e-06, "lm_loss": 0.4715, "loss": 0.4715, "step": 5121 }, { "epoch": 2.2959172238616414, "learning_rate": 6.864182773547431e-06, "lm_loss": 0.3917, "loss": 0.3917, "step": 5122 }, { "epoch": 2.2963654700982405, "learning_rate": 6.855854121866975e-06, "lm_loss": 0.42, "loss": 0.42, "step": 5123 }, { "epoch": 2.29681371633484, "learning_rate": 6.8475297230429145e-06, "lm_loss": 0.3868, "loss": 0.3868, "step": 5124 }, { "epoch": 2.297261962571439, "learning_rate": 6.8392095790264145e-06, "lm_loss": 0.6457, "loss": 0.6457, "step": 5125 }, { "epoch": 2.297710208808039, "learning_rate": 6.830893691767662e-06, "lm_loss": 0.3693, "loss": 0.3693, "step": 5126 }, { "epoch": 2.298158455044638, "learning_rate": 6.822582063215849e-06, "lm_loss": 0.3156, "loss": 0.3156, "step": 5127 }, { "epoch": 2.298606701281237, "learning_rate": 6.8142746953191446e-06, "lm_loss": 0.4328, "loss": 0.4328, "step": 5128 }, { "epoch": 2.2990549475178366, "learning_rate": 6.8059715900247454e-06, "lm_loss": 0.3335, "loss": 0.3335, "step": 5129 }, { "epoch": 2.2995031937544357, "learning_rate": 6.7976727492788385e-06, "lm_loss": 0.4342, "loss": 0.4342, "step": 5130 }, { "epoch": 2.2999514399910352, "learning_rate": 6.7893781750266086e-06, "lm_loss": 0.5356, "loss": 0.5356, "step": 5131 }, { "epoch": 2.3003996862276344, "learning_rate": 6.781087869212257e-06, "lm_loss": 0.539, "loss": 0.539, "step": 5132 }, { "epoch": 2.300847932464234, "learning_rate": 6.772801833778955e-06, "lm_loss": 0.4179, "loss": 0.4179, "step": 5133 }, { "epoch": 2.301296178700833, "learning_rate": 6.764520070668901e-06, "lm_loss": 0.4884, "loss": 0.4884, "step": 5134 }, { "epoch": 2.301744424937432, "learning_rate": 6.756242581823269e-06, "lm_loss": 0.6396, "loss": 0.6396, "step": 5135 }, { "epoch": 2.3021926711740317, "learning_rate": 6.747969369182247e-06, "lm_loss": 0.3928, "loss": 0.3928, "step": 5136 }, { "epoch": 2.302640917410631, "learning_rate": 6.739700434685026e-06, "lm_loss": 0.328, "loss": 0.328, "step": 5137 }, { "epoch": 2.3030891636472304, "learning_rate": 6.73143578026976e-06, "lm_loss": 0.4099, "loss": 0.4099, "step": 5138 }, { "epoch": 2.3035374098838295, "learning_rate": 6.723175407873655e-06, "lm_loss": 0.2571, "loss": 0.2571, "step": 5139 }, { "epoch": 2.303985656120429, "learning_rate": 6.714919319432861e-06, "lm_loss": 0.5506, "loss": 0.5506, "step": 5140 }, { "epoch": 2.304433902357028, "learning_rate": 6.706667516882551e-06, "lm_loss": 0.3199, "loss": 0.3199, "step": 5141 }, { "epoch": 2.3048821485936273, "learning_rate": 6.698420002156894e-06, "lm_loss": 0.3602, "loss": 0.3602, "step": 5142 }, { "epoch": 2.305330394830227, "learning_rate": 6.6901767771890364e-06, "lm_loss": 0.3801, "loss": 0.3801, "step": 5143 }, { "epoch": 2.305778641066826, "learning_rate": 6.681937843911138e-06, "lm_loss": 0.4117, "loss": 0.4117, "step": 5144 }, { "epoch": 2.3062268873034255, "learning_rate": 6.673703204254347e-06, "lm_loss": 0.2849, "loss": 0.2849, "step": 5145 }, { "epoch": 2.3066751335400246, "learning_rate": 6.665472860148797e-06, "lm_loss": 0.4025, "loss": 0.4025, "step": 5146 }, { "epoch": 2.307123379776624, "learning_rate": 6.657246813523621e-06, "lm_loss": 0.3057, "loss": 0.3057, "step": 5147 }, { "epoch": 2.3075716260132233, "learning_rate": 6.649025066306946e-06, "lm_loss": 0.308, "loss": 0.308, "step": 5148 }, { "epoch": 2.3080198722498224, "learning_rate": 6.6408076204258925e-06, "lm_loss": 0.5317, "loss": 0.5317, "step": 5149 }, { "epoch": 2.308468118486422, "learning_rate": 6.632594477806578e-06, "lm_loss": 0.507, "loss": 0.507, "step": 5150 }, { "epoch": 2.308916364723021, "learning_rate": 6.624385640374087e-06, "lm_loss": 0.3098, "loss": 0.3098, "step": 5151 }, { "epoch": 2.3093646109596206, "learning_rate": 6.616181110052519e-06, "lm_loss": 0.4577, "loss": 0.4577, "step": 5152 }, { "epoch": 2.3098128571962198, "learning_rate": 6.607980888764964e-06, "lm_loss": 0.3892, "loss": 0.3892, "step": 5153 }, { "epoch": 2.3102611034328193, "learning_rate": 6.599784978433482e-06, "lm_loss": 0.3493, "loss": 0.3493, "step": 5154 }, { "epoch": 2.3107093496694184, "learning_rate": 6.591593380979139e-06, "lm_loss": 0.3398, "loss": 0.3398, "step": 5155 }, { "epoch": 2.3111575959060175, "learning_rate": 6.583406098321987e-06, "lm_loss": 0.5211, "loss": 0.5211, "step": 5156 }, { "epoch": 2.311605842142617, "learning_rate": 6.575223132381067e-06, "lm_loss": 0.4914, "loss": 0.4914, "step": 5157 }, { "epoch": 2.312054088379216, "learning_rate": 6.567044485074414e-06, "lm_loss": 0.2995, "loss": 0.2995, "step": 5158 }, { "epoch": 2.312502334615816, "learning_rate": 6.558870158319028e-06, "lm_loss": 0.5038, "loss": 0.5038, "step": 5159 }, { "epoch": 2.312950580852415, "learning_rate": 6.55070015403092e-06, "lm_loss": 0.4904, "loss": 0.4904, "step": 5160 }, { "epoch": 2.3133988270890145, "learning_rate": 6.542534474125086e-06, "lm_loss": 0.324, "loss": 0.324, "step": 5161 }, { "epoch": 2.3138470733256136, "learning_rate": 6.534373120515491e-06, "lm_loss": 0.5391, "loss": 0.5391, "step": 5162 }, { "epoch": 2.3142953195622127, "learning_rate": 6.526216095115101e-06, "lm_loss": 0.2218, "loss": 0.2218, "step": 5163 }, { "epoch": 2.3147435657988122, "learning_rate": 6.518063399835867e-06, "lm_loss": 0.3882, "loss": 0.3882, "step": 5164 }, { "epoch": 2.3151918120354114, "learning_rate": 6.509915036588715e-06, "lm_loss": 0.4593, "loss": 0.4593, "step": 5165 }, { "epoch": 2.315640058272011, "learning_rate": 6.501771007283578e-06, "lm_loss": 0.4154, "loss": 0.4154, "step": 5166 }, { "epoch": 2.31608830450861, "learning_rate": 6.493631313829338e-06, "lm_loss": 0.4521, "loss": 0.4521, "step": 5167 }, { "epoch": 2.3165365507452096, "learning_rate": 6.485495958133889e-06, "lm_loss": 0.546, "loss": 0.546, "step": 5168 }, { "epoch": 2.3169847969818087, "learning_rate": 6.4773649421041055e-06, "lm_loss": 0.3343, "loss": 0.3343, "step": 5169 }, { "epoch": 2.317433043218408, "learning_rate": 6.469238267645824e-06, "lm_loss": 0.3006, "loss": 0.3006, "step": 5170 }, { "epoch": 2.3178812894550074, "learning_rate": 6.461115936663889e-06, "lm_loss": 0.4652, "loss": 0.4652, "step": 5171 }, { "epoch": 2.3183295356916065, "learning_rate": 6.45299795106212e-06, "lm_loss": 0.387, "loss": 0.387, "step": 5172 }, { "epoch": 2.318777781928206, "learning_rate": 6.444884312743293e-06, "lm_loss": 0.4032, "loss": 0.4032, "step": 5173 }, { "epoch": 2.319226028164805, "learning_rate": 6.436775023609215e-06, "lm_loss": 0.4467, "loss": 0.4467, "step": 5174 }, { "epoch": 2.3196742744014047, "learning_rate": 6.428670085560623e-06, "lm_loss": 0.3432, "loss": 0.3432, "step": 5175 }, { "epoch": 2.320122520638004, "learning_rate": 6.420569500497262e-06, "lm_loss": 0.5398, "loss": 0.5398, "step": 5176 }, { "epoch": 2.320570766874603, "learning_rate": 6.412473270317862e-06, "lm_loss": 0.3242, "loss": 0.3242, "step": 5177 }, { "epoch": 2.3210190131112025, "learning_rate": 6.4043813969201004e-06, "lm_loss": 0.354, "loss": 0.354, "step": 5178 }, { "epoch": 2.3214672593478016, "learning_rate": 6.396293882200671e-06, "lm_loss": 0.3685, "loss": 0.3685, "step": 5179 }, { "epoch": 2.321915505584401, "learning_rate": 6.388210728055213e-06, "lm_loss": 0.4484, "loss": 0.4484, "step": 5180 }, { "epoch": 2.3223637518210003, "learning_rate": 6.3801319363783695e-06, "lm_loss": 0.4042, "loss": 0.4042, "step": 5181 }, { "epoch": 2.3228119980576, "learning_rate": 6.372057509063747e-06, "lm_loss": 0.3313, "loss": 0.3313, "step": 5182 }, { "epoch": 2.323260244294199, "learning_rate": 6.363987448003936e-06, "lm_loss": 0.36, "loss": 0.36, "step": 5183 }, { "epoch": 2.323708490530798, "learning_rate": 6.3559217550905044e-06, "lm_loss": 0.4873, "loss": 0.4873, "step": 5184 }, { "epoch": 2.3241567367673976, "learning_rate": 6.347860432213981e-06, "lm_loss": 0.3625, "loss": 0.3625, "step": 5185 }, { "epoch": 2.3246049830039968, "learning_rate": 6.3398034812638864e-06, "lm_loss": 0.3301, "loss": 0.3301, "step": 5186 }, { "epoch": 2.3250532292405963, "learning_rate": 6.331750904128722e-06, "lm_loss": 0.4798, "loss": 0.4798, "step": 5187 }, { "epoch": 2.3255014754771954, "learning_rate": 6.323702702695932e-06, "lm_loss": 0.4018, "loss": 0.4018, "step": 5188 }, { "epoch": 2.325949721713795, "learning_rate": 6.315658878851971e-06, "lm_loss": 0.563, "loss": 0.563, "step": 5189 }, { "epoch": 2.326397967950394, "learning_rate": 6.3076194344822594e-06, "lm_loss": 0.3505, "loss": 0.3505, "step": 5190 }, { "epoch": 2.3268462141869932, "learning_rate": 6.2995843714711595e-06, "lm_loss": 0.2856, "loss": 0.2856, "step": 5191 }, { "epoch": 2.327294460423593, "learning_rate": 6.291553691702062e-06, "lm_loss": 0.391, "loss": 0.391, "step": 5192 }, { "epoch": 2.327742706660192, "learning_rate": 6.283527397057279e-06, "lm_loss": 0.3599, "loss": 0.3599, "step": 5193 }, { "epoch": 2.3281909528967915, "learning_rate": 6.275505489418124e-06, "lm_loss": 0.332, "loss": 0.332, "step": 5194 }, { "epoch": 2.3286391991333906, "learning_rate": 6.267487970664878e-06, "lm_loss": 0.3827, "loss": 0.3827, "step": 5195 }, { "epoch": 2.32908744536999, "learning_rate": 6.259474842676779e-06, "lm_loss": 0.4359, "loss": 0.4359, "step": 5196 }, { "epoch": 2.3295356916065892, "learning_rate": 6.251466107332055e-06, "lm_loss": 0.3783, "loss": 0.3783, "step": 5197 }, { "epoch": 2.3299839378431884, "learning_rate": 6.243461766507894e-06, "lm_loss": 0.6368, "loss": 0.6368, "step": 5198 }, { "epoch": 2.330432184079788, "learning_rate": 6.235461822080449e-06, "lm_loss": 0.296, "loss": 0.296, "step": 5199 }, { "epoch": 2.330880430316387, "learning_rate": 6.227466275924854e-06, "lm_loss": 0.5852, "loss": 0.5852, "step": 5200 }, { "epoch": 2.3313286765529866, "learning_rate": 6.219475129915206e-06, "lm_loss": 0.3659, "loss": 0.3659, "step": 5201 }, { "epoch": 2.3317769227895857, "learning_rate": 6.2114883859245715e-06, "lm_loss": 0.3675, "loss": 0.3675, "step": 5202 }, { "epoch": 2.3322251690261853, "learning_rate": 6.203506045824997e-06, "lm_loss": 0.3825, "loss": 0.3825, "step": 5203 }, { "epoch": 2.3326734152627844, "learning_rate": 6.195528111487464e-06, "lm_loss": 0.498, "loss": 0.498, "step": 5204 }, { "epoch": 2.3331216614993835, "learning_rate": 6.187554584781957e-06, "lm_loss": 0.3641, "loss": 0.3641, "step": 5205 }, { "epoch": 2.333569907735983, "learning_rate": 6.179585467577415e-06, "lm_loss": 0.3106, "loss": 0.3106, "step": 5206 }, { "epoch": 2.334018153972582, "learning_rate": 6.171620761741725e-06, "lm_loss": 0.3437, "loss": 0.3437, "step": 5207 }, { "epoch": 2.3344664002091817, "learning_rate": 6.163660469141772e-06, "lm_loss": 0.4383, "loss": 0.4383, "step": 5208 }, { "epoch": 2.334914646445781, "learning_rate": 6.155704591643383e-06, "lm_loss": 0.3469, "loss": 0.3469, "step": 5209 }, { "epoch": 2.3353628926823804, "learning_rate": 6.147753131111361e-06, "lm_loss": 0.4171, "loss": 0.4171, "step": 5210 }, { "epoch": 2.3358111389189795, "learning_rate": 6.139806089409477e-06, "lm_loss": 0.5842, "loss": 0.5842, "step": 5211 }, { "epoch": 2.3362593851555786, "learning_rate": 6.131863468400448e-06, "lm_loss": 0.4028, "loss": 0.4028, "step": 5212 }, { "epoch": 2.336707631392178, "learning_rate": 6.123925269945974e-06, "lm_loss": 0.4123, "loss": 0.4123, "step": 5213 }, { "epoch": 2.3371558776287773, "learning_rate": 6.115991495906715e-06, "lm_loss": 0.3744, "loss": 0.3744, "step": 5214 }, { "epoch": 2.337604123865377, "learning_rate": 6.1080621481422765e-06, "lm_loss": 0.4963, "loss": 0.4963, "step": 5215 }, { "epoch": 2.338052370101976, "learning_rate": 6.1001372285112515e-06, "lm_loss": 0.4056, "loss": 0.4056, "step": 5216 }, { "epoch": 2.3385006163385755, "learning_rate": 6.092216738871187e-06, "lm_loss": 0.3199, "loss": 0.3199, "step": 5217 }, { "epoch": 2.3389488625751746, "learning_rate": 6.084300681078567e-06, "lm_loss": 0.2968, "loss": 0.2968, "step": 5218 }, { "epoch": 2.3393971088117738, "learning_rate": 6.076389056988887e-06, "lm_loss": 0.4099, "loss": 0.4099, "step": 5219 }, { "epoch": 2.3398453550483733, "learning_rate": 6.068481868456558e-06, "lm_loss": 0.2903, "loss": 0.2903, "step": 5220 }, { "epoch": 2.3402936012849724, "learning_rate": 6.060579117334966e-06, "lm_loss": 0.4467, "loss": 0.4467, "step": 5221 }, { "epoch": 2.340741847521572, "learning_rate": 6.052680805476474e-06, "lm_loss": 0.406, "loss": 0.406, "step": 5222 }, { "epoch": 2.341190093758171, "learning_rate": 6.044786934732369e-06, "lm_loss": 0.4186, "loss": 0.4186, "step": 5223 }, { "epoch": 2.3416383399947707, "learning_rate": 6.036897506952932e-06, "lm_loss": 0.4552, "loss": 0.4552, "step": 5224 }, { "epoch": 2.34208658623137, "learning_rate": 6.029012523987376e-06, "lm_loss": 0.2586, "loss": 0.2586, "step": 5225 }, { "epoch": 2.342534832467969, "learning_rate": 6.021131987683893e-06, "lm_loss": 0.4516, "loss": 0.4516, "step": 5226 }, { "epoch": 2.3429830787045685, "learning_rate": 6.0132558998896166e-06, "lm_loss": 0.3415, "loss": 0.3415, "step": 5227 }, { "epoch": 2.3434313249411676, "learning_rate": 6.00538426245065e-06, "lm_loss": 0.5159, "loss": 0.5159, "step": 5228 }, { "epoch": 2.343879571177767, "learning_rate": 5.997517077212056e-06, "lm_loss": 0.3359, "loss": 0.3359, "step": 5229 }, { "epoch": 2.3443278174143662, "learning_rate": 5.989654346017828e-06, "lm_loss": 0.3556, "loss": 0.3556, "step": 5230 }, { "epoch": 2.344776063650966, "learning_rate": 5.9817960707109406e-06, "lm_loss": 0.2842, "loss": 0.2842, "step": 5231 }, { "epoch": 2.345224309887565, "learning_rate": 5.973942253133328e-06, "lm_loss": 0.3213, "loss": 0.3213, "step": 5232 }, { "epoch": 2.345672556124164, "learning_rate": 5.966092895125849e-06, "lm_loss": 0.4829, "loss": 0.4829, "step": 5233 }, { "epoch": 2.3461208023607636, "learning_rate": 5.958247998528346e-06, "lm_loss": 0.1973, "loss": 0.1973, "step": 5234 }, { "epoch": 2.3465690485973627, "learning_rate": 5.950407565179611e-06, "lm_loss": 0.5602, "loss": 0.5602, "step": 5235 }, { "epoch": 2.3470172948339623, "learning_rate": 5.9425715969173676e-06, "lm_loss": 0.3886, "loss": 0.3886, "step": 5236 }, { "epoch": 2.3474655410705614, "learning_rate": 5.934740095578334e-06, "lm_loss": 0.4826, "loss": 0.4826, "step": 5237 }, { "epoch": 2.347913787307161, "learning_rate": 5.926913062998138e-06, "lm_loss": 0.4135, "loss": 0.4135, "step": 5238 }, { "epoch": 2.34836203354376, "learning_rate": 5.919090501011385e-06, "lm_loss": 0.5498, "loss": 0.5498, "step": 5239 }, { "epoch": 2.348810279780359, "learning_rate": 5.9112724114516376e-06, "lm_loss": 0.3705, "loss": 0.3705, "step": 5240 }, { "epoch": 2.3492585260169587, "learning_rate": 5.903458796151381e-06, "lm_loss": 0.458, "loss": 0.458, "step": 5241 }, { "epoch": 2.349706772253558, "learning_rate": 5.895649656942079e-06, "lm_loss": 0.4025, "loss": 0.4025, "step": 5242 }, { "epoch": 2.3501550184901574, "learning_rate": 5.887844995654146e-06, "lm_loss": 0.3727, "loss": 0.3727, "step": 5243 }, { "epoch": 2.3506032647267565, "learning_rate": 5.88004481411692e-06, "lm_loss": 0.379, "loss": 0.379, "step": 5244 }, { "epoch": 2.351051510963356, "learning_rate": 5.872249114158717e-06, "lm_loss": 0.3604, "loss": 0.3604, "step": 5245 }, { "epoch": 2.351499757199955, "learning_rate": 5.864457897606793e-06, "lm_loss": 0.3401, "loss": 0.3401, "step": 5246 }, { "epoch": 2.3519480034365543, "learning_rate": 5.85667116628735e-06, "lm_loss": 0.3844, "loss": 0.3844, "step": 5247 }, { "epoch": 2.352396249673154, "learning_rate": 5.848888922025553e-06, "lm_loss": 0.4851, "loss": 0.4851, "step": 5248 }, { "epoch": 2.352844495909753, "learning_rate": 5.841111166645486e-06, "lm_loss": 0.3455, "loss": 0.3455, "step": 5249 }, { "epoch": 2.3532927421463525, "learning_rate": 5.833337901970204e-06, "lm_loss": 0.4003, "loss": 0.4003, "step": 5250 }, { "epoch": 2.3537409883829516, "learning_rate": 5.825569129821715e-06, "lm_loss": 0.4081, "loss": 0.4081, "step": 5251 }, { "epoch": 2.354189234619551, "learning_rate": 5.817804852020947e-06, "lm_loss": 0.4651, "loss": 0.4651, "step": 5252 }, { "epoch": 2.3546374808561503, "learning_rate": 5.8100450703877984e-06, "lm_loss": 0.266, "loss": 0.266, "step": 5253 }, { "epoch": 2.3550857270927494, "learning_rate": 5.8022897867411025e-06, "lm_loss": 0.3818, "loss": 0.3818, "step": 5254 }, { "epoch": 2.355533973329349, "learning_rate": 5.794539002898647e-06, "lm_loss": 0.3723, "loss": 0.3723, "step": 5255 }, { "epoch": 2.355982219565948, "learning_rate": 5.786792720677161e-06, "lm_loss": 0.3742, "loss": 0.3742, "step": 5256 }, { "epoch": 2.3564304658025477, "learning_rate": 5.779050941892309e-06, "lm_loss": 0.5382, "loss": 0.5382, "step": 5257 }, { "epoch": 2.356878712039147, "learning_rate": 5.77131366835871e-06, "lm_loss": 0.3034, "loss": 0.3034, "step": 5258 }, { "epoch": 2.3573269582757463, "learning_rate": 5.763580901889934e-06, "lm_loss": 0.2935, "loss": 0.2935, "step": 5259 }, { "epoch": 2.3577752045123455, "learning_rate": 5.755852644298471e-06, "lm_loss": 0.2991, "loss": 0.2991, "step": 5260 }, { "epoch": 2.3582234507489446, "learning_rate": 5.74812889739578e-06, "lm_loss": 0.4815, "loss": 0.4815, "step": 5261 }, { "epoch": 2.358671696985544, "learning_rate": 5.740409662992244e-06, "lm_loss": 0.4494, "loss": 0.4494, "step": 5262 }, { "epoch": 2.3591199432221432, "learning_rate": 5.732694942897187e-06, "lm_loss": 0.2721, "loss": 0.2721, "step": 5263 }, { "epoch": 2.359568189458743, "learning_rate": 5.724984738918912e-06, "lm_loss": 0.4358, "loss": 0.4358, "step": 5264 }, { "epoch": 2.360016435695342, "learning_rate": 5.717279052864611e-06, "lm_loss": 0.4377, "loss": 0.4377, "step": 5265 }, { "epoch": 2.3604646819319415, "learning_rate": 5.709577886540446e-06, "lm_loss": 0.3252, "loss": 0.3252, "step": 5266 }, { "epoch": 2.3609129281685406, "learning_rate": 5.701881241751525e-06, "lm_loss": 0.4339, "loss": 0.4339, "step": 5267 }, { "epoch": 2.3613611744051397, "learning_rate": 5.694189120301871e-06, "lm_loss": 0.2884, "loss": 0.2884, "step": 5268 }, { "epoch": 2.3618094206417393, "learning_rate": 5.686501523994475e-06, "lm_loss": 0.4338, "loss": 0.4338, "step": 5269 }, { "epoch": 2.3622576668783384, "learning_rate": 5.678818454631238e-06, "lm_loss": 0.3319, "loss": 0.3319, "step": 5270 }, { "epoch": 2.362705913114938, "learning_rate": 5.671139914013027e-06, "lm_loss": 0.3649, "loss": 0.3649, "step": 5271 }, { "epoch": 2.363154159351537, "learning_rate": 5.663465903939633e-06, "lm_loss": 0.5043, "loss": 0.5043, "step": 5272 }, { "epoch": 2.3636024055881366, "learning_rate": 5.655796426209786e-06, "lm_loss": 0.4842, "loss": 0.4842, "step": 5273 }, { "epoch": 2.3640506518247357, "learning_rate": 5.6481314826211714e-06, "lm_loss": 0.355, "loss": 0.355, "step": 5274 }, { "epoch": 2.364498898061335, "learning_rate": 5.640471074970374e-06, "lm_loss": 0.3729, "loss": 0.3729, "step": 5275 }, { "epoch": 2.3649471442979344, "learning_rate": 5.632815205052952e-06, "lm_loss": 0.5138, "loss": 0.5138, "step": 5276 }, { "epoch": 2.3653953905345335, "learning_rate": 5.625163874663386e-06, "lm_loss": 0.4716, "loss": 0.4716, "step": 5277 }, { "epoch": 2.365843636771133, "learning_rate": 5.617517085595084e-06, "lm_loss": 0.3619, "loss": 0.3619, "step": 5278 }, { "epoch": 2.366291883007732, "learning_rate": 5.6098748396404e-06, "lm_loss": 0.3896, "loss": 0.3896, "step": 5279 }, { "epoch": 2.3667401292443317, "learning_rate": 5.602237138590633e-06, "lm_loss": 0.4946, "loss": 0.4946, "step": 5280 }, { "epoch": 2.367188375480931, "learning_rate": 5.59460398423598e-06, "lm_loss": 0.3393, "loss": 0.3393, "step": 5281 }, { "epoch": 2.36763662171753, "learning_rate": 5.586975378365627e-06, "lm_loss": 0.3986, "loss": 0.3986, "step": 5282 }, { "epoch": 2.3680848679541295, "learning_rate": 5.579351322767643e-06, "lm_loss": 0.3914, "loss": 0.3914, "step": 5283 }, { "epoch": 2.3685331141907287, "learning_rate": 5.571731819229056e-06, "lm_loss": 0.3243, "loss": 0.3243, "step": 5284 }, { "epoch": 2.368981360427328, "learning_rate": 5.564116869535835e-06, "lm_loss": 0.4389, "loss": 0.4389, "step": 5285 }, { "epoch": 2.3694296066639273, "learning_rate": 5.556506475472848e-06, "lm_loss": 0.3053, "loss": 0.3053, "step": 5286 }, { "epoch": 2.369877852900527, "learning_rate": 5.548900638823929e-06, "lm_loss": 0.5137, "loss": 0.5137, "step": 5287 }, { "epoch": 2.370326099137126, "learning_rate": 5.541299361371835e-06, "lm_loss": 0.2001, "loss": 0.2001, "step": 5288 }, { "epoch": 2.370774345373725, "learning_rate": 5.533702644898237e-06, "lm_loss": 0.4469, "loss": 0.4469, "step": 5289 }, { "epoch": 2.3712225916103247, "learning_rate": 5.526110491183758e-06, "lm_loss": 0.3299, "loss": 0.3299, "step": 5290 }, { "epoch": 2.371670837846924, "learning_rate": 5.518522902007944e-06, "lm_loss": 0.3547, "loss": 0.3547, "step": 5291 }, { "epoch": 2.3721190840835233, "learning_rate": 5.510939879149269e-06, "lm_loss": 0.3118, "loss": 0.3118, "step": 5292 }, { "epoch": 2.3725673303201225, "learning_rate": 5.503361424385148e-06, "lm_loss": 0.5098, "loss": 0.5098, "step": 5293 }, { "epoch": 2.373015576556722, "learning_rate": 5.495787539491901e-06, "lm_loss": 0.2121, "loss": 0.2121, "step": 5294 }, { "epoch": 2.373463822793321, "learning_rate": 5.488218226244796e-06, "lm_loss": 0.4725, "loss": 0.4725, "step": 5295 }, { "epoch": 2.3739120690299202, "learning_rate": 5.480653486418039e-06, "lm_loss": 0.3853, "loss": 0.3853, "step": 5296 }, { "epoch": 2.37436031526652, "learning_rate": 5.473093321784728e-06, "lm_loss": 0.4087, "loss": 0.4087, "step": 5297 }, { "epoch": 2.374808561503119, "learning_rate": 5.465537734116924e-06, "lm_loss": 0.4885, "loss": 0.4885, "step": 5298 }, { "epoch": 2.3752568077397185, "learning_rate": 5.457986725185599e-06, "lm_loss": 0.3791, "loss": 0.3791, "step": 5299 }, { "epoch": 2.3757050539763176, "learning_rate": 5.450440296760656e-06, "lm_loss": 0.4954, "loss": 0.4954, "step": 5300 }, { "epoch": 2.376153300212917, "learning_rate": 5.442898450610931e-06, "lm_loss": 0.289, "loss": 0.289, "step": 5301 }, { "epoch": 2.3766015464495163, "learning_rate": 5.435361188504162e-06, "lm_loss": 0.3638, "loss": 0.3638, "step": 5302 }, { "epoch": 2.3770497926861154, "learning_rate": 5.427828512207037e-06, "lm_loss": 0.4695, "loss": 0.4695, "step": 5303 }, { "epoch": 2.377498038922715, "learning_rate": 5.420300423485167e-06, "lm_loss": 0.338, "loss": 0.338, "step": 5304 }, { "epoch": 2.377946285159314, "learning_rate": 5.412776924103066e-06, "lm_loss": 0.3094, "loss": 0.3094, "step": 5305 }, { "epoch": 2.3783945313959136, "learning_rate": 5.4052580158242075e-06, "lm_loss": 0.3513, "loss": 0.3513, "step": 5306 }, { "epoch": 2.3788427776325127, "learning_rate": 5.397743700410948e-06, "lm_loss": 0.4428, "loss": 0.4428, "step": 5307 }, { "epoch": 2.3792910238691123, "learning_rate": 5.3902339796245955e-06, "lm_loss": 0.3564, "loss": 0.3564, "step": 5308 }, { "epoch": 2.3797392701057114, "learning_rate": 5.382728855225388e-06, "lm_loss": 0.3684, "loss": 0.3684, "step": 5309 }, { "epoch": 2.3801875163423105, "learning_rate": 5.3752283289724566e-06, "lm_loss": 0.4726, "loss": 0.4726, "step": 5310 }, { "epoch": 2.38063576257891, "learning_rate": 5.367732402623882e-06, "lm_loss": 0.307, "loss": 0.307, "step": 5311 }, { "epoch": 2.381084008815509, "learning_rate": 5.360241077936642e-06, "lm_loss": 0.5044, "loss": 0.5044, "step": 5312 }, { "epoch": 2.3815322550521087, "learning_rate": 5.352754356666656e-06, "lm_loss": 0.4556, "loss": 0.4556, "step": 5313 }, { "epoch": 2.381980501288708, "learning_rate": 5.3452722405687635e-06, "lm_loss": 0.3937, "loss": 0.3937, "step": 5314 }, { "epoch": 2.3824287475253074, "learning_rate": 5.337794731396703e-06, "lm_loss": 0.4087, "loss": 0.4087, "step": 5315 }, { "epoch": 2.3828769937619065, "learning_rate": 5.330321830903159e-06, "lm_loss": 0.4052, "loss": 0.4052, "step": 5316 }, { "epoch": 2.3833252399985057, "learning_rate": 5.322853540839723e-06, "lm_loss": 0.3508, "loss": 0.3508, "step": 5317 }, { "epoch": 2.383773486235105, "learning_rate": 5.31538986295691e-06, "lm_loss": 0.4303, "loss": 0.4303, "step": 5318 }, { "epoch": 2.3842217324717043, "learning_rate": 5.307930799004155e-06, "lm_loss": 0.4036, "loss": 0.4036, "step": 5319 }, { "epoch": 2.384669978708304, "learning_rate": 5.300476350729796e-06, "lm_loss": 0.2673, "loss": 0.2673, "step": 5320 }, { "epoch": 2.385118224944903, "learning_rate": 5.293026519881114e-06, "lm_loss": 0.4529, "loss": 0.4529, "step": 5321 }, { "epoch": 2.3855664711815026, "learning_rate": 5.2855813082042936e-06, "lm_loss": 0.2321, "loss": 0.2321, "step": 5322 }, { "epoch": 2.3860147174181017, "learning_rate": 5.278140717444433e-06, "lm_loss": 0.4606, "loss": 0.4606, "step": 5323 }, { "epoch": 2.386462963654701, "learning_rate": 5.270704749345553e-06, "lm_loss": 0.4275, "loss": 0.4275, "step": 5324 }, { "epoch": 2.3869112098913003, "learning_rate": 5.263273405650601e-06, "lm_loss": 0.2999, "loss": 0.2999, "step": 5325 }, { "epoch": 2.3873594561278995, "learning_rate": 5.25584668810141e-06, "lm_loss": 0.3706, "loss": 0.3706, "step": 5326 }, { "epoch": 2.387807702364499, "learning_rate": 5.2484245984387725e-06, "lm_loss": 0.6154, "loss": 0.6154, "step": 5327 }, { "epoch": 2.388255948601098, "learning_rate": 5.2410071384023565e-06, "lm_loss": 0.4598, "loss": 0.4598, "step": 5328 }, { "epoch": 2.3887041948376977, "learning_rate": 5.233594309730766e-06, "lm_loss": 0.2797, "loss": 0.2797, "step": 5329 }, { "epoch": 2.389152441074297, "learning_rate": 5.226186114161522e-06, "lm_loss": 0.3938, "loss": 0.3938, "step": 5330 }, { "epoch": 2.389600687310896, "learning_rate": 5.218782553431037e-06, "lm_loss": 0.3919, "loss": 0.3919, "step": 5331 }, { "epoch": 2.3900489335474955, "learning_rate": 5.211383629274658e-06, "lm_loss": 0.376, "loss": 0.376, "step": 5332 }, { "epoch": 2.3904971797840946, "learning_rate": 5.203989343426649e-06, "lm_loss": 0.3849, "loss": 0.3849, "step": 5333 }, { "epoch": 2.390945426020694, "learning_rate": 5.196599697620158e-06, "lm_loss": 0.3652, "loss": 0.3652, "step": 5334 }, { "epoch": 2.3913936722572933, "learning_rate": 5.18921469358728e-06, "lm_loss": 0.3339, "loss": 0.3339, "step": 5335 }, { "epoch": 2.391841918493893, "learning_rate": 5.181834333058997e-06, "lm_loss": 0.5386, "loss": 0.5386, "step": 5336 }, { "epoch": 2.392290164730492, "learning_rate": 5.174458617765218e-06, "lm_loss": 0.2464, "loss": 0.2464, "step": 5337 }, { "epoch": 2.392738410967091, "learning_rate": 5.167087549434762e-06, "lm_loss": 0.3873, "loss": 0.3873, "step": 5338 }, { "epoch": 2.3931866572036906, "learning_rate": 5.159721129795342e-06, "lm_loss": 0.2478, "loss": 0.2478, "step": 5339 }, { "epoch": 2.3936349034402897, "learning_rate": 5.1523593605736e-06, "lm_loss": 0.4344, "loss": 0.4344, "step": 5340 }, { "epoch": 2.3940831496768893, "learning_rate": 5.145002243495084e-06, "lm_loss": 0.2516, "loss": 0.2516, "step": 5341 }, { "epoch": 2.3945313959134884, "learning_rate": 5.1376497802842426e-06, "lm_loss": 0.4103, "loss": 0.4103, "step": 5342 }, { "epoch": 2.394979642150088, "learning_rate": 5.1303019726644395e-06, "lm_loss": 0.3271, "loss": 0.3271, "step": 5343 }, { "epoch": 2.395427888386687, "learning_rate": 5.122958822357954e-06, "lm_loss": 0.3768, "loss": 0.3768, "step": 5344 }, { "epoch": 2.395876134623286, "learning_rate": 5.11562033108596e-06, "lm_loss": 0.3893, "loss": 0.3893, "step": 5345 }, { "epoch": 2.3963243808598857, "learning_rate": 5.108286500568563e-06, "lm_loss": 0.4248, "loss": 0.4248, "step": 5346 }, { "epoch": 2.396772627096485, "learning_rate": 5.100957332524736e-06, "lm_loss": 0.296, "loss": 0.296, "step": 5347 }, { "epoch": 2.3972208733330844, "learning_rate": 5.093632828672398e-06, "lm_loss": 0.3411, "loss": 0.3411, "step": 5348 }, { "epoch": 2.3976691195696835, "learning_rate": 5.086312990728362e-06, "lm_loss": 0.4369, "loss": 0.4369, "step": 5349 }, { "epoch": 2.398117365806283, "learning_rate": 5.078997820408332e-06, "lm_loss": 0.3766, "loss": 0.3766, "step": 5350 }, { "epoch": 2.398565612042882, "learning_rate": 5.071687319426946e-06, "lm_loss": 0.5787, "loss": 0.5787, "step": 5351 }, { "epoch": 2.3990138582794813, "learning_rate": 5.06438148949771e-06, "lm_loss": 0.2794, "loss": 0.2794, "step": 5352 }, { "epoch": 2.399462104516081, "learning_rate": 5.0570803323330776e-06, "lm_loss": 0.5151, "loss": 0.5151, "step": 5353 }, { "epoch": 2.39991035075268, "learning_rate": 5.0497838496443935e-06, "lm_loss": 0.3505, "loss": 0.3505, "step": 5354 }, { "epoch": 2.4003585969892796, "learning_rate": 5.042492043141878e-06, "lm_loss": 0.3327, "loss": 0.3327, "step": 5355 }, { "epoch": 2.4008068432258787, "learning_rate": 5.035204914534697e-06, "lm_loss": 0.4176, "loss": 0.4176, "step": 5356 }, { "epoch": 2.4012550894624782, "learning_rate": 5.027922465530885e-06, "lm_loss": 0.3288, "loss": 0.3288, "step": 5357 }, { "epoch": 2.4017033356990773, "learning_rate": 5.020644697837401e-06, "lm_loss": 0.5836, "loss": 0.5836, "step": 5358 }, { "epoch": 2.4021515819356765, "learning_rate": 5.013371613160112e-06, "lm_loss": 0.3766, "loss": 0.3766, "step": 5359 }, { "epoch": 2.402599828172276, "learning_rate": 5.006103213203758e-06, "lm_loss": 0.356, "loss": 0.356, "step": 5360 }, { "epoch": 2.403048074408875, "learning_rate": 4.9988394996720085e-06, "lm_loss": 0.2811, "loss": 0.2811, "step": 5361 }, { "epoch": 2.4034963206454747, "learning_rate": 4.991580474267427e-06, "lm_loss": 0.4694, "loss": 0.4694, "step": 5362 }, { "epoch": 2.403944566882074, "learning_rate": 4.984326138691472e-06, "lm_loss": 0.3122, "loss": 0.3122, "step": 5363 }, { "epoch": 2.4043928131186734, "learning_rate": 4.97707649464452e-06, "lm_loss": 0.2987, "loss": 0.2987, "step": 5364 }, { "epoch": 2.4048410593552725, "learning_rate": 4.969831543825815e-06, "lm_loss": 0.4215, "loss": 0.4215, "step": 5365 }, { "epoch": 2.4052893055918716, "learning_rate": 4.962591287933532e-06, "lm_loss": 0.3184, "loss": 0.3184, "step": 5366 }, { "epoch": 2.405737551828471, "learning_rate": 4.95535572866474e-06, "lm_loss": 0.4985, "loss": 0.4985, "step": 5367 }, { "epoch": 2.4061857980650703, "learning_rate": 4.948124867715387e-06, "lm_loss": 0.4129, "loss": 0.4129, "step": 5368 }, { "epoch": 2.40663404430167, "learning_rate": 4.940898706780345e-06, "lm_loss": 0.4256, "loss": 0.4256, "step": 5369 }, { "epoch": 2.407082290538269, "learning_rate": 4.93367724755337e-06, "lm_loss": 0.3284, "loss": 0.3284, "step": 5370 }, { "epoch": 2.4075305367748685, "learning_rate": 4.9264604917271195e-06, "lm_loss": 0.2791, "loss": 0.2791, "step": 5371 }, { "epoch": 2.4079787830114676, "learning_rate": 4.91924844099316e-06, "lm_loss": 0.446, "loss": 0.446, "step": 5372 }, { "epoch": 2.4084270292480667, "learning_rate": 4.912041097041925e-06, "lm_loss": 0.2191, "loss": 0.2191, "step": 5373 }, { "epoch": 2.4088752754846663, "learning_rate": 4.904838461562774e-06, "lm_loss": 0.4104, "loss": 0.4104, "step": 5374 }, { "epoch": 2.4093235217212654, "learning_rate": 4.897640536243958e-06, "lm_loss": 0.4443, "loss": 0.4443, "step": 5375 }, { "epoch": 2.409771767957865, "learning_rate": 4.890447322772607e-06, "lm_loss": 0.3928, "loss": 0.3928, "step": 5376 }, { "epoch": 2.410220014194464, "learning_rate": 4.8832588228347605e-06, "lm_loss": 0.5176, "loss": 0.5176, "step": 5377 }, { "epoch": 2.4106682604310636, "learning_rate": 4.8760750381153634e-06, "lm_loss": 0.3343, "loss": 0.3343, "step": 5378 }, { "epoch": 2.4111165066676628, "learning_rate": 4.868895970298218e-06, "lm_loss": 0.4155, "loss": 0.4155, "step": 5379 }, { "epoch": 2.411564752904262, "learning_rate": 4.861721621066076e-06, "lm_loss": 0.4133, "loss": 0.4133, "step": 5380 }, { "epoch": 2.4120129991408614, "learning_rate": 4.854551992100528e-06, "lm_loss": 0.4013, "loss": 0.4013, "step": 5381 }, { "epoch": 2.4124612453774605, "learning_rate": 4.8473870850820944e-06, "lm_loss": 0.3897, "loss": 0.3897, "step": 5382 }, { "epoch": 2.41290949161406, "learning_rate": 4.840226901690184e-06, "lm_loss": 0.3667, "loss": 0.3667, "step": 5383 }, { "epoch": 2.413357737850659, "learning_rate": 4.833071443603077e-06, "lm_loss": 0.4048, "loss": 0.4048, "step": 5384 }, { "epoch": 2.4138059840872588, "learning_rate": 4.825920712497967e-06, "lm_loss": 0.3526, "loss": 0.3526, "step": 5385 }, { "epoch": 2.414254230323858, "learning_rate": 4.818774710050938e-06, "lm_loss": 0.3405, "loss": 0.3405, "step": 5386 }, { "epoch": 2.414702476560457, "learning_rate": 4.811633437936955e-06, "lm_loss": 0.4306, "loss": 0.4306, "step": 5387 }, { "epoch": 2.4151507227970566, "learning_rate": 4.804496897829883e-06, "lm_loss": 0.3392, "loss": 0.3392, "step": 5388 }, { "epoch": 2.4155989690336557, "learning_rate": 4.797365091402475e-06, "lm_loss": 0.3047, "loss": 0.3047, "step": 5389 }, { "epoch": 2.4160472152702552, "learning_rate": 4.790238020326373e-06, "lm_loss": 0.3595, "loss": 0.3595, "step": 5390 }, { "epoch": 2.4164954615068543, "learning_rate": 4.7831156862721215e-06, "lm_loss": 0.4176, "loss": 0.4176, "step": 5391 }, { "epoch": 2.416943707743454, "learning_rate": 4.775998090909131e-06, "lm_loss": 0.358, "loss": 0.358, "step": 5392 }, { "epoch": 2.417391953980053, "learning_rate": 4.7688852359057155e-06, "lm_loss": 0.4312, "loss": 0.4312, "step": 5393 }, { "epoch": 2.417840200216652, "learning_rate": 4.76177712292909e-06, "lm_loss": 0.3798, "loss": 0.3798, "step": 5394 }, { "epoch": 2.4182884464532517, "learning_rate": 4.754673753645325e-06, "lm_loss": 0.3476, "loss": 0.3476, "step": 5395 }, { "epoch": 2.418736692689851, "learning_rate": 4.747575129719417e-06, "lm_loss": 0.4379, "loss": 0.4379, "step": 5396 }, { "epoch": 2.4191849389264504, "learning_rate": 4.74048125281521e-06, "lm_loss": 0.3803, "loss": 0.3803, "step": 5397 }, { "epoch": 2.4196331851630495, "learning_rate": 4.733392124595476e-06, "lm_loss": 0.2776, "loss": 0.2776, "step": 5398 }, { "epoch": 2.420081431399649, "learning_rate": 4.726307746721858e-06, "lm_loss": 0.4129, "loss": 0.4129, "step": 5399 }, { "epoch": 2.420529677636248, "learning_rate": 4.719228120854871e-06, "lm_loss": 0.4331, "loss": 0.4331, "step": 5400 }, { "epoch": 2.4209779238728473, "learning_rate": 4.712153248653936e-06, "lm_loss": 0.3079, "loss": 0.3079, "step": 5401 }, { "epoch": 2.421426170109447, "learning_rate": 4.705083131777343e-06, "lm_loss": 0.4286, "loss": 0.4286, "step": 5402 }, { "epoch": 2.421874416346046, "learning_rate": 4.69801777188228e-06, "lm_loss": 0.4088, "loss": 0.4088, "step": 5403 }, { "epoch": 2.4223226625826455, "learning_rate": 4.690957170624824e-06, "lm_loss": 0.3035, "loss": 0.3035, "step": 5404 }, { "epoch": 2.4227709088192446, "learning_rate": 4.683901329659918e-06, "lm_loss": 0.4057, "loss": 0.4057, "step": 5405 }, { "epoch": 2.423219155055844, "learning_rate": 4.676850250641401e-06, "lm_loss": 0.2807, "loss": 0.2807, "step": 5406 }, { "epoch": 2.4236674012924433, "learning_rate": 4.669803935222e-06, "lm_loss": 0.4618, "loss": 0.4618, "step": 5407 }, { "epoch": 2.4241156475290424, "learning_rate": 4.662762385053318e-06, "lm_loss": 0.4205, "loss": 0.4205, "step": 5408 }, { "epoch": 2.424563893765642, "learning_rate": 4.6557256017858485e-06, "lm_loss": 0.2504, "loss": 0.2504, "step": 5409 }, { "epoch": 2.425012140002241, "learning_rate": 4.648693587068956e-06, "lm_loss": 0.3753, "loss": 0.3753, "step": 5410 }, { "epoch": 2.4254603862388406, "learning_rate": 4.641666342550891e-06, "lm_loss": 0.3328, "loss": 0.3328, "step": 5411 }, { "epoch": 2.4259086324754398, "learning_rate": 4.634643869878802e-06, "lm_loss": 0.4414, "loss": 0.4414, "step": 5412 }, { "epoch": 2.4263568787120393, "learning_rate": 4.62762617069869e-06, "lm_loss": 0.453, "loss": 0.453, "step": 5413 }, { "epoch": 2.4268051249486384, "learning_rate": 4.620613246655459e-06, "lm_loss": 0.3646, "loss": 0.3646, "step": 5414 }, { "epoch": 2.4272533711852375, "learning_rate": 4.61360509939289e-06, "lm_loss": 0.2641, "loss": 0.2641, "step": 5415 }, { "epoch": 2.427701617421837, "learning_rate": 4.606601730553639e-06, "lm_loss": 0.4319, "loss": 0.4319, "step": 5416 }, { "epoch": 2.428149863658436, "learning_rate": 4.5996031417792545e-06, "lm_loss": 0.4415, "loss": 0.4415, "step": 5417 }, { "epoch": 2.4285981098950358, "learning_rate": 4.592609334710141e-06, "lm_loss": 0.5791, "loss": 0.5791, "step": 5418 }, { "epoch": 2.429046356131635, "learning_rate": 4.5856203109856e-06, "lm_loss": 0.6597, "loss": 0.6597, "step": 5419 }, { "epoch": 2.4294946023682344, "learning_rate": 4.578636072243817e-06, "lm_loss": 0.4528, "loss": 0.4528, "step": 5420 }, { "epoch": 2.4299428486048336, "learning_rate": 4.571656620121833e-06, "lm_loss": 0.434, "loss": 0.434, "step": 5421 }, { "epoch": 2.4303910948414327, "learning_rate": 4.564681956255593e-06, "lm_loss": 0.5257, "loss": 0.5257, "step": 5422 }, { "epoch": 2.4308393410780322, "learning_rate": 4.557712082279905e-06, "lm_loss": 0.6328, "loss": 0.6328, "step": 5423 }, { "epoch": 2.4312875873146313, "learning_rate": 4.5507469998284425e-06, "lm_loss": 0.5981, "loss": 0.5981, "step": 5424 }, { "epoch": 2.431735833551231, "learning_rate": 4.543786710533796e-06, "lm_loss": 0.5961, "loss": 0.5961, "step": 5425 }, { "epoch": 2.43218407978783, "learning_rate": 4.536831216027387e-06, "lm_loss": 0.3553, "loss": 0.3553, "step": 5426 }, { "epoch": 2.4326323260244296, "learning_rate": 4.529880517939542e-06, "lm_loss": 0.3279, "loss": 0.3279, "step": 5427 }, { "epoch": 2.4330805722610287, "learning_rate": 4.522934617899455e-06, "lm_loss": 0.3192, "loss": 0.3192, "step": 5428 }, { "epoch": 2.433528818497628, "learning_rate": 4.515993517535189e-06, "lm_loss": 0.3364, "loss": 0.3364, "step": 5429 }, { "epoch": 2.4339770647342274, "learning_rate": 4.5090572184736864e-06, "lm_loss": 0.4707, "loss": 0.4707, "step": 5430 }, { "epoch": 2.4344253109708265, "learning_rate": 4.502125722340778e-06, "lm_loss": 0.3343, "loss": 0.3343, "step": 5431 }, { "epoch": 2.434873557207426, "learning_rate": 4.495199030761141e-06, "lm_loss": 0.2391, "loss": 0.2391, "step": 5432 }, { "epoch": 2.435321803444025, "learning_rate": 4.4882771453583494e-06, "lm_loss": 0.4867, "loss": 0.4867, "step": 5433 }, { "epoch": 2.4357700496806247, "learning_rate": 4.4813600677548386e-06, "lm_loss": 0.3663, "loss": 0.3663, "step": 5434 }, { "epoch": 2.436218295917224, "learning_rate": 4.4744477995719295e-06, "lm_loss": 0.5815, "loss": 0.5815, "step": 5435 }, { "epoch": 2.436666542153823, "learning_rate": 4.467540342429807e-06, "lm_loss": 0.4357, "loss": 0.4357, "step": 5436 }, { "epoch": 2.4371147883904225, "learning_rate": 4.46063769794752e-06, "lm_loss": 0.4921, "loss": 0.4921, "step": 5437 }, { "epoch": 2.4375630346270216, "learning_rate": 4.4537398677430005e-06, "lm_loss": 0.4663, "loss": 0.4663, "step": 5438 }, { "epoch": 2.438011280863621, "learning_rate": 4.4468468534330625e-06, "lm_loss": 0.3664, "loss": 0.3664, "step": 5439 }, { "epoch": 2.4384595271002203, "learning_rate": 4.439958656633364e-06, "lm_loss": 0.3918, "loss": 0.3918, "step": 5440 }, { "epoch": 2.43890777333682, "learning_rate": 4.433075278958457e-06, "lm_loss": 0.417, "loss": 0.417, "step": 5441 }, { "epoch": 2.439356019573419, "learning_rate": 4.426196722021739e-06, "lm_loss": 0.3353, "loss": 0.3353, "step": 5442 }, { "epoch": 2.439804265810018, "learning_rate": 4.4193229874355145e-06, "lm_loss": 0.4732, "loss": 0.4732, "step": 5443 }, { "epoch": 2.4402525120466176, "learning_rate": 4.412454076810937e-06, "lm_loss": 0.3092, "loss": 0.3092, "step": 5444 }, { "epoch": 2.4407007582832168, "learning_rate": 4.405589991758014e-06, "lm_loss": 0.4116, "loss": 0.4116, "step": 5445 }, { "epoch": 2.4411490045198163, "learning_rate": 4.39873073388565e-06, "lm_loss": 0.2881, "loss": 0.2881, "step": 5446 }, { "epoch": 2.4415972507564154, "learning_rate": 4.391876304801598e-06, "lm_loss": 0.4226, "loss": 0.4226, "step": 5447 }, { "epoch": 2.442045496993015, "learning_rate": 4.385026706112486e-06, "lm_loss": 0.3536, "loss": 0.3536, "step": 5448 }, { "epoch": 2.442493743229614, "learning_rate": 4.378181939423823e-06, "lm_loss": 0.3958, "loss": 0.3958, "step": 5449 }, { "epoch": 2.442941989466213, "learning_rate": 4.371342006339954e-06, "lm_loss": 0.3238, "loss": 0.3238, "step": 5450 }, { "epoch": 2.4433902357028128, "learning_rate": 4.364506908464119e-06, "lm_loss": 0.5763, "loss": 0.5763, "step": 5451 }, { "epoch": 2.443838481939412, "learning_rate": 4.3576766473984175e-06, "lm_loss": 0.3576, "loss": 0.3576, "step": 5452 }, { "epoch": 2.4442867281760114, "learning_rate": 4.3508512247438095e-06, "lm_loss": 0.3497, "loss": 0.3497, "step": 5453 }, { "epoch": 2.4447349744126106, "learning_rate": 4.344030642100133e-06, "lm_loss": 0.4794, "loss": 0.4794, "step": 5454 }, { "epoch": 2.44518322064921, "learning_rate": 4.33721490106607e-06, "lm_loss": 0.3777, "loss": 0.3777, "step": 5455 }, { "epoch": 2.4456314668858092, "learning_rate": 4.3304040032391855e-06, "lm_loss": 0.432, "loss": 0.432, "step": 5456 }, { "epoch": 2.4460797131224084, "learning_rate": 4.3235979502159154e-06, "lm_loss": 0.2964, "loss": 0.2964, "step": 5457 }, { "epoch": 2.446527959359008, "learning_rate": 4.3167967435915336e-06, "lm_loss": 0.3774, "loss": 0.3774, "step": 5458 }, { "epoch": 2.446976205595607, "learning_rate": 4.3100003849602e-06, "lm_loss": 0.5524, "loss": 0.5524, "step": 5459 }, { "epoch": 2.4474244518322066, "learning_rate": 4.30320887591493e-06, "lm_loss": 0.3465, "loss": 0.3465, "step": 5460 }, { "epoch": 2.4478726980688057, "learning_rate": 4.2964222180476075e-06, "lm_loss": 0.3645, "loss": 0.3645, "step": 5461 }, { "epoch": 2.4483209443054053, "learning_rate": 4.2896404129489836e-06, "lm_loss": 0.4023, "loss": 0.4023, "step": 5462 }, { "epoch": 2.4487691905420044, "learning_rate": 4.282863462208647e-06, "lm_loss": 0.3946, "loss": 0.3946, "step": 5463 }, { "epoch": 2.4492174367786035, "learning_rate": 4.276091367415075e-06, "lm_loss": 0.3949, "loss": 0.3949, "step": 5464 }, { "epoch": 2.449665683015203, "learning_rate": 4.2693241301556025e-06, "lm_loss": 0.5131, "loss": 0.5131, "step": 5465 }, { "epoch": 2.450113929251802, "learning_rate": 4.2625617520164085e-06, "lm_loss": 0.3673, "loss": 0.3673, "step": 5466 }, { "epoch": 2.4505621754884017, "learning_rate": 4.255804234582553e-06, "lm_loss": 0.3546, "loss": 0.3546, "step": 5467 }, { "epoch": 2.451010421725001, "learning_rate": 4.2490515794379545e-06, "lm_loss": 0.3273, "loss": 0.3273, "step": 5468 }, { "epoch": 2.4514586679616004, "learning_rate": 4.242303788165367e-06, "lm_loss": 0.472, "loss": 0.472, "step": 5469 }, { "epoch": 2.4519069141981995, "learning_rate": 4.235560862346452e-06, "lm_loss": 0.3312, "loss": 0.3312, "step": 5470 }, { "epoch": 2.4523551604347986, "learning_rate": 4.22882280356168e-06, "lm_loss": 0.5543, "loss": 0.5543, "step": 5471 }, { "epoch": 2.452803406671398, "learning_rate": 4.222089613390412e-06, "lm_loss": 0.4047, "loss": 0.4047, "step": 5472 }, { "epoch": 2.4532516529079973, "learning_rate": 4.2153612934108605e-06, "lm_loss": 0.52, "loss": 0.52, "step": 5473 }, { "epoch": 2.453699899144597, "learning_rate": 4.208637845200089e-06, "lm_loss": 0.4919, "loss": 0.4919, "step": 5474 }, { "epoch": 2.454148145381196, "learning_rate": 4.2019192703340285e-06, "lm_loss": 0.2853, "loss": 0.2853, "step": 5475 }, { "epoch": 2.4545963916177955, "learning_rate": 4.1952055703874724e-06, "lm_loss": 0.3185, "loss": 0.3185, "step": 5476 }, { "epoch": 2.4550446378543946, "learning_rate": 4.188496746934045e-06, "lm_loss": 0.3675, "loss": 0.3675, "step": 5477 }, { "epoch": 2.4554928840909938, "learning_rate": 4.181792801546258e-06, "lm_loss": 0.3259, "loss": 0.3259, "step": 5478 }, { "epoch": 2.4559411303275933, "learning_rate": 4.175093735795468e-06, "lm_loss": 0.3584, "loss": 0.3584, "step": 5479 }, { "epoch": 2.4563893765641924, "learning_rate": 4.168399551251881e-06, "lm_loss": 0.3465, "loss": 0.3465, "step": 5480 }, { "epoch": 2.456837622800792, "learning_rate": 4.16171024948458e-06, "lm_loss": 0.4345, "loss": 0.4345, "step": 5481 }, { "epoch": 2.457285869037391, "learning_rate": 4.155025832061468e-06, "lm_loss": 0.3395, "loss": 0.3395, "step": 5482 }, { "epoch": 2.4577341152739907, "learning_rate": 4.148346300549344e-06, "lm_loss": 0.2952, "loss": 0.2952, "step": 5483 }, { "epoch": 2.4581823615105898, "learning_rate": 4.1416716565138265e-06, "lm_loss": 0.4178, "loss": 0.4178, "step": 5484 }, { "epoch": 2.458630607747189, "learning_rate": 4.1350019015194085e-06, "lm_loss": 0.2589, "loss": 0.2589, "step": 5485 }, { "epoch": 2.4590788539837884, "learning_rate": 4.12833703712944e-06, "lm_loss": 0.4562, "loss": 0.4562, "step": 5486 }, { "epoch": 2.4595271002203876, "learning_rate": 4.121677064906096e-06, "lm_loss": 0.3485, "loss": 0.3485, "step": 5487 }, { "epoch": 2.459975346456987, "learning_rate": 4.115021986410447e-06, "lm_loss": 0.3459, "loss": 0.3459, "step": 5488 }, { "epoch": 2.4604235926935862, "learning_rate": 4.108371803202396e-06, "lm_loss": 0.3836, "loss": 0.3836, "step": 5489 }, { "epoch": 2.460871838930186, "learning_rate": 4.1017265168406845e-06, "lm_loss": 0.3262, "loss": 0.3262, "step": 5490 }, { "epoch": 2.461320085166785, "learning_rate": 4.095086128882927e-06, "lm_loss": 0.6227, "loss": 0.6227, "step": 5491 }, { "epoch": 2.461768331403384, "learning_rate": 4.088450640885577e-06, "lm_loss": 0.2899, "loss": 0.2899, "step": 5492 }, { "epoch": 2.4622165776399836, "learning_rate": 4.081820054403948e-06, "lm_loss": 0.4147, "loss": 0.4147, "step": 5493 }, { "epoch": 2.4626648238765827, "learning_rate": 4.075194370992208e-06, "lm_loss": 0.2594, "loss": 0.2594, "step": 5494 }, { "epoch": 2.4631130701131823, "learning_rate": 4.068573592203359e-06, "lm_loss": 0.4081, "loss": 0.4081, "step": 5495 }, { "epoch": 2.4635613163497814, "learning_rate": 4.0619577195892654e-06, "lm_loss": 0.3811, "loss": 0.3811, "step": 5496 }, { "epoch": 2.464009562586381, "learning_rate": 4.055346754700639e-06, "lm_loss": 0.3794, "loss": 0.3794, "step": 5497 }, { "epoch": 2.46445780882298, "learning_rate": 4.04874069908705e-06, "lm_loss": 0.3645, "loss": 0.3645, "step": 5498 }, { "epoch": 2.464906055059579, "learning_rate": 4.042139554296909e-06, "lm_loss": 0.5648, "loss": 0.5648, "step": 5499 }, { "epoch": 2.4653543012961787, "learning_rate": 4.0355433218774665e-06, "lm_loss": 0.358, "loss": 0.358, "step": 5500 }, { "epoch": 2.465802547532778, "learning_rate": 4.028952003374836e-06, "lm_loss": 0.3307, "loss": 0.3307, "step": 5501 }, { "epoch": 2.4662507937693774, "learning_rate": 4.022365600333983e-06, "lm_loss": 0.3823, "loss": 0.3823, "step": 5502 }, { "epoch": 2.4666990400059765, "learning_rate": 4.0157841142987014e-06, "lm_loss": 0.3721, "loss": 0.3721, "step": 5503 }, { "epoch": 2.467147286242576, "learning_rate": 4.0092075468116496e-06, "lm_loss": 0.3501, "loss": 0.3501, "step": 5504 }, { "epoch": 2.467595532479175, "learning_rate": 4.002635899414322e-06, "lm_loss": 0.4657, "loss": 0.4657, "step": 5505 }, { "epoch": 2.4680437787157743, "learning_rate": 3.996069173647074e-06, "lm_loss": 0.4936, "loss": 0.4936, "step": 5506 }, { "epoch": 2.468492024952374, "learning_rate": 3.989507371049098e-06, "lm_loss": 0.4832, "loss": 0.4832, "step": 5507 }, { "epoch": 2.468940271188973, "learning_rate": 3.9829504931584245e-06, "lm_loss": 0.3801, "loss": 0.3801, "step": 5508 }, { "epoch": 2.4693885174255725, "learning_rate": 3.976398541511941e-06, "lm_loss": 0.4681, "loss": 0.4681, "step": 5509 }, { "epoch": 2.4698367636621716, "learning_rate": 3.969851517645387e-06, "lm_loss": 0.3665, "loss": 0.3665, "step": 5510 }, { "epoch": 2.470285009898771, "learning_rate": 3.9633094230933225e-06, "lm_loss": 0.4215, "loss": 0.4215, "step": 5511 }, { "epoch": 2.4707332561353703, "learning_rate": 3.956772259389175e-06, "lm_loss": 0.3536, "loss": 0.3536, "step": 5512 }, { "epoch": 2.4711815023719694, "learning_rate": 3.950240028065214e-06, "lm_loss": 0.4124, "loss": 0.4124, "step": 5513 }, { "epoch": 2.471629748608569, "learning_rate": 3.94371273065253e-06, "lm_loss": 0.5145, "loss": 0.5145, "step": 5514 }, { "epoch": 2.472077994845168, "learning_rate": 3.937190368681093e-06, "lm_loss": 0.426, "loss": 0.426, "step": 5515 }, { "epoch": 2.4725262410817677, "learning_rate": 3.9306729436796865e-06, "lm_loss": 0.317, "loss": 0.317, "step": 5516 }, { "epoch": 2.4729744873183668, "learning_rate": 3.924160457175951e-06, "lm_loss": 0.3227, "loss": 0.3227, "step": 5517 }, { "epoch": 2.4734227335549663, "learning_rate": 3.917652910696371e-06, "lm_loss": 0.4217, "loss": 0.4217, "step": 5518 }, { "epoch": 2.4738709797915654, "learning_rate": 3.911150305766256e-06, "lm_loss": 0.3357, "loss": 0.3357, "step": 5519 }, { "epoch": 2.4743192260281646, "learning_rate": 3.904652643909779e-06, "lm_loss": 0.3456, "loss": 0.3456, "step": 5520 }, { "epoch": 2.474767472264764, "learning_rate": 3.8981599266499495e-06, "lm_loss": 0.3295, "loss": 0.3295, "step": 5521 }, { "epoch": 2.4752157185013632, "learning_rate": 3.891672155508591e-06, "lm_loss": 0.3666, "loss": 0.3666, "step": 5522 }, { "epoch": 2.475663964737963, "learning_rate": 3.88518933200642e-06, "lm_loss": 0.4201, "loss": 0.4201, "step": 5523 }, { "epoch": 2.476112210974562, "learning_rate": 3.878711457662942e-06, "lm_loss": 0.4816, "loss": 0.4816, "step": 5524 }, { "epoch": 2.4765604572111615, "learning_rate": 3.872238533996531e-06, "lm_loss": 0.197, "loss": 0.197, "step": 5525 }, { "epoch": 2.4770087034477606, "learning_rate": 3.865770562524398e-06, "lm_loss": 0.454, "loss": 0.454, "step": 5526 }, { "epoch": 2.4774569496843597, "learning_rate": 3.85930754476258e-06, "lm_loss": 0.3994, "loss": 0.3994, "step": 5527 }, { "epoch": 2.4779051959209593, "learning_rate": 3.85284948222597e-06, "lm_loss": 0.3002, "loss": 0.3002, "step": 5528 }, { "epoch": 2.4783534421575584, "learning_rate": 3.8463963764282824e-06, "lm_loss": 0.3372, "loss": 0.3372, "step": 5529 }, { "epoch": 2.478801688394158, "learning_rate": 3.839948228882082e-06, "lm_loss": 0.421, "loss": 0.421, "step": 5530 }, { "epoch": 2.479249934630757, "learning_rate": 3.833505041098767e-06, "lm_loss": 0.4365, "loss": 0.4365, "step": 5531 }, { "epoch": 2.4796981808673566, "learning_rate": 3.8270668145885755e-06, "lm_loss": 0.3118, "loss": 0.3118, "step": 5532 }, { "epoch": 2.4801464271039557, "learning_rate": 3.820633550860589e-06, "lm_loss": 0.326, "loss": 0.326, "step": 5533 }, { "epoch": 2.480594673340555, "learning_rate": 3.8142052514227007e-06, "lm_loss": 0.4424, "loss": 0.4424, "step": 5534 }, { "epoch": 2.4810429195771544, "learning_rate": 3.8077819177816693e-06, "lm_loss": 0.4155, "loss": 0.4155, "step": 5535 }, { "epoch": 2.4814911658137535, "learning_rate": 3.8013635514430823e-06, "lm_loss": 0.4146, "loss": 0.4146, "step": 5536 }, { "epoch": 2.481939412050353, "learning_rate": 3.794950153911342e-06, "lm_loss": 0.4311, "loss": 0.4311, "step": 5537 }, { "epoch": 2.482387658286952, "learning_rate": 3.788541726689712e-06, "lm_loss": 0.2692, "loss": 0.2692, "step": 5538 }, { "epoch": 2.4828359045235517, "learning_rate": 3.782138271280289e-06, "lm_loss": 0.3407, "loss": 0.3407, "step": 5539 }, { "epoch": 2.483284150760151, "learning_rate": 3.7757397891839742e-06, "lm_loss": 0.372, "loss": 0.372, "step": 5540 }, { "epoch": 2.48373239699675, "learning_rate": 3.7693462819005516e-06, "lm_loss": 0.3355, "loss": 0.3355, "step": 5541 }, { "epoch": 2.4841806432333495, "learning_rate": 3.7629577509285947e-06, "lm_loss": 0.5178, "loss": 0.5178, "step": 5542 }, { "epoch": 2.4846288894699486, "learning_rate": 3.756574197765536e-06, "lm_loss": 0.4284, "loss": 0.4284, "step": 5543 }, { "epoch": 2.485077135706548, "learning_rate": 3.7501956239076403e-06, "lm_loss": 0.2676, "loss": 0.2676, "step": 5544 }, { "epoch": 2.4855253819431473, "learning_rate": 3.7438220308499833e-06, "lm_loss": 0.4358, "loss": 0.4358, "step": 5545 }, { "epoch": 2.485973628179747, "learning_rate": 3.737453420086501e-06, "lm_loss": 0.3109, "loss": 0.3109, "step": 5546 }, { "epoch": 2.486421874416346, "learning_rate": 3.7310897931099486e-06, "lm_loss": 0.5235, "loss": 0.5235, "step": 5547 }, { "epoch": 2.486870120652945, "learning_rate": 3.724731151411909e-06, "lm_loss": 0.355, "loss": 0.355, "step": 5548 }, { "epoch": 2.4873183668895447, "learning_rate": 3.718377496482803e-06, "lm_loss": 0.379, "loss": 0.379, "step": 5549 }, { "epoch": 2.4877666131261438, "learning_rate": 3.7120288298118837e-06, "lm_loss": 0.3636, "loss": 0.3636, "step": 5550 }, { "epoch": 2.4882148593627433, "learning_rate": 3.705685152887234e-06, "lm_loss": 0.4191, "loss": 0.4191, "step": 5551 }, { "epoch": 2.4886631055993425, "learning_rate": 3.6993464671957662e-06, "lm_loss": 0.2592, "loss": 0.2592, "step": 5552 }, { "epoch": 2.489111351835942, "learning_rate": 3.6930127742232173e-06, "lm_loss": 0.3984, "loss": 0.3984, "step": 5553 }, { "epoch": 2.489559598072541, "learning_rate": 3.686684075454158e-06, "lm_loss": 0.3246, "loss": 0.3246, "step": 5554 }, { "epoch": 2.4900078443091402, "learning_rate": 3.6803603723720022e-06, "lm_loss": 0.4425, "loss": 0.4425, "step": 5555 }, { "epoch": 2.49045609054574, "learning_rate": 3.674041666458963e-06, "lm_loss": 0.2455, "loss": 0.2455, "step": 5556 }, { "epoch": 2.490904336782339, "learning_rate": 3.66772795919611e-06, "lm_loss": 0.4209, "loss": 0.4209, "step": 5557 }, { "epoch": 2.4913525830189385, "learning_rate": 3.661419252063325e-06, "lm_loss": 0.3089, "loss": 0.3089, "step": 5558 }, { "epoch": 2.4918008292555376, "learning_rate": 3.655115546539328e-06, "lm_loss": 0.3392, "loss": 0.3392, "step": 5559 }, { "epoch": 2.492249075492137, "learning_rate": 3.6488168441016658e-06, "lm_loss": 0.4087, "loss": 0.4087, "step": 5560 }, { "epoch": 2.4926973217287363, "learning_rate": 3.642523146226695e-06, "lm_loss": 0.3309, "loss": 0.3309, "step": 5561 }, { "epoch": 2.493145567965336, "learning_rate": 3.6362344543896227e-06, "lm_loss": 0.3368, "loss": 0.3368, "step": 5562 }, { "epoch": 2.493593814201935, "learning_rate": 3.6299507700644748e-06, "lm_loss": 0.2994, "loss": 0.2994, "step": 5563 }, { "epoch": 2.494042060438534, "learning_rate": 3.623672094724093e-06, "lm_loss": 0.3185, "loss": 0.3185, "step": 5564 }, { "epoch": 2.4944903066751336, "learning_rate": 3.6173984298401546e-06, "lm_loss": 0.3763, "loss": 0.3763, "step": 5565 }, { "epoch": 2.4949385529117327, "learning_rate": 3.6111297768831738e-06, "lm_loss": 0.3155, "loss": 0.3155, "step": 5566 }, { "epoch": 2.4953867991483323, "learning_rate": 3.6048661373224547e-06, "lm_loss": 0.4028, "loss": 0.4028, "step": 5567 }, { "epoch": 2.4958350453849314, "learning_rate": 3.5986075126261732e-06, "lm_loss": 0.3705, "loss": 0.3705, "step": 5568 }, { "epoch": 2.496283291621531, "learning_rate": 3.592353904261289e-06, "lm_loss": 0.362, "loss": 0.362, "step": 5569 }, { "epoch": 2.49673153785813, "learning_rate": 3.5861053136936077e-06, "lm_loss": 0.4694, "loss": 0.4694, "step": 5570 }, { "epoch": 2.497179784094729, "learning_rate": 3.579861742387761e-06, "lm_loss": 0.3225, "loss": 0.3225, "step": 5571 }, { "epoch": 2.4976280303313287, "learning_rate": 3.573623191807185e-06, "lm_loss": 0.3403, "loss": 0.3403, "step": 5572 }, { "epoch": 2.498076276567928, "learning_rate": 3.5673896634141587e-06, "lm_loss": 0.5535, "loss": 0.5535, "step": 5573 }, { "epoch": 2.4985245228045274, "learning_rate": 3.5611611586697708e-06, "lm_loss": 0.202, "loss": 0.202, "step": 5574 }, { "epoch": 2.4989727690411265, "learning_rate": 3.5549376790339373e-06, "lm_loss": 0.3843, "loss": 0.3843, "step": 5575 }, { "epoch": 2.499421015277726, "learning_rate": 3.5487192259653996e-06, "lm_loss": 0.4348, "loss": 0.4348, "step": 5576 }, { "epoch": 2.499869261514325, "learning_rate": 3.5425058009217195e-06, "lm_loss": 0.4316, "loss": 0.4316, "step": 5577 }, { "epoch": 2.5003175077509248, "learning_rate": 3.536297405359282e-06, "lm_loss": 0.3424, "loss": 0.3424, "step": 5578 }, { "epoch": 2.500765753987524, "learning_rate": 3.5300940407332784e-06, "lm_loss": 0.268, "loss": 0.268, "step": 5579 }, { "epoch": 2.501214000224123, "learning_rate": 3.5238957084977402e-06, "lm_loss": 0.4589, "loss": 0.4589, "step": 5580 }, { "epoch": 2.5016622464607225, "learning_rate": 3.517702410105517e-06, "lm_loss": 0.2951, "loss": 0.2951, "step": 5581 }, { "epoch": 2.5021104926973217, "learning_rate": 3.51151414700826e-06, "lm_loss": 0.3692, "loss": 0.3692, "step": 5582 }, { "epoch": 2.502558738933921, "learning_rate": 3.5053309206564603e-06, "lm_loss": 0.3109, "loss": 0.3109, "step": 5583 }, { "epoch": 2.5030069851705203, "learning_rate": 3.499152732499428e-06, "lm_loss": 0.3596, "loss": 0.3596, "step": 5584 }, { "epoch": 2.50345523140712, "learning_rate": 3.4929795839852645e-06, "lm_loss": 0.3687, "loss": 0.3687, "step": 5585 }, { "epoch": 2.503903477643719, "learning_rate": 3.486811476560939e-06, "lm_loss": 0.3315, "loss": 0.3315, "step": 5586 }, { "epoch": 2.504351723880318, "learning_rate": 3.4806484116721936e-06, "lm_loss": 0.3938, "loss": 0.3938, "step": 5587 }, { "epoch": 2.5047999701169177, "learning_rate": 3.4744903907636074e-06, "lm_loss": 0.3414, "loss": 0.3414, "step": 5588 }, { "epoch": 2.505248216353517, "learning_rate": 3.468337415278583e-06, "lm_loss": 0.4552, "loss": 0.4552, "step": 5589 }, { "epoch": 2.505696462590116, "learning_rate": 3.4621894866593256e-06, "lm_loss": 0.4009, "loss": 0.4009, "step": 5590 }, { "epoch": 2.5061447088267155, "learning_rate": 3.4560466063468688e-06, "lm_loss": 0.4217, "loss": 0.4217, "step": 5591 }, { "epoch": 2.506592955063315, "learning_rate": 3.4499087757810634e-06, "lm_loss": 0.2705, "loss": 0.2705, "step": 5592 }, { "epoch": 2.507041201299914, "learning_rate": 3.443775996400561e-06, "lm_loss": 0.4045, "loss": 0.4045, "step": 5593 }, { "epoch": 2.5074894475365133, "learning_rate": 3.4376482696428485e-06, "lm_loss": 0.4188, "loss": 0.4188, "step": 5594 }, { "epoch": 2.507937693773113, "learning_rate": 3.4315255969442213e-06, "lm_loss": 0.3293, "loss": 0.3293, "step": 5595 }, { "epoch": 2.508385940009712, "learning_rate": 3.4254079797397864e-06, "lm_loss": 0.3251, "loss": 0.3251, "step": 5596 }, { "epoch": 2.508834186246311, "learning_rate": 3.4192954194634753e-06, "lm_loss": 0.5434, "loss": 0.5434, "step": 5597 }, { "epoch": 2.5092824324829106, "learning_rate": 3.413187917548019e-06, "lm_loss": 0.3274, "loss": 0.3274, "step": 5598 }, { "epoch": 2.50973067871951, "learning_rate": 3.407085475424976e-06, "lm_loss": 0.2946, "loss": 0.2946, "step": 5599 }, { "epoch": 2.5101789249561093, "learning_rate": 3.400988094524718e-06, "lm_loss": 0.5332, "loss": 0.5332, "step": 5600 }, { "epoch": 2.5106271711927084, "learning_rate": 3.394895776276419e-06, "lm_loss": 0.2053, "loss": 0.2053, "step": 5601 }, { "epoch": 2.511075417429308, "learning_rate": 3.388808522108081e-06, "lm_loss": 0.3327, "loss": 0.3327, "step": 5602 }, { "epoch": 2.511523663665907, "learning_rate": 3.3827263334465066e-06, "lm_loss": 0.3939, "loss": 0.3939, "step": 5603 }, { "epoch": 2.511971909902506, "learning_rate": 3.37664921171732e-06, "lm_loss": 0.3598, "loss": 0.3598, "step": 5604 }, { "epoch": 2.5124201561391057, "learning_rate": 3.370577158344959e-06, "lm_loss": 0.3518, "loss": 0.3518, "step": 5605 }, { "epoch": 2.5128684023757053, "learning_rate": 3.3645101747526608e-06, "lm_loss": 0.2694, "loss": 0.2694, "step": 5606 }, { "epoch": 2.5133166486123044, "learning_rate": 3.3584482623624876e-06, "lm_loss": 0.469, "loss": 0.469, "step": 5607 }, { "epoch": 2.5137648948489035, "learning_rate": 3.3523914225953107e-06, "lm_loss": 0.5777, "loss": 0.5777, "step": 5608 }, { "epoch": 2.514213141085503, "learning_rate": 3.3463396568707976e-06, "lm_loss": 0.2924, "loss": 0.2924, "step": 5609 }, { "epoch": 2.514661387322102, "learning_rate": 3.3402929666074473e-06, "lm_loss": 0.5409, "loss": 0.5409, "step": 5610 }, { "epoch": 2.5151096335587013, "learning_rate": 3.3342513532225643e-06, "lm_loss": 0.3497, "loss": 0.3497, "step": 5611 }, { "epoch": 2.515557879795301, "learning_rate": 3.3282148181322412e-06, "lm_loss": 0.3548, "loss": 0.3548, "step": 5612 }, { "epoch": 2.5160061260319004, "learning_rate": 3.3221833627514217e-06, "lm_loss": 0.3287, "loss": 0.3287, "step": 5613 }, { "epoch": 2.5164543722684996, "learning_rate": 3.316156988493818e-06, "lm_loss": 0.3147, "loss": 0.3147, "step": 5614 }, { "epoch": 2.5169026185050987, "learning_rate": 3.3101356967719767e-06, "lm_loss": 0.3171, "loss": 0.3171, "step": 5615 }, { "epoch": 2.5173508647416982, "learning_rate": 3.3041194889972456e-06, "lm_loss": 0.3741, "loss": 0.3741, "step": 5616 }, { "epoch": 2.5177991109782973, "learning_rate": 3.298108366579772e-06, "lm_loss": 0.4127, "loss": 0.4127, "step": 5617 }, { "epoch": 2.5182473572148965, "learning_rate": 3.2921023309285315e-06, "lm_loss": 0.3877, "loss": 0.3877, "step": 5618 }, { "epoch": 2.518695603451496, "learning_rate": 3.2861013834512846e-06, "lm_loss": 0.4828, "loss": 0.4828, "step": 5619 }, { "epoch": 2.5191438496880956, "learning_rate": 3.280105525554611e-06, "lm_loss": 0.4736, "loss": 0.4736, "step": 5620 }, { "epoch": 2.5195920959246947, "learning_rate": 3.274114758643901e-06, "lm_loss": 0.4268, "loss": 0.4268, "step": 5621 }, { "epoch": 2.520040342161294, "learning_rate": 3.2681290841233447e-06, "lm_loss": 0.4765, "loss": 0.4765, "step": 5622 }, { "epoch": 2.5204885883978934, "learning_rate": 3.2621485033959455e-06, "lm_loss": 0.3087, "loss": 0.3087, "step": 5623 }, { "epoch": 2.5209368346344925, "learning_rate": 3.2561730178635006e-06, "lm_loss": 0.3345, "loss": 0.3345, "step": 5624 }, { "epoch": 2.5213850808710916, "learning_rate": 3.250202628926624e-06, "lm_loss": 0.3948, "loss": 0.3948, "step": 5625 }, { "epoch": 2.521833327107691, "learning_rate": 3.2442373379847372e-06, "lm_loss": 0.4499, "loss": 0.4499, "step": 5626 }, { "epoch": 2.5222815733442907, "learning_rate": 3.23827714643605e-06, "lm_loss": 0.2955, "loss": 0.2955, "step": 5627 }, { "epoch": 2.52272981958089, "learning_rate": 3.2323220556775946e-06, "lm_loss": 0.6101, "loss": 0.6101, "step": 5628 }, { "epoch": 2.523178065817489, "learning_rate": 3.2263720671052046e-06, "lm_loss": 0.3663, "loss": 0.3663, "step": 5629 }, { "epoch": 2.5236263120540885, "learning_rate": 3.2204271821135e-06, "lm_loss": 0.5304, "loss": 0.5304, "step": 5630 }, { "epoch": 2.5240745582906876, "learning_rate": 3.2144874020959393e-06, "lm_loss": 0.3116, "loss": 0.3116, "step": 5631 }, { "epoch": 2.5245228045272867, "learning_rate": 3.2085527284447496e-06, "lm_loss": 0.4824, "loss": 0.4824, "step": 5632 }, { "epoch": 2.5249710507638863, "learning_rate": 3.202623162550977e-06, "lm_loss": 0.5044, "loss": 0.5044, "step": 5633 }, { "epoch": 2.525419297000486, "learning_rate": 3.196698705804477e-06, "lm_loss": 0.3579, "loss": 0.3579, "step": 5634 }, { "epoch": 2.525867543237085, "learning_rate": 3.19077935959389e-06, "lm_loss": 0.3961, "loss": 0.3961, "step": 5635 }, { "epoch": 2.526315789473684, "learning_rate": 3.1848651253066685e-06, "lm_loss": 0.4789, "loss": 0.4789, "step": 5636 }, { "epoch": 2.5267640357102836, "learning_rate": 3.1789560043290757e-06, "lm_loss": 0.3908, "loss": 0.3908, "step": 5637 }, { "epoch": 2.5272122819468827, "learning_rate": 3.173051998046156e-06, "lm_loss": 0.2914, "loss": 0.2914, "step": 5638 }, { "epoch": 2.527660528183482, "learning_rate": 3.1671531078417667e-06, "lm_loss": 0.404, "loss": 0.404, "step": 5639 }, { "epoch": 2.5281087744200814, "learning_rate": 3.161259335098571e-06, "lm_loss": 0.4064, "loss": 0.4064, "step": 5640 }, { "epoch": 2.528557020656681, "learning_rate": 3.155370681198022e-06, "lm_loss": 0.517, "loss": 0.517, "step": 5641 }, { "epoch": 2.52900526689328, "learning_rate": 3.149487147520386e-06, "lm_loss": 0.4116, "loss": 0.4116, "step": 5642 }, { "epoch": 2.529453513129879, "learning_rate": 3.1436087354447057e-06, "lm_loss": 0.3499, "loss": 0.3499, "step": 5643 }, { "epoch": 2.5299017593664788, "learning_rate": 3.1377354463488493e-06, "lm_loss": 0.4179, "loss": 0.4179, "step": 5644 }, { "epoch": 2.530350005603078, "learning_rate": 3.131867281609474e-06, "lm_loss": 0.5635, "loss": 0.5635, "step": 5645 }, { "epoch": 2.530798251839677, "learning_rate": 3.1260042426020263e-06, "lm_loss": 0.3417, "loss": 0.3417, "step": 5646 }, { "epoch": 2.5312464980762766, "learning_rate": 3.1201463307007657e-06, "lm_loss": 0.3196, "loss": 0.3196, "step": 5647 }, { "epoch": 2.531694744312876, "learning_rate": 3.1142935472787433e-06, "lm_loss": 0.4365, "loss": 0.4365, "step": 5648 }, { "epoch": 2.5321429905494752, "learning_rate": 3.108445893707809e-06, "lm_loss": 0.2615, "loss": 0.2615, "step": 5649 }, { "epoch": 2.5325912367860743, "learning_rate": 3.102603371358617e-06, "lm_loss": 0.3405, "loss": 0.3405, "step": 5650 }, { "epoch": 2.533039483022674, "learning_rate": 3.096765981600602e-06, "lm_loss": 0.4546, "loss": 0.4546, "step": 5651 }, { "epoch": 2.533487729259273, "learning_rate": 3.0909337258020087e-06, "lm_loss": 0.3851, "loss": 0.3851, "step": 5652 }, { "epoch": 2.533935975495872, "learning_rate": 3.085106605329885e-06, "lm_loss": 0.3173, "loss": 0.3173, "step": 5653 }, { "epoch": 2.5343842217324717, "learning_rate": 3.079284621550049e-06, "lm_loss": 0.4069, "loss": 0.4069, "step": 5654 }, { "epoch": 2.5348324679690712, "learning_rate": 3.0734677758271493e-06, "lm_loss": 0.4054, "loss": 0.4054, "step": 5655 }, { "epoch": 2.5352807142056704, "learning_rate": 3.0676560695245964e-06, "lm_loss": 0.2766, "loss": 0.2766, "step": 5656 }, { "epoch": 2.5357289604422695, "learning_rate": 3.061849504004613e-06, "lm_loss": 0.4497, "loss": 0.4497, "step": 5657 }, { "epoch": 2.536177206678869, "learning_rate": 3.056048080628235e-06, "lm_loss": 0.5564, "loss": 0.5564, "step": 5658 }, { "epoch": 2.536625452915468, "learning_rate": 3.050251800755255e-06, "lm_loss": 0.3311, "loss": 0.3311, "step": 5659 }, { "epoch": 2.5370736991520673, "learning_rate": 3.044460665744284e-06, "lm_loss": 0.4031, "loss": 0.4031, "step": 5660 }, { "epoch": 2.537521945388667, "learning_rate": 3.0386746769527323e-06, "lm_loss": 0.319, "loss": 0.319, "step": 5661 }, { "epoch": 2.5379701916252664, "learning_rate": 3.0328938357367776e-06, "lm_loss": 0.331, "loss": 0.331, "step": 5662 }, { "epoch": 2.5384184378618655, "learning_rate": 3.0271181434514182e-06, "lm_loss": 0.3316, "loss": 0.3316, "step": 5663 }, { "epoch": 2.5388666840984646, "learning_rate": 3.0213476014504275e-06, "lm_loss": 0.366, "loss": 0.366, "step": 5664 }, { "epoch": 2.539314930335064, "learning_rate": 3.015582211086382e-06, "lm_loss": 0.3457, "loss": 0.3457, "step": 5665 }, { "epoch": 2.5397631765716633, "learning_rate": 3.009821973710647e-06, "lm_loss": 0.5139, "loss": 0.5139, "step": 5666 }, { "epoch": 2.5402114228082624, "learning_rate": 3.004066890673382e-06, "lm_loss": 0.3777, "loss": 0.3777, "step": 5667 }, { "epoch": 2.540659669044862, "learning_rate": 2.9983169633235416e-06, "lm_loss": 0.2876, "loss": 0.2876, "step": 5668 }, { "epoch": 2.5411079152814615, "learning_rate": 2.992572193008855e-06, "lm_loss": 0.435, "loss": 0.435, "step": 5669 }, { "epoch": 2.5415561615180606, "learning_rate": 2.98683258107586e-06, "lm_loss": 0.3572, "loss": 0.3572, "step": 5670 }, { "epoch": 2.5420044077546597, "learning_rate": 2.9810981288698886e-06, "lm_loss": 0.4338, "loss": 0.4338, "step": 5671 }, { "epoch": 2.5424526539912593, "learning_rate": 2.9753688377350402e-06, "lm_loss": 0.3442, "loss": 0.3442, "step": 5672 }, { "epoch": 2.5429009002278584, "learning_rate": 2.969644709014227e-06, "lm_loss": 0.3483, "loss": 0.3483, "step": 5673 }, { "epoch": 2.5433491464644575, "learning_rate": 2.963925744049148e-06, "lm_loss": 0.4261, "loss": 0.4261, "step": 5674 }, { "epoch": 2.543797392701057, "learning_rate": 2.9582119441802724e-06, "lm_loss": 0.4625, "loss": 0.4625, "step": 5675 }, { "epoch": 2.5442456389376567, "learning_rate": 2.952503310746893e-06, "lm_loss": 0.5147, "loss": 0.5147, "step": 5676 }, { "epoch": 2.5446938851742558, "learning_rate": 2.946799845087056e-06, "lm_loss": 0.3636, "loss": 0.3636, "step": 5677 }, { "epoch": 2.545142131410855, "learning_rate": 2.9411015485376196e-06, "lm_loss": 0.4596, "loss": 0.4596, "step": 5678 }, { "epoch": 2.5455903776474544, "learning_rate": 2.935408422434227e-06, "lm_loss": 0.3497, "loss": 0.3497, "step": 5679 }, { "epoch": 2.5460386238840536, "learning_rate": 2.929720468111294e-06, "lm_loss": 0.2869, "loss": 0.2869, "step": 5680 }, { "epoch": 2.5464868701206527, "learning_rate": 2.924037686902045e-06, "lm_loss": 0.5402, "loss": 0.5402, "step": 5681 }, { "epoch": 2.5469351163572522, "learning_rate": 2.918360080138485e-06, "lm_loss": 0.4578, "loss": 0.4578, "step": 5682 }, { "epoch": 2.547383362593852, "learning_rate": 2.9126876491513937e-06, "lm_loss": 0.3092, "loss": 0.3092, "step": 5683 }, { "epoch": 2.547831608830451, "learning_rate": 2.9070203952703545e-06, "lm_loss": 0.3106, "loss": 0.3106, "step": 5684 }, { "epoch": 2.54827985506705, "learning_rate": 2.9013583198237298e-06, "lm_loss": 0.361, "loss": 0.361, "step": 5685 }, { "epoch": 2.5487281013036496, "learning_rate": 2.895701424138669e-06, "lm_loss": 0.4317, "loss": 0.4317, "step": 5686 }, { "epoch": 2.5491763475402487, "learning_rate": 2.8900497095411123e-06, "lm_loss": 0.3384, "loss": 0.3384, "step": 5687 }, { "epoch": 2.5496245937768482, "learning_rate": 2.884403177355771e-06, "lm_loss": 0.3818, "loss": 0.3818, "step": 5688 }, { "epoch": 2.5500728400134474, "learning_rate": 2.878761828906157e-06, "lm_loss": 0.3571, "loss": 0.3571, "step": 5689 }, { "epoch": 2.550521086250047, "learning_rate": 2.8731256655145644e-06, "lm_loss": 0.3846, "loss": 0.3846, "step": 5690 }, { "epoch": 2.550969332486646, "learning_rate": 2.867494688502059e-06, "lm_loss": 0.5031, "loss": 0.5031, "step": 5691 }, { "epoch": 2.551417578723245, "learning_rate": 2.8618688991885106e-06, "lm_loss": 0.3393, "loss": 0.3393, "step": 5692 }, { "epoch": 2.5518658249598447, "learning_rate": 2.856248298892561e-06, "lm_loss": 0.3857, "loss": 0.3857, "step": 5693 }, { "epoch": 2.552314071196444, "learning_rate": 2.8506328889316343e-06, "lm_loss": 0.4995, "loss": 0.4995, "step": 5694 }, { "epoch": 2.5527623174330434, "learning_rate": 2.8450226706219523e-06, "lm_loss": 0.5019, "loss": 0.5019, "step": 5695 }, { "epoch": 2.5532105636696425, "learning_rate": 2.839417645278497e-06, "lm_loss": 0.3657, "loss": 0.3657, "step": 5696 }, { "epoch": 2.553658809906242, "learning_rate": 2.8338178142150506e-06, "lm_loss": 0.445, "loss": 0.445, "step": 5697 }, { "epoch": 2.554107056142841, "learning_rate": 2.8282231787441797e-06, "lm_loss": 0.3381, "loss": 0.3381, "step": 5698 }, { "epoch": 2.5545553023794403, "learning_rate": 2.8226337401772136e-06, "lm_loss": 0.4557, "loss": 0.4557, "step": 5699 }, { "epoch": 2.55500354861604, "learning_rate": 2.8170494998242897e-06, "lm_loss": 0.4879, "loss": 0.4879, "step": 5700 }, { "epoch": 2.555451794852639, "learning_rate": 2.8114704589942946e-06, "lm_loss": 0.3278, "loss": 0.3278, "step": 5701 }, { "epoch": 2.5559000410892385, "learning_rate": 2.8058966189949324e-06, "lm_loss": 0.4752, "loss": 0.4752, "step": 5702 }, { "epoch": 2.5563482873258376, "learning_rate": 2.8003279811326723e-06, "lm_loss": 0.4486, "loss": 0.4486, "step": 5703 }, { "epoch": 2.556796533562437, "learning_rate": 2.794764546712747e-06, "lm_loss": 0.4107, "loss": 0.4107, "step": 5704 }, { "epoch": 2.5572447797990363, "learning_rate": 2.789206317039203e-06, "lm_loss": 0.5187, "loss": 0.5187, "step": 5705 }, { "epoch": 2.5576930260356354, "learning_rate": 2.783653293414834e-06, "lm_loss": 0.464, "loss": 0.464, "step": 5706 }, { "epoch": 2.558141272272235, "learning_rate": 2.7781054771412326e-06, "lm_loss": 0.5969, "loss": 0.5969, "step": 5707 }, { "epoch": 2.558589518508834, "learning_rate": 2.772562869518777e-06, "lm_loss": 0.3273, "loss": 0.3273, "step": 5708 }, { "epoch": 2.5590377647454337, "learning_rate": 2.767025471846599e-06, "lm_loss": 0.3449, "loss": 0.3449, "step": 5709 }, { "epoch": 2.5594860109820328, "learning_rate": 2.761493285422628e-06, "lm_loss": 0.7125, "loss": 0.7125, "step": 5710 }, { "epoch": 2.5599342572186323, "learning_rate": 2.755966311543581e-06, "lm_loss": 0.3062, "loss": 0.3062, "step": 5711 }, { "epoch": 2.5603825034552314, "learning_rate": 2.7504445515049272e-06, "lm_loss": 0.3574, "loss": 0.3574, "step": 5712 }, { "epoch": 2.5608307496918306, "learning_rate": 2.744928006600936e-06, "lm_loss": 0.4197, "loss": 0.4197, "step": 5713 }, { "epoch": 2.56127899592843, "learning_rate": 2.7394166781246372e-06, "lm_loss": 0.2469, "loss": 0.2469, "step": 5714 }, { "epoch": 2.5617272421650292, "learning_rate": 2.7339105673678504e-06, "lm_loss": 0.5192, "loss": 0.5192, "step": 5715 }, { "epoch": 2.562175488401629, "learning_rate": 2.728409675621174e-06, "lm_loss": 0.462, "loss": 0.462, "step": 5716 }, { "epoch": 2.562623734638228, "learning_rate": 2.7229140041739626e-06, "lm_loss": 0.3314, "loss": 0.3314, "step": 5717 }, { "epoch": 2.5630719808748275, "learning_rate": 2.7174235543143743e-06, "lm_loss": 0.3185, "loss": 0.3185, "step": 5718 }, { "epoch": 2.5635202271114266, "learning_rate": 2.7119383273293248e-06, "lm_loss": 0.3479, "loss": 0.3479, "step": 5719 }, { "epoch": 2.5639684733480257, "learning_rate": 2.7064583245045126e-06, "lm_loss": 0.4934, "loss": 0.4934, "step": 5720 }, { "epoch": 2.5644167195846252, "learning_rate": 2.700983547124419e-06, "lm_loss": 0.3079, "loss": 0.3079, "step": 5721 }, { "epoch": 2.5648649658212244, "learning_rate": 2.695513996472279e-06, "lm_loss": 0.302, "loss": 0.302, "step": 5722 }, { "epoch": 2.565313212057824, "learning_rate": 2.690049673830122e-06, "lm_loss": 0.4657, "loss": 0.4657, "step": 5723 }, { "epoch": 2.565761458294423, "learning_rate": 2.684590580478749e-06, "lm_loss": 0.2821, "loss": 0.2821, "step": 5724 }, { "epoch": 2.5662097045310226, "learning_rate": 2.679136717697725e-06, "lm_loss": 0.3748, "loss": 0.3748, "step": 5725 }, { "epoch": 2.5666579507676217, "learning_rate": 2.6736880867653994e-06, "lm_loss": 0.4748, "loss": 0.4748, "step": 5726 }, { "epoch": 2.567106197004221, "learning_rate": 2.668244688958893e-06, "lm_loss": 0.295, "loss": 0.295, "step": 5727 }, { "epoch": 2.5675544432408204, "learning_rate": 2.66280652555409e-06, "lm_loss": 0.5684, "loss": 0.5684, "step": 5728 }, { "epoch": 2.5680026894774195, "learning_rate": 2.657373597825674e-06, "lm_loss": 0.3793, "loss": 0.3793, "step": 5729 }, { "epoch": 2.568450935714019, "learning_rate": 2.651945907047068e-06, "lm_loss": 0.4063, "loss": 0.4063, "step": 5730 }, { "epoch": 2.568899181950618, "learning_rate": 2.6465234544904903e-06, "lm_loss": 0.4599, "loss": 0.4599, "step": 5731 }, { "epoch": 2.5693474281872177, "learning_rate": 2.641106241426927e-06, "lm_loss": 0.3143, "loss": 0.3143, "step": 5732 }, { "epoch": 2.569795674423817, "learning_rate": 2.6356942691261265e-06, "lm_loss": 0.2123, "loss": 0.2123, "step": 5733 }, { "epoch": 2.570243920660416, "learning_rate": 2.6302875388566183e-06, "lm_loss": 0.4091, "loss": 0.4091, "step": 5734 }, { "epoch": 2.5706921668970155, "learning_rate": 2.6248860518857055e-06, "lm_loss": 0.3261, "loss": 0.3261, "step": 5735 }, { "epoch": 2.5711404131336146, "learning_rate": 2.61948980947945e-06, "lm_loss": 0.4287, "loss": 0.4287, "step": 5736 }, { "epoch": 2.571588659370214, "learning_rate": 2.614098812902696e-06, "lm_loss": 0.3663, "loss": 0.3663, "step": 5737 }, { "epoch": 2.5720369056068133, "learning_rate": 2.608713063419055e-06, "lm_loss": 0.3747, "loss": 0.3747, "step": 5738 }, { "epoch": 2.572485151843413, "learning_rate": 2.6033325622909053e-06, "lm_loss": 0.3892, "loss": 0.3892, "step": 5739 }, { "epoch": 2.572933398080012, "learning_rate": 2.5979573107794057e-06, "lm_loss": 0.3556, "loss": 0.3556, "step": 5740 }, { "epoch": 2.573381644316611, "learning_rate": 2.592587310144462e-06, "lm_loss": 0.4501, "loss": 0.4501, "step": 5741 }, { "epoch": 2.5738298905532107, "learning_rate": 2.5872225616447725e-06, "lm_loss": 0.3296, "loss": 0.3296, "step": 5742 }, { "epoch": 2.5742781367898098, "learning_rate": 2.5818630665377984e-06, "lm_loss": 0.3111, "loss": 0.3111, "step": 5743 }, { "epoch": 2.5747263830264093, "learning_rate": 2.5765088260797544e-06, "lm_loss": 0.3331, "loss": 0.3331, "step": 5744 }, { "epoch": 2.5751746292630084, "learning_rate": 2.57115984152565e-06, "lm_loss": 0.429, "loss": 0.429, "step": 5745 }, { "epoch": 2.575622875499608, "learning_rate": 2.5658161141292296e-06, "lm_loss": 0.3988, "loss": 0.3988, "step": 5746 }, { "epoch": 2.576071121736207, "learning_rate": 2.5604776451430444e-06, "lm_loss": 0.438, "loss": 0.438, "step": 5747 }, { "epoch": 2.5765193679728062, "learning_rate": 2.555144435818388e-06, "lm_loss": 0.4591, "loss": 0.4591, "step": 5748 }, { "epoch": 2.576967614209406, "learning_rate": 2.5498164874053153e-06, "lm_loss": 0.3745, "loss": 0.3745, "step": 5749 }, { "epoch": 2.577415860446005, "learning_rate": 2.5444938011526754e-06, "lm_loss": 0.3959, "loss": 0.3959, "step": 5750 }, { "epoch": 2.5778641066826045, "learning_rate": 2.53917637830805e-06, "lm_loss": 0.2885, "loss": 0.2885, "step": 5751 }, { "epoch": 2.5783123529192036, "learning_rate": 2.5338642201178123e-06, "lm_loss": 0.3845, "loss": 0.3845, "step": 5752 }, { "epoch": 2.578760599155803, "learning_rate": 2.5285573278271e-06, "lm_loss": 0.482, "loss": 0.482, "step": 5753 }, { "epoch": 2.5792088453924022, "learning_rate": 2.5232557026798e-06, "lm_loss": 0.3145, "loss": 0.3145, "step": 5754 }, { "epoch": 2.5796570916290014, "learning_rate": 2.5179593459185773e-06, "lm_loss": 0.348, "loss": 0.348, "step": 5755 }, { "epoch": 2.580105337865601, "learning_rate": 2.5126682587848615e-06, "lm_loss": 0.397, "loss": 0.397, "step": 5756 }, { "epoch": 2.5805535841022, "learning_rate": 2.507382442518846e-06, "lm_loss": 0.3411, "loss": 0.3411, "step": 5757 }, { "epoch": 2.5810018303387996, "learning_rate": 2.5021018983594935e-06, "lm_loss": 0.4079, "loss": 0.4079, "step": 5758 }, { "epoch": 2.5814500765753987, "learning_rate": 2.49682662754451e-06, "lm_loss": 0.2939, "loss": 0.2939, "step": 5759 }, { "epoch": 2.5818983228119983, "learning_rate": 2.4915566313103905e-06, "lm_loss": 0.36, "loss": 0.36, "step": 5760 }, { "epoch": 2.5823465690485974, "learning_rate": 2.486291910892388e-06, "lm_loss": 0.3735, "loss": 0.3735, "step": 5761 }, { "epoch": 2.5827948152851965, "learning_rate": 2.481032467524508e-06, "lm_loss": 0.4309, "loss": 0.4309, "step": 5762 }, { "epoch": 2.583243061521796, "learning_rate": 2.475778302439524e-06, "lm_loss": 0.4752, "loss": 0.4752, "step": 5763 }, { "epoch": 2.583691307758395, "learning_rate": 2.4705294168689773e-06, "lm_loss": 0.4255, "loss": 0.4255, "step": 5764 }, { "epoch": 2.5841395539949947, "learning_rate": 2.465285812043169e-06, "lm_loss": 0.358, "loss": 0.358, "step": 5765 }, { "epoch": 2.584587800231594, "learning_rate": 2.4600474891911694e-06, "lm_loss": 0.4197, "loss": 0.4197, "step": 5766 }, { "epoch": 2.5850360464681934, "learning_rate": 2.4548144495407903e-06, "lm_loss": 0.3636, "loss": 0.3636, "step": 5767 }, { "epoch": 2.5854842927047925, "learning_rate": 2.449586694318626e-06, "lm_loss": 0.3144, "loss": 0.3144, "step": 5768 }, { "epoch": 2.5859325389413916, "learning_rate": 2.4443642247500257e-06, "lm_loss": 0.3283, "loss": 0.3283, "step": 5769 }, { "epoch": 2.586380785177991, "learning_rate": 2.4391470420590922e-06, "lm_loss": 0.468, "loss": 0.468, "step": 5770 }, { "epoch": 2.5868290314145903, "learning_rate": 2.4339351474686966e-06, "lm_loss": 0.2724, "loss": 0.2724, "step": 5771 }, { "epoch": 2.58727727765119, "learning_rate": 2.4287285422004784e-06, "lm_loss": 0.2987, "loss": 0.2987, "step": 5772 }, { "epoch": 2.587725523887789, "learning_rate": 2.423527227474809e-06, "lm_loss": 0.4336, "loss": 0.4336, "step": 5773 }, { "epoch": 2.5881737701243885, "learning_rate": 2.4183312045108624e-06, "lm_loss": 0.3675, "loss": 0.3675, "step": 5774 }, { "epoch": 2.5886220163609877, "learning_rate": 2.4131404745265317e-06, "lm_loss": 0.2834, "loss": 0.2834, "step": 5775 }, { "epoch": 2.5890702625975868, "learning_rate": 2.4079550387384935e-06, "lm_loss": 0.4275, "loss": 0.4275, "step": 5776 }, { "epoch": 2.5895185088341863, "learning_rate": 2.4027748983621785e-06, "lm_loss": 0.4923, "loss": 0.4923, "step": 5777 }, { "epoch": 2.5899667550707854, "learning_rate": 2.397600054611765e-06, "lm_loss": 0.2478, "loss": 0.2478, "step": 5778 }, { "epoch": 2.590415001307385, "learning_rate": 2.392430508700205e-06, "lm_loss": 0.4323, "loss": 0.4323, "step": 5779 }, { "epoch": 2.590863247543984, "learning_rate": 2.387266261839208e-06, "lm_loss": 0.3235, "loss": 0.3235, "step": 5780 }, { "epoch": 2.5913114937805837, "learning_rate": 2.3821073152392237e-06, "lm_loss": 0.3196, "loss": 0.3196, "step": 5781 }, { "epoch": 2.591759740017183, "learning_rate": 2.3769536701094804e-06, "lm_loss": 0.3603, "loss": 0.3603, "step": 5782 }, { "epoch": 2.592207986253782, "learning_rate": 2.37180532765795e-06, "lm_loss": 0.2515, "loss": 0.2515, "step": 5783 }, { "epoch": 2.5926562324903815, "learning_rate": 2.3666622890913733e-06, "lm_loss": 0.3926, "loss": 0.3926, "step": 5784 }, { "epoch": 2.5931044787269806, "learning_rate": 2.361524555615238e-06, "lm_loss": 0.3649, "loss": 0.3649, "step": 5785 }, { "epoch": 2.59355272496358, "learning_rate": 2.3563921284337904e-06, "lm_loss": 0.2648, "loss": 0.2648, "step": 5786 }, { "epoch": 2.5940009712001793, "learning_rate": 2.3512650087500337e-06, "lm_loss": 0.4665, "loss": 0.4665, "step": 5787 }, { "epoch": 2.594449217436779, "learning_rate": 2.346143197765735e-06, "lm_loss": 0.3498, "loss": 0.3498, "step": 5788 }, { "epoch": 2.594897463673378, "learning_rate": 2.3410266966813977e-06, "lm_loss": 0.485, "loss": 0.485, "step": 5789 }, { "epoch": 2.595345709909977, "learning_rate": 2.335915506696304e-06, "lm_loss": 0.2879, "loss": 0.2879, "step": 5790 }, { "epoch": 2.5957939561465766, "learning_rate": 2.330809629008465e-06, "lm_loss": 0.3939, "loss": 0.3939, "step": 5791 }, { "epoch": 2.5962422023831757, "learning_rate": 2.325709064814674e-06, "lm_loss": 0.4512, "loss": 0.4512, "step": 5792 }, { "epoch": 2.5966904486197753, "learning_rate": 2.320613815310471e-06, "lm_loss": 0.2397, "loss": 0.2397, "step": 5793 }, { "epoch": 2.5971386948563744, "learning_rate": 2.315523881690132e-06, "lm_loss": 0.324, "loss": 0.324, "step": 5794 }, { "epoch": 2.597586941092974, "learning_rate": 2.3104392651467103e-06, "lm_loss": 0.4945, "loss": 0.4945, "step": 5795 }, { "epoch": 2.598035187329573, "learning_rate": 2.305359966871995e-06, "lm_loss": 0.4857, "loss": 0.4857, "step": 5796 }, { "epoch": 2.598483433566172, "learning_rate": 2.3002859880565414e-06, "lm_loss": 0.3622, "loss": 0.3622, "step": 5797 }, { "epoch": 2.5989316798027717, "learning_rate": 2.2952173298896566e-06, "lm_loss": 0.3636, "loss": 0.3636, "step": 5798 }, { "epoch": 2.599379926039371, "learning_rate": 2.2901539935593907e-06, "lm_loss": 0.3261, "loss": 0.3261, "step": 5799 }, { "epoch": 2.5998281722759704, "learning_rate": 2.285095980252558e-06, "lm_loss": 0.3245, "loss": 0.3245, "step": 5800 }, { "epoch": 2.6002764185125695, "learning_rate": 2.2800432911547154e-06, "lm_loss": 0.3835, "loss": 0.3835, "step": 5801 }, { "epoch": 2.600724664749169, "learning_rate": 2.2749959274501798e-06, "lm_loss": 0.3981, "loss": 0.3981, "step": 5802 }, { "epoch": 2.601172910985768, "learning_rate": 2.2699538903220242e-06, "lm_loss": 0.4185, "loss": 0.4185, "step": 5803 }, { "epoch": 2.6016211572223673, "learning_rate": 2.2649171809520504e-06, "lm_loss": 0.2542, "loss": 0.2542, "step": 5804 }, { "epoch": 2.602069403458967, "learning_rate": 2.2598858005208362e-06, "lm_loss": 0.3659, "loss": 0.3659, "step": 5805 }, { "epoch": 2.602517649695566, "learning_rate": 2.2548597502077023e-06, "lm_loss": 0.292, "loss": 0.292, "step": 5806 }, { "epoch": 2.6029658959321655, "learning_rate": 2.2498390311907093e-06, "lm_loss": 0.3771, "loss": 0.3771, "step": 5807 }, { "epoch": 2.6034141421687647, "learning_rate": 2.2448236446466846e-06, "lm_loss": 0.3715, "loss": 0.3715, "step": 5808 }, { "epoch": 2.603862388405364, "learning_rate": 2.239813591751197e-06, "lm_loss": 0.392, "loss": 0.392, "step": 5809 }, { "epoch": 2.6043106346419633, "learning_rate": 2.234808873678565e-06, "lm_loss": 0.4084, "loss": 0.4084, "step": 5810 }, { "epoch": 2.6047588808785624, "learning_rate": 2.229809491601864e-06, "lm_loss": 0.3641, "loss": 0.3641, "step": 5811 }, { "epoch": 2.605207127115162, "learning_rate": 2.2248154466929045e-06, "lm_loss": 0.2698, "loss": 0.2698, "step": 5812 }, { "epoch": 2.605655373351761, "learning_rate": 2.2198267401222607e-06, "lm_loss": 0.3773, "loss": 0.3773, "step": 5813 }, { "epoch": 2.6061036195883607, "learning_rate": 2.21484337305925e-06, "lm_loss": 0.3676, "loss": 0.3676, "step": 5814 }, { "epoch": 2.60655186582496, "learning_rate": 2.2098653466719297e-06, "lm_loss": 0.5405, "loss": 0.5405, "step": 5815 }, { "epoch": 2.6070001120615593, "learning_rate": 2.20489266212712e-06, "lm_loss": 0.4082, "loss": 0.4082, "step": 5816 }, { "epoch": 2.6074483582981585, "learning_rate": 2.1999253205903848e-06, "lm_loss": 0.505, "loss": 0.505, "step": 5817 }, { "epoch": 2.6078966045347576, "learning_rate": 2.1949633232260182e-06, "lm_loss": 0.3142, "loss": 0.3142, "step": 5818 }, { "epoch": 2.608344850771357, "learning_rate": 2.1900066711970977e-06, "lm_loss": 0.3122, "loss": 0.3122, "step": 5819 }, { "epoch": 2.6087930970079563, "learning_rate": 2.185055365665409e-06, "lm_loss": 0.3876, "loss": 0.3876, "step": 5820 }, { "epoch": 2.609241343244556, "learning_rate": 2.1801094077915112e-06, "lm_loss": 0.406, "loss": 0.406, "step": 5821 }, { "epoch": 2.609689589481155, "learning_rate": 2.175168798734703e-06, "lm_loss": 0.3934, "loss": 0.3934, "step": 5822 }, { "epoch": 2.6101378357177545, "learning_rate": 2.170233539653016e-06, "lm_loss": 0.2831, "loss": 0.2831, "step": 5823 }, { "epoch": 2.6105860819543536, "learning_rate": 2.16530363170325e-06, "lm_loss": 0.3669, "loss": 0.3669, "step": 5824 }, { "epoch": 2.6110343281909527, "learning_rate": 2.160379076040939e-06, "lm_loss": 0.4573, "loss": 0.4573, "step": 5825 }, { "epoch": 2.6114825744275523, "learning_rate": 2.1554598738203535e-06, "lm_loss": 0.3299, "loss": 0.3299, "step": 5826 }, { "epoch": 2.6119308206641514, "learning_rate": 2.1505460261945278e-06, "lm_loss": 0.2512, "loss": 0.2512, "step": 5827 }, { "epoch": 2.612379066900751, "learning_rate": 2.145637534315231e-06, "lm_loss": 0.4495, "loss": 0.4495, "step": 5828 }, { "epoch": 2.61282731313735, "learning_rate": 2.1407343993329746e-06, "lm_loss": 0.2738, "loss": 0.2738, "step": 5829 }, { "epoch": 2.6132755593739496, "learning_rate": 2.1358366223970277e-06, "lm_loss": 0.3356, "loss": 0.3356, "step": 5830 }, { "epoch": 2.6137238056105487, "learning_rate": 2.13094420465538e-06, "lm_loss": 0.5272, "loss": 0.5272, "step": 5831 }, { "epoch": 2.614172051847148, "learning_rate": 2.126057147254787e-06, "lm_loss": 0.3193, "loss": 0.3193, "step": 5832 }, { "epoch": 2.6146202980837474, "learning_rate": 2.12117545134074e-06, "lm_loss": 0.4111, "loss": 0.4111, "step": 5833 }, { "epoch": 2.6150685443203465, "learning_rate": 2.1162991180574686e-06, "lm_loss": 0.4419, "loss": 0.4419, "step": 5834 }, { "epoch": 2.615516790556946, "learning_rate": 2.111428148547956e-06, "lm_loss": 0.4115, "loss": 0.4115, "step": 5835 }, { "epoch": 2.615965036793545, "learning_rate": 2.1065625439539103e-06, "lm_loss": 0.3204, "loss": 0.3204, "step": 5836 }, { "epoch": 2.6164132830301448, "learning_rate": 2.1017023054158063e-06, "lm_loss": 0.3437, "loss": 0.3437, "step": 5837 }, { "epoch": 2.616861529266744, "learning_rate": 2.096847434072849e-06, "lm_loss": 0.3038, "loss": 0.3038, "step": 5838 }, { "epoch": 2.617309775503343, "learning_rate": 2.091997931062975e-06, "lm_loss": 0.4309, "loss": 0.4309, "step": 5839 }, { "epoch": 2.6177580217399425, "learning_rate": 2.0871537975228837e-06, "lm_loss": 0.4149, "loss": 0.4149, "step": 5840 }, { "epoch": 2.6182062679765417, "learning_rate": 2.082315034587995e-06, "lm_loss": 0.2867, "loss": 0.2867, "step": 5841 }, { "epoch": 2.618654514213141, "learning_rate": 2.077481643392484e-06, "lm_loss": 0.4028, "loss": 0.4028, "step": 5842 }, { "epoch": 2.6191027604497403, "learning_rate": 2.0726536250692707e-06, "lm_loss": 0.2572, "loss": 0.2572, "step": 5843 }, { "epoch": 2.61955100668634, "learning_rate": 2.0678309807499935e-06, "lm_loss": 0.4459, "loss": 0.4459, "step": 5844 }, { "epoch": 2.619999252922939, "learning_rate": 2.063013711565051e-06, "lm_loss": 0.4253, "loss": 0.4253, "step": 5845 }, { "epoch": 2.620447499159538, "learning_rate": 2.0582018186435766e-06, "lm_loss": 0.5592, "loss": 0.5592, "step": 5846 }, { "epoch": 2.6208957453961377, "learning_rate": 2.053395303113445e-06, "lm_loss": 0.4848, "loss": 0.4848, "step": 5847 }, { "epoch": 2.621343991632737, "learning_rate": 2.0485941661012725e-06, "lm_loss": 0.4099, "loss": 0.4099, "step": 5848 }, { "epoch": 2.6217922378693364, "learning_rate": 2.0437984087324e-06, "lm_loss": 0.4199, "loss": 0.4199, "step": 5849 }, { "epoch": 2.6222404841059355, "learning_rate": 2.0390080321309236e-06, "lm_loss": 0.4191, "loss": 0.4191, "step": 5850 }, { "epoch": 2.622688730342535, "learning_rate": 2.0342230374196773e-06, "lm_loss": 0.4234, "loss": 0.4234, "step": 5851 }, { "epoch": 2.623136976579134, "learning_rate": 2.029443425720223e-06, "lm_loss": 0.3122, "loss": 0.3122, "step": 5852 }, { "epoch": 2.6235852228157333, "learning_rate": 2.0246691981528676e-06, "lm_loss": 0.3041, "loss": 0.3041, "step": 5853 }, { "epoch": 2.624033469052333, "learning_rate": 2.019900355836657e-06, "lm_loss": 0.44, "loss": 0.44, "step": 5854 }, { "epoch": 2.624481715288932, "learning_rate": 2.015136899889372e-06, "lm_loss": 0.2988, "loss": 0.2988, "step": 5855 }, { "epoch": 2.6249299615255315, "learning_rate": 2.0103788314275397e-06, "lm_loss": 0.4378, "loss": 0.4378, "step": 5856 }, { "epoch": 2.6253782077621306, "learning_rate": 2.005626151566406e-06, "lm_loss": 0.3415, "loss": 0.3415, "step": 5857 }, { "epoch": 2.62582645399873, "learning_rate": 2.000878861419969e-06, "lm_loss": 0.334, "loss": 0.334, "step": 5858 }, { "epoch": 2.6262747002353293, "learning_rate": 1.996136962100964e-06, "lm_loss": 0.4292, "loss": 0.4292, "step": 5859 }, { "epoch": 2.6267229464719284, "learning_rate": 1.9914004547208486e-06, "lm_loss": 0.3509, "loss": 0.3509, "step": 5860 }, { "epoch": 2.627171192708528, "learning_rate": 1.9866693403898297e-06, "lm_loss": 0.247, "loss": 0.247, "step": 5861 }, { "epoch": 2.627619438945127, "learning_rate": 1.981943620216853e-06, "lm_loss": 0.5894, "loss": 0.5894, "step": 5862 }, { "epoch": 2.6280676851817266, "learning_rate": 1.977223295309577e-06, "lm_loss": 0.6868, "loss": 0.6868, "step": 5863 }, { "epoch": 2.6285159314183257, "learning_rate": 1.972508366774431e-06, "lm_loss": 0.3663, "loss": 0.3663, "step": 5864 }, { "epoch": 2.6289641776549253, "learning_rate": 1.9677988357165472e-06, "lm_loss": 0.3956, "loss": 0.3956, "step": 5865 }, { "epoch": 2.6294124238915244, "learning_rate": 1.9630947032398067e-06, "lm_loss": 0.3604, "loss": 0.3604, "step": 5866 }, { "epoch": 2.6298606701281235, "learning_rate": 1.958395970446833e-06, "lm_loss": 0.3699, "loss": 0.3699, "step": 5867 }, { "epoch": 2.630308916364723, "learning_rate": 1.9537026384389643e-06, "lm_loss": 0.3791, "loss": 0.3791, "step": 5868 }, { "epoch": 2.630757162601322, "learning_rate": 1.949014708316291e-06, "lm_loss": 0.3264, "loss": 0.3264, "step": 5869 }, { "epoch": 2.6312054088379218, "learning_rate": 1.944332181177627e-06, "lm_loss": 0.4751, "loss": 0.4751, "step": 5870 }, { "epoch": 2.631653655074521, "learning_rate": 1.939655058120521e-06, "lm_loss": 0.4159, "loss": 0.4159, "step": 5871 }, { "epoch": 2.6321019013111204, "learning_rate": 1.934983340241256e-06, "lm_loss": 0.3675, "loss": 0.3675, "step": 5872 }, { "epoch": 2.6325501475477195, "learning_rate": 1.9303170286348536e-06, "lm_loss": 0.2777, "loss": 0.2777, "step": 5873 }, { "epoch": 2.6329983937843187, "learning_rate": 1.925656124395059e-06, "lm_loss": 0.4067, "loss": 0.4067, "step": 5874 }, { "epoch": 2.633446640020918, "learning_rate": 1.921000628614364e-06, "lm_loss": 0.4481, "loss": 0.4481, "step": 5875 }, { "epoch": 2.6338948862575173, "learning_rate": 1.9163505423839684e-06, "lm_loss": 0.3918, "loss": 0.3918, "step": 5876 }, { "epoch": 2.634343132494117, "learning_rate": 1.911705866793831e-06, "lm_loss": 0.3075, "loss": 0.3075, "step": 5877 }, { "epoch": 2.634791378730716, "learning_rate": 1.9070666029326223e-06, "lm_loss": 0.3335, "loss": 0.3335, "step": 5878 }, { "epoch": 2.6352396249673156, "learning_rate": 1.9024327518877533e-06, "lm_loss": 0.3546, "loss": 0.3546, "step": 5879 }, { "epoch": 2.6356878712039147, "learning_rate": 1.897804314745369e-06, "lm_loss": 0.2759, "loss": 0.2759, "step": 5880 }, { "epoch": 2.636136117440514, "learning_rate": 1.8931812925903375e-06, "lm_loss": 0.4516, "loss": 0.4516, "step": 5881 }, { "epoch": 2.6365843636771134, "learning_rate": 1.8885636865062644e-06, "lm_loss": 0.3332, "loss": 0.3332, "step": 5882 }, { "epoch": 2.6370326099137125, "learning_rate": 1.883951497575487e-06, "lm_loss": 0.4455, "loss": 0.4455, "step": 5883 }, { "epoch": 2.637480856150312, "learning_rate": 1.8793447268790592e-06, "lm_loss": 0.2697, "loss": 0.2697, "step": 5884 }, { "epoch": 2.637929102386911, "learning_rate": 1.874743375496782e-06, "lm_loss": 0.4137, "loss": 0.4137, "step": 5885 }, { "epoch": 2.6383773486235107, "learning_rate": 1.8701474445071754e-06, "lm_loss": 0.2928, "loss": 0.2928, "step": 5886 }, { "epoch": 2.63882559486011, "learning_rate": 1.8655569349874918e-06, "lm_loss": 0.4144, "loss": 0.4144, "step": 5887 }, { "epoch": 2.639273841096709, "learning_rate": 1.8609718480137207e-06, "lm_loss": 0.4488, "loss": 0.4488, "step": 5888 }, { "epoch": 2.6397220873333085, "learning_rate": 1.8563921846605603e-06, "lm_loss": 0.3719, "loss": 0.3719, "step": 5889 }, { "epoch": 2.6401703335699076, "learning_rate": 1.8518179460014633e-06, "lm_loss": 0.4068, "loss": 0.4068, "step": 5890 }, { "epoch": 2.640618579806507, "learning_rate": 1.8472491331085889e-06, "lm_loss": 0.2715, "loss": 0.2715, "step": 5891 }, { "epoch": 2.6410668260431063, "learning_rate": 1.8426857470528413e-06, "lm_loss": 0.4172, "loss": 0.4172, "step": 5892 }, { "epoch": 2.641515072279706, "learning_rate": 1.8381277889038428e-06, "lm_loss": 0.5925, "loss": 0.5925, "step": 5893 }, { "epoch": 2.641963318516305, "learning_rate": 1.8335752597299387e-06, "lm_loss": 0.4997, "loss": 0.4997, "step": 5894 }, { "epoch": 2.642411564752904, "learning_rate": 1.8290281605982173e-06, "lm_loss": 0.3037, "loss": 0.3037, "step": 5895 }, { "epoch": 2.6428598109895036, "learning_rate": 1.824486492574487e-06, "lm_loss": 0.3421, "loss": 0.3421, "step": 5896 }, { "epoch": 2.6433080572261027, "learning_rate": 1.8199502567232712e-06, "lm_loss": 0.2523, "loss": 0.2523, "step": 5897 }, { "epoch": 2.6437563034627023, "learning_rate": 1.8154194541078418e-06, "lm_loss": 0.4426, "loss": 0.4426, "step": 5898 }, { "epoch": 2.6442045496993014, "learning_rate": 1.8108940857901768e-06, "lm_loss": 0.3084, "loss": 0.3084, "step": 5899 }, { "epoch": 2.644652795935901, "learning_rate": 1.8063741528309974e-06, "lm_loss": 0.5463, "loss": 0.5463, "step": 5900 }, { "epoch": 2.6451010421725, "learning_rate": 1.801859656289745e-06, "lm_loss": 0.2635, "loss": 0.2635, "step": 5901 }, { "epoch": 2.645549288409099, "learning_rate": 1.7973505972245757e-06, "lm_loss": 0.3873, "loss": 0.3873, "step": 5902 }, { "epoch": 2.6459975346456988, "learning_rate": 1.7928469766923833e-06, "lm_loss": 0.2616, "loss": 0.2616, "step": 5903 }, { "epoch": 2.646445780882298, "learning_rate": 1.7883487957487928e-06, "lm_loss": 0.4035, "loss": 0.4035, "step": 5904 }, { "epoch": 2.6468940271188974, "learning_rate": 1.7838560554481327e-06, "lm_loss": 0.2765, "loss": 0.2765, "step": 5905 }, { "epoch": 2.6473422733554965, "learning_rate": 1.7793687568434718e-06, "lm_loss": 0.3719, "loss": 0.3719, "step": 5906 }, { "epoch": 2.647790519592096, "learning_rate": 1.7748869009866054e-06, "lm_loss": 0.402, "loss": 0.402, "step": 5907 }, { "epoch": 2.648238765828695, "learning_rate": 1.7704104889280427e-06, "lm_loss": 0.5074, "loss": 0.5074, "step": 5908 }, { "epoch": 2.6486870120652943, "learning_rate": 1.7659395217170305e-06, "lm_loss": 0.244, "loss": 0.244, "step": 5909 }, { "epoch": 2.649135258301894, "learning_rate": 1.7614740004015196e-06, "lm_loss": 0.3712, "loss": 0.3712, "step": 5910 }, { "epoch": 2.649583504538493, "learning_rate": 1.7570139260282032e-06, "lm_loss": 0.3465, "loss": 0.3465, "step": 5911 }, { "epoch": 2.6500317507750926, "learning_rate": 1.752559299642495e-06, "lm_loss": 0.2538, "loss": 0.2538, "step": 5912 }, { "epoch": 2.6504799970116917, "learning_rate": 1.7481101222885127e-06, "lm_loss": 0.4949, "loss": 0.4949, "step": 5913 }, { "epoch": 2.6509282432482912, "learning_rate": 1.743666395009122e-06, "lm_loss": 0.4383, "loss": 0.4383, "step": 5914 }, { "epoch": 2.6513764894848904, "learning_rate": 1.7392281188459036e-06, "lm_loss": 0.5195, "loss": 0.5195, "step": 5915 }, { "epoch": 2.6518247357214895, "learning_rate": 1.7347952948391422e-06, "lm_loss": 0.3877, "loss": 0.3877, "step": 5916 }, { "epoch": 2.652272981958089, "learning_rate": 1.7303679240278757e-06, "lm_loss": 0.3678, "loss": 0.3678, "step": 5917 }, { "epoch": 2.652721228194688, "learning_rate": 1.7259460074498384e-06, "lm_loss": 0.4255, "loss": 0.4255, "step": 5918 }, { "epoch": 2.6531694744312877, "learning_rate": 1.721529546141498e-06, "lm_loss": 0.247, "loss": 0.247, "step": 5919 }, { "epoch": 2.653617720667887, "learning_rate": 1.7171185411380463e-06, "lm_loss": 0.4231, "loss": 0.4231, "step": 5920 }, { "epoch": 2.6540659669044864, "learning_rate": 1.712712993473381e-06, "lm_loss": 0.376, "loss": 0.376, "step": 5921 }, { "epoch": 2.6545142131410855, "learning_rate": 1.7083129041801398e-06, "lm_loss": 0.3421, "loss": 0.3421, "step": 5922 }, { "epoch": 2.6549624593776846, "learning_rate": 1.7039182742896648e-06, "lm_loss": 0.3319, "loss": 0.3319, "step": 5923 }, { "epoch": 2.655410705614284, "learning_rate": 1.6995291048320262e-06, "lm_loss": 0.458, "loss": 0.458, "step": 5924 }, { "epoch": 2.6558589518508833, "learning_rate": 1.6951453968360175e-06, "lm_loss": 0.3671, "loss": 0.3671, "step": 5925 }, { "epoch": 2.656307198087483, "learning_rate": 1.690767151329145e-06, "lm_loss": 0.4478, "loss": 0.4478, "step": 5926 }, { "epoch": 2.656755444324082, "learning_rate": 1.6863943693376433e-06, "lm_loss": 0.3293, "loss": 0.3293, "step": 5927 }, { "epoch": 2.6572036905606815, "learning_rate": 1.6820270518864506e-06, "lm_loss": 0.4241, "loss": 0.4241, "step": 5928 }, { "epoch": 2.6576519367972806, "learning_rate": 1.6776651999992426e-06, "lm_loss": 0.351, "loss": 0.351, "step": 5929 }, { "epoch": 2.6581001830338797, "learning_rate": 1.6733088146984072e-06, "lm_loss": 0.4746, "loss": 0.4746, "step": 5930 }, { "epoch": 2.6585484292704793, "learning_rate": 1.6689578970050412e-06, "lm_loss": 0.5174, "loss": 0.5174, "step": 5931 }, { "epoch": 2.6589966755070784, "learning_rate": 1.6646124479389708e-06, "lm_loss": 0.3646, "loss": 0.3646, "step": 5932 }, { "epoch": 2.659444921743678, "learning_rate": 1.6602724685187476e-06, "lm_loss": 0.3583, "loss": 0.3583, "step": 5933 }, { "epoch": 2.659893167980277, "learning_rate": 1.6559379597616137e-06, "lm_loss": 0.3054, "loss": 0.3054, "step": 5934 }, { "epoch": 2.6603414142168766, "learning_rate": 1.651608922683562e-06, "lm_loss": 0.4348, "loss": 0.4348, "step": 5935 }, { "epoch": 2.6607896604534758, "learning_rate": 1.6472853582992804e-06, "lm_loss": 0.384, "loss": 0.384, "step": 5936 }, { "epoch": 2.661237906690075, "learning_rate": 1.6429672676221835e-06, "lm_loss": 0.3345, "loss": 0.3345, "step": 5937 }, { "epoch": 2.6616861529266744, "learning_rate": 1.6386546516644008e-06, "lm_loss": 0.356, "loss": 0.356, "step": 5938 }, { "epoch": 2.6621343991632735, "learning_rate": 1.6343475114367762e-06, "lm_loss": 0.4041, "loss": 0.4041, "step": 5939 }, { "epoch": 2.662582645399873, "learning_rate": 1.6300458479488718e-06, "lm_loss": 0.3284, "loss": 0.3284, "step": 5940 }, { "epoch": 2.663030891636472, "learning_rate": 1.6257496622089697e-06, "lm_loss": 0.3639, "loss": 0.3639, "step": 5941 }, { "epoch": 2.663479137873072, "learning_rate": 1.6214589552240588e-06, "lm_loss": 0.5881, "loss": 0.5881, "step": 5942 }, { "epoch": 2.663927384109671, "learning_rate": 1.6171737279998545e-06, "lm_loss": 0.3217, "loss": 0.3217, "step": 5943 }, { "epoch": 2.66437563034627, "learning_rate": 1.6128939815407807e-06, "lm_loss": 0.3486, "loss": 0.3486, "step": 5944 }, { "epoch": 2.6648238765828696, "learning_rate": 1.6086197168499822e-06, "lm_loss": 0.3519, "loss": 0.3519, "step": 5945 }, { "epoch": 2.6652721228194687, "learning_rate": 1.6043509349293162e-06, "lm_loss": 0.3216, "loss": 0.3216, "step": 5946 }, { "epoch": 2.6657203690560682, "learning_rate": 1.6000876367793489e-06, "lm_loss": 0.3478, "loss": 0.3478, "step": 5947 }, { "epoch": 2.6661686152926674, "learning_rate": 1.5958298233993696e-06, "lm_loss": 0.4789, "loss": 0.4789, "step": 5948 }, { "epoch": 2.666616861529267, "learning_rate": 1.5915774957873831e-06, "lm_loss": 0.2923, "loss": 0.2923, "step": 5949 }, { "epoch": 2.667065107765866, "learning_rate": 1.5873306549400946e-06, "lm_loss": 0.4547, "loss": 0.4547, "step": 5950 }, { "epoch": 2.667513354002465, "learning_rate": 1.583089301852936e-06, "lm_loss": 0.2888, "loss": 0.2888, "step": 5951 }, { "epoch": 2.6679616002390647, "learning_rate": 1.5788534375200558e-06, "lm_loss": 0.2973, "loss": 0.2973, "step": 5952 }, { "epoch": 2.668409846475664, "learning_rate": 1.5746230629343023e-06, "lm_loss": 0.3814, "loss": 0.3814, "step": 5953 }, { "epoch": 2.6688580927122634, "learning_rate": 1.570398179087254e-06, "lm_loss": 0.5535, "loss": 0.5535, "step": 5954 }, { "epoch": 2.6693063389488625, "learning_rate": 1.5661787869691857e-06, "lm_loss": 0.3619, "loss": 0.3619, "step": 5955 }, { "epoch": 2.669754585185462, "learning_rate": 1.5619648875690923e-06, "lm_loss": 0.4332, "loss": 0.4332, "step": 5956 }, { "epoch": 2.670202831422061, "learning_rate": 1.5577564818746864e-06, "lm_loss": 0.375, "loss": 0.375, "step": 5957 }, { "epoch": 2.6706510776586603, "learning_rate": 1.553553570872379e-06, "lm_loss": 0.3647, "loss": 0.3647, "step": 5958 }, { "epoch": 2.67109932389526, "learning_rate": 1.5493561555473102e-06, "lm_loss": 0.2744, "loss": 0.2744, "step": 5959 }, { "epoch": 2.671547570131859, "learning_rate": 1.545164236883323e-06, "lm_loss": 0.3556, "loss": 0.3556, "step": 5960 }, { "epoch": 2.6719958163684585, "learning_rate": 1.540977815862965e-06, "lm_loss": 0.4068, "loss": 0.4068, "step": 5961 }, { "epoch": 2.6724440626050576, "learning_rate": 1.5367968934675153e-06, "lm_loss": 0.3144, "loss": 0.3144, "step": 5962 }, { "epoch": 2.672892308841657, "learning_rate": 1.5326214706769397e-06, "lm_loss": 0.3094, "loss": 0.3094, "step": 5963 }, { "epoch": 2.6733405550782563, "learning_rate": 1.5284515484699363e-06, "lm_loss": 0.5664, "loss": 0.5664, "step": 5964 }, { "epoch": 2.6737888013148554, "learning_rate": 1.5242871278239034e-06, "lm_loss": 0.4989, "loss": 0.4989, "step": 5965 }, { "epoch": 2.674237047551455, "learning_rate": 1.5201282097149439e-06, "lm_loss": 0.5007, "loss": 0.5007, "step": 5966 }, { "epoch": 2.674685293788054, "learning_rate": 1.515974795117886e-06, "lm_loss": 0.3684, "loss": 0.3684, "step": 5967 }, { "epoch": 2.6751335400246536, "learning_rate": 1.511826885006254e-06, "lm_loss": 0.4867, "loss": 0.4867, "step": 5968 }, { "epoch": 2.6755817862612528, "learning_rate": 1.5076844803522922e-06, "lm_loss": 0.4237, "loss": 0.4237, "step": 5969 }, { "epoch": 2.6760300324978523, "learning_rate": 1.5035475821269462e-06, "lm_loss": 0.3879, "loss": 0.3879, "step": 5970 }, { "epoch": 2.6764782787344514, "learning_rate": 1.4994161912998795e-06, "lm_loss": 0.4171, "loss": 0.4171, "step": 5971 }, { "epoch": 2.6769265249710505, "learning_rate": 1.4952903088394616e-06, "lm_loss": 0.2775, "loss": 0.2775, "step": 5972 }, { "epoch": 2.67737477120765, "learning_rate": 1.4911699357127635e-06, "lm_loss": 0.2942, "loss": 0.2942, "step": 5973 }, { "epoch": 2.677823017444249, "learning_rate": 1.4870550728855708e-06, "lm_loss": 0.3547, "loss": 0.3547, "step": 5974 }, { "epoch": 2.678271263680849, "learning_rate": 1.4829457213223875e-06, "lm_loss": 0.3432, "loss": 0.3432, "step": 5975 }, { "epoch": 2.678719509917448, "learning_rate": 1.4788418819864037e-06, "lm_loss": 0.4219, "loss": 0.4219, "step": 5976 }, { "epoch": 2.6791677561540475, "learning_rate": 1.4747435558395335e-06, "lm_loss": 0.3552, "loss": 0.3552, "step": 5977 }, { "epoch": 2.6796160023906466, "learning_rate": 1.4706507438424e-06, "lm_loss": 0.3639, "loss": 0.3639, "step": 5978 }, { "epoch": 2.6800642486272457, "learning_rate": 1.4665634469543165e-06, "lm_loss": 0.2928, "loss": 0.2928, "step": 5979 }, { "epoch": 2.6805124948638452, "learning_rate": 1.4624816661333335e-06, "lm_loss": 0.271, "loss": 0.271, "step": 5980 }, { "epoch": 2.6809607411004444, "learning_rate": 1.4584054023361742e-06, "lm_loss": 0.4543, "loss": 0.4543, "step": 5981 }, { "epoch": 2.681408987337044, "learning_rate": 1.454334656518294e-06, "lm_loss": 0.4318, "loss": 0.4318, "step": 5982 }, { "epoch": 2.681857233573643, "learning_rate": 1.4502694296338492e-06, "lm_loss": 0.3707, "loss": 0.3707, "step": 5983 }, { "epoch": 2.6823054798102426, "learning_rate": 1.4462097226356914e-06, "lm_loss": 0.3079, "loss": 0.3079, "step": 5984 }, { "epoch": 2.6827537260468417, "learning_rate": 1.4421555364753896e-06, "lm_loss": 0.3737, "loss": 0.3737, "step": 5985 }, { "epoch": 2.683201972283441, "learning_rate": 1.4381068721032175e-06, "lm_loss": 0.3785, "loss": 0.3785, "step": 5986 }, { "epoch": 2.6836502185200404, "learning_rate": 1.434063730468152e-06, "lm_loss": 0.2895, "loss": 0.2895, "step": 5987 }, { "epoch": 2.6840984647566395, "learning_rate": 1.430026112517871e-06, "lm_loss": 0.4484, "loss": 0.4484, "step": 5988 }, { "epoch": 2.684546710993239, "learning_rate": 1.4259940191987703e-06, "lm_loss": 0.3225, "loss": 0.3225, "step": 5989 }, { "epoch": 2.684994957229838, "learning_rate": 1.4219674514559412e-06, "lm_loss": 0.334, "loss": 0.334, "step": 5990 }, { "epoch": 2.6854432034664377, "learning_rate": 1.4179464102331842e-06, "lm_loss": 0.3151, "loss": 0.3151, "step": 5991 }, { "epoch": 2.685891449703037, "learning_rate": 1.4139308964729952e-06, "lm_loss": 0.3792, "loss": 0.3792, "step": 5992 }, { "epoch": 2.686339695939636, "learning_rate": 1.4099209111165856e-06, "lm_loss": 0.3138, "loss": 0.3138, "step": 5993 }, { "epoch": 2.6867879421762355, "learning_rate": 1.40591645510387e-06, "lm_loss": 0.4992, "loss": 0.4992, "step": 5994 }, { "epoch": 2.6872361884128346, "learning_rate": 1.4019175293734588e-06, "lm_loss": 0.348, "loss": 0.348, "step": 5995 }, { "epoch": 2.687684434649434, "learning_rate": 1.3979241348626716e-06, "lm_loss": 0.3628, "loss": 0.3628, "step": 5996 }, { "epoch": 2.6881326808860333, "learning_rate": 1.3939362725075345e-06, "lm_loss": 0.3804, "loss": 0.3804, "step": 5997 }, { "epoch": 2.688580927122633, "learning_rate": 1.389953943242772e-06, "lm_loss": 0.3303, "loss": 0.3303, "step": 5998 }, { "epoch": 2.689029173359232, "learning_rate": 1.3859771480018157e-06, "lm_loss": 0.3895, "loss": 0.3895, "step": 5999 }, { "epoch": 2.689477419595831, "learning_rate": 1.3820058877167918e-06, "lm_loss": 0.3199, "loss": 0.3199, "step": 6000 }, { "epoch": 2.6899256658324306, "learning_rate": 1.3780401633185391e-06, "lm_loss": 0.2835, "loss": 0.2835, "step": 6001 }, { "epoch": 2.6903739120690298, "learning_rate": 1.3740799757365974e-06, "lm_loss": 0.4776, "loss": 0.4776, "step": 6002 }, { "epoch": 2.6908221583056293, "learning_rate": 1.3701253258991993e-06, "lm_loss": 0.3054, "loss": 0.3054, "step": 6003 }, { "epoch": 2.6912704045422284, "learning_rate": 1.3661762147332862e-06, "lm_loss": 0.449, "loss": 0.449, "step": 6004 }, { "epoch": 2.691718650778828, "learning_rate": 1.3622326431645099e-06, "lm_loss": 0.4076, "loss": 0.4076, "step": 6005 }, { "epoch": 2.692166897015427, "learning_rate": 1.358294612117203e-06, "lm_loss": 0.4953, "loss": 0.4953, "step": 6006 }, { "epoch": 2.692615143252026, "learning_rate": 1.3543621225144242e-06, "lm_loss": 0.3095, "loss": 0.3095, "step": 6007 }, { "epoch": 2.693063389488626, "learning_rate": 1.3504351752779115e-06, "lm_loss": 0.3534, "loss": 0.3534, "step": 6008 }, { "epoch": 2.693511635725225, "learning_rate": 1.3465137713281145e-06, "lm_loss": 0.4755, "loss": 0.4755, "step": 6009 }, { "epoch": 2.6939598819618245, "learning_rate": 1.3425979115841842e-06, "lm_loss": 0.411, "loss": 0.411, "step": 6010 }, { "epoch": 2.6944081281984236, "learning_rate": 1.3386875969639695e-06, "lm_loss": 0.2159, "loss": 0.2159, "step": 6011 }, { "epoch": 2.694856374435023, "learning_rate": 1.334782828384018e-06, "lm_loss": 0.386, "loss": 0.386, "step": 6012 }, { "epoch": 2.6953046206716222, "learning_rate": 1.3308836067595781e-06, "lm_loss": 0.369, "loss": 0.369, "step": 6013 }, { "epoch": 2.6957528669082214, "learning_rate": 1.3269899330046021e-06, "lm_loss": 0.4223, "loss": 0.4223, "step": 6014 }, { "epoch": 2.696201113144821, "learning_rate": 1.3231018080317376e-06, "lm_loss": 0.3192, "loss": 0.3192, "step": 6015 }, { "epoch": 2.69664935938142, "learning_rate": 1.3192192327523333e-06, "lm_loss": 0.4066, "loss": 0.4066, "step": 6016 }, { "epoch": 2.6970976056180196, "learning_rate": 1.3153422080764394e-06, "lm_loss": 0.356, "loss": 0.356, "step": 6017 }, { "epoch": 2.6975458518546187, "learning_rate": 1.3114707349127953e-06, "lm_loss": 0.4036, "loss": 0.4036, "step": 6018 }, { "epoch": 2.6979940980912183, "learning_rate": 1.3076048141688502e-06, "lm_loss": 0.3218, "loss": 0.3218, "step": 6019 }, { "epoch": 2.6984423443278174, "learning_rate": 1.3037444467507543e-06, "lm_loss": 0.3341, "loss": 0.3341, "step": 6020 }, { "epoch": 2.6988905905644165, "learning_rate": 1.2998896335633392e-06, "lm_loss": 0.4404, "loss": 0.4404, "step": 6021 }, { "epoch": 2.699338836801016, "learning_rate": 1.296040375510149e-06, "lm_loss": 0.4373, "loss": 0.4373, "step": 6022 }, { "epoch": 2.699787083037615, "learning_rate": 1.292196673493426e-06, "lm_loss": 0.3542, "loss": 0.3542, "step": 6023 }, { "epoch": 2.7002353292742147, "learning_rate": 1.2883585284140963e-06, "lm_loss": 0.3838, "loss": 0.3838, "step": 6024 }, { "epoch": 2.700683575510814, "learning_rate": 1.2845259411718074e-06, "lm_loss": 0.601, "loss": 0.601, "step": 6025 }, { "epoch": 2.7011318217474134, "learning_rate": 1.2806989126648766e-06, "lm_loss": 0.6394, "loss": 0.6394, "step": 6026 }, { "epoch": 2.7015800679840125, "learning_rate": 1.2768774437903392e-06, "lm_loss": 0.3312, "loss": 0.3312, "step": 6027 }, { "epoch": 2.7020283142206116, "learning_rate": 1.2730615354439228e-06, "lm_loss": 0.3636, "loss": 0.3636, "step": 6028 }, { "epoch": 2.702476560457211, "learning_rate": 1.2692511885200376e-06, "lm_loss": 0.322, "loss": 0.322, "step": 6029 }, { "epoch": 2.7029248066938103, "learning_rate": 1.2654464039118103e-06, "lm_loss": 0.3939, "loss": 0.3939, "step": 6030 }, { "epoch": 2.70337305293041, "learning_rate": 1.2616471825110526e-06, "lm_loss": 0.372, "loss": 0.372, "step": 6031 }, { "epoch": 2.703821299167009, "learning_rate": 1.2578535252082745e-06, "lm_loss": 0.3419, "loss": 0.3419, "step": 6032 }, { "epoch": 2.7042695454036085, "learning_rate": 1.2540654328926787e-06, "lm_loss": 0.4373, "loss": 0.4373, "step": 6033 }, { "epoch": 2.7047177916402076, "learning_rate": 1.2502829064521688e-06, "lm_loss": 0.2451, "loss": 0.2451, "step": 6034 }, { "epoch": 2.7051660378768068, "learning_rate": 1.2465059467733436e-06, "lm_loss": 0.4215, "loss": 0.4215, "step": 6035 }, { "epoch": 2.7056142841134063, "learning_rate": 1.2427345547414981e-06, "lm_loss": 0.3814, "loss": 0.3814, "step": 6036 }, { "epoch": 2.706062530350006, "learning_rate": 1.2389687312406085e-06, "lm_loss": 0.4098, "loss": 0.4098, "step": 6037 }, { "epoch": 2.706510776586605, "learning_rate": 1.2352084771533657e-06, "lm_loss": 0.4524, "loss": 0.4524, "step": 6038 }, { "epoch": 2.706959022823204, "learning_rate": 1.2314537933611424e-06, "lm_loss": 0.3765, "loss": 0.3765, "step": 6039 }, { "epoch": 2.7074072690598037, "learning_rate": 1.2277046807440102e-06, "lm_loss": 0.5458, "loss": 0.5458, "step": 6040 }, { "epoch": 2.707855515296403, "learning_rate": 1.2239611401807322e-06, "lm_loss": 0.1845, "loss": 0.1845, "step": 6041 }, { "epoch": 2.708303761533002, "learning_rate": 1.2202231725487706e-06, "lm_loss": 0.4559, "loss": 0.4559, "step": 6042 }, { "epoch": 2.7087520077696015, "learning_rate": 1.2164907787242747e-06, "lm_loss": 0.3753, "loss": 0.3753, "step": 6043 }, { "epoch": 2.709200254006201, "learning_rate": 1.2127639595820972e-06, "lm_loss": 0.3854, "loss": 0.3854, "step": 6044 }, { "epoch": 2.7096485002428, "learning_rate": 1.2090427159957673e-06, "lm_loss": 0.273, "loss": 0.273, "step": 6045 }, { "epoch": 2.7100967464793992, "learning_rate": 1.2053270488375208e-06, "lm_loss": 0.3482, "loss": 0.3482, "step": 6046 }, { "epoch": 2.710544992715999, "learning_rate": 1.201616958978291e-06, "lm_loss": 0.355, "loss": 0.355, "step": 6047 }, { "epoch": 2.710993238952598, "learning_rate": 1.1979124472876858e-06, "lm_loss": 0.3248, "loss": 0.3248, "step": 6048 }, { "epoch": 2.711441485189197, "learning_rate": 1.1942135146340239e-06, "lm_loss": 0.3648, "loss": 0.3648, "step": 6049 }, { "epoch": 2.7118897314257966, "learning_rate": 1.190520161884301e-06, "lm_loss": 0.2487, "loss": 0.2487, "step": 6050 }, { "epoch": 2.712337977662396, "learning_rate": 1.1868323899042133e-06, "lm_loss": 0.3958, "loss": 0.3958, "step": 6051 }, { "epoch": 2.7127862238989953, "learning_rate": 1.1831501995581556e-06, "lm_loss": 0.4349, "loss": 0.4349, "step": 6052 }, { "epoch": 2.7132344701355944, "learning_rate": 1.1794735917091988e-06, "lm_loss": 0.3621, "loss": 0.3621, "step": 6053 }, { "epoch": 2.713682716372194, "learning_rate": 1.1758025672191142e-06, "lm_loss": 0.2826, "loss": 0.2826, "step": 6054 }, { "epoch": 2.714130962608793, "learning_rate": 1.1721371269483694e-06, "lm_loss": 0.407, "loss": 0.407, "step": 6055 }, { "epoch": 2.714579208845392, "learning_rate": 1.1684772717561105e-06, "lm_loss": 0.3255, "loss": 0.3255, "step": 6056 }, { "epoch": 2.7150274550819917, "learning_rate": 1.1648230025001845e-06, "lm_loss": 0.3403, "loss": 0.3403, "step": 6057 }, { "epoch": 2.7154757013185913, "learning_rate": 1.1611743200371227e-06, "lm_loss": 0.3568, "loss": 0.3568, "step": 6058 }, { "epoch": 2.7159239475551904, "learning_rate": 1.1575312252221499e-06, "lm_loss": 0.3481, "loss": 0.3481, "step": 6059 }, { "epoch": 2.7163721937917895, "learning_rate": 1.1538937189091825e-06, "lm_loss": 0.3402, "loss": 0.3402, "step": 6060 }, { "epoch": 2.716820440028389, "learning_rate": 1.1502618019508276e-06, "lm_loss": 0.375, "loss": 0.375, "step": 6061 }, { "epoch": 2.717268686264988, "learning_rate": 1.1466354751983794e-06, "lm_loss": 0.2706, "loss": 0.2706, "step": 6062 }, { "epoch": 2.7177169325015873, "learning_rate": 1.1430147395018214e-06, "lm_loss": 0.3602, "loss": 0.3602, "step": 6063 }, { "epoch": 2.718165178738187, "learning_rate": 1.1393995957098252e-06, "lm_loss": 0.3496, "loss": 0.3496, "step": 6064 }, { "epoch": 2.7186134249747864, "learning_rate": 1.1357900446697628e-06, "lm_loss": 0.3304, "loss": 0.3304, "step": 6065 }, { "epoch": 2.7190616712113855, "learning_rate": 1.132186087227674e-06, "lm_loss": 0.3419, "loss": 0.3419, "step": 6066 }, { "epoch": 2.7195099174479846, "learning_rate": 1.128587724228311e-06, "lm_loss": 0.5459, "loss": 0.5459, "step": 6067 }, { "epoch": 2.719958163684584, "learning_rate": 1.124994956515099e-06, "lm_loss": 0.3466, "loss": 0.3466, "step": 6068 }, { "epoch": 2.7204064099211833, "learning_rate": 1.1214077849301591e-06, "lm_loss": 0.3169, "loss": 0.3169, "step": 6069 }, { "epoch": 2.7208546561577824, "learning_rate": 1.1178262103143016e-06, "lm_loss": 0.4932, "loss": 0.4932, "step": 6070 }, { "epoch": 2.721302902394382, "learning_rate": 1.1142502335070137e-06, "lm_loss": 0.3076, "loss": 0.3076, "step": 6071 }, { "epoch": 2.7217511486309816, "learning_rate": 1.1106798553464804e-06, "lm_loss": 0.5349, "loss": 0.5349, "step": 6072 }, { "epoch": 2.7221993948675807, "learning_rate": 1.1071150766695793e-06, "lm_loss": 0.4679, "loss": 0.4679, "step": 6073 }, { "epoch": 2.72264764110418, "learning_rate": 1.1035558983118617e-06, "lm_loss": 0.2731, "loss": 0.2731, "step": 6074 }, { "epoch": 2.7230958873407793, "learning_rate": 1.100002321107574e-06, "lm_loss": 0.3855, "loss": 0.3855, "step": 6075 }, { "epoch": 2.7235441335773785, "learning_rate": 1.0964543458896526e-06, "lm_loss": 0.3861, "loss": 0.3861, "step": 6076 }, { "epoch": 2.7239923798139776, "learning_rate": 1.0929119734897103e-06, "lm_loss": 0.2671, "loss": 0.2671, "step": 6077 }, { "epoch": 2.724440626050577, "learning_rate": 1.0893752047380634e-06, "lm_loss": 0.449, "loss": 0.449, "step": 6078 }, { "epoch": 2.7248888722871767, "learning_rate": 1.0858440404636988e-06, "lm_loss": 0.6119, "loss": 0.6119, "step": 6079 }, { "epoch": 2.725337118523776, "learning_rate": 1.0823184814942933e-06, "lm_loss": 0.3932, "loss": 0.3932, "step": 6080 }, { "epoch": 2.725785364760375, "learning_rate": 1.078798528656222e-06, "lm_loss": 0.4597, "loss": 0.4597, "step": 6081 }, { "epoch": 2.7262336109969745, "learning_rate": 1.0752841827745247e-06, "lm_loss": 0.4028, "loss": 0.4028, "step": 6082 }, { "epoch": 2.7266818572335736, "learning_rate": 1.0717754446729456e-06, "lm_loss": 0.3858, "loss": 0.3858, "step": 6083 }, { "epoch": 2.7271301034701727, "learning_rate": 1.0682723151739071e-06, "lm_loss": 0.2604, "loss": 0.2604, "step": 6084 }, { "epoch": 2.7275783497067723, "learning_rate": 1.0647747950985136e-06, "lm_loss": 0.3864, "loss": 0.3864, "step": 6085 }, { "epoch": 2.728026595943372, "learning_rate": 1.0612828852665623e-06, "lm_loss": 0.3278, "loss": 0.3278, "step": 6086 }, { "epoch": 2.728474842179971, "learning_rate": 1.057796586496529e-06, "lm_loss": 0.2238, "loss": 0.2238, "step": 6087 }, { "epoch": 2.72892308841657, "learning_rate": 1.054315899605579e-06, "lm_loss": 0.3224, "loss": 0.3224, "step": 6088 }, { "epoch": 2.7293713346531696, "learning_rate": 1.050840825409563e-06, "lm_loss": 0.4917, "loss": 0.4917, "step": 6089 }, { "epoch": 2.7298195808897687, "learning_rate": 1.0473713647230072e-06, "lm_loss": 0.4058, "loss": 0.4058, "step": 6090 }, { "epoch": 2.730267827126368, "learning_rate": 1.0439075183591274e-06, "lm_loss": 0.3482, "loss": 0.3482, "step": 6091 }, { "epoch": 2.7307160733629674, "learning_rate": 1.0404492871298328e-06, "lm_loss": 0.3027, "loss": 0.3027, "step": 6092 }, { "epoch": 2.731164319599567, "learning_rate": 1.0369966718456968e-06, "lm_loss": 0.4177, "loss": 0.4177, "step": 6093 }, { "epoch": 2.731612565836166, "learning_rate": 1.0335496733159977e-06, "lm_loss": 0.4205, "loss": 0.4205, "step": 6094 }, { "epoch": 2.732060812072765, "learning_rate": 1.0301082923486749e-06, "lm_loss": 0.5195, "loss": 0.5195, "step": 6095 }, { "epoch": 2.7325090583093647, "learning_rate": 1.0266725297503749e-06, "lm_loss": 0.4359, "loss": 0.4359, "step": 6096 }, { "epoch": 2.732957304545964, "learning_rate": 1.0232423863264118e-06, "lm_loss": 0.4386, "loss": 0.4386, "step": 6097 }, { "epoch": 2.733405550782563, "learning_rate": 1.0198178628807842e-06, "lm_loss": 0.3027, "loss": 0.3027, "step": 6098 }, { "epoch": 2.7338537970191625, "learning_rate": 1.0163989602161778e-06, "lm_loss": 0.3466, "loss": 0.3466, "step": 6099 }, { "epoch": 2.734302043255762, "learning_rate": 1.0129856791339571e-06, "lm_loss": 0.3444, "loss": 0.3444, "step": 6100 }, { "epoch": 2.734750289492361, "learning_rate": 1.009578020434171e-06, "lm_loss": 0.4475, "loss": 0.4475, "step": 6101 }, { "epoch": 2.7351985357289603, "learning_rate": 1.00617598491555e-06, "lm_loss": 0.4146, "loss": 0.4146, "step": 6102 }, { "epoch": 2.73564678196556, "learning_rate": 1.0027795733755063e-06, "lm_loss": 0.4235, "loss": 0.4235, "step": 6103 }, { "epoch": 2.736095028202159, "learning_rate": 9.993887866101308e-07, "lm_loss": 0.2723, "loss": 0.2723, "step": 6104 }, { "epoch": 2.736543274438758, "learning_rate": 9.960036254142101e-07, "lm_loss": 0.369, "loss": 0.369, "step": 6105 }, { "epoch": 2.7369915206753577, "learning_rate": 9.926240905811901e-07, "lm_loss": 0.3279, "loss": 0.3279, "step": 6106 }, { "epoch": 2.7374397669119572, "learning_rate": 9.892501829032174e-07, "lm_loss": 0.2866, "loss": 0.2866, "step": 6107 }, { "epoch": 2.7378880131485563, "learning_rate": 9.858819031711069e-07, "lm_loss": 0.4987, "loss": 0.4987, "step": 6108 }, { "epoch": 2.7383362593851555, "learning_rate": 9.825192521743576e-07, "lm_loss": 0.2876, "loss": 0.2876, "step": 6109 }, { "epoch": 2.738784505621755, "learning_rate": 9.791622307011555e-07, "lm_loss": 0.3679, "loss": 0.3679, "step": 6110 }, { "epoch": 2.739232751858354, "learning_rate": 9.758108395383575e-07, "lm_loss": 0.4002, "loss": 0.4002, "step": 6111 }, { "epoch": 2.7396809980949532, "learning_rate": 9.724650794715073e-07, "lm_loss": 0.4406, "loss": 0.4406, "step": 6112 }, { "epoch": 2.740129244331553, "learning_rate": 9.69124951284825e-07, "lm_loss": 0.4579, "loss": 0.4579, "step": 6113 }, { "epoch": 2.7405774905681524, "learning_rate": 9.657904557612174e-07, "lm_loss": 0.2725, "loss": 0.2725, "step": 6114 }, { "epoch": 2.7410257368047515, "learning_rate": 9.624615936822624e-07, "lm_loss": 0.3797, "loss": 0.3797, "step": 6115 }, { "epoch": 2.7414739830413506, "learning_rate": 9.591383658282216e-07, "lm_loss": 0.4716, "loss": 0.4716, "step": 6116 }, { "epoch": 2.74192222927795, "learning_rate": 9.558207729780361e-07, "lm_loss": 0.582, "loss": 0.582, "step": 6117 }, { "epoch": 2.7423704755145493, "learning_rate": 9.525088159093254e-07, "lm_loss": 0.346, "loss": 0.346, "step": 6118 }, { "epoch": 2.7428187217511484, "learning_rate": 9.49202495398388e-07, "lm_loss": 0.4645, "loss": 0.4645, "step": 6119 }, { "epoch": 2.743266967987748, "learning_rate": 9.459018122202013e-07, "lm_loss": 0.326, "loss": 0.326, "step": 6120 }, { "epoch": 2.7437152142243475, "learning_rate": 9.42606767148424e-07, "lm_loss": 0.3704, "loss": 0.3704, "step": 6121 }, { "epoch": 2.7441634604609466, "learning_rate": 9.393173609553829e-07, "lm_loss": 0.4577, "loss": 0.4577, "step": 6122 }, { "epoch": 2.7446117066975457, "learning_rate": 9.360335944121029e-07, "lm_loss": 0.4121, "loss": 0.4121, "step": 6123 }, { "epoch": 2.7450599529341453, "learning_rate": 9.327554682882656e-07, "lm_loss": 0.3009, "loss": 0.3009, "step": 6124 }, { "epoch": 2.7455081991707444, "learning_rate": 9.294829833522423e-07, "lm_loss": 0.4441, "loss": 0.4441, "step": 6125 }, { "epoch": 2.7459564454073435, "learning_rate": 9.262161403710862e-07, "lm_loss": 0.3478, "loss": 0.3478, "step": 6126 }, { "epoch": 2.746404691643943, "learning_rate": 9.229549401105125e-07, "lm_loss": 0.3622, "loss": 0.3622, "step": 6127 }, { "epoch": 2.7468529378805426, "learning_rate": 9.196993833349265e-07, "lm_loss": 0.3369, "loss": 0.3369, "step": 6128 }, { "epoch": 2.7473011841171417, "learning_rate": 9.164494708074095e-07, "lm_loss": 0.3641, "loss": 0.3641, "step": 6129 }, { "epoch": 2.747749430353741, "learning_rate": 9.132052032897131e-07, "lm_loss": 0.3816, "loss": 0.3816, "step": 6130 }, { "epoch": 2.7481976765903404, "learning_rate": 9.099665815422709e-07, "lm_loss": 0.3952, "loss": 0.3952, "step": 6131 }, { "epoch": 2.7486459228269395, "learning_rate": 9.06733606324195e-07, "lm_loss": 0.3435, "loss": 0.3435, "step": 6132 }, { "epoch": 2.7490941690635387, "learning_rate": 9.035062783932707e-07, "lm_loss": 0.3665, "loss": 0.3665, "step": 6133 }, { "epoch": 2.749542415300138, "learning_rate": 9.002845985059627e-07, "lm_loss": 0.3587, "loss": 0.3587, "step": 6134 }, { "epoch": 2.7499906615367378, "learning_rate": 8.970685674174029e-07, "lm_loss": 0.3782, "loss": 0.3782, "step": 6135 }, { "epoch": 2.750438907773337, "learning_rate": 8.938581858814077e-07, "lm_loss": 0.3838, "loss": 0.3838, "step": 6136 }, { "epoch": 2.750887154009936, "learning_rate": 8.906534546504724e-07, "lm_loss": 0.2849, "loss": 0.2849, "step": 6137 }, { "epoch": 2.7513354002465356, "learning_rate": 8.874543744757574e-07, "lm_loss": 0.3969, "loss": 0.3969, "step": 6138 }, { "epoch": 2.7517836464831347, "learning_rate": 8.8426094610711e-07, "lm_loss": 0.3955, "loss": 0.3955, "step": 6139 }, { "epoch": 2.752231892719734, "learning_rate": 8.810731702930341e-07, "lm_loss": 0.3442, "loss": 0.3442, "step": 6140 }, { "epoch": 2.7526801389563333, "learning_rate": 8.778910477807351e-07, "lm_loss": 0.3494, "loss": 0.3494, "step": 6141 }, { "epoch": 2.753128385192933, "learning_rate": 8.747145793160744e-07, "lm_loss": 0.3997, "loss": 0.3997, "step": 6142 }, { "epoch": 2.753576631429532, "learning_rate": 8.715437656435899e-07, "lm_loss": 0.417, "loss": 0.417, "step": 6143 }, { "epoch": 2.754024877666131, "learning_rate": 8.683786075065065e-07, "lm_loss": 0.6066, "loss": 0.6066, "step": 6144 }, { "epoch": 2.7544731239027307, "learning_rate": 8.652191056467029e-07, "lm_loss": 0.4001, "loss": 0.4001, "step": 6145 }, { "epoch": 2.75492137013933, "learning_rate": 8.620652608047481e-07, "lm_loss": 0.4806, "loss": 0.4806, "step": 6146 }, { "epoch": 2.755369616375929, "learning_rate": 8.589170737198837e-07, "lm_loss": 0.4909, "loss": 0.4909, "step": 6147 }, { "epoch": 2.7558178626125285, "learning_rate": 8.55774545130017e-07, "lm_loss": 0.3746, "loss": 0.3746, "step": 6148 }, { "epoch": 2.756266108849128, "learning_rate": 8.526376757717364e-07, "lm_loss": 0.2848, "loss": 0.2848, "step": 6149 }, { "epoch": 2.756714355085727, "learning_rate": 8.49506466380301e-07, "lm_loss": 0.3087, "loss": 0.3087, "step": 6150 }, { "epoch": 2.7571626013223263, "learning_rate": 8.463809176896431e-07, "lm_loss": 0.3351, "loss": 0.3351, "step": 6151 }, { "epoch": 2.757610847558926, "learning_rate": 8.43261030432374e-07, "lm_loss": 0.3807, "loss": 0.3807, "step": 6152 }, { "epoch": 2.758059093795525, "learning_rate": 8.401468053397643e-07, "lm_loss": 0.2776, "loss": 0.2776, "step": 6153 }, { "epoch": 2.758507340032124, "learning_rate": 8.37038243141769e-07, "lm_loss": 0.4216, "loss": 0.4216, "step": 6154 }, { "epoch": 2.7589555862687236, "learning_rate": 8.339353445670162e-07, "lm_loss": 0.4633, "loss": 0.4633, "step": 6155 }, { "epoch": 2.759403832505323, "learning_rate": 8.308381103427965e-07, "lm_loss": 0.2242, "loss": 0.2242, "step": 6156 }, { "epoch": 2.7598520787419223, "learning_rate": 8.277465411950847e-07, "lm_loss": 0.4213, "loss": 0.4213, "step": 6157 }, { "epoch": 2.7603003249785214, "learning_rate": 8.246606378485178e-07, "lm_loss": 0.3379, "loss": 0.3379, "step": 6158 }, { "epoch": 2.760748571215121, "learning_rate": 8.215804010264116e-07, "lm_loss": 0.323, "loss": 0.323, "step": 6159 }, { "epoch": 2.76119681745172, "learning_rate": 8.185058314507554e-07, "lm_loss": 0.3985, "loss": 0.3985, "step": 6160 }, { "epoch": 2.761645063688319, "learning_rate": 8.154369298421976e-07, "lm_loss": 0.314, "loss": 0.314, "step": 6161 }, { "epoch": 2.7620933099249187, "learning_rate": 8.123736969200712e-07, "lm_loss": 0.4932, "loss": 0.4932, "step": 6162 }, { "epoch": 2.7625415561615183, "learning_rate": 8.093161334023769e-07, "lm_loss": 0.3846, "loss": 0.3846, "step": 6163 }, { "epoch": 2.7629898023981174, "learning_rate": 8.062642400057801e-07, "lm_loss": 0.5457, "loss": 0.5457, "step": 6164 }, { "epoch": 2.7634380486347165, "learning_rate": 8.032180174456283e-07, "lm_loss": 0.377, "loss": 0.377, "step": 6165 }, { "epoch": 2.763886294871316, "learning_rate": 8.001774664359335e-07, "lm_loss": 0.3458, "loss": 0.3458, "step": 6166 }, { "epoch": 2.764334541107915, "learning_rate": 7.971425876893729e-07, "lm_loss": 0.3132, "loss": 0.3132, "step": 6167 }, { "epoch": 2.7647827873445143, "learning_rate": 7.941133819173107e-07, "lm_loss": 0.5513, "loss": 0.5513, "step": 6168 }, { "epoch": 2.765231033581114, "learning_rate": 7.910898498297598e-07, "lm_loss": 0.3543, "loss": 0.3543, "step": 6169 }, { "epoch": 2.7656792798177134, "learning_rate": 7.880719921354196e-07, "lm_loss": 0.4479, "loss": 0.4479, "step": 6170 }, { "epoch": 2.7661275260543126, "learning_rate": 7.85059809541655e-07, "lm_loss": 0.4932, "loss": 0.4932, "step": 6171 }, { "epoch": 2.7665757722909117, "learning_rate": 7.820533027544958e-07, "lm_loss": 0.4085, "loss": 0.4085, "step": 6172 }, { "epoch": 2.7670240185275112, "learning_rate": 7.790524724786502e-07, "lm_loss": 0.5623, "loss": 0.5623, "step": 6173 }, { "epoch": 2.7674722647641103, "learning_rate": 7.760573194174891e-07, "lm_loss": 0.4694, "loss": 0.4694, "step": 6174 }, { "epoch": 2.7679205110007095, "learning_rate": 7.730678442730538e-07, "lm_loss": 0.3339, "loss": 0.3339, "step": 6175 }, { "epoch": 2.768368757237309, "learning_rate": 7.700840477460531e-07, "lm_loss": 0.3341, "loss": 0.3341, "step": 6176 }, { "epoch": 2.7688170034739086, "learning_rate": 7.671059305358718e-07, "lm_loss": 0.471, "loss": 0.471, "step": 6177 }, { "epoch": 2.7692652497105077, "learning_rate": 7.641334933405575e-07, "lm_loss": 0.443, "loss": 0.443, "step": 6178 }, { "epoch": 2.769713495947107, "learning_rate": 7.611667368568304e-07, "lm_loss": 0.4498, "loss": 0.4498, "step": 6179 }, { "epoch": 2.7701617421837064, "learning_rate": 7.582056617800731e-07, "lm_loss": 0.2628, "loss": 0.2628, "step": 6180 }, { "epoch": 2.7706099884203055, "learning_rate": 7.552502688043389e-07, "lm_loss": 0.3146, "loss": 0.3146, "step": 6181 }, { "epoch": 2.7710582346569046, "learning_rate": 7.523005586223569e-07, "lm_loss": 0.3543, "loss": 0.3543, "step": 6182 }, { "epoch": 2.771506480893504, "learning_rate": 7.49356531925513e-07, "lm_loss": 0.3655, "loss": 0.3655, "step": 6183 }, { "epoch": 2.7719547271301037, "learning_rate": 7.464181894038663e-07, "lm_loss": 0.4144, "loss": 0.4144, "step": 6184 }, { "epoch": 2.772402973366703, "learning_rate": 7.434855317461381e-07, "lm_loss": 0.3491, "loss": 0.3491, "step": 6185 }, { "epoch": 2.772851219603302, "learning_rate": 7.405585596397313e-07, "lm_loss": 0.493, "loss": 0.493, "step": 6186 }, { "epoch": 2.7732994658399015, "learning_rate": 7.376372737707055e-07, "lm_loss": 0.311, "loss": 0.311, "step": 6187 }, { "epoch": 2.7737477120765006, "learning_rate": 7.347216748237851e-07, "lm_loss": 0.4543, "loss": 0.4543, "step": 6188 }, { "epoch": 2.7741959583130997, "learning_rate": 7.318117634823679e-07, "lm_loss": 0.3712, "loss": 0.3712, "step": 6189 }, { "epoch": 2.7746442045496993, "learning_rate": 7.289075404285112e-07, "lm_loss": 0.3755, "loss": 0.3755, "step": 6190 }, { "epoch": 2.775092450786299, "learning_rate": 7.260090063429509e-07, "lm_loss": 0.4082, "loss": 0.4082, "step": 6191 }, { "epoch": 2.775540697022898, "learning_rate": 7.231161619050769e-07, "lm_loss": 0.3832, "loss": 0.3832, "step": 6192 }, { "epoch": 2.775988943259497, "learning_rate": 7.202290077929552e-07, "lm_loss": 0.572, "loss": 0.572, "step": 6193 }, { "epoch": 2.7764371894960966, "learning_rate": 7.173475446833083e-07, "lm_loss": 0.6256, "loss": 0.6256, "step": 6194 }, { "epoch": 2.7768854357326958, "learning_rate": 7.144717732515349e-07, "lm_loss": 0.3164, "loss": 0.3164, "step": 6195 }, { "epoch": 2.777333681969295, "learning_rate": 7.116016941716929e-07, "lm_loss": 0.3592, "loss": 0.3592, "step": 6196 }, { "epoch": 2.7777819282058944, "learning_rate": 7.087373081165138e-07, "lm_loss": 0.2754, "loss": 0.2754, "step": 6197 }, { "epoch": 2.778230174442494, "learning_rate": 7.058786157573799e-07, "lm_loss": 0.5447, "loss": 0.5447, "step": 6198 }, { "epoch": 2.778678420679093, "learning_rate": 7.030256177643524e-07, "lm_loss": 0.3016, "loss": 0.3016, "step": 6199 }, { "epoch": 2.779126666915692, "learning_rate": 7.001783148061547e-07, "lm_loss": 0.4292, "loss": 0.4292, "step": 6200 }, { "epoch": 2.7795749131522918, "learning_rate": 6.973367075501724e-07, "lm_loss": 0.3438, "loss": 0.3438, "step": 6201 }, { "epoch": 2.780023159388891, "learning_rate": 6.945007966624561e-07, "lm_loss": 0.2897, "loss": 0.2897, "step": 6202 }, { "epoch": 2.78047140562549, "learning_rate": 6.916705828077242e-07, "lm_loss": 0.5335, "loss": 0.5335, "step": 6203 }, { "epoch": 2.7809196518620896, "learning_rate": 6.888460666493596e-07, "lm_loss": 0.3684, "loss": 0.3684, "step": 6204 }, { "epoch": 2.781367898098689, "learning_rate": 6.86027248849408e-07, "lm_loss": 0.2632, "loss": 0.2632, "step": 6205 }, { "epoch": 2.7818161443352882, "learning_rate": 6.832141300685768e-07, "lm_loss": 0.4186, "loss": 0.4186, "step": 6206 }, { "epoch": 2.7822643905718873, "learning_rate": 6.804067109662443e-07, "lm_loss": 0.3242, "loss": 0.3242, "step": 6207 }, { "epoch": 2.782712636808487, "learning_rate": 6.776049922004508e-07, "lm_loss": 0.408, "loss": 0.408, "step": 6208 }, { "epoch": 2.783160883045086, "learning_rate": 6.748089744278929e-07, "lm_loss": 0.3098, "loss": 0.3098, "step": 6209 }, { "epoch": 2.783609129281685, "learning_rate": 6.720186583039412e-07, "lm_loss": 0.3322, "loss": 0.3322, "step": 6210 }, { "epoch": 2.7840573755182847, "learning_rate": 6.692340444826251e-07, "lm_loss": 0.3597, "loss": 0.3597, "step": 6211 }, { "epoch": 2.7845056217548843, "learning_rate": 6.664551336166336e-07, "lm_loss": 0.4396, "loss": 0.4396, "step": 6212 }, { "epoch": 2.7849538679914834, "learning_rate": 6.636819263573318e-07, "lm_loss": 0.6088, "loss": 0.6088, "step": 6213 }, { "epoch": 2.7854021142280825, "learning_rate": 6.609144233547304e-07, "lm_loss": 0.3401, "loss": 0.3401, "step": 6214 }, { "epoch": 2.785850360464682, "learning_rate": 6.581526252575188e-07, "lm_loss": 0.2671, "loss": 0.2671, "step": 6215 }, { "epoch": 2.786298606701281, "learning_rate": 6.553965327130401e-07, "lm_loss": 0.4178, "loss": 0.4178, "step": 6216 }, { "epoch": 2.7867468529378803, "learning_rate": 6.526461463673001e-07, "lm_loss": 0.4139, "loss": 0.4139, "step": 6217 }, { "epoch": 2.78719509917448, "learning_rate": 6.499014668649717e-07, "lm_loss": 0.4699, "loss": 0.4699, "step": 6218 }, { "epoch": 2.7876433454110794, "learning_rate": 6.471624948493931e-07, "lm_loss": 0.4387, "loss": 0.4387, "step": 6219 }, { "epoch": 2.7880915916476785, "learning_rate": 6.444292309625483e-07, "lm_loss": 0.3827, "loss": 0.3827, "step": 6220 }, { "epoch": 2.7885398378842776, "learning_rate": 6.417016758451022e-07, "lm_loss": 0.4645, "loss": 0.4645, "step": 6221 }, { "epoch": 2.788988084120877, "learning_rate": 6.389798301363714e-07, "lm_loss": 0.1944, "loss": 0.1944, "step": 6222 }, { "epoch": 2.7894363303574763, "learning_rate": 6.362636944743372e-07, "lm_loss": 0.3243, "loss": 0.3243, "step": 6223 }, { "epoch": 2.7898845765940754, "learning_rate": 6.335532694956458e-07, "lm_loss": 0.561, "loss": 0.561, "step": 6224 }, { "epoch": 2.790332822830675, "learning_rate": 6.308485558355947e-07, "lm_loss": 0.5237, "loss": 0.5237, "step": 6225 }, { "epoch": 2.7907810690672745, "learning_rate": 6.281495541281545e-07, "lm_loss": 0.3281, "loss": 0.3281, "step": 6226 }, { "epoch": 2.7912293153038736, "learning_rate": 6.254562650059498e-07, "lm_loss": 0.4706, "loss": 0.4706, "step": 6227 }, { "epoch": 2.7916775615404728, "learning_rate": 6.227686891002671e-07, "lm_loss": 0.4172, "loss": 0.4172, "step": 6228 }, { "epoch": 2.7921258077770723, "learning_rate": 6.200868270410553e-07, "lm_loss": 0.3245, "loss": 0.3245, "step": 6229 }, { "epoch": 2.7925740540136714, "learning_rate": 6.174106794569201e-07, "lm_loss": 0.3775, "loss": 0.3775, "step": 6230 }, { "epoch": 2.7930223002502705, "learning_rate": 6.147402469751374e-07, "lm_loss": 0.4524, "loss": 0.4524, "step": 6231 }, { "epoch": 2.79347054648687, "learning_rate": 6.120755302216369e-07, "lm_loss": 0.3204, "loss": 0.3204, "step": 6232 }, { "epoch": 2.7939187927234697, "learning_rate": 6.094165298210053e-07, "lm_loss": 0.3502, "loss": 0.3502, "step": 6233 }, { "epoch": 2.7943670389600688, "learning_rate": 6.067632463964939e-07, "lm_loss": 0.3346, "loss": 0.3346, "step": 6234 }, { "epoch": 2.794815285196668, "learning_rate": 6.041156805700138e-07, "lm_loss": 0.3432, "loss": 0.3432, "step": 6235 }, { "epoch": 2.7952635314332674, "learning_rate": 6.014738329621323e-07, "lm_loss": 0.4834, "loss": 0.4834, "step": 6236 }, { "epoch": 2.7957117776698666, "learning_rate": 5.988377041920873e-07, "lm_loss": 0.398, "loss": 0.398, "step": 6237 }, { "epoch": 2.7961600239064657, "learning_rate": 5.962072948777569e-07, "lm_loss": 0.3656, "loss": 0.3656, "step": 6238 }, { "epoch": 2.7966082701430652, "learning_rate": 5.93582605635698e-07, "lm_loss": 0.3919, "loss": 0.3919, "step": 6239 }, { "epoch": 2.797056516379665, "learning_rate": 5.909636370811184e-07, "lm_loss": 0.2872, "loss": 0.2872, "step": 6240 }, { "epoch": 2.797504762616264, "learning_rate": 5.883503898278825e-07, "lm_loss": 0.3221, "loss": 0.3221, "step": 6241 }, { "epoch": 2.797953008852863, "learning_rate": 5.857428644885199e-07, "lm_loss": 0.3994, "loss": 0.3994, "step": 6242 }, { "epoch": 2.7984012550894626, "learning_rate": 5.831410616742083e-07, "lm_loss": 0.3618, "loss": 0.3618, "step": 6243 }, { "epoch": 2.7988495013260617, "learning_rate": 5.805449819948017e-07, "lm_loss": 0.3645, "loss": 0.3645, "step": 6244 }, { "epoch": 2.7992977475626613, "learning_rate": 5.779546260587964e-07, "lm_loss": 0.3245, "loss": 0.3245, "step": 6245 }, { "epoch": 2.7997459937992604, "learning_rate": 5.753699944733515e-07, "lm_loss": 0.417, "loss": 0.417, "step": 6246 }, { "epoch": 2.80019424003586, "learning_rate": 5.727910878442905e-07, "lm_loss": 0.3442, "loss": 0.3442, "step": 6247 }, { "epoch": 2.800642486272459, "learning_rate": 5.702179067760855e-07, "lm_loss": 0.3018, "loss": 0.3018, "step": 6248 }, { "epoch": 2.801090732509058, "learning_rate": 5.676504518718761e-07, "lm_loss": 0.4115, "loss": 0.4115, "step": 6249 }, { "epoch": 2.8015389787456577, "learning_rate": 5.650887237334563e-07, "lm_loss": 0.3418, "loss": 0.3418, "step": 6250 }, { "epoch": 2.801987224982257, "learning_rate": 5.625327229612704e-07, "lm_loss": 0.2863, "loss": 0.2863, "step": 6251 }, { "epoch": 2.8024354712188564, "learning_rate": 5.59982450154431e-07, "lm_loss": 0.4403, "loss": 0.4403, "step": 6252 }, { "epoch": 2.8028837174554555, "learning_rate": 5.574379059107043e-07, "lm_loss": 0.2792, "loss": 0.2792, "step": 6253 }, { "epoch": 2.803331963692055, "learning_rate": 5.548990908265078e-07, "lm_loss": 0.4344, "loss": 0.4344, "step": 6254 }, { "epoch": 2.803780209928654, "learning_rate": 5.523660054969265e-07, "lm_loss": 0.4467, "loss": 0.4467, "step": 6255 }, { "epoch": 2.8042284561652533, "learning_rate": 5.498386505156938e-07, "lm_loss": 0.4019, "loss": 0.4019, "step": 6256 }, { "epoch": 2.804676702401853, "learning_rate": 5.473170264752054e-07, "lm_loss": 0.2735, "loss": 0.2735, "step": 6257 }, { "epoch": 2.805124948638452, "learning_rate": 5.448011339665132e-07, "lm_loss": 0.3269, "loss": 0.3269, "step": 6258 }, { "epoch": 2.8055731948750515, "learning_rate": 5.422909735793208e-07, "lm_loss": 0.5015, "loss": 0.5015, "step": 6259 }, { "epoch": 2.8060214411116506, "learning_rate": 5.397865459019935e-07, "lm_loss": 0.3147, "loss": 0.3147, "step": 6260 }, { "epoch": 2.80646968734825, "learning_rate": 5.372878515215507e-07, "lm_loss": 0.4624, "loss": 0.4624, "step": 6261 }, { "epoch": 2.8069179335848493, "learning_rate": 5.347948910236683e-07, "lm_loss": 0.3919, "loss": 0.3919, "step": 6262 }, { "epoch": 2.8073661798214484, "learning_rate": 5.323076649926762e-07, "lm_loss": 0.3257, "loss": 0.3257, "step": 6263 }, { "epoch": 2.807814426058048, "learning_rate": 5.29826174011569e-07, "lm_loss": 0.3678, "loss": 0.3678, "step": 6264 }, { "epoch": 2.808262672294647, "learning_rate": 5.273504186619787e-07, "lm_loss": 0.3688, "loss": 0.3688, "step": 6265 }, { "epoch": 2.8087109185312467, "learning_rate": 5.248803995242163e-07, "lm_loss": 0.4721, "loss": 0.4721, "step": 6266 }, { "epoch": 2.8091591647678458, "learning_rate": 5.224161171772268e-07, "lm_loss": 0.431, "loss": 0.431, "step": 6267 }, { "epoch": 2.8096074110044453, "learning_rate": 5.199575721986261e-07, "lm_loss": 0.3959, "loss": 0.3959, "step": 6268 }, { "epoch": 2.8100556572410444, "learning_rate": 5.175047651646808e-07, "lm_loss": 0.325, "loss": 0.325, "step": 6269 }, { "epoch": 2.8105039034776436, "learning_rate": 5.150576966503063e-07, "lm_loss": 0.4459, "loss": 0.4459, "step": 6270 }, { "epoch": 2.810952149714243, "learning_rate": 5.126163672290796e-07, "lm_loss": 0.5767, "loss": 0.5767, "step": 6271 }, { "epoch": 2.8114003959508422, "learning_rate": 5.101807774732292e-07, "lm_loss": 0.2427, "loss": 0.2427, "step": 6272 }, { "epoch": 2.811848642187442, "learning_rate": 5.077509279536402e-07, "lm_loss": 0.3847, "loss": 0.3847, "step": 6273 }, { "epoch": 2.812296888424041, "learning_rate": 5.053268192398541e-07, "lm_loss": 0.3749, "loss": 0.3749, "step": 6274 }, { "epoch": 2.8127451346606405, "learning_rate": 5.02908451900061e-07, "lm_loss": 0.3029, "loss": 0.3029, "step": 6275 }, { "epoch": 2.8131933808972396, "learning_rate": 5.004958265011129e-07, "lm_loss": 0.3629, "loss": 0.3629, "step": 6276 }, { "epoch": 2.8136416271338387, "learning_rate": 4.980889436085101e-07, "lm_loss": 0.3203, "loss": 0.3203, "step": 6277 }, { "epoch": 2.8140898733704383, "learning_rate": 4.956878037864043e-07, "lm_loss": 0.3852, "loss": 0.3852, "step": 6278 }, { "epoch": 2.8145381196070374, "learning_rate": 4.93292407597612e-07, "lm_loss": 0.4052, "loss": 0.4052, "step": 6279 }, { "epoch": 2.814986365843637, "learning_rate": 4.909027556035923e-07, "lm_loss": 0.364, "loss": 0.364, "step": 6280 }, { "epoch": 2.815434612080236, "learning_rate": 4.88518848364461e-07, "lm_loss": 0.2626, "loss": 0.2626, "step": 6281 }, { "epoch": 2.8158828583168356, "learning_rate": 4.861406864389961e-07, "lm_loss": 0.3301, "loss": 0.3301, "step": 6282 }, { "epoch": 2.8163311045534347, "learning_rate": 4.837682703846103e-07, "lm_loss": 0.5481, "loss": 0.5481, "step": 6283 }, { "epoch": 2.816779350790034, "learning_rate": 4.814016007573918e-07, "lm_loss": 0.4164, "loss": 0.4164, "step": 6284 }, { "epoch": 2.8172275970266334, "learning_rate": 4.790406781120638e-07, "lm_loss": 0.4201, "loss": 0.4201, "step": 6285 }, { "epoch": 2.8176758432632325, "learning_rate": 4.766855030020112e-07, "lm_loss": 0.5335, "loss": 0.5335, "step": 6286 }, { "epoch": 2.818124089499832, "learning_rate": 4.743360759792731e-07, "lm_loss": 0.3864, "loss": 0.3864, "step": 6287 }, { "epoch": 2.818572335736431, "learning_rate": 4.7199239759453395e-07, "lm_loss": 0.3621, "loss": 0.3621, "step": 6288 }, { "epoch": 2.8190205819730307, "learning_rate": 4.696544683971377e-07, "lm_loss": 0.2695, "loss": 0.2695, "step": 6289 }, { "epoch": 2.81946882820963, "learning_rate": 4.673222889350765e-07, "lm_loss": 0.4114, "loss": 0.4114, "step": 6290 }, { "epoch": 2.819917074446229, "learning_rate": 4.6499585975499637e-07, "lm_loss": 0.431, "loss": 0.431, "step": 6291 }, { "epoch": 2.8203653206828285, "learning_rate": 4.6267518140219734e-07, "lm_loss": 0.223, "loss": 0.223, "step": 6292 }, { "epoch": 2.8208135669194276, "learning_rate": 4.603602544206248e-07, "lm_loss": 0.4329, "loss": 0.4329, "step": 6293 }, { "epoch": 2.821261813156027, "learning_rate": 4.5805107935288637e-07, "lm_loss": 0.3828, "loss": 0.3828, "step": 6294 }, { "epoch": 2.8217100593926263, "learning_rate": 4.5574765674023523e-07, "lm_loss": 0.4586, "loss": 0.4586, "step": 6295 }, { "epoch": 2.822158305629226, "learning_rate": 4.534499871225728e-07, "lm_loss": 0.2546, "loss": 0.2546, "step": 6296 }, { "epoch": 2.822606551865825, "learning_rate": 4.5115807103845997e-07, "lm_loss": 0.4021, "loss": 0.4021, "step": 6297 }, { "epoch": 2.823054798102424, "learning_rate": 4.488719090251059e-07, "lm_loss": 0.4333, "loss": 0.4333, "step": 6298 }, { "epoch": 2.8235030443390237, "learning_rate": 4.4659150161836806e-07, "lm_loss": 0.2064, "loss": 0.2064, "step": 6299 }, { "epoch": 2.8239512905756228, "learning_rate": 4.4431684935275497e-07, "lm_loss": 0.3953, "loss": 0.3953, "step": 6300 }, { "epoch": 2.8243995368122223, "learning_rate": 4.420479527614346e-07, "lm_loss": 0.4747, "loss": 0.4747, "step": 6301 }, { "epoch": 2.8248477830488214, "learning_rate": 4.3978481237621495e-07, "lm_loss": 0.3533, "loss": 0.3533, "step": 6302 }, { "epoch": 2.825296029285421, "learning_rate": 4.37527428727566e-07, "lm_loss": 0.3856, "loss": 0.3856, "step": 6303 }, { "epoch": 2.82574427552202, "learning_rate": 4.352758023445952e-07, "lm_loss": 0.3288, "loss": 0.3288, "step": 6304 }, { "epoch": 2.8261925217586192, "learning_rate": 4.330299337550692e-07, "lm_loss": 0.4838, "loss": 0.4838, "step": 6305 }, { "epoch": 2.826640767995219, "learning_rate": 4.3078982348540854e-07, "lm_loss": 0.4525, "loss": 0.4525, "step": 6306 }, { "epoch": 2.827089014231818, "learning_rate": 4.28555472060671e-07, "lm_loss": 0.3443, "loss": 0.3443, "step": 6307 }, { "epoch": 2.8275372604684175, "learning_rate": 4.2632688000457386e-07, "lm_loss": 0.3982, "loss": 0.3982, "step": 6308 }, { "epoch": 2.8279855067050166, "learning_rate": 4.2410404783949087e-07, "lm_loss": 0.384, "loss": 0.384, "step": 6309 }, { "epoch": 2.828433752941616, "learning_rate": 4.2188697608642745e-07, "lm_loss": 0.3484, "loss": 0.3484, "step": 6310 }, { "epoch": 2.8288819991782153, "learning_rate": 4.1967566526505684e-07, "lm_loss": 0.4248, "loss": 0.4248, "step": 6311 }, { "epoch": 2.8293302454148144, "learning_rate": 4.1747011589368947e-07, "lm_loss": 0.3899, "loss": 0.3899, "step": 6312 }, { "epoch": 2.829778491651414, "learning_rate": 4.1527032848929225e-07, "lm_loss": 0.3725, "loss": 0.3725, "step": 6313 }, { "epoch": 2.830226737888013, "learning_rate": 4.130763035674806e-07, "lm_loss": 0.4775, "loss": 0.4775, "step": 6314 }, { "epoch": 2.8306749841246126, "learning_rate": 4.108880416425154e-07, "lm_loss": 0.2524, "loss": 0.2524, "step": 6315 }, { "epoch": 2.8311232303612117, "learning_rate": 4.0870554322731123e-07, "lm_loss": 0.4285, "loss": 0.4285, "step": 6316 }, { "epoch": 2.8315714765978113, "learning_rate": 4.065288088334285e-07, "lm_loss": 0.2994, "loss": 0.2994, "step": 6317 }, { "epoch": 2.8320197228344104, "learning_rate": 4.043578389710756e-07, "lm_loss": 0.3884, "loss": 0.3884, "step": 6318 }, { "epoch": 2.8324679690710095, "learning_rate": 4.0219263414911777e-07, "lm_loss": 0.4387, "loss": 0.4387, "step": 6319 }, { "epoch": 2.832916215307609, "learning_rate": 4.0003319487506297e-07, "lm_loss": 0.2982, "loss": 0.2982, "step": 6320 }, { "epoch": 2.833364461544208, "learning_rate": 3.978795216550646e-07, "lm_loss": 0.3446, "loss": 0.3446, "step": 6321 }, { "epoch": 2.8338127077808077, "learning_rate": 3.957316149939272e-07, "lm_loss": 0.4872, "loss": 0.4872, "step": 6322 }, { "epoch": 2.834260954017407, "learning_rate": 3.9358947539510913e-07, "lm_loss": 0.4785, "loss": 0.4785, "step": 6323 }, { "epoch": 2.8347092002540064, "learning_rate": 3.9145310336071164e-07, "lm_loss": 0.2795, "loss": 0.2795, "step": 6324 }, { "epoch": 2.8351574464906055, "learning_rate": 3.8932249939147847e-07, "lm_loss": 0.4409, "loss": 0.4409, "step": 6325 }, { "epoch": 2.8356056927272046, "learning_rate": 3.871976639868158e-07, "lm_loss": 0.4019, "loss": 0.4019, "step": 6326 }, { "epoch": 2.836053938963804, "learning_rate": 3.850785976447668e-07, "lm_loss": 0.3531, "loss": 0.3531, "step": 6327 }, { "epoch": 2.8365021852004033, "learning_rate": 3.829653008620232e-07, "lm_loss": 0.3527, "loss": 0.3527, "step": 6328 }, { "epoch": 2.836950431437003, "learning_rate": 3.808577741339303e-07, "lm_loss": 0.3349, "loss": 0.3349, "step": 6329 }, { "epoch": 2.837398677673602, "learning_rate": 3.787560179544708e-07, "lm_loss": 0.4929, "loss": 0.4929, "step": 6330 }, { "epoch": 2.8378469239102015, "learning_rate": 3.7666003281628946e-07, "lm_loss": 0.4432, "loss": 0.4432, "step": 6331 }, { "epoch": 2.8382951701468007, "learning_rate": 3.745698192106656e-07, "lm_loss": 0.2344, "loss": 0.2344, "step": 6332 }, { "epoch": 2.8387434163833998, "learning_rate": 3.7248537762752667e-07, "lm_loss": 0.4132, "loss": 0.4132, "step": 6333 }, { "epoch": 2.8391916626199993, "learning_rate": 3.704067085554569e-07, "lm_loss": 0.3497, "loss": 0.3497, "step": 6334 }, { "epoch": 2.8396399088565984, "learning_rate": 3.683338124816804e-07, "lm_loss": 0.3632, "loss": 0.3632, "step": 6335 }, { "epoch": 2.840088155093198, "learning_rate": 3.6626668989206415e-07, "lm_loss": 0.3652, "loss": 0.3652, "step": 6336 }, { "epoch": 2.840536401329797, "learning_rate": 3.642053412711316e-07, "lm_loss": 0.4176, "loss": 0.4176, "step": 6337 }, { "epoch": 2.8409846475663967, "learning_rate": 3.621497671020463e-07, "lm_loss": 0.2773, "loss": 0.2773, "step": 6338 }, { "epoch": 2.841432893802996, "learning_rate": 3.6009996786661724e-07, "lm_loss": 0.2933, "loss": 0.2933, "step": 6339 }, { "epoch": 2.841881140039595, "learning_rate": 3.580559440453102e-07, "lm_loss": 0.4218, "loss": 0.4218, "step": 6340 }, { "epoch": 2.8423293862761945, "learning_rate": 3.5601769611722236e-07, "lm_loss": 0.3234, "loss": 0.3234, "step": 6341 }, { "epoch": 2.8427776325127936, "learning_rate": 3.53985224560105e-07, "lm_loss": 0.4209, "loss": 0.4209, "step": 6342 }, { "epoch": 2.843225878749393, "learning_rate": 3.519585298503575e-07, "lm_loss": 0.5082, "loss": 0.5082, "step": 6343 }, { "epoch": 2.8436741249859923, "learning_rate": 3.499376124630194e-07, "lm_loss": 0.2823, "loss": 0.2823, "step": 6344 }, { "epoch": 2.844122371222592, "learning_rate": 3.479224728717839e-07, "lm_loss": 0.3637, "loss": 0.3637, "step": 6345 }, { "epoch": 2.844570617459191, "learning_rate": 3.4591311154897885e-07, "lm_loss": 0.3685, "loss": 0.3685, "step": 6346 }, { "epoch": 2.84501886369579, "learning_rate": 3.439095289655886e-07, "lm_loss": 0.3695, "loss": 0.3695, "step": 6347 }, { "epoch": 2.8454671099323896, "learning_rate": 3.419117255912374e-07, "lm_loss": 0.3263, "loss": 0.3263, "step": 6348 }, { "epoch": 2.8459153561689887, "learning_rate": 3.399197018941952e-07, "lm_loss": 0.324, "loss": 0.324, "step": 6349 }, { "epoch": 2.8463636024055883, "learning_rate": 3.3793345834137737e-07, "lm_loss": 0.4484, "loss": 0.4484, "step": 6350 }, { "epoch": 2.8468118486421874, "learning_rate": 3.359529953983476e-07, "lm_loss": 0.3177, "loss": 0.3177, "step": 6351 }, { "epoch": 2.847260094878787, "learning_rate": 3.339783135293095e-07, "lm_loss": 0.3137, "loss": 0.3137, "step": 6352 }, { "epoch": 2.847708341115386, "learning_rate": 3.320094131971124e-07, "lm_loss": 0.3733, "loss": 0.3733, "step": 6353 }, { "epoch": 2.848156587351985, "learning_rate": 3.300462948632593e-07, "lm_loss": 0.473, "loss": 0.473, "step": 6354 }, { "epoch": 2.8486048335885847, "learning_rate": 3.280889589878822e-07, "lm_loss": 0.3173, "loss": 0.3173, "step": 6355 }, { "epoch": 2.849053079825184, "learning_rate": 3.2613740602977506e-07, "lm_loss": 0.3406, "loss": 0.3406, "step": 6356 }, { "epoch": 2.8495013260617834, "learning_rate": 3.2419163644636095e-07, "lm_loss": 0.335, "loss": 0.335, "step": 6357 }, { "epoch": 2.8499495722983825, "learning_rate": 3.2225165069371397e-07, "lm_loss": 0.3274, "loss": 0.3274, "step": 6358 }, { "epoch": 2.850397818534982, "learning_rate": 3.2031744922656205e-07, "lm_loss": 0.5031, "loss": 0.5031, "step": 6359 }, { "epoch": 2.850846064771581, "learning_rate": 3.1838903249825635e-07, "lm_loss": 0.5297, "loss": 0.5297, "step": 6360 }, { "epoch": 2.8512943110081803, "learning_rate": 3.164664009608104e-07, "lm_loss": 0.3748, "loss": 0.3748, "step": 6361 }, { "epoch": 2.85174255724478, "learning_rate": 3.145495550648747e-07, "lm_loss": 0.3721, "loss": 0.3721, "step": 6362 }, { "epoch": 2.852190803481379, "learning_rate": 3.1263849525973997e-07, "lm_loss": 0.525, "loss": 0.525, "step": 6363 }, { "epoch": 2.8526390497179785, "learning_rate": 3.1073322199334785e-07, "lm_loss": 0.3462, "loss": 0.3462, "step": 6364 }, { "epoch": 2.8530872959545777, "learning_rate": 3.088337357122828e-07, "lm_loss": 0.2695, "loss": 0.2695, "step": 6365 }, { "epoch": 2.853535542191177, "learning_rate": 3.069400368617692e-07, "lm_loss": 0.3287, "loss": 0.3287, "step": 6366 }, { "epoch": 2.8539837884277763, "learning_rate": 3.0505212588567144e-07, "lm_loss": 0.5302, "loss": 0.5302, "step": 6367 }, { "epoch": 2.8544320346643755, "learning_rate": 3.0317000322650777e-07, "lm_loss": 0.2703, "loss": 0.2703, "step": 6368 }, { "epoch": 2.854880280900975, "learning_rate": 3.012936693254337e-07, "lm_loss": 0.3498, "loss": 0.3498, "step": 6369 }, { "epoch": 2.855328527137574, "learning_rate": 2.994231246222473e-07, "lm_loss": 0.4498, "loss": 0.4498, "step": 6370 }, { "epoch": 2.8557767733741737, "learning_rate": 2.9755836955538964e-07, "lm_loss": 0.3785, "loss": 0.3785, "step": 6371 }, { "epoch": 2.856225019610773, "learning_rate": 2.956994045619471e-07, "lm_loss": 0.4155, "loss": 0.4155, "step": 6372 }, { "epoch": 2.8566732658473724, "learning_rate": 2.9384623007764347e-07, "lm_loss": 0.5197, "loss": 0.5197, "step": 6373 }, { "epoch": 2.8571215120839715, "learning_rate": 2.919988465368589e-07, "lm_loss": 0.3432, "loss": 0.3432, "step": 6374 }, { "epoch": 2.8575697583205706, "learning_rate": 2.9015725437259724e-07, "lm_loss": 0.3548, "loss": 0.3548, "step": 6375 }, { "epoch": 2.85801800455717, "learning_rate": 2.8832145401651853e-07, "lm_loss": 0.3353, "loss": 0.3353, "step": 6376 }, { "epoch": 2.8584662507937693, "learning_rate": 2.8649144589892316e-07, "lm_loss": 0.2919, "loss": 0.2919, "step": 6377 }, { "epoch": 2.858914497030369, "learning_rate": 2.8466723044874575e-07, "lm_loss": 0.5149, "loss": 0.5149, "step": 6378 }, { "epoch": 2.859362743266968, "learning_rate": 2.8284880809357484e-07, "lm_loss": 0.3664, "loss": 0.3664, "step": 6379 }, { "epoch": 2.8598109895035675, "learning_rate": 2.810361792596333e-07, "lm_loss": 0.4374, "loss": 0.4374, "step": 6380 }, { "epoch": 2.8602592357401666, "learning_rate": 2.7922934437178695e-07, "lm_loss": 0.3589, "loss": 0.3589, "step": 6381 }, { "epoch": 2.8607074819767657, "learning_rate": 2.774283038535469e-07, "lm_loss": 0.2897, "loss": 0.2897, "step": 6382 }, { "epoch": 2.8611557282133653, "learning_rate": 2.7563305812706163e-07, "lm_loss": 0.3079, "loss": 0.3079, "step": 6383 }, { "epoch": 2.8616039744499644, "learning_rate": 2.738436076131279e-07, "lm_loss": 0.4301, "loss": 0.4301, "step": 6384 }, { "epoch": 2.862052220686564, "learning_rate": 2.720599527311768e-07, "lm_loss": 0.3911, "loss": 0.3911, "step": 6385 }, { "epoch": 2.862500466923163, "learning_rate": 2.7028209389928226e-07, "lm_loss": 0.3105, "loss": 0.3105, "step": 6386 }, { "epoch": 2.8629487131597626, "learning_rate": 2.685100315341665e-07, "lm_loss": 0.5035, "loss": 0.5035, "step": 6387 }, { "epoch": 2.8633969593963617, "learning_rate": 2.667437660511862e-07, "lm_loss": 0.4139, "loss": 0.4139, "step": 6388 }, { "epoch": 2.863845205632961, "learning_rate": 2.6498329786433797e-07, "lm_loss": 0.5618, "loss": 0.5618, "step": 6389 }, { "epoch": 2.8642934518695604, "learning_rate": 2.6322862738626386e-07, "lm_loss": 0.3183, "loss": 0.3183, "step": 6390 }, { "epoch": 2.8647416981061595, "learning_rate": 2.6147975502824886e-07, "lm_loss": 0.3654, "loss": 0.3654, "step": 6391 }, { "epoch": 2.865189944342759, "learning_rate": 2.59736681200215e-07, "lm_loss": 0.4338, "loss": 0.4338, "step": 6392 }, { "epoch": 2.865638190579358, "learning_rate": 2.579994063107272e-07, "lm_loss": 0.4207, "loss": 0.4207, "step": 6393 }, { "epoch": 2.8660864368159578, "learning_rate": 2.562679307669874e-07, "lm_loss": 0.3634, "loss": 0.3634, "step": 6394 }, { "epoch": 2.866534683052557, "learning_rate": 2.545422549748433e-07, "lm_loss": 0.2124, "loss": 0.2124, "step": 6395 }, { "epoch": 2.866982929289156, "learning_rate": 2.528223793387796e-07, "lm_loss": 0.6353, "loss": 0.6353, "step": 6396 }, { "epoch": 2.8674311755257555, "learning_rate": 2.5110830426192113e-07, "lm_loss": 0.307, "loss": 0.307, "step": 6397 }, { "epoch": 2.8678794217623547, "learning_rate": 2.494000301460353e-07, "lm_loss": 0.4311, "loss": 0.4311, "step": 6398 }, { "epoch": 2.8683276679989542, "learning_rate": 2.4769755739153246e-07, "lm_loss": 0.51, "loss": 0.51, "step": 6399 }, { "epoch": 2.8687759142355533, "learning_rate": 2.460008863974544e-07, "lm_loss": 0.4065, "loss": 0.4065, "step": 6400 }, { "epoch": 2.869224160472153, "learning_rate": 2.443100175614943e-07, "lm_loss": 0.5647, "loss": 0.5647, "step": 6401 }, { "epoch": 2.869672406708752, "learning_rate": 2.4262495127997663e-07, "lm_loss": 0.2578, "loss": 0.2578, "step": 6402 }, { "epoch": 2.870120652945351, "learning_rate": 2.4094568794786885e-07, "lm_loss": 0.3863, "loss": 0.3863, "step": 6403 }, { "epoch": 2.8705688991819507, "learning_rate": 2.3927222795877833e-07, "lm_loss": 0.4033, "loss": 0.4033, "step": 6404 }, { "epoch": 2.87101714541855, "learning_rate": 2.3760457170495243e-07, "lm_loss": 0.3545, "loss": 0.3545, "step": 6405 }, { "epoch": 2.8714653916551494, "learning_rate": 2.3594271957727841e-07, "lm_loss": 0.3817, "loss": 0.3817, "step": 6406 }, { "epoch": 2.8719136378917485, "learning_rate": 2.342866719652781e-07, "lm_loss": 0.2645, "loss": 0.2645, "step": 6407 }, { "epoch": 2.872361884128348, "learning_rate": 2.3263642925712147e-07, "lm_loss": 0.5767, "loss": 0.5767, "step": 6408 }, { "epoch": 2.872810130364947, "learning_rate": 2.3099199183961307e-07, "lm_loss": 0.3164, "loss": 0.3164, "step": 6409 }, { "epoch": 2.8732583766015463, "learning_rate": 2.2935336009819453e-07, "lm_loss": 0.4746, "loss": 0.4746, "step": 6410 }, { "epoch": 2.873706622838146, "learning_rate": 2.2772053441695306e-07, "lm_loss": 0.3541, "loss": 0.3541, "step": 6411 }, { "epoch": 2.874154869074745, "learning_rate": 2.2609351517860754e-07, "lm_loss": 0.3167, "loss": 0.3167, "step": 6412 }, { "epoch": 2.8746031153113445, "learning_rate": 2.244723027645196e-07, "lm_loss": 0.3299, "loss": 0.3299, "step": 6413 }, { "epoch": 2.8750513615479436, "learning_rate": 2.2285689755469352e-07, "lm_loss": 0.3921, "loss": 0.3921, "step": 6414 }, { "epoch": 2.875499607784543, "learning_rate": 2.2124729992776537e-07, "lm_loss": 0.3828, "loss": 0.3828, "step": 6415 }, { "epoch": 2.8759478540211423, "learning_rate": 2.1964351026101392e-07, "lm_loss": 0.2967, "loss": 0.2967, "step": 6416 }, { "epoch": 2.8763961002577414, "learning_rate": 2.180455289303579e-07, "lm_loss": 0.5876, "loss": 0.5876, "step": 6417 }, { "epoch": 2.876844346494341, "learning_rate": 2.164533563103477e-07, "lm_loss": 0.4501, "loss": 0.4501, "step": 6418 }, { "epoch": 2.87729259273094, "learning_rate": 2.1486699277418198e-07, "lm_loss": 0.2359, "loss": 0.2359, "step": 6419 }, { "epoch": 2.8777408389675396, "learning_rate": 2.1328643869368837e-07, "lm_loss": 0.4734, "loss": 0.4734, "step": 6420 }, { "epoch": 2.8781890852041387, "learning_rate": 2.117116944393399e-07, "lm_loss": 0.427, "loss": 0.427, "step": 6421 }, { "epoch": 2.8786373314407383, "learning_rate": 2.1014276038024694e-07, "lm_loss": 0.2479, "loss": 0.2479, "step": 6422 }, { "epoch": 2.8790855776773374, "learning_rate": 2.0857963688415415e-07, "lm_loss": 0.3628, "loss": 0.3628, "step": 6423 }, { "epoch": 2.8795338239139365, "learning_rate": 2.070223243174463e-07, "lm_loss": 0.3498, "loss": 0.3498, "step": 6424 }, { "epoch": 2.879982070150536, "learning_rate": 2.054708230451452e-07, "lm_loss": 0.5097, "loss": 0.5097, "step": 6425 }, { "epoch": 2.880430316387135, "learning_rate": 2.039251334309128e-07, "lm_loss": 0.3256, "loss": 0.3256, "step": 6426 }, { "epoch": 2.8808785626237348, "learning_rate": 2.0238525583705092e-07, "lm_loss": 0.3965, "loss": 0.3965, "step": 6427 }, { "epoch": 2.881326808860334, "learning_rate": 2.0085119062448754e-07, "lm_loss": 0.3682, "loss": 0.3682, "step": 6428 }, { "epoch": 2.8817750550969334, "learning_rate": 1.9932293815280446e-07, "lm_loss": 0.2671, "loss": 0.2671, "step": 6429 }, { "epoch": 2.8822233013335326, "learning_rate": 1.9780049878020956e-07, "lm_loss": 0.4203, "loss": 0.4203, "step": 6430 }, { "epoch": 2.8826715475701317, "learning_rate": 1.9628387286355078e-07, "lm_loss": 0.3322, "loss": 0.3322, "step": 6431 }, { "epoch": 2.8831197938067312, "learning_rate": 1.947730607583159e-07, "lm_loss": 0.3624, "loss": 0.3624, "step": 6432 }, { "epoch": 2.8835680400433303, "learning_rate": 1.932680628186273e-07, "lm_loss": 0.5199, "loss": 0.5199, "step": 6433 }, { "epoch": 2.88401628627993, "learning_rate": 1.917688793972472e-07, "lm_loss": 0.3444, "loss": 0.3444, "step": 6434 }, { "epoch": 2.884464532516529, "learning_rate": 1.9027551084556948e-07, "lm_loss": 0.3832, "loss": 0.3832, "step": 6435 }, { "epoch": 2.8849127787531286, "learning_rate": 1.8878795751363364e-07, "lm_loss": 0.4728, "loss": 0.4728, "step": 6436 }, { "epoch": 2.8853610249897277, "learning_rate": 1.8730621975010797e-07, "lm_loss": 0.4483, "loss": 0.4483, "step": 6437 }, { "epoch": 2.885809271226327, "learning_rate": 1.8583029790230355e-07, "lm_loss": 0.1753, "loss": 0.1753, "step": 6438 }, { "epoch": 2.8862575174629264, "learning_rate": 1.843601923161631e-07, "lm_loss": 0.4235, "loss": 0.4235, "step": 6439 }, { "epoch": 2.8867057636995255, "learning_rate": 1.8289590333627204e-07, "lm_loss": 0.3467, "loss": 0.3467, "step": 6440 }, { "epoch": 2.887154009936125, "learning_rate": 1.8143743130584757e-07, "lm_loss": 0.3009, "loss": 0.3009, "step": 6441 }, { "epoch": 2.887602256172724, "learning_rate": 1.7998477656674396e-07, "lm_loss": 0.4188, "loss": 0.4188, "step": 6442 }, { "epoch": 2.8880505024093237, "learning_rate": 1.7853793945945274e-07, "lm_loss": 0.3967, "loss": 0.3967, "step": 6443 }, { "epoch": 2.888498748645923, "learning_rate": 1.7709692032310267e-07, "lm_loss": 0.3059, "loss": 0.3059, "step": 6444 }, { "epoch": 2.888946994882522, "learning_rate": 1.7566171949545963e-07, "lm_loss": 0.2761, "loss": 0.2761, "step": 6445 }, { "epoch": 2.8893952411191215, "learning_rate": 1.74232337312924e-07, "lm_loss": 0.423, "loss": 0.423, "step": 6446 }, { "epoch": 2.8898434873557206, "learning_rate": 1.7280877411053054e-07, "lm_loss": 0.3, "loss": 0.3, "step": 6447 }, { "epoch": 2.89029173359232, "learning_rate": 1.7139103022195403e-07, "lm_loss": 0.3685, "loss": 0.3685, "step": 6448 }, { "epoch": 2.8907399798289193, "learning_rate": 1.699791059795036e-07, "lm_loss": 0.5049, "loss": 0.5049, "step": 6449 }, { "epoch": 2.891188226065519, "learning_rate": 1.6857300171412293e-07, "lm_loss": 0.3734, "loss": 0.3734, "step": 6450 }, { "epoch": 2.891636472302118, "learning_rate": 1.671727177553928e-07, "lm_loss": 0.7469, "loss": 0.7469, "step": 6451 }, { "epoch": 2.892084718538717, "learning_rate": 1.6577825443153116e-07, "lm_loss": 0.2987, "loss": 0.2987, "step": 6452 }, { "epoch": 2.8925329647753166, "learning_rate": 1.6438961206938497e-07, "lm_loss": 0.3142, "loss": 0.3142, "step": 6453 }, { "epoch": 2.8929812110119157, "learning_rate": 1.6300679099444937e-07, "lm_loss": 0.2764, "loss": 0.2764, "step": 6454 }, { "epoch": 2.8934294572485153, "learning_rate": 1.616297915308429e-07, "lm_loss": 0.4509, "loss": 0.4509, "step": 6455 }, { "epoch": 2.8938777034851144, "learning_rate": 1.602586140013268e-07, "lm_loss": 0.3032, "loss": 0.3032, "step": 6456 }, { "epoch": 2.894325949721714, "learning_rate": 1.588932587272912e-07, "lm_loss": 0.4296, "loss": 0.4296, "step": 6457 }, { "epoch": 2.894774195958313, "learning_rate": 1.5753372602876902e-07, "lm_loss": 0.2407, "loss": 0.2407, "step": 6458 }, { "epoch": 2.895222442194912, "learning_rate": 1.5618001622442479e-07, "lm_loss": 0.4027, "loss": 0.4027, "step": 6459 }, { "epoch": 2.8956706884315118, "learning_rate": 1.5483212963155469e-07, "lm_loss": 0.3876, "loss": 0.3876, "step": 6460 }, { "epoch": 2.896118934668111, "learning_rate": 1.5349006656609767e-07, "lm_loss": 0.3213, "loss": 0.3213, "step": 6461 }, { "epoch": 2.8965671809047104, "learning_rate": 1.5215382734262428e-07, "lm_loss": 0.2824, "loss": 0.2824, "step": 6462 }, { "epoch": 2.8970154271413096, "learning_rate": 1.5082341227433405e-07, "lm_loss": 0.4022, "loss": 0.4022, "step": 6463 }, { "epoch": 2.897463673377909, "learning_rate": 1.4949882167307193e-07, "lm_loss": 0.269, "loss": 0.269, "step": 6464 }, { "epoch": 2.8979119196145082, "learning_rate": 1.4818005584930904e-07, "lm_loss": 0.2777, "loss": 0.2777, "step": 6465 }, { "epoch": 2.8983601658511073, "learning_rate": 1.4686711511215646e-07, "lm_loss": 0.3852, "loss": 0.3852, "step": 6466 }, { "epoch": 2.898808412087707, "learning_rate": 1.4555999976935686e-07, "lm_loss": 0.3231, "loss": 0.3231, "step": 6467 }, { "epoch": 2.899256658324306, "learning_rate": 1.442587101272902e-07, "lm_loss": 0.3474, "loss": 0.3474, "step": 6468 }, { "epoch": 2.8997049045609056, "learning_rate": 1.4296324649096526e-07, "lm_loss": 0.3686, "loss": 0.3686, "step": 6469 }, { "epoch": 2.9001531507975047, "learning_rate": 1.416736091640364e-07, "lm_loss": 0.3821, "loss": 0.3821, "step": 6470 }, { "epoch": 2.9006013970341042, "learning_rate": 1.4038979844877842e-07, "lm_loss": 0.3529, "loss": 0.3529, "step": 6471 }, { "epoch": 2.9010496432707034, "learning_rate": 1.39111814646109e-07, "lm_loss": 0.3081, "loss": 0.3081, "step": 6472 }, { "epoch": 2.9014978895073025, "learning_rate": 1.3783965805558018e-07, "lm_loss": 0.4644, "loss": 0.4644, "step": 6473 }, { "epoch": 2.901946135743902, "learning_rate": 1.3657332897537566e-07, "lm_loss": 0.2271, "loss": 0.2271, "step": 6474 }, { "epoch": 2.902394381980501, "learning_rate": 1.3531282770231358e-07, "lm_loss": 0.3309, "loss": 0.3309, "step": 6475 }, { "epoch": 2.9028426282171007, "learning_rate": 1.3405815453184645e-07, "lm_loss": 0.402, "loss": 0.402, "step": 6476 }, { "epoch": 2.9032908744537, "learning_rate": 1.328093097580585e-07, "lm_loss": 0.3705, "loss": 0.3705, "step": 6477 }, { "epoch": 2.9037391206902994, "learning_rate": 1.315662936736739e-07, "lm_loss": 0.5215, "loss": 0.5215, "step": 6478 }, { "epoch": 2.9041873669268985, "learning_rate": 1.303291065700457e-07, "lm_loss": 0.48, "loss": 0.48, "step": 6479 }, { "epoch": 2.9046356131634976, "learning_rate": 1.2909774873715585e-07, "lm_loss": 0.5203, "loss": 0.5203, "step": 6480 }, { "epoch": 2.905083859400097, "learning_rate": 1.278722204636318e-07, "lm_loss": 0.3256, "loss": 0.3256, "step": 6481 }, { "epoch": 2.9055321056366963, "learning_rate": 1.2665252203672706e-07, "lm_loss": 0.3319, "loss": 0.3319, "step": 6482 }, { "epoch": 2.905980351873296, "learning_rate": 1.2543865374233242e-07, "lm_loss": 0.4021, "loss": 0.4021, "step": 6483 }, { "epoch": 2.906428598109895, "learning_rate": 1.2423061586496477e-07, "lm_loss": 0.312, "loss": 0.312, "step": 6484 }, { "epoch": 2.9068768443464945, "learning_rate": 1.2302840868778364e-07, "lm_loss": 0.4951, "loss": 0.4951, "step": 6485 }, { "epoch": 2.9073250905830936, "learning_rate": 1.218320324925748e-07, "lm_loss": 0.2589, "loss": 0.2589, "step": 6486 }, { "epoch": 2.9077733368196927, "learning_rate": 1.2064148755976114e-07, "lm_loss": 0.6347, "loss": 0.6347, "step": 6487 }, { "epoch": 2.9082215830562923, "learning_rate": 1.1945677416839995e-07, "lm_loss": 0.413, "loss": 0.413, "step": 6488 }, { "epoch": 2.9086698292928914, "learning_rate": 1.1827789259617195e-07, "lm_loss": 0.2066, "loss": 0.2066, "step": 6489 }, { "epoch": 2.909118075529491, "learning_rate": 1.1710484311940883e-07, "lm_loss": 0.5053, "loss": 0.5053, "step": 6490 }, { "epoch": 2.90956632176609, "learning_rate": 1.1593762601306013e-07, "lm_loss": 0.3158, "loss": 0.3158, "step": 6491 }, { "epoch": 2.9100145680026896, "learning_rate": 1.1477624155071253e-07, "lm_loss": 0.3469, "loss": 0.3469, "step": 6492 }, { "epoch": 2.9104628142392888, "learning_rate": 1.136206900045872e-07, "lm_loss": 0.4454, "loss": 0.4454, "step": 6493 }, { "epoch": 2.910911060475888, "learning_rate": 1.1247097164553411e-07, "lm_loss": 0.3437, "loss": 0.3437, "step": 6494 }, { "epoch": 2.9113593067124874, "learning_rate": 1.1132708674304326e-07, "lm_loss": 0.3086, "loss": 0.3086, "step": 6495 }, { "epoch": 2.9118075529490866, "learning_rate": 1.1018903556523075e-07, "lm_loss": 0.4276, "loss": 0.4276, "step": 6496 }, { "epoch": 2.912255799185686, "learning_rate": 1.0905681837884708e-07, "lm_loss": 0.3703, "loss": 0.3703, "step": 6497 }, { "epoch": 2.9127040454222852, "learning_rate": 1.0793043544927717e-07, "lm_loss": 0.365, "loss": 0.365, "step": 6498 }, { "epoch": 2.913152291658885, "learning_rate": 1.0680988704053486e-07, "lm_loss": 0.4388, "loss": 0.4388, "step": 6499 }, { "epoch": 2.913600537895484, "learning_rate": 1.056951734152739e-07, "lm_loss": 0.3169, "loss": 0.3169, "step": 6500 }, { "epoch": 2.914048784132083, "learning_rate": 1.0458629483476867e-07, "lm_loss": 0.4006, "loss": 0.4006, "step": 6501 }, { "epoch": 2.9144970303686826, "learning_rate": 1.0348325155893346e-07, "lm_loss": 0.3042, "loss": 0.3042, "step": 6502 }, { "epoch": 2.9149452766052817, "learning_rate": 1.02386043846317e-07, "lm_loss": 0.4195, "loss": 0.4195, "step": 6503 }, { "epoch": 2.9153935228418812, "learning_rate": 1.0129467195409692e-07, "lm_loss": 0.5083, "loss": 0.5083, "step": 6504 }, { "epoch": 2.9158417690784804, "learning_rate": 1.0020913613807692e-07, "lm_loss": 0.3837, "loss": 0.3837, "step": 6505 }, { "epoch": 2.91629001531508, "learning_rate": 9.912943665270347e-08, "lm_loss": 0.3581, "loss": 0.3581, "step": 6506 }, { "epoch": 2.916738261551679, "learning_rate": 9.805557375104912e-08, "lm_loss": 0.2668, "loss": 0.2668, "step": 6507 }, { "epoch": 2.917186507788278, "learning_rate": 9.698754768482088e-08, "lm_loss": 0.4273, "loss": 0.4273, "step": 6508 }, { "epoch": 2.9176347540248777, "learning_rate": 9.592535870435738e-08, "lm_loss": 0.2746, "loss": 0.2746, "step": 6509 }, { "epoch": 2.918083000261477, "learning_rate": 9.486900705862334e-08, "lm_loss": 0.3704, "loss": 0.3704, "step": 6510 }, { "epoch": 2.9185312464980764, "learning_rate": 9.381849299522349e-08, "lm_loss": 0.384, "loss": 0.384, "step": 6511 }, { "epoch": 2.9189794927346755, "learning_rate": 9.277381676039143e-08, "lm_loss": 0.3761, "loss": 0.3761, "step": 6512 }, { "epoch": 2.919427738971275, "learning_rate": 9.173497859898683e-08, "lm_loss": 0.2681, "loss": 0.2681, "step": 6513 }, { "epoch": 2.919875985207874, "learning_rate": 9.070197875451213e-08, "lm_loss": 0.4573, "loss": 0.4573, "step": 6514 }, { "epoch": 2.9203242314444733, "learning_rate": 8.967481746909034e-08, "lm_loss": 0.2856, "loss": 0.2856, "step": 6515 }, { "epoch": 2.920772477681073, "learning_rate": 8.865349498348163e-08, "lm_loss": 0.4516, "loss": 0.4516, "step": 6516 }, { "epoch": 2.921220723917672, "learning_rate": 8.763801153707784e-08, "lm_loss": 0.3668, "loss": 0.3668, "step": 6517 }, { "epoch": 2.9216689701542715, "learning_rate": 8.662836736790247e-08, "lm_loss": 0.3582, "loss": 0.3582, "step": 6518 }, { "epoch": 2.9221172163908706, "learning_rate": 8.562456271260233e-08, "lm_loss": 0.3083, "loss": 0.3083, "step": 6519 }, { "epoch": 2.92256546262747, "learning_rate": 8.462659780646976e-08, "lm_loss": 0.3893, "loss": 0.3893, "step": 6520 }, { "epoch": 2.9230137088640693, "learning_rate": 8.363447288341764e-08, "lm_loss": 0.352, "loss": 0.352, "step": 6521 }, { "epoch": 2.9234619551006684, "learning_rate": 8.264818817599052e-08, "lm_loss": 0.4355, "loss": 0.4355, "step": 6522 }, { "epoch": 2.923910201337268, "learning_rate": 8.166774391536735e-08, "lm_loss": 0.3051, "loss": 0.3051, "step": 6523 }, { "epoch": 2.924358447573867, "learning_rate": 8.069314033135877e-08, "lm_loss": 0.4368, "loss": 0.4368, "step": 6524 }, { "epoch": 2.9248066938104667, "learning_rate": 7.972437765240426e-08, "lm_loss": 0.361, "loss": 0.361, "step": 6525 }, { "epoch": 2.9252549400470658, "learning_rate": 7.876145610557495e-08, "lm_loss": 0.2634, "loss": 0.2634, "step": 6526 }, { "epoch": 2.9257031862836653, "learning_rate": 7.780437591657086e-08, "lm_loss": 0.5003, "loss": 0.5003, "step": 6527 }, { "epoch": 2.9261514325202644, "learning_rate": 7.68531373097292e-08, "lm_loss": 0.2516, "loss": 0.2516, "step": 6528 }, { "epoch": 2.9265996787568636, "learning_rate": 7.590774050800775e-08, "lm_loss": 0.4263, "loss": 0.4263, "step": 6529 }, { "epoch": 2.927047924993463, "learning_rate": 7.496818573300424e-08, "lm_loss": 0.3355, "loss": 0.3355, "step": 6530 }, { "epoch": 2.9274961712300622, "learning_rate": 7.40344732049425e-08, "lm_loss": 0.4722, "loss": 0.4722, "step": 6531 }, { "epoch": 2.927944417466662, "learning_rate": 7.310660314267526e-08, "lm_loss": 0.352, "loss": 0.352, "step": 6532 }, { "epoch": 2.928392663703261, "learning_rate": 7.218457576369519e-08, "lm_loss": 0.4027, "loss": 0.4027, "step": 6533 }, { "epoch": 2.9288409099398605, "learning_rate": 7.126839128411001e-08, "lm_loss": 0.3413, "loss": 0.3413, "step": 6534 }, { "epoch": 2.9292891561764596, "learning_rate": 7.035804991867568e-08, "lm_loss": 0.3524, "loss": 0.3524, "step": 6535 }, { "epoch": 2.9297374024130587, "learning_rate": 6.94535518807632e-08, "lm_loss": 0.4279, "loss": 0.4279, "step": 6536 }, { "epoch": 2.9301856486496582, "learning_rate": 6.855489738238353e-08, "lm_loss": 0.2491, "loss": 0.2491, "step": 6537 }, { "epoch": 2.9306338948862574, "learning_rate": 6.766208663417372e-08, "lm_loss": 0.3953, "loss": 0.3953, "step": 6538 }, { "epoch": 2.931082141122857, "learning_rate": 6.677511984540252e-08, "lm_loss": 0.3359, "loss": 0.3359, "step": 6539 }, { "epoch": 2.931530387359456, "learning_rate": 6.589399722397028e-08, "lm_loss": 0.3611, "loss": 0.3611, "step": 6540 }, { "epoch": 2.9319786335960556, "learning_rate": 6.501871897640066e-08, "lm_loss": 0.436, "loss": 0.436, "step": 6541 }, { "epoch": 2.9324268798326547, "learning_rate": 6.414928530785736e-08, "lm_loss": 0.5914, "loss": 0.5914, "step": 6542 }, { "epoch": 2.932875126069254, "learning_rate": 6.328569642212734e-08, "lm_loss": 0.5123, "loss": 0.5123, "step": 6543 }, { "epoch": 2.9333233723058534, "learning_rate": 6.242795252162925e-08, "lm_loss": 0.5318, "loss": 0.5318, "step": 6544 }, { "epoch": 2.9337716185424525, "learning_rate": 6.157605380741338e-08, "lm_loss": 0.2642, "loss": 0.2642, "step": 6545 }, { "epoch": 2.934219864779052, "learning_rate": 6.073000047915889e-08, "lm_loss": 0.451, "loss": 0.451, "step": 6546 }, { "epoch": 2.934668111015651, "learning_rate": 5.988979273517104e-08, "lm_loss": 0.5449, "loss": 0.5449, "step": 6547 }, { "epoch": 2.9351163572522507, "learning_rate": 5.9055430772392285e-08, "lm_loss": 0.3308, "loss": 0.3308, "step": 6548 }, { "epoch": 2.93556460348885, "learning_rate": 5.822691478639119e-08, "lm_loss": 0.2786, "loss": 0.2786, "step": 6549 }, { "epoch": 2.936012849725449, "learning_rate": 5.740424497136243e-08, "lm_loss": 0.2774, "loss": 0.2774, "step": 6550 }, { "epoch": 2.9364610959620485, "learning_rate": 5.6587421520137854e-08, "lm_loss": 0.3696, "loss": 0.3696, "step": 6551 }, { "epoch": 2.9369093421986476, "learning_rate": 5.577644462417264e-08, "lm_loss": 0.3913, "loss": 0.3913, "step": 6552 }, { "epoch": 2.937357588435247, "learning_rate": 5.497131447355364e-08, "lm_loss": 0.3696, "loss": 0.3696, "step": 6553 }, { "epoch": 2.9378058346718463, "learning_rate": 5.417203125699655e-08, "lm_loss": 0.3142, "loss": 0.3142, "step": 6554 }, { "epoch": 2.938254080908446, "learning_rate": 5.337859516185151e-08, "lm_loss": 0.3148, "loss": 0.3148, "step": 6555 }, { "epoch": 2.938702327145045, "learning_rate": 5.25910063740892e-08, "lm_loss": 0.3751, "loss": 0.3751, "step": 6556 }, { "epoch": 2.939150573381644, "learning_rate": 5.180926507832029e-08, "lm_loss": 0.4944, "loss": 0.4944, "step": 6557 }, { "epoch": 2.9395988196182437, "learning_rate": 5.103337145777598e-08, "lm_loss": 0.4167, "loss": 0.4167, "step": 6558 }, { "epoch": 2.9400470658548428, "learning_rate": 5.026332569431913e-08, "lm_loss": 0.3306, "loss": 0.3306, "step": 6559 }, { "epoch": 2.9404953120914423, "learning_rate": 4.949912796844425e-08, "lm_loss": 0.3037, "loss": 0.3037, "step": 6560 }, { "epoch": 2.9409435583280414, "learning_rate": 4.874077845927472e-08, "lm_loss": 0.429, "loss": 0.429, "step": 6561 }, { "epoch": 2.941391804564641, "learning_rate": 4.798827734456279e-08, "lm_loss": 0.4416, "loss": 0.4416, "step": 6562 }, { "epoch": 2.94184005080124, "learning_rate": 4.724162480068406e-08, "lm_loss": 0.4437, "loss": 0.4437, "step": 6563 }, { "epoch": 2.9422882970378392, "learning_rate": 4.650082100265407e-08, "lm_loss": 0.3525, "loss": 0.3525, "step": 6564 }, { "epoch": 2.942736543274439, "learning_rate": 4.576586612410893e-08, "lm_loss": 0.3189, "loss": 0.3189, "step": 6565 }, { "epoch": 2.943184789511038, "learning_rate": 4.503676033731918e-08, "lm_loss": 0.4395, "loss": 0.4395, "step": 6566 }, { "epoch": 2.9436330357476375, "learning_rate": 4.431350381317867e-08, "lm_loss": 0.2385, "loss": 0.2385, "step": 6567 }, { "epoch": 2.9440812819842366, "learning_rate": 4.359609672121845e-08, "lm_loss": 0.3881, "loss": 0.3881, "step": 6568 }, { "epoch": 2.944529528220836, "learning_rate": 4.288453922958735e-08, "lm_loss": 0.2878, "loss": 0.2878, "step": 6569 }, { "epoch": 2.9449777744574352, "learning_rate": 4.217883150507418e-08, "lm_loss": 0.2813, "loss": 0.2813, "step": 6570 }, { "epoch": 2.9454260206940344, "learning_rate": 4.1478973713088284e-08, "lm_loss": 0.3097, "loss": 0.3097, "step": 6571 }, { "epoch": 2.945874266930634, "learning_rate": 4.0784966017673455e-08, "lm_loss": 0.3804, "loss": 0.3804, "step": 6572 }, { "epoch": 2.946322513167233, "learning_rate": 4.0096808581499557e-08, "lm_loss": 0.339, "loss": 0.339, "step": 6573 }, { "epoch": 2.9467707594038326, "learning_rate": 3.9414501565865346e-08, "lm_loss": 0.3706, "loss": 0.3706, "step": 6574 }, { "epoch": 2.9472190056404317, "learning_rate": 3.873804513069845e-08, "lm_loss": 0.362, "loss": 0.362, "step": 6575 }, { "epoch": 2.9476672518770313, "learning_rate": 3.806743943455815e-08, "lm_loss": 0.3552, "loss": 0.3552, "step": 6576 }, { "epoch": 2.9481154981136304, "learning_rate": 3.740268463462704e-08, "lm_loss": 0.3716, "loss": 0.3716, "step": 6577 }, { "epoch": 2.9485637443502295, "learning_rate": 3.67437808867166e-08, "lm_loss": 0.5055, "loss": 0.5055, "step": 6578 }, { "epoch": 2.949011990586829, "learning_rate": 3.609072834527272e-08, "lm_loss": 0.3118, "loss": 0.3118, "step": 6579 }, { "epoch": 2.949460236823428, "learning_rate": 3.544352716336463e-08, "lm_loss": 0.3209, "loss": 0.3209, "step": 6580 }, { "epoch": 2.9499084830600277, "learning_rate": 3.4802177492693214e-08, "lm_loss": 0.4266, "loss": 0.4266, "step": 6581 }, { "epoch": 2.950356729296627, "learning_rate": 3.4166679483582655e-08, "lm_loss": 0.2454, "loss": 0.2454, "step": 6582 }, { "epoch": 2.9508049755332264, "learning_rate": 3.353703328499436e-08, "lm_loss": 0.3589, "loss": 0.3589, "step": 6583 }, { "epoch": 2.9512532217698255, "learning_rate": 3.291323904450749e-08, "lm_loss": 0.4285, "loss": 0.4285, "step": 6584 }, { "epoch": 2.9517014680064246, "learning_rate": 3.229529690833843e-08, "lm_loss": 0.2833, "loss": 0.2833, "step": 6585 }, { "epoch": 2.952149714243024, "learning_rate": 3.1683207021326876e-08, "lm_loss": 0.3628, "loss": 0.3628, "step": 6586 }, { "epoch": 2.9525979604796233, "learning_rate": 3.107696952694139e-08, "lm_loss": 0.4912, "loss": 0.4912, "step": 6587 }, { "epoch": 2.953046206716223, "learning_rate": 3.047658456728219e-08, "lm_loss": 0.2761, "loss": 0.2761, "step": 6588 }, { "epoch": 2.953494452952822, "learning_rate": 2.988205228307284e-08, "lm_loss": 0.4278, "loss": 0.4278, "step": 6589 }, { "epoch": 2.9539426991894215, "learning_rate": 2.9293372813668507e-08, "lm_loss": 0.4442, "loss": 0.4442, "step": 6590 }, { "epoch": 2.9543909454260207, "learning_rate": 2.87105462970505e-08, "lm_loss": 0.2321, "loss": 0.2321, "step": 6591 }, { "epoch": 2.9548391916626198, "learning_rate": 2.813357286983176e-08, "lm_loss": 0.3035, "loss": 0.3035, "step": 6592 }, { "epoch": 2.9552874378992193, "learning_rate": 2.756245266724855e-08, "lm_loss": 0.4949, "loss": 0.4949, "step": 6593 }, { "epoch": 2.955735684135819, "learning_rate": 2.6997185823166017e-08, "lm_loss": 0.351, "loss": 0.351, "step": 6594 }, { "epoch": 2.956183930372418, "learning_rate": 2.6437772470080947e-08, "lm_loss": 0.3558, "loss": 0.3558, "step": 6595 }, { "epoch": 2.956632176609017, "learning_rate": 2.588421273911623e-08, "lm_loss": 0.3118, "loss": 0.3118, "step": 6596 }, { "epoch": 2.9570804228456167, "learning_rate": 2.533650676002086e-08, "lm_loss": 0.3136, "loss": 0.3136, "step": 6597 }, { "epoch": 2.957528669082216, "learning_rate": 2.4794654661169924e-08, "lm_loss": 0.3737, "loss": 0.3737, "step": 6598 }, { "epoch": 2.957976915318815, "learning_rate": 2.425865656957571e-08, "lm_loss": 0.344, "loss": 0.344, "step": 6599 }, { "epoch": 2.9584251615554145, "learning_rate": 2.3728512610871056e-08, "lm_loss": 0.3413, "loss": 0.3413, "step": 6600 }, { "epoch": 2.958873407792014, "learning_rate": 2.3204222909314898e-08, "lm_loss": 0.5326, "loss": 0.5326, "step": 6601 }, { "epoch": 2.959321654028613, "learning_rate": 2.2685787587797826e-08, "lm_loss": 0.2566, "loss": 0.2566, "step": 6602 }, { "epoch": 2.9597699002652123, "learning_rate": 2.2173206767839293e-08, "lm_loss": 0.4102, "loss": 0.4102, "step": 6603 }, { "epoch": 2.960218146501812, "learning_rate": 2.1666480569582092e-08, "lm_loss": 0.3673, "loss": 0.3673, "step": 6604 }, { "epoch": 2.960666392738411, "learning_rate": 2.116560911179788e-08, "lm_loss": 0.4221, "loss": 0.4221, "step": 6605 }, { "epoch": 2.96111463897501, "learning_rate": 2.067059251189274e-08, "lm_loss": 0.3264, "loss": 0.3264, "step": 6606 }, { "epoch": 2.9615628852116096, "learning_rate": 2.0181430885893305e-08, "lm_loss": 0.357, "loss": 0.357, "step": 6607 }, { "epoch": 2.962011131448209, "learning_rate": 1.969812434844953e-08, "lm_loss": 0.4044, "loss": 0.4044, "step": 6608 }, { "epoch": 2.9624593776848083, "learning_rate": 1.9220673012854127e-08, "lm_loss": 0.3731, "loss": 0.3731, "step": 6609 }, { "epoch": 2.9629076239214074, "learning_rate": 1.8749076991012027e-08, "lm_loss": 0.3401, "loss": 0.3401, "step": 6610 }, { "epoch": 2.963355870158007, "learning_rate": 1.828333639346258e-08, "lm_loss": 0.4998, "loss": 0.4998, "step": 6611 }, { "epoch": 2.963804116394606, "learning_rate": 1.7823451329374017e-08, "lm_loss": 0.2648, "loss": 0.2648, "step": 6612 }, { "epoch": 2.964252362631205, "learning_rate": 1.73694219065379e-08, "lm_loss": 0.4254, "loss": 0.4254, "step": 6613 }, { "epoch": 2.9647006088678047, "learning_rate": 1.692124823137742e-08, "lm_loss": 0.3202, "loss": 0.3202, "step": 6614 }, { "epoch": 2.9651488551044043, "learning_rate": 1.6478930408941884e-08, "lm_loss": 0.4751, "loss": 0.4751, "step": 6615 }, { "epoch": 2.9655971013410034, "learning_rate": 1.604246854290392e-08, "lm_loss": 0.2848, "loss": 0.2848, "step": 6616 }, { "epoch": 2.9660453475776025, "learning_rate": 1.5611862735567805e-08, "lm_loss": 0.3209, "loss": 0.3209, "step": 6617 }, { "epoch": 2.966493593814202, "learning_rate": 1.5187113087869466e-08, "lm_loss": 0.3297, "loss": 0.3297, "step": 6618 }, { "epoch": 2.966941840050801, "learning_rate": 1.476821969935982e-08, "lm_loss": 0.4316, "loss": 0.4316, "step": 6619 }, { "epoch": 2.9673900862874003, "learning_rate": 1.4355182668229772e-08, "lm_loss": 0.3225, "loss": 0.3225, "step": 6620 }, { "epoch": 2.967838332524, "learning_rate": 1.3948002091290768e-08, "lm_loss": 0.3541, "loss": 0.3541, "step": 6621 }, { "epoch": 2.9682865787605994, "learning_rate": 1.3546678063980356e-08, "lm_loss": 0.3282, "loss": 0.3282, "step": 6622 }, { "epoch": 2.9687348249971985, "learning_rate": 1.3151210680370507e-08, "lm_loss": 0.4046, "loss": 0.4046, "step": 6623 }, { "epoch": 2.9691830712337977, "learning_rate": 1.2761600033150967e-08, "lm_loss": 0.2514, "loss": 0.2514, "step": 6624 }, { "epoch": 2.969631317470397, "learning_rate": 1.2377846213648681e-08, "lm_loss": 0.3007, "loss": 0.3007, "step": 6625 }, { "epoch": 2.9700795637069963, "learning_rate": 1.1999949311811144e-08, "lm_loss": 0.331, "loss": 0.331, "step": 6626 }, { "epoch": 2.9705278099435954, "learning_rate": 1.1627909416211947e-08, "lm_loss": 0.393, "loss": 0.393, "step": 6627 }, { "epoch": 2.970976056180195, "learning_rate": 1.1261726614056334e-08, "lm_loss": 0.6028, "loss": 0.6028, "step": 6628 }, { "epoch": 2.9714243024167946, "learning_rate": 1.0901400991175647e-08, "lm_loss": 0.2815, "loss": 0.2815, "step": 6629 }, { "epoch": 2.9718725486533937, "learning_rate": 1.0546932632027328e-08, "lm_loss": 0.318, "loss": 0.318, "step": 6630 }, { "epoch": 2.972320794889993, "learning_rate": 1.0198321619694917e-08, "lm_loss": 0.4696, "loss": 0.4696, "step": 6631 }, { "epoch": 2.9727690411265923, "learning_rate": 9.855568035893604e-09, "lm_loss": 0.4335, "loss": 0.4335, "step": 6632 }, { "epoch": 2.9732172873631915, "learning_rate": 9.51867196095635e-09, "lm_loss": 0.3369, "loss": 0.3369, "step": 6633 }, { "epoch": 2.9736655335997906, "learning_rate": 9.187633473856095e-09, "lm_loss": 0.2582, "loss": 0.2582, "step": 6634 }, { "epoch": 2.97411377983639, "learning_rate": 8.862452652180775e-09, "lm_loss": 0.453, "loss": 0.453, "step": 6635 }, { "epoch": 2.9745620260729897, "learning_rate": 8.543129572152752e-09, "lm_loss": 0.4205, "loss": 0.4205, "step": 6636 }, { "epoch": 2.975010272309589, "learning_rate": 8.229664308620488e-09, "lm_loss": 0.3059, "loss": 0.3059, "step": 6637 }, { "epoch": 2.975458518546188, "learning_rate": 7.92205693505299e-09, "lm_loss": 0.3305, "loss": 0.3305, "step": 6638 }, { "epoch": 2.9759067647827875, "learning_rate": 7.62030752355647e-09, "lm_loss": 0.3293, "loss": 0.3293, "step": 6639 }, { "epoch": 2.9763550110193866, "learning_rate": 7.324416144854906e-09, "lm_loss": 0.2996, "loss": 0.2996, "step": 6640 }, { "epoch": 2.9768032572559857, "learning_rate": 7.0343828683039344e-09, "lm_loss": 0.2971, "loss": 0.2971, "step": 6641 }, { "epoch": 2.9772515034925853, "learning_rate": 6.750207761888061e-09, "lm_loss": 0.4438, "loss": 0.4438, "step": 6642 }, { "epoch": 2.977699749729185, "learning_rate": 6.47189089221234e-09, "lm_loss": 0.3206, "loss": 0.3206, "step": 6643 }, { "epoch": 2.978147995965784, "learning_rate": 6.199432324516252e-09, "lm_loss": 0.2964, "loss": 0.2964, "step": 6644 }, { "epoch": 2.978596242202383, "learning_rate": 5.9328321226570505e-09, "lm_loss": 0.4401, "loss": 0.4401, "step": 6645 }, { "epoch": 2.9790444884389826, "learning_rate": 5.672090349129189e-09, "lm_loss": 0.3056, "loss": 0.3056, "step": 6646 }, { "epoch": 2.9794927346755817, "learning_rate": 5.417207065042118e-09, "lm_loss": 0.3854, "loss": 0.3854, "step": 6647 }, { "epoch": 2.979940980912181, "learning_rate": 5.168182330145266e-09, "lm_loss": 0.3104, "loss": 0.3104, "step": 6648 }, { "epoch": 2.9803892271487804, "learning_rate": 4.925016202805832e-09, "lm_loss": 0.4239, "loss": 0.4239, "step": 6649 }, { "epoch": 2.98083747338538, "learning_rate": 4.687708740017116e-09, "lm_loss": 0.3226, "loss": 0.3226, "step": 6650 }, { "epoch": 2.981285719621979, "learning_rate": 4.456259997406842e-09, "lm_loss": 0.3232, "loss": 0.3232, "step": 6651 }, { "epoch": 2.981733965858578, "learning_rate": 4.2306700292232825e-09, "lm_loss": 0.3894, "loss": 0.3894, "step": 6652 }, { "epoch": 2.9821822120951778, "learning_rate": 4.01093888834081e-09, "lm_loss": 0.4128, "loss": 0.4128, "step": 6653 }, { "epoch": 2.982630458331777, "learning_rate": 3.797066626265444e-09, "lm_loss": 0.3586, "loss": 0.3586, "step": 6654 }, { "epoch": 2.983078704568376, "learning_rate": 3.5890532931293075e-09, "lm_loss": 0.4234, "loss": 0.4234, "step": 6655 }, { "epoch": 2.9835269508049755, "learning_rate": 3.3868989376822922e-09, "lm_loss": 0.3642, "loss": 0.3642, "step": 6656 }, { "epoch": 2.983975197041575, "learning_rate": 3.1906036073142685e-09, "lm_loss": 0.3373, "loss": 0.3373, "step": 6657 }, { "epoch": 2.984423443278174, "learning_rate": 3.000167348035654e-09, "lm_loss": 0.3911, "loss": 0.3911, "step": 6658 }, { "epoch": 2.9848716895147733, "learning_rate": 2.815590204477414e-09, "lm_loss": 0.3343, "loss": 0.3343, "step": 6659 }, { "epoch": 2.985319935751373, "learning_rate": 2.6368722199077158e-09, "lm_loss": 0.4684, "loss": 0.4684, "step": 6660 }, { "epoch": 2.985768181987972, "learning_rate": 2.464013436218049e-09, "lm_loss": 0.4083, "loss": 0.4083, "step": 6661 }, { "epoch": 2.986216428224571, "learning_rate": 2.297013893920452e-09, "lm_loss": 0.3844, "loss": 0.3844, "step": 6662 }, { "epoch": 2.9866646744611707, "learning_rate": 2.1358736321613893e-09, "lm_loss": 0.4549, "loss": 0.4549, "step": 6663 }, { "epoch": 2.9871129206977702, "learning_rate": 1.9805926887106474e-09, "lm_loss": 0.2837, "loss": 0.2837, "step": 6664 }, { "epoch": 2.9875611669343693, "learning_rate": 1.8311710999668885e-09, "lm_loss": 0.4343, "loss": 0.4343, "step": 6665 }, { "epoch": 2.9880094131709685, "learning_rate": 1.6876089009493223e-09, "lm_loss": 0.3563, "loss": 0.3563, "step": 6666 }, { "epoch": 2.988457659407568, "learning_rate": 1.5499061253088087e-09, "lm_loss": 0.3741, "loss": 0.3741, "step": 6667 }, { "epoch": 2.988905905644167, "learning_rate": 1.4180628053250821e-09, "lm_loss": 0.3152, "loss": 0.3152, "step": 6668 }, { "epoch": 2.9893541518807663, "learning_rate": 1.292078971898425e-09, "lm_loss": 0.3944, "loss": 0.3944, "step": 6669 }, { "epoch": 2.989802398117366, "learning_rate": 1.171954654557994e-09, "lm_loss": 0.2604, "loss": 0.2604, "step": 6670 }, { "epoch": 2.9902506443539654, "learning_rate": 1.0576898814645965e-09, "lm_loss": 0.4687, "loss": 0.4687, "step": 6671 }, { "epoch": 2.9906988905905645, "learning_rate": 9.492846793940358e-10, "lm_loss": 0.2818, "loss": 0.2818, "step": 6672 }, { "epoch": 2.9911471368271636, "learning_rate": 8.467390737620928e-10, "lm_loss": 0.56, "loss": 0.56, "step": 6673 }, { "epoch": 2.991595383063763, "learning_rate": 7.500530886023205e-10, "lm_loss": 0.3401, "loss": 0.3401, "step": 6674 }, { "epoch": 2.9920436293003623, "learning_rate": 6.592267465743707e-10, "lm_loss": 0.4089, "loss": 0.4089, "step": 6675 }, { "epoch": 2.9924918755369614, "learning_rate": 5.74260068972321e-10, "lm_loss": 0.4015, "loss": 0.4015, "step": 6676 }, { "epoch": 2.992940121773561, "learning_rate": 4.951530757080214e-10, "lm_loss": 0.5302, "loss": 0.5302, "step": 6677 }, { "epoch": 2.9933883680101605, "learning_rate": 4.219057853277475e-10, "lm_loss": 0.3107, "loss": 0.3107, "step": 6678 }, { "epoch": 2.9938366142467596, "learning_rate": 3.545182149955473e-10, "lm_loss": 0.3755, "loss": 0.3755, "step": 6679 }, { "epoch": 2.9942848604833587, "learning_rate": 2.929903805071188e-10, "lm_loss": 0.4823, "loss": 0.4823, "step": 6680 }, { "epoch": 2.9947331067199583, "learning_rate": 2.373222962870347e-10, "lm_loss": 0.4321, "loss": 0.4321, "step": 6681 }, { "epoch": 2.9951813529565574, "learning_rate": 1.875139753804156e-10, "lm_loss": 0.3441, "loss": 0.3441, "step": 6682 }, { "epoch": 2.9956295991931565, "learning_rate": 1.4356542946403206e-10, "lm_loss": 0.3311, "loss": 0.3311, "step": 6683 }, { "epoch": 2.996077845429756, "learning_rate": 1.0547666883797824e-10, "lm_loss": 0.3981, "loss": 0.3981, "step": 6684 }, { "epoch": 2.9965260916663556, "learning_rate": 7.324770243122281e-11, "lm_loss": 0.3028, "loss": 0.3028, "step": 6685 }, { "epoch": 2.9969743379029548, "learning_rate": 4.687853779605788e-11, "lm_loss": 0.3347, "loss": 0.3347, "step": 6686 }, { "epoch": 2.997422584139554, "learning_rate": 2.6369181116425724e-11, "lm_loss": 0.2725, "loss": 0.2725, "step": 6687 }, { "epoch": 2.9978708303761534, "learning_rate": 1.1719637196816458e-11, "lm_loss": 0.3595, "loss": 0.3595, "step": 6688 }, { "epoch": 2.9983190766127525, "learning_rate": 2.929909470594794e-12, "lm_loss": 0.3643, "loss": 0.3643, "step": 6689 }, { "epoch": 2.9987673228493517, "learning_rate": 0.0, "lm_loss": 0.2748, "loss": 0.2748, "step": 6690 }, { "epoch": 2.9987673228493517, "step": 6690, "total_flos": 2.0837604417110802e+19, "train_loss": 0.71592930310332, "train_runtime": 82170.1138, "train_samples_per_second": 151.168, "train_steps_per_second": 0.081 } ], "logging_steps": 1, "max_steps": 6690, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0837604417110802e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }