{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9109575116893678, "eval_steps": 500, "global_step": 9400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020329335230737954, "grad_norm": 0.36621615290641785, "learning_rate": 0.0, "loss": 1.5428, "step": 1 }, { "epoch": 0.0004065867046147591, "grad_norm": 0.38205745816230774, "learning_rate": 4e-05, "loss": 1.3117, "step": 2 }, { "epoch": 0.0006098800569221387, "grad_norm": 0.38206785917282104, "learning_rate": 8e-05, "loss": 1.5818, "step": 3 }, { "epoch": 0.0008131734092295182, "grad_norm": 0.2856779396533966, "learning_rate": 0.00012, "loss": 1.4046, "step": 4 }, { "epoch": 0.0010164667615368977, "grad_norm": 0.28240150213241577, "learning_rate": 0.00016, "loss": 1.1899, "step": 5 }, { "epoch": 0.0012197601138442774, "grad_norm": 0.27537214756011963, "learning_rate": 0.0002, "loss": 1.3741, "step": 6 }, { "epoch": 0.0014230534661516568, "grad_norm": 0.1832859218120575, "learning_rate": 0.00019997966032746873, "loss": 1.2432, "step": 7 }, { "epoch": 0.0016263468184590363, "grad_norm": 0.1194186806678772, "learning_rate": 0.00019995932065493746, "loss": 1.1152, "step": 8 }, { "epoch": 0.001829640170766416, "grad_norm": 0.287241131067276, "learning_rate": 0.0001999389809824062, "loss": 1.2556, "step": 9 }, { "epoch": 0.0020329335230737954, "grad_norm": 0.2129606008529663, "learning_rate": 0.00019991864130987493, "loss": 1.2698, "step": 10 }, { "epoch": 0.002236226875381175, "grad_norm": 0.16557097434997559, "learning_rate": 0.00019989830163734363, "loss": 1.2364, "step": 11 }, { "epoch": 0.002439520227688555, "grad_norm": 0.15639722347259521, "learning_rate": 0.00019987796196481236, "loss": 1.2499, "step": 12 }, { "epoch": 0.002642813579995934, "grad_norm": 0.11634412407875061, "learning_rate": 0.0001998576222922811, "loss": 1.148, "step": 13 }, { "epoch": 0.0028461069323033137, "grad_norm": 0.13249927759170532, "learning_rate": 0.00019983728261974983, "loss": 1.267, "step": 14 }, { "epoch": 0.0030494002846106934, "grad_norm": 0.12251798063516617, "learning_rate": 0.00019981694294721856, "loss": 1.1868, "step": 15 }, { "epoch": 0.0032526936369180726, "grad_norm": 0.1279357224702835, "learning_rate": 0.00019979660327468728, "loss": 1.3611, "step": 16 }, { "epoch": 0.0034559869892254523, "grad_norm": 0.10690166801214218, "learning_rate": 0.00019977626360215603, "loss": 1.2934, "step": 17 }, { "epoch": 0.003659280341532832, "grad_norm": 0.09634354710578918, "learning_rate": 0.00019975592392962476, "loss": 1.1628, "step": 18 }, { "epoch": 0.0038625736938402116, "grad_norm": 0.08993979543447495, "learning_rate": 0.00019973558425709345, "loss": 1.1328, "step": 19 }, { "epoch": 0.004065867046147591, "grad_norm": 0.09834206104278564, "learning_rate": 0.00019971524458456218, "loss": 1.1424, "step": 20 }, { "epoch": 0.0042691603984549705, "grad_norm": 0.10754235088825226, "learning_rate": 0.00019969490491203093, "loss": 1.0843, "step": 21 }, { "epoch": 0.00447245375076235, "grad_norm": 0.09953349828720093, "learning_rate": 0.00019967456523949966, "loss": 1.1475, "step": 22 }, { "epoch": 0.00467574710306973, "grad_norm": 0.09910175204277039, "learning_rate": 0.00019965422556696838, "loss": 1.1532, "step": 23 }, { "epoch": 0.00487904045537711, "grad_norm": 0.10800202935934067, "learning_rate": 0.0001996338858944371, "loss": 1.2336, "step": 24 }, { "epoch": 0.005082333807684488, "grad_norm": 0.10042817890644073, "learning_rate": 0.00019961354622190586, "loss": 1.1235, "step": 25 }, { "epoch": 0.005285627159991868, "grad_norm": 0.10839787125587463, "learning_rate": 0.00019959320654937458, "loss": 1.3561, "step": 26 }, { "epoch": 0.005488920512299248, "grad_norm": 0.10559111088514328, "learning_rate": 0.00019957286687684328, "loss": 1.3255, "step": 27 }, { "epoch": 0.005692213864606627, "grad_norm": 0.12504474818706512, "learning_rate": 0.000199552527204312, "loss": 1.4834, "step": 28 }, { "epoch": 0.005895507216914007, "grad_norm": 0.09099473804235458, "learning_rate": 0.00019953218753178075, "loss": 1.1404, "step": 29 }, { "epoch": 0.006098800569221387, "grad_norm": 0.09648846834897995, "learning_rate": 0.00019951184785924948, "loss": 1.1302, "step": 30 }, { "epoch": 0.006302093921528766, "grad_norm": 0.0992799773812294, "learning_rate": 0.0001994915081867182, "loss": 1.3508, "step": 31 }, { "epoch": 0.006505387273836145, "grad_norm": 0.10332880914211273, "learning_rate": 0.00019947116851418693, "loss": 1.1819, "step": 32 }, { "epoch": 0.006708680626143525, "grad_norm": 0.1172771006822586, "learning_rate": 0.00019945082884165568, "loss": 1.2512, "step": 33 }, { "epoch": 0.006911973978450905, "grad_norm": 0.10649558156728745, "learning_rate": 0.0001994304891691244, "loss": 1.2394, "step": 34 }, { "epoch": 0.007115267330758284, "grad_norm": 0.10849574208259583, "learning_rate": 0.0001994101494965931, "loss": 1.1847, "step": 35 }, { "epoch": 0.007318560683065664, "grad_norm": 0.11581245064735413, "learning_rate": 0.00019938980982406182, "loss": 1.2788, "step": 36 }, { "epoch": 0.007521854035373044, "grad_norm": 0.09857199341058731, "learning_rate": 0.00019936947015153058, "loss": 1.1072, "step": 37 }, { "epoch": 0.007725147387680423, "grad_norm": 0.08895204216241837, "learning_rate": 0.0001993491304789993, "loss": 0.8741, "step": 38 }, { "epoch": 0.007928440739987802, "grad_norm": 0.09715849161148071, "learning_rate": 0.00019932879080646803, "loss": 1.2999, "step": 39 }, { "epoch": 0.008131734092295182, "grad_norm": 0.09145913273096085, "learning_rate": 0.00019930845113393675, "loss": 0.9852, "step": 40 }, { "epoch": 0.008335027444602561, "grad_norm": 0.09298407286405563, "learning_rate": 0.0001992881114614055, "loss": 1.1033, "step": 41 }, { "epoch": 0.008538320796909941, "grad_norm": 0.09957871586084366, "learning_rate": 0.00019926777178887423, "loss": 1.3203, "step": 42 }, { "epoch": 0.00874161414921732, "grad_norm": 0.1212410032749176, "learning_rate": 0.00019924743211634292, "loss": 1.166, "step": 43 }, { "epoch": 0.0089449075015247, "grad_norm": 0.10740375518798828, "learning_rate": 0.00019922709244381165, "loss": 1.1905, "step": 44 }, { "epoch": 0.00914820085383208, "grad_norm": 0.10571859031915665, "learning_rate": 0.0001992067527712804, "loss": 1.07, "step": 45 }, { "epoch": 0.00935149420613946, "grad_norm": 0.11196234077215195, "learning_rate": 0.00019918641309874912, "loss": 1.2516, "step": 46 }, { "epoch": 0.00955478755844684, "grad_norm": 0.1025981530547142, "learning_rate": 0.00019916607342621785, "loss": 1.1118, "step": 47 }, { "epoch": 0.00975808091075422, "grad_norm": 0.10584773868322372, "learning_rate": 0.00019914573375368657, "loss": 1.1551, "step": 48 }, { "epoch": 0.009961374263061597, "grad_norm": 0.10157100111246109, "learning_rate": 0.0001991253940811553, "loss": 0.9638, "step": 49 }, { "epoch": 0.010164667615368977, "grad_norm": 0.10194176435470581, "learning_rate": 0.00019910505440862405, "loss": 1.0024, "step": 50 }, { "epoch": 0.010367960967676356, "grad_norm": 0.10047532618045807, "learning_rate": 0.00019908471473609275, "loss": 1.1767, "step": 51 }, { "epoch": 0.010571254319983736, "grad_norm": 0.10448278486728668, "learning_rate": 0.00019906437506356147, "loss": 1.2346, "step": 52 }, { "epoch": 0.010774547672291116, "grad_norm": 0.09438527375459671, "learning_rate": 0.0001990440353910302, "loss": 1.0517, "step": 53 }, { "epoch": 0.010977841024598495, "grad_norm": 0.12310227751731873, "learning_rate": 0.00019902369571849895, "loss": 1.2809, "step": 54 }, { "epoch": 0.011181134376905875, "grad_norm": 0.10811592638492584, "learning_rate": 0.00019900335604596767, "loss": 1.1171, "step": 55 }, { "epoch": 0.011384427729213255, "grad_norm": 0.12063754349946976, "learning_rate": 0.0001989830163734364, "loss": 1.0719, "step": 56 }, { "epoch": 0.011587721081520634, "grad_norm": 0.10147465020418167, "learning_rate": 0.00019896267670090512, "loss": 1.0643, "step": 57 }, { "epoch": 0.011791014433828014, "grad_norm": 0.10184100270271301, "learning_rate": 0.00019894233702837387, "loss": 1.0148, "step": 58 }, { "epoch": 0.011994307786135394, "grad_norm": 0.1090080663561821, "learning_rate": 0.00019892199735584257, "loss": 1.0762, "step": 59 }, { "epoch": 0.012197601138442773, "grad_norm": 0.0960102528333664, "learning_rate": 0.0001989016576833113, "loss": 1.09, "step": 60 }, { "epoch": 0.012400894490750153, "grad_norm": 0.1056618019938469, "learning_rate": 0.00019888131801078002, "loss": 1.0092, "step": 61 }, { "epoch": 0.012604187843057533, "grad_norm": 0.10064820945262909, "learning_rate": 0.00019886097833824877, "loss": 1.0135, "step": 62 }, { "epoch": 0.01280748119536491, "grad_norm": 0.11083406209945679, "learning_rate": 0.0001988406386657175, "loss": 1.2344, "step": 63 }, { "epoch": 0.01301077454767229, "grad_norm": 0.10507647693157196, "learning_rate": 0.00019882029899318622, "loss": 1.1035, "step": 64 }, { "epoch": 0.01321406789997967, "grad_norm": 0.10840694606304169, "learning_rate": 0.00019879995932065494, "loss": 1.1458, "step": 65 }, { "epoch": 0.01341736125228705, "grad_norm": 0.1149667352437973, "learning_rate": 0.0001987796196481237, "loss": 1.0722, "step": 66 }, { "epoch": 0.01362065460459443, "grad_norm": 0.1219320297241211, "learning_rate": 0.00019875927997559242, "loss": 1.1392, "step": 67 }, { "epoch": 0.01382394795690181, "grad_norm": 0.1296710968017578, "learning_rate": 0.00019873894030306112, "loss": 1.1831, "step": 68 }, { "epoch": 0.014027241309209189, "grad_norm": 0.11214271187782288, "learning_rate": 0.00019871860063052984, "loss": 1.0385, "step": 69 }, { "epoch": 0.014230534661516568, "grad_norm": 0.12374462932348251, "learning_rate": 0.0001986982609579986, "loss": 1.2633, "step": 70 }, { "epoch": 0.014433828013823948, "grad_norm": 0.09856373071670532, "learning_rate": 0.00019867792128546732, "loss": 1.0495, "step": 71 }, { "epoch": 0.014637121366131328, "grad_norm": 0.1356707215309143, "learning_rate": 0.00019865758161293604, "loss": 1.2245, "step": 72 }, { "epoch": 0.014840414718438708, "grad_norm": 0.10265105217695236, "learning_rate": 0.00019863724194040477, "loss": 1.0076, "step": 73 }, { "epoch": 0.015043708070746087, "grad_norm": 0.1307467520236969, "learning_rate": 0.00019861690226787352, "loss": 1.3838, "step": 74 }, { "epoch": 0.015247001423053467, "grad_norm": 0.12013835459947586, "learning_rate": 0.00019859656259534224, "loss": 1.1509, "step": 75 }, { "epoch": 0.015450294775360847, "grad_norm": 0.10897748917341232, "learning_rate": 0.00019857622292281094, "loss": 1.025, "step": 76 }, { "epoch": 0.015653588127668226, "grad_norm": 0.10924937576055527, "learning_rate": 0.00019855588325027966, "loss": 1.06, "step": 77 }, { "epoch": 0.015856881479975604, "grad_norm": 0.10474475473165512, "learning_rate": 0.00019853554357774841, "loss": 0.9267, "step": 78 }, { "epoch": 0.016060174832282986, "grad_norm": 0.12250765413045883, "learning_rate": 0.00019851520390521714, "loss": 1.1231, "step": 79 }, { "epoch": 0.016263468184590364, "grad_norm": 0.12869718670845032, "learning_rate": 0.00019849486423268586, "loss": 1.1219, "step": 80 }, { "epoch": 0.016466761536897745, "grad_norm": 0.11656077951192856, "learning_rate": 0.0001984745245601546, "loss": 1.1494, "step": 81 }, { "epoch": 0.016670054889205123, "grad_norm": 0.12449704855680466, "learning_rate": 0.00019845418488762334, "loss": 1.3177, "step": 82 }, { "epoch": 0.016873348241512504, "grad_norm": 0.10715439915657043, "learning_rate": 0.00019843384521509206, "loss": 1.0944, "step": 83 }, { "epoch": 0.017076641593819882, "grad_norm": 0.11231628060340881, "learning_rate": 0.00019841350554256076, "loss": 1.0681, "step": 84 }, { "epoch": 0.01727993494612726, "grad_norm": 0.1253119856119156, "learning_rate": 0.00019839316587002949, "loss": 1.104, "step": 85 }, { "epoch": 0.01748322829843464, "grad_norm": 0.12725764513015747, "learning_rate": 0.00019837282619749824, "loss": 1.307, "step": 86 }, { "epoch": 0.01768652165074202, "grad_norm": 0.11470405012369156, "learning_rate": 0.00019835248652496696, "loss": 1.0729, "step": 87 }, { "epoch": 0.0178898150030494, "grad_norm": 0.12006914615631104, "learning_rate": 0.0001983321468524357, "loss": 1.1608, "step": 88 }, { "epoch": 0.01809310835535678, "grad_norm": 0.11256147921085358, "learning_rate": 0.0001983118071799044, "loss": 1.0816, "step": 89 }, { "epoch": 0.01829640170766416, "grad_norm": 0.12627887725830078, "learning_rate": 0.00019829146750737314, "loss": 1.2015, "step": 90 }, { "epoch": 0.018499695059971538, "grad_norm": 0.12313251942396164, "learning_rate": 0.0001982711278348419, "loss": 1.0837, "step": 91 }, { "epoch": 0.01870298841227892, "grad_norm": 0.1349901705980301, "learning_rate": 0.00019825078816231058, "loss": 1.2277, "step": 92 }, { "epoch": 0.018906281764586298, "grad_norm": 0.11006023734807968, "learning_rate": 0.0001982304484897793, "loss": 1.0768, "step": 93 }, { "epoch": 0.01910957511689368, "grad_norm": 0.1100686565041542, "learning_rate": 0.00019821010881724803, "loss": 1.1119, "step": 94 }, { "epoch": 0.019312868469201057, "grad_norm": 0.1252383440732956, "learning_rate": 0.00019818976914471678, "loss": 1.1862, "step": 95 }, { "epoch": 0.01951616182150844, "grad_norm": 0.1430596113204956, "learning_rate": 0.0001981694294721855, "loss": 1.1765, "step": 96 }, { "epoch": 0.019719455173815816, "grad_norm": 0.130848690867424, "learning_rate": 0.00019814908979965423, "loss": 1.3255, "step": 97 }, { "epoch": 0.019922748526123194, "grad_norm": 0.13019633293151855, "learning_rate": 0.00019812875012712296, "loss": 1.1853, "step": 98 }, { "epoch": 0.020126041878430576, "grad_norm": 0.11539386957883835, "learning_rate": 0.0001981084104545917, "loss": 1.1897, "step": 99 }, { "epoch": 0.020329335230737954, "grad_norm": 0.11749454587697983, "learning_rate": 0.0001980880707820604, "loss": 1.1189, "step": 100 }, { "epoch": 0.020532628583045335, "grad_norm": 0.12270624190568924, "learning_rate": 0.00019806773110952913, "loss": 1.1821, "step": 101 }, { "epoch": 0.020735921935352713, "grad_norm": 0.1244652196764946, "learning_rate": 0.00019804739143699786, "loss": 1.2847, "step": 102 }, { "epoch": 0.020939215287660094, "grad_norm": 0.1163022369146347, "learning_rate": 0.0001980270517644666, "loss": 0.9405, "step": 103 }, { "epoch": 0.021142508639967472, "grad_norm": 0.13008280098438263, "learning_rate": 0.00019800671209193533, "loss": 1.272, "step": 104 }, { "epoch": 0.021345801992274854, "grad_norm": 0.11667025834321976, "learning_rate": 0.00019798637241940406, "loss": 1.0145, "step": 105 }, { "epoch": 0.02154909534458223, "grad_norm": 0.10680039972066879, "learning_rate": 0.00019796603274687278, "loss": 1.0929, "step": 106 }, { "epoch": 0.021752388696889613, "grad_norm": 0.12161742150783539, "learning_rate": 0.00019794569307434153, "loss": 1.0314, "step": 107 }, { "epoch": 0.02195568204919699, "grad_norm": 0.10798537731170654, "learning_rate": 0.00019792535340181023, "loss": 0.9918, "step": 108 }, { "epoch": 0.022158975401504372, "grad_norm": 0.10750683397054672, "learning_rate": 0.00019790501372927895, "loss": 1.0899, "step": 109 }, { "epoch": 0.02236226875381175, "grad_norm": 0.10349331051111221, "learning_rate": 0.00019788467405674768, "loss": 1.1011, "step": 110 }, { "epoch": 0.022565562106119128, "grad_norm": 0.12401413917541504, "learning_rate": 0.00019786433438421643, "loss": 1.1993, "step": 111 }, { "epoch": 0.02276885545842651, "grad_norm": 0.12651224434375763, "learning_rate": 0.00019784399471168515, "loss": 1.1643, "step": 112 }, { "epoch": 0.022972148810733888, "grad_norm": 0.12746116518974304, "learning_rate": 0.00019782365503915388, "loss": 1.2093, "step": 113 }, { "epoch": 0.02317544216304127, "grad_norm": 0.12016987800598145, "learning_rate": 0.0001978033153666226, "loss": 1.0743, "step": 114 }, { "epoch": 0.023378735515348647, "grad_norm": 0.11947723478078842, "learning_rate": 0.00019778297569409136, "loss": 1.0899, "step": 115 }, { "epoch": 0.02358202886765603, "grad_norm": 0.13821756839752197, "learning_rate": 0.00019776263602156005, "loss": 1.2247, "step": 116 }, { "epoch": 0.023785322219963406, "grad_norm": 0.1487802267074585, "learning_rate": 0.00019774229634902878, "loss": 1.5157, "step": 117 }, { "epoch": 0.023988615572270788, "grad_norm": 0.11825321614742279, "learning_rate": 0.0001977219566764975, "loss": 1.0966, "step": 118 }, { "epoch": 0.024191908924578166, "grad_norm": 0.11644168198108673, "learning_rate": 0.00019770161700396625, "loss": 1.057, "step": 119 }, { "epoch": 0.024395202276885547, "grad_norm": 0.12633183598518372, "learning_rate": 0.00019768127733143498, "loss": 1.039, "step": 120 }, { "epoch": 0.024598495629192925, "grad_norm": 0.11956316977739334, "learning_rate": 0.0001976609376589037, "loss": 1.0974, "step": 121 }, { "epoch": 0.024801788981500306, "grad_norm": 0.11445662379264832, "learning_rate": 0.00019764059798637243, "loss": 1.0289, "step": 122 }, { "epoch": 0.025005082333807684, "grad_norm": 0.11500035226345062, "learning_rate": 0.00019762025831384118, "loss": 1.0781, "step": 123 }, { "epoch": 0.025208375686115066, "grad_norm": 0.11663355678319931, "learning_rate": 0.0001975999186413099, "loss": 1.0258, "step": 124 }, { "epoch": 0.025411669038422444, "grad_norm": 0.13630478084087372, "learning_rate": 0.0001975795789687786, "loss": 1.1725, "step": 125 }, { "epoch": 0.02561496239072982, "grad_norm": 0.09818248450756073, "learning_rate": 0.00019755923929624732, "loss": 0.8459, "step": 126 }, { "epoch": 0.025818255743037203, "grad_norm": 0.12124455720186234, "learning_rate": 0.00019753889962371608, "loss": 1.0427, "step": 127 }, { "epoch": 0.02602154909534458, "grad_norm": 0.12221626192331314, "learning_rate": 0.0001975185599511848, "loss": 1.0463, "step": 128 }, { "epoch": 0.026224842447651962, "grad_norm": 0.12191324681043625, "learning_rate": 0.00019749822027865352, "loss": 1.0816, "step": 129 }, { "epoch": 0.02642813579995934, "grad_norm": 0.12219464778900146, "learning_rate": 0.00019747788060612225, "loss": 1.0725, "step": 130 }, { "epoch": 0.02663142915226672, "grad_norm": 0.11998015642166138, "learning_rate": 0.00019745754093359097, "loss": 1.1146, "step": 131 }, { "epoch": 0.0268347225045741, "grad_norm": 0.1426505446434021, "learning_rate": 0.00019743720126105973, "loss": 1.0739, "step": 132 }, { "epoch": 0.02703801585688148, "grad_norm": 0.11330442875623703, "learning_rate": 0.00019741686158852842, "loss": 1.0534, "step": 133 }, { "epoch": 0.02724130920918886, "grad_norm": 0.11867399513721466, "learning_rate": 0.00019739652191599715, "loss": 1.0519, "step": 134 }, { "epoch": 0.02744460256149624, "grad_norm": 0.11425293982028961, "learning_rate": 0.00019737618224346587, "loss": 1.1641, "step": 135 }, { "epoch": 0.02764789591380362, "grad_norm": 0.12021425366401672, "learning_rate": 0.00019735584257093462, "loss": 0.8639, "step": 136 }, { "epoch": 0.027851189266111, "grad_norm": 0.13119390606880188, "learning_rate": 0.00019733550289840335, "loss": 1.1486, "step": 137 }, { "epoch": 0.028054482618418378, "grad_norm": 0.12350285053253174, "learning_rate": 0.00019731516322587207, "loss": 1.2504, "step": 138 }, { "epoch": 0.028257775970725756, "grad_norm": 0.11073335260152817, "learning_rate": 0.0001972948235533408, "loss": 1.0745, "step": 139 }, { "epoch": 0.028461069323033137, "grad_norm": 0.1184212937951088, "learning_rate": 0.00019727448388080955, "loss": 1.2013, "step": 140 }, { "epoch": 0.028664362675340515, "grad_norm": 0.11723863333463669, "learning_rate": 0.00019725414420827825, "loss": 1.0349, "step": 141 }, { "epoch": 0.028867656027647896, "grad_norm": 0.12323645502328873, "learning_rate": 0.00019723380453574697, "loss": 1.2585, "step": 142 }, { "epoch": 0.029070949379955274, "grad_norm": 0.12688735127449036, "learning_rate": 0.0001972134648632157, "loss": 1.2059, "step": 143 }, { "epoch": 0.029274242732262656, "grad_norm": 0.11595512181520462, "learning_rate": 0.00019719312519068445, "loss": 1.1894, "step": 144 }, { "epoch": 0.029477536084570034, "grad_norm": 0.11859143525362015, "learning_rate": 0.00019717278551815317, "loss": 1.0111, "step": 145 }, { "epoch": 0.029680829436877415, "grad_norm": 0.13400156795978546, "learning_rate": 0.0001971524458456219, "loss": 1.1861, "step": 146 }, { "epoch": 0.029884122789184793, "grad_norm": 0.12621331214904785, "learning_rate": 0.00019713210617309062, "loss": 1.1099, "step": 147 }, { "epoch": 0.030087416141492174, "grad_norm": 0.11988019198179245, "learning_rate": 0.00019711176650055937, "loss": 1.0571, "step": 148 }, { "epoch": 0.030290709493799552, "grad_norm": 0.11711090058088303, "learning_rate": 0.00019709142682802807, "loss": 1.0591, "step": 149 }, { "epoch": 0.030494002846106934, "grad_norm": 0.12458360195159912, "learning_rate": 0.0001970710871554968, "loss": 1.1863, "step": 150 }, { "epoch": 0.03069729619841431, "grad_norm": 0.11751321703195572, "learning_rate": 0.00019705074748296552, "loss": 1.0735, "step": 151 }, { "epoch": 0.030900589550721693, "grad_norm": 0.13113288581371307, "learning_rate": 0.00019703040781043427, "loss": 1.2103, "step": 152 }, { "epoch": 0.03110388290302907, "grad_norm": 0.13107489049434662, "learning_rate": 0.000197010068137903, "loss": 1.17, "step": 153 }, { "epoch": 0.03130717625533645, "grad_norm": 0.12383049726486206, "learning_rate": 0.00019698972846537172, "loss": 1.0346, "step": 154 }, { "epoch": 0.03151046960764383, "grad_norm": 0.11656415462493896, "learning_rate": 0.00019696938879284044, "loss": 1.1545, "step": 155 }, { "epoch": 0.03171376295995121, "grad_norm": 0.12201374024152756, "learning_rate": 0.0001969490491203092, "loss": 0.9697, "step": 156 }, { "epoch": 0.031917056312258586, "grad_norm": 0.12756960093975067, "learning_rate": 0.0001969287094477779, "loss": 1.1668, "step": 157 }, { "epoch": 0.03212034966456597, "grad_norm": 0.13866621255874634, "learning_rate": 0.00019690836977524662, "loss": 1.2137, "step": 158 }, { "epoch": 0.03232364301687335, "grad_norm": 0.1329393833875656, "learning_rate": 0.00019688803010271534, "loss": 1.1864, "step": 159 }, { "epoch": 0.03252693636918073, "grad_norm": 0.14017806947231293, "learning_rate": 0.0001968676904301841, "loss": 1.2564, "step": 160 }, { "epoch": 0.032730229721488105, "grad_norm": 0.13004827499389648, "learning_rate": 0.00019684735075765282, "loss": 1.1301, "step": 161 }, { "epoch": 0.03293352307379549, "grad_norm": 0.11768215149641037, "learning_rate": 0.00019682701108512154, "loss": 1.0638, "step": 162 }, { "epoch": 0.03313681642610287, "grad_norm": 0.12334595620632172, "learning_rate": 0.00019680667141259026, "loss": 1.277, "step": 163 }, { "epoch": 0.033340109778410246, "grad_norm": 0.12338969856500626, "learning_rate": 0.00019678633174005902, "loss": 0.9211, "step": 164 }, { "epoch": 0.033543403130717624, "grad_norm": 0.10570957511663437, "learning_rate": 0.00019676599206752771, "loss": 1.0919, "step": 165 }, { "epoch": 0.03374669648302501, "grad_norm": 0.1223050057888031, "learning_rate": 0.00019674565239499644, "loss": 1.124, "step": 166 }, { "epoch": 0.033949989835332386, "grad_norm": 0.12787429988384247, "learning_rate": 0.00019672531272246516, "loss": 1.1717, "step": 167 }, { "epoch": 0.034153283187639764, "grad_norm": 0.10991297662258148, "learning_rate": 0.00019670497304993391, "loss": 0.9493, "step": 168 }, { "epoch": 0.03435657653994714, "grad_norm": 0.11774353682994843, "learning_rate": 0.00019668463337740264, "loss": 1.1527, "step": 169 }, { "epoch": 0.03455986989225452, "grad_norm": 0.12775689363479614, "learning_rate": 0.00019666429370487136, "loss": 1.389, "step": 170 }, { "epoch": 0.034763163244561905, "grad_norm": 0.12656515836715698, "learning_rate": 0.0001966439540323401, "loss": 1.182, "step": 171 }, { "epoch": 0.03496645659686928, "grad_norm": 0.11234056204557419, "learning_rate": 0.0001966236143598088, "loss": 0.8927, "step": 172 }, { "epoch": 0.03516974994917666, "grad_norm": 0.12165993452072144, "learning_rate": 0.00019660327468727754, "loss": 1.199, "step": 173 }, { "epoch": 0.03537304330148404, "grad_norm": 0.13241636753082275, "learning_rate": 0.00019658293501474626, "loss": 1.1245, "step": 174 }, { "epoch": 0.035576336653791424, "grad_norm": 0.12375210225582123, "learning_rate": 0.00019656259534221499, "loss": 1.0997, "step": 175 }, { "epoch": 0.0357796300060988, "grad_norm": 0.10730253159999847, "learning_rate": 0.0001965422556696837, "loss": 1.0075, "step": 176 }, { "epoch": 0.03598292335840618, "grad_norm": 0.11313315480947495, "learning_rate": 0.00019652191599715246, "loss": 1.003, "step": 177 }, { "epoch": 0.03618621671071356, "grad_norm": 0.11057975888252258, "learning_rate": 0.00019650157632462119, "loss": 1.1361, "step": 178 }, { "epoch": 0.03638951006302094, "grad_norm": 0.13612353801727295, "learning_rate": 0.0001964812366520899, "loss": 1.091, "step": 179 }, { "epoch": 0.03659280341532832, "grad_norm": 0.10917545855045319, "learning_rate": 0.00019646089697955864, "loss": 1.0915, "step": 180 }, { "epoch": 0.0367960967676357, "grad_norm": 0.11874423176050186, "learning_rate": 0.00019644055730702736, "loss": 1.1054, "step": 181 }, { "epoch": 0.036999390119943076, "grad_norm": 0.11719070374965668, "learning_rate": 0.00019642021763449608, "loss": 1.0794, "step": 182 }, { "epoch": 0.037202683472250454, "grad_norm": 0.11768540740013123, "learning_rate": 0.0001963998779619648, "loss": 1.1453, "step": 183 }, { "epoch": 0.03740597682455784, "grad_norm": 0.12951141595840454, "learning_rate": 0.00019637953828943353, "loss": 1.1443, "step": 184 }, { "epoch": 0.03760927017686522, "grad_norm": 0.12669187784194946, "learning_rate": 0.00019635919861690228, "loss": 1.121, "step": 185 }, { "epoch": 0.037812563529172595, "grad_norm": 0.13488180935382843, "learning_rate": 0.000196338858944371, "loss": 1.2476, "step": 186 }, { "epoch": 0.03801585688147997, "grad_norm": 0.1352519690990448, "learning_rate": 0.00019631851927183973, "loss": 1.2076, "step": 187 }, { "epoch": 0.03821915023378736, "grad_norm": 0.11772511899471283, "learning_rate": 0.00019629817959930846, "loss": 1.0063, "step": 188 }, { "epoch": 0.038422443586094736, "grad_norm": 0.12861546874046326, "learning_rate": 0.0001962778399267772, "loss": 1.1495, "step": 189 }, { "epoch": 0.038625736938402114, "grad_norm": 0.1372981071472168, "learning_rate": 0.0001962575002542459, "loss": 1.1458, "step": 190 }, { "epoch": 0.03882903029070949, "grad_norm": 0.11377538740634918, "learning_rate": 0.00019623716058171463, "loss": 0.9751, "step": 191 }, { "epoch": 0.03903232364301688, "grad_norm": 0.11588437110185623, "learning_rate": 0.00019621682090918336, "loss": 1.0187, "step": 192 }, { "epoch": 0.039235616995324255, "grad_norm": 0.12083633244037628, "learning_rate": 0.0001961964812366521, "loss": 1.0916, "step": 193 }, { "epoch": 0.03943891034763163, "grad_norm": 0.12773993611335754, "learning_rate": 0.00019617614156412083, "loss": 1.2734, "step": 194 }, { "epoch": 0.03964220369993901, "grad_norm": 0.11607804894447327, "learning_rate": 0.00019615580189158956, "loss": 1.0007, "step": 195 }, { "epoch": 0.03984549705224639, "grad_norm": 0.12700581550598145, "learning_rate": 0.00019613546221905828, "loss": 1.1465, "step": 196 }, { "epoch": 0.04004879040455377, "grad_norm": 0.12830078601837158, "learning_rate": 0.00019611512254652703, "loss": 1.056, "step": 197 }, { "epoch": 0.04025208375686115, "grad_norm": 0.12503017485141754, "learning_rate": 0.00019609478287399573, "loss": 1.0332, "step": 198 }, { "epoch": 0.04045537710916853, "grad_norm": 0.13521379232406616, "learning_rate": 0.00019607444320146445, "loss": 1.209, "step": 199 }, { "epoch": 0.04065867046147591, "grad_norm": 0.12014853954315186, "learning_rate": 0.00019605410352893318, "loss": 1.0632, "step": 200 }, { "epoch": 0.04086196381378329, "grad_norm": 0.14510953426361084, "learning_rate": 0.00019603376385640193, "loss": 1.2434, "step": 201 }, { "epoch": 0.04106525716609067, "grad_norm": 0.12610237300395966, "learning_rate": 0.00019601342418387065, "loss": 1.1765, "step": 202 }, { "epoch": 0.04126855051839805, "grad_norm": 0.12680204212665558, "learning_rate": 0.00019599308451133938, "loss": 1.1754, "step": 203 }, { "epoch": 0.041471843870705426, "grad_norm": 0.1301220804452896, "learning_rate": 0.0001959727448388081, "loss": 1.2629, "step": 204 }, { "epoch": 0.04167513722301281, "grad_norm": 0.11219633370637894, "learning_rate": 0.00019595240516627685, "loss": 1.0403, "step": 205 }, { "epoch": 0.04187843057532019, "grad_norm": 0.11882266402244568, "learning_rate": 0.00019593206549374555, "loss": 0.9514, "step": 206 }, { "epoch": 0.042081723927627566, "grad_norm": 0.11426525563001633, "learning_rate": 0.00019591172582121428, "loss": 1.163, "step": 207 }, { "epoch": 0.042285017279934944, "grad_norm": 0.12243502587080002, "learning_rate": 0.000195891386148683, "loss": 1.166, "step": 208 }, { "epoch": 0.04248831063224232, "grad_norm": 0.12537699937820435, "learning_rate": 0.00019587104647615175, "loss": 1.1335, "step": 209 }, { "epoch": 0.04269160398454971, "grad_norm": 0.12613898515701294, "learning_rate": 0.00019585070680362048, "loss": 1.2059, "step": 210 }, { "epoch": 0.042894897336857085, "grad_norm": 0.11983931809663773, "learning_rate": 0.0001958303671310892, "loss": 1.0849, "step": 211 }, { "epoch": 0.04309819068916446, "grad_norm": 0.12225540727376938, "learning_rate": 0.00019581002745855793, "loss": 1.1581, "step": 212 }, { "epoch": 0.04330148404147184, "grad_norm": 0.12691259384155273, "learning_rate": 0.00019578968778602668, "loss": 1.2828, "step": 213 }, { "epoch": 0.043504777393779226, "grad_norm": 0.11088678985834122, "learning_rate": 0.00019576934811349538, "loss": 1.0987, "step": 214 }, { "epoch": 0.043708070746086604, "grad_norm": 0.14477139711380005, "learning_rate": 0.0001957490084409641, "loss": 1.3049, "step": 215 }, { "epoch": 0.04391136409839398, "grad_norm": 0.12409314513206482, "learning_rate": 0.00019572866876843282, "loss": 1.1617, "step": 216 }, { "epoch": 0.04411465745070136, "grad_norm": 0.12076637893915176, "learning_rate": 0.00019570832909590155, "loss": 1.1163, "step": 217 }, { "epoch": 0.044317950803008745, "grad_norm": 0.1195930689573288, "learning_rate": 0.0001956879894233703, "loss": 1.3286, "step": 218 }, { "epoch": 0.04452124415531612, "grad_norm": 0.11751043051481247, "learning_rate": 0.00019566764975083902, "loss": 1.0493, "step": 219 }, { "epoch": 0.0447245375076235, "grad_norm": 0.12073373049497604, "learning_rate": 0.00019564731007830775, "loss": 1.088, "step": 220 }, { "epoch": 0.04492783085993088, "grad_norm": 0.11219310760498047, "learning_rate": 0.00019562697040577647, "loss": 1.0455, "step": 221 }, { "epoch": 0.045131124212238256, "grad_norm": 0.11247701942920685, "learning_rate": 0.0001956066307332452, "loss": 1.0703, "step": 222 }, { "epoch": 0.04533441756454564, "grad_norm": 0.11884698271751404, "learning_rate": 0.00019558629106071392, "loss": 1.2793, "step": 223 }, { "epoch": 0.04553771091685302, "grad_norm": 0.12219983339309692, "learning_rate": 0.00019556595138818265, "loss": 1.1819, "step": 224 }, { "epoch": 0.0457410042691604, "grad_norm": 0.1017618402838707, "learning_rate": 0.00019554561171565137, "loss": 0.9348, "step": 225 }, { "epoch": 0.045944297621467775, "grad_norm": 0.11600673943758011, "learning_rate": 0.00019552527204312012, "loss": 1.0054, "step": 226 }, { "epoch": 0.04614759097377516, "grad_norm": 0.13273292779922485, "learning_rate": 0.00019550493237058885, "loss": 1.2805, "step": 227 }, { "epoch": 0.04635088432608254, "grad_norm": 0.11936032027006149, "learning_rate": 0.00019548459269805757, "loss": 1.1255, "step": 228 }, { "epoch": 0.046554177678389916, "grad_norm": 0.12319690734148026, "learning_rate": 0.0001954642530255263, "loss": 1.3222, "step": 229 }, { "epoch": 0.046757471030697294, "grad_norm": 0.11699585616588593, "learning_rate": 0.00019544391335299502, "loss": 1.1299, "step": 230 }, { "epoch": 0.04696076438300468, "grad_norm": 0.11112070828676224, "learning_rate": 0.00019542357368046375, "loss": 1.1836, "step": 231 }, { "epoch": 0.04716405773531206, "grad_norm": 0.13511928915977478, "learning_rate": 0.00019540323400793247, "loss": 1.1319, "step": 232 }, { "epoch": 0.047367351087619435, "grad_norm": 0.12584780156612396, "learning_rate": 0.0001953828943354012, "loss": 1.0862, "step": 233 }, { "epoch": 0.04757064443992681, "grad_norm": 0.12041206657886505, "learning_rate": 0.00019536255466286995, "loss": 1.0866, "step": 234 }, { "epoch": 0.0477739377922342, "grad_norm": 0.1117459088563919, "learning_rate": 0.00019534221499033867, "loss": 1.0334, "step": 235 }, { "epoch": 0.047977231144541575, "grad_norm": 0.11388564109802246, "learning_rate": 0.0001953218753178074, "loss": 1.0335, "step": 236 }, { "epoch": 0.04818052449684895, "grad_norm": 0.11740144342184067, "learning_rate": 0.00019530153564527612, "loss": 1.0301, "step": 237 }, { "epoch": 0.04838381784915633, "grad_norm": 0.1059211865067482, "learning_rate": 0.00019528119597274484, "loss": 0.933, "step": 238 }, { "epoch": 0.04858711120146371, "grad_norm": 0.12493643909692764, "learning_rate": 0.00019526085630021357, "loss": 1.129, "step": 239 }, { "epoch": 0.048790404553771094, "grad_norm": 0.11791351437568665, "learning_rate": 0.0001952405166276823, "loss": 1.0817, "step": 240 }, { "epoch": 0.04899369790607847, "grad_norm": 0.12103426456451416, "learning_rate": 0.00019522017695515102, "loss": 1.0588, "step": 241 }, { "epoch": 0.04919699125838585, "grad_norm": 0.12383697926998138, "learning_rate": 0.00019519983728261977, "loss": 1.1269, "step": 242 }, { "epoch": 0.04940028461069323, "grad_norm": 0.10310048609972, "learning_rate": 0.0001951794976100885, "loss": 0.9393, "step": 243 }, { "epoch": 0.04960357796300061, "grad_norm": 0.11793255805969238, "learning_rate": 0.00019515915793755722, "loss": 1.1511, "step": 244 }, { "epoch": 0.04980687131530799, "grad_norm": 0.12708383798599243, "learning_rate": 0.00019513881826502594, "loss": 1.1525, "step": 245 }, { "epoch": 0.05001016466761537, "grad_norm": 0.13343508541584015, "learning_rate": 0.0001951184785924947, "loss": 1.2057, "step": 246 }, { "epoch": 0.050213458019922746, "grad_norm": 0.12891672551631927, "learning_rate": 0.0001950981389199634, "loss": 1.1788, "step": 247 }, { "epoch": 0.05041675137223013, "grad_norm": 0.11919089406728745, "learning_rate": 0.00019507779924743212, "loss": 0.8968, "step": 248 }, { "epoch": 0.05062004472453751, "grad_norm": 0.11912382394075394, "learning_rate": 0.00019505745957490084, "loss": 1.0667, "step": 249 }, { "epoch": 0.05082333807684489, "grad_norm": 0.12510718405246735, "learning_rate": 0.0001950371199023696, "loss": 0.9301, "step": 250 }, { "epoch": 0.051026631429152265, "grad_norm": 0.13244077563285828, "learning_rate": 0.00019501678022983832, "loss": 1.1302, "step": 251 }, { "epoch": 0.05122992478145964, "grad_norm": 0.11624693870544434, "learning_rate": 0.00019499644055730704, "loss": 1.1317, "step": 252 }, { "epoch": 0.05143321813376703, "grad_norm": 0.11178990453481674, "learning_rate": 0.00019497610088477576, "loss": 1.0696, "step": 253 }, { "epoch": 0.051636511486074406, "grad_norm": 0.12613075971603394, "learning_rate": 0.00019495576121224452, "loss": 1.2195, "step": 254 }, { "epoch": 0.051839804838381784, "grad_norm": 0.13160696625709534, "learning_rate": 0.00019493542153971321, "loss": 1.0333, "step": 255 }, { "epoch": 0.05204309819068916, "grad_norm": 0.11390336602926254, "learning_rate": 0.00019491508186718194, "loss": 0.9062, "step": 256 }, { "epoch": 0.05224639154299655, "grad_norm": 0.12177371233701706, "learning_rate": 0.00019489474219465066, "loss": 1.1687, "step": 257 }, { "epoch": 0.052449684895303925, "grad_norm": 0.12821920216083527, "learning_rate": 0.0001948744025221194, "loss": 1.0509, "step": 258 }, { "epoch": 0.0526529782476113, "grad_norm": 0.11554522067308426, "learning_rate": 0.00019485406284958814, "loss": 0.9503, "step": 259 }, { "epoch": 0.05285627159991868, "grad_norm": 0.1144140213727951, "learning_rate": 0.00019483372317705686, "loss": 1.0419, "step": 260 }, { "epoch": 0.053059564952226065, "grad_norm": 0.12091881781816483, "learning_rate": 0.0001948133835045256, "loss": 1.1362, "step": 261 }, { "epoch": 0.05326285830453344, "grad_norm": 0.1322740912437439, "learning_rate": 0.0001947930438319943, "loss": 1.2769, "step": 262 }, { "epoch": 0.05346615165684082, "grad_norm": 0.12368176877498627, "learning_rate": 0.00019477270415946304, "loss": 1.1871, "step": 263 }, { "epoch": 0.0536694450091482, "grad_norm": 0.11115586012601852, "learning_rate": 0.00019475236448693176, "loss": 1.0479, "step": 264 }, { "epoch": 0.05387273836145558, "grad_norm": 0.1282634437084198, "learning_rate": 0.00019473202481440049, "loss": 1.038, "step": 265 }, { "epoch": 0.05407603171376296, "grad_norm": 0.11252263188362122, "learning_rate": 0.0001947116851418692, "loss": 1.1254, "step": 266 }, { "epoch": 0.05427932506607034, "grad_norm": 0.10750589519739151, "learning_rate": 0.00019469134546933796, "loss": 0.8887, "step": 267 }, { "epoch": 0.05448261841837772, "grad_norm": 0.1257811188697815, "learning_rate": 0.00019467100579680669, "loss": 1.3177, "step": 268 }, { "epoch": 0.054685911770685096, "grad_norm": 0.13415637612342834, "learning_rate": 0.0001946506661242754, "loss": 1.2538, "step": 269 }, { "epoch": 0.05488920512299248, "grad_norm": 0.11637566983699799, "learning_rate": 0.00019463032645174413, "loss": 1.0536, "step": 270 }, { "epoch": 0.05509249847529986, "grad_norm": 0.12544845044612885, "learning_rate": 0.00019460998677921286, "loss": 1.2673, "step": 271 }, { "epoch": 0.05529579182760724, "grad_norm": 0.13013462722301483, "learning_rate": 0.00019458964710668158, "loss": 1.1977, "step": 272 }, { "epoch": 0.055499085179914615, "grad_norm": 0.13211217522621155, "learning_rate": 0.0001945693074341503, "loss": 1.1317, "step": 273 }, { "epoch": 0.055702378532222, "grad_norm": 0.13389961421489716, "learning_rate": 0.00019454896776161903, "loss": 1.1075, "step": 274 }, { "epoch": 0.05590567188452938, "grad_norm": 0.12008912861347198, "learning_rate": 0.00019452862808908778, "loss": 0.9692, "step": 275 }, { "epoch": 0.056108965236836755, "grad_norm": 0.1291409730911255, "learning_rate": 0.0001945082884165565, "loss": 1.2363, "step": 276 }, { "epoch": 0.05631225858914413, "grad_norm": 0.12915107607841492, "learning_rate": 0.00019448794874402523, "loss": 1.3091, "step": 277 }, { "epoch": 0.05651555194145151, "grad_norm": 0.11216573417186737, "learning_rate": 0.00019446760907149396, "loss": 1.0992, "step": 278 }, { "epoch": 0.056718845293758896, "grad_norm": 0.10683475434780121, "learning_rate": 0.00019444726939896268, "loss": 0.9928, "step": 279 }, { "epoch": 0.056922138646066274, "grad_norm": 0.12699760496616364, "learning_rate": 0.0001944269297264314, "loss": 1.1559, "step": 280 }, { "epoch": 0.05712543199837365, "grad_norm": 0.1270214468240738, "learning_rate": 0.00019440659005390013, "loss": 0.9364, "step": 281 }, { "epoch": 0.05732872535068103, "grad_norm": 0.13140781223773956, "learning_rate": 0.00019438625038136886, "loss": 1.1587, "step": 282 }, { "epoch": 0.057532018702988415, "grad_norm": 0.127557173371315, "learning_rate": 0.0001943659107088376, "loss": 1.0203, "step": 283 }, { "epoch": 0.05773531205529579, "grad_norm": 0.13735321164131165, "learning_rate": 0.00019434557103630633, "loss": 1.3412, "step": 284 }, { "epoch": 0.05793860540760317, "grad_norm": 0.11763381958007812, "learning_rate": 0.00019432523136377506, "loss": 1.1305, "step": 285 }, { "epoch": 0.05814189875991055, "grad_norm": 0.1292058527469635, "learning_rate": 0.00019430489169124378, "loss": 1.227, "step": 286 }, { "epoch": 0.05834519211221793, "grad_norm": 0.1357347071170807, "learning_rate": 0.0001942845520187125, "loss": 1.0898, "step": 287 }, { "epoch": 0.05854848546452531, "grad_norm": 0.13546323776245117, "learning_rate": 0.00019426421234618123, "loss": 1.1308, "step": 288 }, { "epoch": 0.05875177881683269, "grad_norm": 0.12612831592559814, "learning_rate": 0.00019424387267364995, "loss": 1.1161, "step": 289 }, { "epoch": 0.05895507216914007, "grad_norm": 0.12061580270528793, "learning_rate": 0.00019422353300111868, "loss": 0.9395, "step": 290 }, { "epoch": 0.059158365521447445, "grad_norm": 0.12118272483348846, "learning_rate": 0.00019420319332858743, "loss": 1.0597, "step": 291 }, { "epoch": 0.05936165887375483, "grad_norm": 0.11357955634593964, "learning_rate": 0.00019418285365605615, "loss": 1.1651, "step": 292 }, { "epoch": 0.05956495222606221, "grad_norm": 0.11546896398067474, "learning_rate": 0.00019416251398352488, "loss": 0.9093, "step": 293 }, { "epoch": 0.059768245578369586, "grad_norm": 0.12699609994888306, "learning_rate": 0.0001941421743109936, "loss": 1.1878, "step": 294 }, { "epoch": 0.059971538930676964, "grad_norm": 0.11789494752883911, "learning_rate": 0.00019412183463846233, "loss": 1.0162, "step": 295 }, { "epoch": 0.06017483228298435, "grad_norm": 0.11362869292497635, "learning_rate": 0.00019410149496593105, "loss": 1.0056, "step": 296 }, { "epoch": 0.06037812563529173, "grad_norm": 0.125663161277771, "learning_rate": 0.00019408115529339978, "loss": 1.083, "step": 297 }, { "epoch": 0.060581418987599105, "grad_norm": 0.11303743720054626, "learning_rate": 0.0001940608156208685, "loss": 1.1573, "step": 298 }, { "epoch": 0.06078471233990648, "grad_norm": 0.11955615878105164, "learning_rate": 0.00019404047594833723, "loss": 0.9637, "step": 299 }, { "epoch": 0.06098800569221387, "grad_norm": 0.11959411948919296, "learning_rate": 0.00019402013627580598, "loss": 1.1023, "step": 300 }, { "epoch": 0.061191299044521245, "grad_norm": 0.1248716339468956, "learning_rate": 0.0001939997966032747, "loss": 1.2881, "step": 301 }, { "epoch": 0.06139459239682862, "grad_norm": 0.1136515811085701, "learning_rate": 0.00019397945693074343, "loss": 1.0921, "step": 302 }, { "epoch": 0.061597885749136, "grad_norm": 0.11583786457777023, "learning_rate": 0.00019395911725821215, "loss": 1.0758, "step": 303 }, { "epoch": 0.061801179101443386, "grad_norm": 0.12685681879520416, "learning_rate": 0.00019393877758568087, "loss": 1.2444, "step": 304 }, { "epoch": 0.062004472453750764, "grad_norm": 0.15549907088279724, "learning_rate": 0.0001939184379131496, "loss": 1.4011, "step": 305 }, { "epoch": 0.06220776580605814, "grad_norm": 0.11548073589801788, "learning_rate": 0.00019389809824061832, "loss": 1.0536, "step": 306 }, { "epoch": 0.06241105915836552, "grad_norm": 0.11526035517454147, "learning_rate": 0.00019387775856808705, "loss": 0.9051, "step": 307 }, { "epoch": 0.0626143525106729, "grad_norm": 0.10682015866041183, "learning_rate": 0.0001938574188955558, "loss": 0.9744, "step": 308 }, { "epoch": 0.06281764586298028, "grad_norm": 0.11594579368829727, "learning_rate": 0.00019383707922302452, "loss": 1.0771, "step": 309 }, { "epoch": 0.06302093921528766, "grad_norm": 0.11397954076528549, "learning_rate": 0.00019381673955049325, "loss": 0.9991, "step": 310 }, { "epoch": 0.06322423256759505, "grad_norm": 0.12746506929397583, "learning_rate": 0.00019379639987796197, "loss": 1.1141, "step": 311 }, { "epoch": 0.06342752591990242, "grad_norm": 0.11370940506458282, "learning_rate": 0.0001937760602054307, "loss": 0.9784, "step": 312 }, { "epoch": 0.0636308192722098, "grad_norm": 0.11094705015420914, "learning_rate": 0.00019375572053289942, "loss": 0.9092, "step": 313 }, { "epoch": 0.06383411262451717, "grad_norm": 0.12067949026823044, "learning_rate": 0.00019373538086036815, "loss": 1.0812, "step": 314 }, { "epoch": 0.06403740597682456, "grad_norm": 0.11797504872083664, "learning_rate": 0.00019371504118783687, "loss": 1.0659, "step": 315 }, { "epoch": 0.06424069932913194, "grad_norm": 0.10436304658651352, "learning_rate": 0.00019369470151530562, "loss": 1.0226, "step": 316 }, { "epoch": 0.06444399268143931, "grad_norm": 0.1373065710067749, "learning_rate": 0.00019367436184277435, "loss": 1.1967, "step": 317 }, { "epoch": 0.0646472860337467, "grad_norm": 0.12204968929290771, "learning_rate": 0.00019365402217024307, "loss": 1.138, "step": 318 }, { "epoch": 0.06485057938605407, "grad_norm": 0.11520784348249435, "learning_rate": 0.0001936336824977118, "loss": 1.0148, "step": 319 }, { "epoch": 0.06505387273836145, "grad_norm": 0.12380523979663849, "learning_rate": 0.00019361334282518052, "loss": 1.1577, "step": 320 }, { "epoch": 0.06525716609066884, "grad_norm": 0.12227565050125122, "learning_rate": 0.00019359300315264924, "loss": 1.0843, "step": 321 }, { "epoch": 0.06546045944297621, "grad_norm": 0.12836994230747223, "learning_rate": 0.00019357266348011797, "loss": 1.137, "step": 322 }, { "epoch": 0.0656637527952836, "grad_norm": 0.1091795489192009, "learning_rate": 0.0001935523238075867, "loss": 1.1794, "step": 323 }, { "epoch": 0.06586704614759098, "grad_norm": 0.11629168689250946, "learning_rate": 0.00019353198413505545, "loss": 1.1052, "step": 324 }, { "epoch": 0.06607033949989835, "grad_norm": 0.12525077164173126, "learning_rate": 0.00019351164446252417, "loss": 1.0891, "step": 325 }, { "epoch": 0.06627363285220574, "grad_norm": 0.12222876399755478, "learning_rate": 0.0001934913047899929, "loss": 1.2059, "step": 326 }, { "epoch": 0.0664769262045131, "grad_norm": 0.12146129459142685, "learning_rate": 0.00019347096511746162, "loss": 1.1067, "step": 327 }, { "epoch": 0.06668021955682049, "grad_norm": 0.11807144433259964, "learning_rate": 0.00019345062544493034, "loss": 1.0953, "step": 328 }, { "epoch": 0.06688351290912788, "grad_norm": 0.11966339498758316, "learning_rate": 0.00019343028577239907, "loss": 1.071, "step": 329 }, { "epoch": 0.06708680626143525, "grad_norm": 0.1203102245926857, "learning_rate": 0.0001934099460998678, "loss": 1.0197, "step": 330 }, { "epoch": 0.06729009961374263, "grad_norm": 0.1138140857219696, "learning_rate": 0.00019338960642733652, "loss": 0.9191, "step": 331 }, { "epoch": 0.06749339296605002, "grad_norm": 0.12846186757087708, "learning_rate": 0.00019336926675480527, "loss": 1.0951, "step": 332 }, { "epoch": 0.06769668631835739, "grad_norm": 0.12961986660957336, "learning_rate": 0.000193348927082274, "loss": 1.1309, "step": 333 }, { "epoch": 0.06789997967066477, "grad_norm": 0.12339945137500763, "learning_rate": 0.00019332858740974272, "loss": 1.0363, "step": 334 }, { "epoch": 0.06810327302297214, "grad_norm": 0.13385798037052155, "learning_rate": 0.00019330824773721144, "loss": 1.114, "step": 335 }, { "epoch": 0.06830656637527953, "grad_norm": 0.13270089030265808, "learning_rate": 0.00019328790806468017, "loss": 1.2388, "step": 336 }, { "epoch": 0.06850985972758691, "grad_norm": 0.11112480610609055, "learning_rate": 0.0001932675683921489, "loss": 1.1178, "step": 337 }, { "epoch": 0.06871315307989428, "grad_norm": 0.12246957421302795, "learning_rate": 0.00019324722871961761, "loss": 1.233, "step": 338 }, { "epoch": 0.06891644643220167, "grad_norm": 0.12208685278892517, "learning_rate": 0.00019322688904708634, "loss": 1.1145, "step": 339 }, { "epoch": 0.06911973978450904, "grad_norm": 0.11839979141950607, "learning_rate": 0.00019320654937455506, "loss": 0.986, "step": 340 }, { "epoch": 0.06932303313681643, "grad_norm": 0.13268662989139557, "learning_rate": 0.00019318620970202382, "loss": 1.0527, "step": 341 }, { "epoch": 0.06952632648912381, "grad_norm": 0.11831391602754593, "learning_rate": 0.00019316587002949254, "loss": 1.2318, "step": 342 }, { "epoch": 0.06972961984143118, "grad_norm": 0.11892188340425491, "learning_rate": 0.00019314553035696126, "loss": 1.2, "step": 343 }, { "epoch": 0.06993291319373857, "grad_norm": 0.13015909492969513, "learning_rate": 0.00019312519068443, "loss": 1.1622, "step": 344 }, { "epoch": 0.07013620654604595, "grad_norm": 0.10422676056623459, "learning_rate": 0.0001931048510118987, "loss": 0.9258, "step": 345 }, { "epoch": 0.07033949989835332, "grad_norm": 0.10162926465272903, "learning_rate": 0.00019308451133936744, "loss": 0.9709, "step": 346 }, { "epoch": 0.0705427932506607, "grad_norm": 0.12753081321716309, "learning_rate": 0.00019306417166683616, "loss": 1.1193, "step": 347 }, { "epoch": 0.07074608660296808, "grad_norm": 0.12309850752353668, "learning_rate": 0.0001930438319943049, "loss": 1.2146, "step": 348 }, { "epoch": 0.07094937995527546, "grad_norm": 0.13199441134929657, "learning_rate": 0.00019302349232177364, "loss": 1.1723, "step": 349 }, { "epoch": 0.07115267330758285, "grad_norm": 0.12041430175304413, "learning_rate": 0.00019300315264924236, "loss": 1.1115, "step": 350 }, { "epoch": 0.07135596665989022, "grad_norm": 0.11456899344921112, "learning_rate": 0.0001929828129767111, "loss": 1.0829, "step": 351 }, { "epoch": 0.0715592600121976, "grad_norm": 0.12147854268550873, "learning_rate": 0.0001929624733041798, "loss": 1.1963, "step": 352 }, { "epoch": 0.07176255336450497, "grad_norm": 0.13312789797782898, "learning_rate": 0.00019294213363164854, "loss": 1.1182, "step": 353 }, { "epoch": 0.07196584671681236, "grad_norm": 0.1078067272901535, "learning_rate": 0.00019292179395911726, "loss": 0.9415, "step": 354 }, { "epoch": 0.07216914006911974, "grad_norm": 0.1231444925069809, "learning_rate": 0.00019290145428658598, "loss": 1.2428, "step": 355 }, { "epoch": 0.07237243342142712, "grad_norm": 0.13848941028118134, "learning_rate": 0.0001928811146140547, "loss": 1.1953, "step": 356 }, { "epoch": 0.0725757267737345, "grad_norm": 0.11954299360513687, "learning_rate": 0.00019286077494152346, "loss": 0.9834, "step": 357 }, { "epoch": 0.07277902012604189, "grad_norm": 0.11029402166604996, "learning_rate": 0.00019284043526899219, "loss": 1.0635, "step": 358 }, { "epoch": 0.07298231347834926, "grad_norm": 0.11941875517368317, "learning_rate": 0.0001928200955964609, "loss": 1.1698, "step": 359 }, { "epoch": 0.07318560683065664, "grad_norm": 0.11633221805095673, "learning_rate": 0.00019279975592392963, "loss": 0.9411, "step": 360 }, { "epoch": 0.07338890018296401, "grad_norm": 0.11820893734693527, "learning_rate": 0.00019277941625139836, "loss": 1.0487, "step": 361 }, { "epoch": 0.0735921935352714, "grad_norm": 0.14069049060344696, "learning_rate": 0.00019275907657886708, "loss": 1.2295, "step": 362 }, { "epoch": 0.07379548688757878, "grad_norm": 0.12828344106674194, "learning_rate": 0.0001927387369063358, "loss": 1.1904, "step": 363 }, { "epoch": 0.07399878023988615, "grad_norm": 0.12259247899055481, "learning_rate": 0.00019271839723380453, "loss": 1.0655, "step": 364 }, { "epoch": 0.07420207359219354, "grad_norm": 0.12864744663238525, "learning_rate": 0.00019269805756127328, "loss": 1.207, "step": 365 }, { "epoch": 0.07440536694450091, "grad_norm": 0.1141364574432373, "learning_rate": 0.000192677717888742, "loss": 0.9853, "step": 366 }, { "epoch": 0.0746086602968083, "grad_norm": 0.10614699870347977, "learning_rate": 0.00019265737821621073, "loss": 1.0003, "step": 367 }, { "epoch": 0.07481195364911568, "grad_norm": 0.1159566193819046, "learning_rate": 0.00019263703854367946, "loss": 1.1753, "step": 368 }, { "epoch": 0.07501524700142305, "grad_norm": 0.11285501718521118, "learning_rate": 0.00019261669887114818, "loss": 1.0592, "step": 369 }, { "epoch": 0.07521854035373043, "grad_norm": 0.11360286176204681, "learning_rate": 0.0001925963591986169, "loss": 0.9719, "step": 370 }, { "epoch": 0.07542183370603782, "grad_norm": 0.1143144741654396, "learning_rate": 0.00019257601952608563, "loss": 1.0623, "step": 371 }, { "epoch": 0.07562512705834519, "grad_norm": 0.11664289981126785, "learning_rate": 0.00019255567985355436, "loss": 0.9849, "step": 372 }, { "epoch": 0.07582842041065257, "grad_norm": 0.11677186191082001, "learning_rate": 0.0001925353401810231, "loss": 0.9926, "step": 373 }, { "epoch": 0.07603171376295995, "grad_norm": 0.12509550154209137, "learning_rate": 0.00019251500050849183, "loss": 1.1648, "step": 374 }, { "epoch": 0.07623500711526733, "grad_norm": 0.13659395277500153, "learning_rate": 0.00019249466083596056, "loss": 1.2005, "step": 375 }, { "epoch": 0.07643830046757472, "grad_norm": 0.11500003188848495, "learning_rate": 0.00019247432116342928, "loss": 1.1287, "step": 376 }, { "epoch": 0.07664159381988209, "grad_norm": 0.11376544088125229, "learning_rate": 0.000192453981490898, "loss": 1.0013, "step": 377 }, { "epoch": 0.07684488717218947, "grad_norm": 0.13335828483104706, "learning_rate": 0.00019243364181836673, "loss": 1.1969, "step": 378 }, { "epoch": 0.07704818052449684, "grad_norm": 0.1245710700750351, "learning_rate": 0.00019241330214583545, "loss": 1.2461, "step": 379 }, { "epoch": 0.07725147387680423, "grad_norm": 0.12159935384988785, "learning_rate": 0.00019239296247330418, "loss": 1.0066, "step": 380 }, { "epoch": 0.07745476722911161, "grad_norm": 0.1263132244348526, "learning_rate": 0.0001923726228007729, "loss": 1.1993, "step": 381 }, { "epoch": 0.07765806058141898, "grad_norm": 0.11738517135381699, "learning_rate": 0.00019235228312824165, "loss": 1.19, "step": 382 }, { "epoch": 0.07786135393372637, "grad_norm": 0.13438478112220764, "learning_rate": 0.00019233194345571038, "loss": 1.1794, "step": 383 }, { "epoch": 0.07806464728603375, "grad_norm": 0.1180570125579834, "learning_rate": 0.0001923116037831791, "loss": 1.0685, "step": 384 }, { "epoch": 0.07826794063834112, "grad_norm": 0.13014809787273407, "learning_rate": 0.00019229126411064783, "loss": 1.1838, "step": 385 }, { "epoch": 0.07847123399064851, "grad_norm": 0.12478948384523392, "learning_rate": 0.00019227092443811655, "loss": 1.1978, "step": 386 }, { "epoch": 0.07867452734295588, "grad_norm": 0.10319990664720535, "learning_rate": 0.00019225058476558528, "loss": 1.1273, "step": 387 }, { "epoch": 0.07887782069526326, "grad_norm": 0.11172400414943695, "learning_rate": 0.000192230245093054, "loss": 0.9054, "step": 388 }, { "epoch": 0.07908111404757065, "grad_norm": 0.12951341271400452, "learning_rate": 0.00019220990542052273, "loss": 1.1554, "step": 389 }, { "epoch": 0.07928440739987802, "grad_norm": 0.13350042700767517, "learning_rate": 0.00019218956574799148, "loss": 1.1787, "step": 390 }, { "epoch": 0.0794877007521854, "grad_norm": 0.11068174242973328, "learning_rate": 0.0001921692260754602, "loss": 1.1072, "step": 391 }, { "epoch": 0.07969099410449278, "grad_norm": 0.09952767938375473, "learning_rate": 0.00019214888640292893, "loss": 1.0071, "step": 392 }, { "epoch": 0.07989428745680016, "grad_norm": 0.10815319418907166, "learning_rate": 0.00019212854673039765, "loss": 0.8681, "step": 393 }, { "epoch": 0.08009758080910755, "grad_norm": 0.1121988445520401, "learning_rate": 0.00019210820705786637, "loss": 0.987, "step": 394 }, { "epoch": 0.08030087416141492, "grad_norm": 0.10137449204921722, "learning_rate": 0.0001920878673853351, "loss": 0.8968, "step": 395 }, { "epoch": 0.0805041675137223, "grad_norm": 0.09827956557273865, "learning_rate": 0.00019206752771280382, "loss": 0.8864, "step": 396 }, { "epoch": 0.08070746086602969, "grad_norm": 0.11967012286186218, "learning_rate": 0.00019204718804027255, "loss": 1.085, "step": 397 }, { "epoch": 0.08091075421833706, "grad_norm": 0.11249358206987381, "learning_rate": 0.0001920268483677413, "loss": 1.0201, "step": 398 }, { "epoch": 0.08111404757064444, "grad_norm": 0.12788376212120056, "learning_rate": 0.00019200650869521002, "loss": 1.0529, "step": 399 }, { "epoch": 0.08131734092295181, "grad_norm": 0.11879412829875946, "learning_rate": 0.00019198616902267875, "loss": 1.0418, "step": 400 }, { "epoch": 0.0815206342752592, "grad_norm": 0.11404243856668472, "learning_rate": 0.00019196582935014747, "loss": 0.9311, "step": 401 }, { "epoch": 0.08172392762756658, "grad_norm": 0.13113105297088623, "learning_rate": 0.0001919454896776162, "loss": 1.2886, "step": 402 }, { "epoch": 0.08192722097987395, "grad_norm": 0.12636548280715942, "learning_rate": 0.00019192515000508492, "loss": 1.0967, "step": 403 }, { "epoch": 0.08213051433218134, "grad_norm": 0.1245994120836258, "learning_rate": 0.00019190481033255365, "loss": 1.0426, "step": 404 }, { "epoch": 0.08233380768448871, "grad_norm": 0.12495577335357666, "learning_rate": 0.00019188447066002237, "loss": 1.1212, "step": 405 }, { "epoch": 0.0825371010367961, "grad_norm": 0.112003855407238, "learning_rate": 0.00019186413098749112, "loss": 0.9948, "step": 406 }, { "epoch": 0.08274039438910348, "grad_norm": 0.11918698996305466, "learning_rate": 0.00019184379131495985, "loss": 1.0927, "step": 407 }, { "epoch": 0.08294368774141085, "grad_norm": 0.11620672792196274, "learning_rate": 0.00019182345164242857, "loss": 1.0805, "step": 408 }, { "epoch": 0.08314698109371824, "grad_norm": 0.12570421397686005, "learning_rate": 0.0001918031119698973, "loss": 1.1484, "step": 409 }, { "epoch": 0.08335027444602562, "grad_norm": 0.12078004330396652, "learning_rate": 0.00019178277229736602, "loss": 1.248, "step": 410 }, { "epoch": 0.08355356779833299, "grad_norm": 0.1178092435002327, "learning_rate": 0.00019176243262483474, "loss": 1.1365, "step": 411 }, { "epoch": 0.08375686115064038, "grad_norm": 0.13181130588054657, "learning_rate": 0.00019174209295230347, "loss": 1.335, "step": 412 }, { "epoch": 0.08396015450294775, "grad_norm": 0.1192195788025856, "learning_rate": 0.0001917217532797722, "loss": 1.119, "step": 413 }, { "epoch": 0.08416344785525513, "grad_norm": 0.12525242567062378, "learning_rate": 0.00019170141360724095, "loss": 1.2269, "step": 414 }, { "epoch": 0.08436674120756252, "grad_norm": 0.12473724037408829, "learning_rate": 0.00019168107393470967, "loss": 1.2479, "step": 415 }, { "epoch": 0.08457003455986989, "grad_norm": 0.1118764728307724, "learning_rate": 0.0001916607342621784, "loss": 1.0089, "step": 416 }, { "epoch": 0.08477332791217727, "grad_norm": 0.11220741271972656, "learning_rate": 0.00019164039458964712, "loss": 0.9793, "step": 417 }, { "epoch": 0.08497662126448464, "grad_norm": 0.1261814385652542, "learning_rate": 0.00019162005491711584, "loss": 1.092, "step": 418 }, { "epoch": 0.08517991461679203, "grad_norm": 0.12782973051071167, "learning_rate": 0.00019159971524458457, "loss": 1.08, "step": 419 }, { "epoch": 0.08538320796909941, "grad_norm": 0.12007841467857361, "learning_rate": 0.0001915793755720533, "loss": 1.0856, "step": 420 }, { "epoch": 0.08558650132140679, "grad_norm": 0.1249847337603569, "learning_rate": 0.00019155903589952202, "loss": 1.1314, "step": 421 }, { "epoch": 0.08578979467371417, "grad_norm": 0.10619431734085083, "learning_rate": 0.00019153869622699074, "loss": 1.0298, "step": 422 }, { "epoch": 0.08599308802602156, "grad_norm": 0.12282367795705795, "learning_rate": 0.0001915183565544595, "loss": 1.1277, "step": 423 }, { "epoch": 0.08619638137832893, "grad_norm": 0.12001215666532516, "learning_rate": 0.00019149801688192822, "loss": 1.0792, "step": 424 }, { "epoch": 0.08639967473063631, "grad_norm": 0.10283269733190536, "learning_rate": 0.00019147767720939694, "loss": 0.9422, "step": 425 }, { "epoch": 0.08660296808294368, "grad_norm": 0.11698923259973526, "learning_rate": 0.00019145733753686567, "loss": 1.0371, "step": 426 }, { "epoch": 0.08680626143525107, "grad_norm": 0.11874233931303024, "learning_rate": 0.0001914369978643344, "loss": 1.042, "step": 427 }, { "epoch": 0.08700955478755845, "grad_norm": 0.10154362767934799, "learning_rate": 0.00019141665819180311, "loss": 0.9436, "step": 428 }, { "epoch": 0.08721284813986582, "grad_norm": 0.10885417461395264, "learning_rate": 0.00019139631851927184, "loss": 1.0823, "step": 429 }, { "epoch": 0.08741614149217321, "grad_norm": 0.11313669383525848, "learning_rate": 0.00019137597884674056, "loss": 1.0905, "step": 430 }, { "epoch": 0.08761943484448058, "grad_norm": 0.12074249237775803, "learning_rate": 0.00019135563917420932, "loss": 1.1466, "step": 431 }, { "epoch": 0.08782272819678796, "grad_norm": 0.12890012562274933, "learning_rate": 0.00019133529950167804, "loss": 1.1222, "step": 432 }, { "epoch": 0.08802602154909535, "grad_norm": 0.12527287006378174, "learning_rate": 0.00019131495982914676, "loss": 1.0391, "step": 433 }, { "epoch": 0.08822931490140272, "grad_norm": 0.11698780208826065, "learning_rate": 0.0001912946201566155, "loss": 0.9235, "step": 434 }, { "epoch": 0.0884326082537101, "grad_norm": 0.11191095411777496, "learning_rate": 0.0001912742804840842, "loss": 0.9763, "step": 435 }, { "epoch": 0.08863590160601749, "grad_norm": 0.1118699237704277, "learning_rate": 0.00019125394081155294, "loss": 0.9919, "step": 436 }, { "epoch": 0.08883919495832486, "grad_norm": 0.10507287830114365, "learning_rate": 0.00019123360113902166, "loss": 0.8505, "step": 437 }, { "epoch": 0.08904248831063225, "grad_norm": 0.1091250404715538, "learning_rate": 0.00019121326146649039, "loss": 0.9453, "step": 438 }, { "epoch": 0.08924578166293962, "grad_norm": 0.10213371366262436, "learning_rate": 0.00019119292179395914, "loss": 0.9082, "step": 439 }, { "epoch": 0.089449075015247, "grad_norm": 0.1446637064218521, "learning_rate": 0.00019117258212142786, "loss": 1.2307, "step": 440 }, { "epoch": 0.08965236836755439, "grad_norm": 0.13018859922885895, "learning_rate": 0.0001911522424488966, "loss": 1.1052, "step": 441 }, { "epoch": 0.08985566171986176, "grad_norm": 0.1239272952079773, "learning_rate": 0.0001911319027763653, "loss": 1.0036, "step": 442 }, { "epoch": 0.09005895507216914, "grad_norm": 0.1135847195982933, "learning_rate": 0.00019111156310383404, "loss": 1.0524, "step": 443 }, { "epoch": 0.09026224842447651, "grad_norm": 0.1171732023358345, "learning_rate": 0.00019109122343130276, "loss": 1.1769, "step": 444 }, { "epoch": 0.0904655417767839, "grad_norm": 0.12947380542755127, "learning_rate": 0.00019107088375877148, "loss": 1.2071, "step": 445 }, { "epoch": 0.09066883512909128, "grad_norm": 0.1240135133266449, "learning_rate": 0.0001910505440862402, "loss": 1.0782, "step": 446 }, { "epoch": 0.09087212848139865, "grad_norm": 0.1232561394572258, "learning_rate": 0.00019103020441370896, "loss": 1.0068, "step": 447 }, { "epoch": 0.09107542183370604, "grad_norm": 0.11200708150863647, "learning_rate": 0.00019100986474117769, "loss": 0.9491, "step": 448 }, { "epoch": 0.09127871518601342, "grad_norm": 0.1400870531797409, "learning_rate": 0.0001909895250686464, "loss": 1.3197, "step": 449 }, { "epoch": 0.0914820085383208, "grad_norm": 0.12712709605693817, "learning_rate": 0.00019096918539611513, "loss": 1.1853, "step": 450 }, { "epoch": 0.09168530189062818, "grad_norm": 0.11399099975824356, "learning_rate": 0.00019094884572358386, "loss": 0.8887, "step": 451 }, { "epoch": 0.09188859524293555, "grad_norm": 0.10861057788133621, "learning_rate": 0.00019092850605105258, "loss": 0.9224, "step": 452 }, { "epoch": 0.09209188859524294, "grad_norm": 0.12274569272994995, "learning_rate": 0.0001909081663785213, "loss": 1.1491, "step": 453 }, { "epoch": 0.09229518194755032, "grad_norm": 0.11641780287027359, "learning_rate": 0.00019088782670599003, "loss": 1.1646, "step": 454 }, { "epoch": 0.09249847529985769, "grad_norm": 0.1300159990787506, "learning_rate": 0.00019086748703345878, "loss": 1.1239, "step": 455 }, { "epoch": 0.09270176865216508, "grad_norm": 0.12116070836782455, "learning_rate": 0.0001908471473609275, "loss": 1.0475, "step": 456 }, { "epoch": 0.09290506200447246, "grad_norm": 0.11318276822566986, "learning_rate": 0.00019082680768839623, "loss": 1.0162, "step": 457 }, { "epoch": 0.09310835535677983, "grad_norm": 0.10791938006877899, "learning_rate": 0.00019080646801586496, "loss": 0.9874, "step": 458 }, { "epoch": 0.09331164870908722, "grad_norm": 0.10658224672079086, "learning_rate": 0.00019078612834333368, "loss": 0.9483, "step": 459 }, { "epoch": 0.09351494206139459, "grad_norm": 0.12912395596504211, "learning_rate": 0.0001907657886708024, "loss": 1.2248, "step": 460 }, { "epoch": 0.09371823541370197, "grad_norm": 0.1268775314092636, "learning_rate": 0.00019074544899827113, "loss": 1.079, "step": 461 }, { "epoch": 0.09392152876600936, "grad_norm": 0.11810900270938873, "learning_rate": 0.00019072510932573985, "loss": 1.1856, "step": 462 }, { "epoch": 0.09412482211831673, "grad_norm": 0.13081328570842743, "learning_rate": 0.00019070476965320858, "loss": 1.125, "step": 463 }, { "epoch": 0.09432811547062411, "grad_norm": 0.11875245720148087, "learning_rate": 0.00019068442998067733, "loss": 1.1341, "step": 464 }, { "epoch": 0.09453140882293148, "grad_norm": 0.10965297371149063, "learning_rate": 0.00019066409030814606, "loss": 0.9892, "step": 465 }, { "epoch": 0.09473470217523887, "grad_norm": 0.1167355626821518, "learning_rate": 0.00019064375063561478, "loss": 1.1234, "step": 466 }, { "epoch": 0.09493799552754625, "grad_norm": 0.1092626228928566, "learning_rate": 0.0001906234109630835, "loss": 0.9734, "step": 467 }, { "epoch": 0.09514128887985362, "grad_norm": 0.12768998742103577, "learning_rate": 0.00019060307129055223, "loss": 1.1349, "step": 468 }, { "epoch": 0.09534458223216101, "grad_norm": 0.13227547705173492, "learning_rate": 0.00019058273161802095, "loss": 1.2362, "step": 469 }, { "epoch": 0.0955478755844684, "grad_norm": 0.11458224058151245, "learning_rate": 0.00019056239194548968, "loss": 0.9707, "step": 470 }, { "epoch": 0.09575116893677577, "grad_norm": 0.11045580357313156, "learning_rate": 0.0001905420522729584, "loss": 0.973, "step": 471 }, { "epoch": 0.09595446228908315, "grad_norm": 0.1274811327457428, "learning_rate": 0.00019052171260042715, "loss": 1.2641, "step": 472 }, { "epoch": 0.09615775564139052, "grad_norm": 0.11694994568824768, "learning_rate": 0.00019050137292789588, "loss": 0.9362, "step": 473 }, { "epoch": 0.0963610489936979, "grad_norm": 0.11511142551898956, "learning_rate": 0.0001904810332553646, "loss": 1.0127, "step": 474 }, { "epoch": 0.09656434234600529, "grad_norm": 0.1253817081451416, "learning_rate": 0.00019046069358283333, "loss": 1.0489, "step": 475 }, { "epoch": 0.09676763569831266, "grad_norm": 0.11795701086521149, "learning_rate": 0.00019044035391030205, "loss": 1.0528, "step": 476 }, { "epoch": 0.09697092905062005, "grad_norm": 0.12703485786914825, "learning_rate": 0.00019042001423777078, "loss": 1.2692, "step": 477 }, { "epoch": 0.09717422240292742, "grad_norm": 0.12391920387744904, "learning_rate": 0.0001903996745652395, "loss": 1.0765, "step": 478 }, { "epoch": 0.0973775157552348, "grad_norm": 0.12939028441905975, "learning_rate": 0.00019037933489270822, "loss": 1.2686, "step": 479 }, { "epoch": 0.09758080910754219, "grad_norm": 0.11955651640892029, "learning_rate": 0.00019035899522017698, "loss": 1.0179, "step": 480 }, { "epoch": 0.09778410245984956, "grad_norm": 0.11481709033250809, "learning_rate": 0.0001903386555476457, "loss": 1.1008, "step": 481 }, { "epoch": 0.09798739581215694, "grad_norm": 0.12216270714998245, "learning_rate": 0.00019031831587511443, "loss": 1.2387, "step": 482 }, { "epoch": 0.09819068916446433, "grad_norm": 0.10991356521844864, "learning_rate": 0.00019029797620258315, "loss": 0.9913, "step": 483 }, { "epoch": 0.0983939825167717, "grad_norm": 0.11534951627254486, "learning_rate": 0.00019027763653005187, "loss": 0.9248, "step": 484 }, { "epoch": 0.09859727586907908, "grad_norm": 0.11887869983911514, "learning_rate": 0.0001902572968575206, "loss": 1.065, "step": 485 }, { "epoch": 0.09880056922138646, "grad_norm": 0.12391136586666107, "learning_rate": 0.00019023695718498932, "loss": 1.1692, "step": 486 }, { "epoch": 0.09900386257369384, "grad_norm": 0.10672067850828171, "learning_rate": 0.00019021661751245805, "loss": 1.154, "step": 487 }, { "epoch": 0.09920715592600123, "grad_norm": 0.14061135053634644, "learning_rate": 0.0001901962778399268, "loss": 1.168, "step": 488 }, { "epoch": 0.0994104492783086, "grad_norm": 0.11371248215436935, "learning_rate": 0.00019017593816739552, "loss": 0.9905, "step": 489 }, { "epoch": 0.09961374263061598, "grad_norm": 0.11754601448774338, "learning_rate": 0.00019015559849486425, "loss": 1.01, "step": 490 }, { "epoch": 0.09981703598292335, "grad_norm": 0.12492667138576508, "learning_rate": 0.00019013525882233297, "loss": 1.1024, "step": 491 }, { "epoch": 0.10002032933523074, "grad_norm": 0.12676015496253967, "learning_rate": 0.0001901149191498017, "loss": 1.3976, "step": 492 }, { "epoch": 0.10022362268753812, "grad_norm": 0.13545620441436768, "learning_rate": 0.00019009457947727042, "loss": 1.1542, "step": 493 }, { "epoch": 0.10042691603984549, "grad_norm": 0.12883707880973816, "learning_rate": 0.00019007423980473915, "loss": 1.1068, "step": 494 }, { "epoch": 0.10063020939215288, "grad_norm": 0.11707032471895218, "learning_rate": 0.00019005390013220787, "loss": 0.9906, "step": 495 }, { "epoch": 0.10083350274446026, "grad_norm": 0.13158461451530457, "learning_rate": 0.00019003356045967662, "loss": 1.1453, "step": 496 }, { "epoch": 0.10103679609676763, "grad_norm": 0.1244715005159378, "learning_rate": 0.00019001322078714535, "loss": 1.1244, "step": 497 }, { "epoch": 0.10124008944907502, "grad_norm": 0.12620943784713745, "learning_rate": 0.00018999288111461407, "loss": 1.1018, "step": 498 }, { "epoch": 0.10144338280138239, "grad_norm": 0.1192685067653656, "learning_rate": 0.0001899725414420828, "loss": 1.1069, "step": 499 }, { "epoch": 0.10164667615368977, "grad_norm": 0.12764599919319153, "learning_rate": 0.00018995220176955152, "loss": 1.2122, "step": 500 }, { "epoch": 0.10184996950599716, "grad_norm": 0.12098994851112366, "learning_rate": 0.00018993186209702024, "loss": 1.1113, "step": 501 }, { "epoch": 0.10205326285830453, "grad_norm": 0.14677678048610687, "learning_rate": 0.00018991152242448897, "loss": 1.399, "step": 502 }, { "epoch": 0.10225655621061192, "grad_norm": 0.1371246576309204, "learning_rate": 0.0001898911827519577, "loss": 1.2582, "step": 503 }, { "epoch": 0.10245984956291929, "grad_norm": 0.11643920093774796, "learning_rate": 0.00018987084307942642, "loss": 1.0707, "step": 504 }, { "epoch": 0.10266314291522667, "grad_norm": 0.1150643602013588, "learning_rate": 0.00018985050340689517, "loss": 1.0886, "step": 505 }, { "epoch": 0.10286643626753406, "grad_norm": 0.10518593341112137, "learning_rate": 0.0001898301637343639, "loss": 0.8955, "step": 506 }, { "epoch": 0.10306972961984143, "grad_norm": 0.11445560306310654, "learning_rate": 0.00018980982406183262, "loss": 0.93, "step": 507 }, { "epoch": 0.10327302297214881, "grad_norm": 0.11920091509819031, "learning_rate": 0.00018978948438930134, "loss": 1.0148, "step": 508 }, { "epoch": 0.1034763163244562, "grad_norm": 0.12822504341602325, "learning_rate": 0.00018976914471677007, "loss": 1.2004, "step": 509 }, { "epoch": 0.10367960967676357, "grad_norm": 0.12469658255577087, "learning_rate": 0.0001897488050442388, "loss": 1.0902, "step": 510 }, { "epoch": 0.10388290302907095, "grad_norm": 0.12136801332235336, "learning_rate": 0.00018972846537170752, "loss": 1.059, "step": 511 }, { "epoch": 0.10408619638137832, "grad_norm": 0.10618099570274353, "learning_rate": 0.00018970812569917624, "loss": 0.9972, "step": 512 }, { "epoch": 0.10428948973368571, "grad_norm": 0.12111090868711472, "learning_rate": 0.000189687786026645, "loss": 1.0789, "step": 513 }, { "epoch": 0.1044927830859931, "grad_norm": 0.1108577698469162, "learning_rate": 0.00018966744635411372, "loss": 0.9024, "step": 514 }, { "epoch": 0.10469607643830046, "grad_norm": 0.1184157282114029, "learning_rate": 0.00018964710668158244, "loss": 0.9548, "step": 515 }, { "epoch": 0.10489936979060785, "grad_norm": 0.1288694143295288, "learning_rate": 0.00018962676700905117, "loss": 1.2128, "step": 516 }, { "epoch": 0.10510266314291522, "grad_norm": 0.12015259265899658, "learning_rate": 0.0001896064273365199, "loss": 1.1964, "step": 517 }, { "epoch": 0.1053059564952226, "grad_norm": 0.13204379379749298, "learning_rate": 0.00018958608766398861, "loss": 1.0473, "step": 518 }, { "epoch": 0.10550924984752999, "grad_norm": 0.11321057379245758, "learning_rate": 0.00018956574799145734, "loss": 1.0961, "step": 519 }, { "epoch": 0.10571254319983736, "grad_norm": 0.13245680928230286, "learning_rate": 0.00018954540831892606, "loss": 1.0835, "step": 520 }, { "epoch": 0.10591583655214475, "grad_norm": 0.12220027297735214, "learning_rate": 0.00018952506864639481, "loss": 1.1246, "step": 521 }, { "epoch": 0.10611912990445213, "grad_norm": 0.11933163553476334, "learning_rate": 0.00018950472897386354, "loss": 1.0739, "step": 522 }, { "epoch": 0.1063224232567595, "grad_norm": 0.14022572338581085, "learning_rate": 0.00018948438930133226, "loss": 1.1557, "step": 523 }, { "epoch": 0.10652571660906689, "grad_norm": 0.13287031650543213, "learning_rate": 0.000189464049628801, "loss": 1.2597, "step": 524 }, { "epoch": 0.10672900996137426, "grad_norm": 0.11653829365968704, "learning_rate": 0.0001894437099562697, "loss": 0.9564, "step": 525 }, { "epoch": 0.10693230331368164, "grad_norm": 0.11488767713308334, "learning_rate": 0.00018942337028373844, "loss": 0.9883, "step": 526 }, { "epoch": 0.10713559666598903, "grad_norm": 0.11149357259273529, "learning_rate": 0.00018940303061120716, "loss": 1.0004, "step": 527 }, { "epoch": 0.1073388900182964, "grad_norm": 0.11848779767751694, "learning_rate": 0.00018938269093867589, "loss": 1.0462, "step": 528 }, { "epoch": 0.10754218337060378, "grad_norm": 0.11932095140218735, "learning_rate": 0.00018936235126614464, "loss": 1.1001, "step": 529 }, { "epoch": 0.10774547672291115, "grad_norm": 0.11937075853347778, "learning_rate": 0.00018934201159361336, "loss": 1.2032, "step": 530 }, { "epoch": 0.10794877007521854, "grad_norm": 0.10601939260959625, "learning_rate": 0.00018932167192108209, "loss": 0.9689, "step": 531 }, { "epoch": 0.10815206342752592, "grad_norm": 0.11901092529296875, "learning_rate": 0.0001893013322485508, "loss": 1.1913, "step": 532 }, { "epoch": 0.1083553567798333, "grad_norm": 0.1308038979768753, "learning_rate": 0.00018928099257601954, "loss": 1.2393, "step": 533 }, { "epoch": 0.10855865013214068, "grad_norm": 0.1222740039229393, "learning_rate": 0.00018926065290348826, "loss": 0.9574, "step": 534 }, { "epoch": 0.10876194348444806, "grad_norm": 0.12856149673461914, "learning_rate": 0.00018924031323095698, "loss": 1.1296, "step": 535 }, { "epoch": 0.10896523683675544, "grad_norm": 0.12045751512050629, "learning_rate": 0.0001892199735584257, "loss": 1.153, "step": 536 }, { "epoch": 0.10916853018906282, "grad_norm": 0.11606315523386002, "learning_rate": 0.00018919963388589446, "loss": 0.9028, "step": 537 }, { "epoch": 0.10937182354137019, "grad_norm": 0.10877380520105362, "learning_rate": 0.00018917929421336318, "loss": 0.9954, "step": 538 }, { "epoch": 0.10957511689367758, "grad_norm": 0.10476227104663849, "learning_rate": 0.0001891589545408319, "loss": 0.9486, "step": 539 }, { "epoch": 0.10977841024598496, "grad_norm": 0.12538990378379822, "learning_rate": 0.00018913861486830063, "loss": 1.1749, "step": 540 }, { "epoch": 0.10998170359829233, "grad_norm": 0.13290320336818695, "learning_rate": 0.00018911827519576936, "loss": 1.0933, "step": 541 }, { "epoch": 0.11018499695059972, "grad_norm": 0.11773636192083359, "learning_rate": 0.00018909793552323808, "loss": 1.0693, "step": 542 }, { "epoch": 0.11038829030290709, "grad_norm": 0.11466556787490845, "learning_rate": 0.0001890775958507068, "loss": 1.0589, "step": 543 }, { "epoch": 0.11059158365521447, "grad_norm": 0.1275825798511505, "learning_rate": 0.00018905725617817553, "loss": 1.0582, "step": 544 }, { "epoch": 0.11079487700752186, "grad_norm": 0.1283504068851471, "learning_rate": 0.00018903691650564428, "loss": 1.1702, "step": 545 }, { "epoch": 0.11099817035982923, "grad_norm": 0.13250254094600677, "learning_rate": 0.000189016576833113, "loss": 1.1467, "step": 546 }, { "epoch": 0.11120146371213661, "grad_norm": 0.15396709740161896, "learning_rate": 0.00018899623716058173, "loss": 1.1299, "step": 547 }, { "epoch": 0.111404757064444, "grad_norm": 0.13014012575149536, "learning_rate": 0.00018897589748805046, "loss": 1.1633, "step": 548 }, { "epoch": 0.11160805041675137, "grad_norm": 0.11697974056005478, "learning_rate": 0.00018895555781551918, "loss": 1.1052, "step": 549 }, { "epoch": 0.11181134376905875, "grad_norm": 0.13976189494132996, "learning_rate": 0.0001889352181429879, "loss": 1.1199, "step": 550 }, { "epoch": 0.11201463712136613, "grad_norm": 0.13051995635032654, "learning_rate": 0.00018891487847045663, "loss": 1.2532, "step": 551 }, { "epoch": 0.11221793047367351, "grad_norm": 0.11212155967950821, "learning_rate": 0.00018889453879792535, "loss": 0.9873, "step": 552 }, { "epoch": 0.1124212238259809, "grad_norm": 0.1334063857793808, "learning_rate": 0.00018887419912539408, "loss": 1.1102, "step": 553 }, { "epoch": 0.11262451717828827, "grad_norm": 0.1290140599012375, "learning_rate": 0.00018885385945286283, "loss": 0.9598, "step": 554 }, { "epoch": 0.11282781053059565, "grad_norm": 0.12794511020183563, "learning_rate": 0.00018883351978033155, "loss": 1.2875, "step": 555 }, { "epoch": 0.11303110388290302, "grad_norm": 0.11270211637020111, "learning_rate": 0.00018881318010780028, "loss": 0.9414, "step": 556 }, { "epoch": 0.11323439723521041, "grad_norm": 0.12074756622314453, "learning_rate": 0.000188792840435269, "loss": 1.0734, "step": 557 }, { "epoch": 0.11343769058751779, "grad_norm": 0.11245666444301605, "learning_rate": 0.00018877250076273773, "loss": 1.2024, "step": 558 }, { "epoch": 0.11364098393982516, "grad_norm": 0.10953640192747116, "learning_rate": 0.00018875216109020645, "loss": 0.9572, "step": 559 }, { "epoch": 0.11384427729213255, "grad_norm": 0.11975332349538803, "learning_rate": 0.00018873182141767518, "loss": 1.1559, "step": 560 }, { "epoch": 0.11404757064443993, "grad_norm": 0.10940812528133392, "learning_rate": 0.0001887114817451439, "loss": 0.926, "step": 561 }, { "epoch": 0.1142508639967473, "grad_norm": 0.139595165848732, "learning_rate": 0.00018869114207261265, "loss": 1.3275, "step": 562 }, { "epoch": 0.11445415734905469, "grad_norm": 0.10891355574131012, "learning_rate": 0.00018867080240008138, "loss": 0.9736, "step": 563 }, { "epoch": 0.11465745070136206, "grad_norm": 0.1192033439874649, "learning_rate": 0.0001886504627275501, "loss": 0.9881, "step": 564 }, { "epoch": 0.11486074405366944, "grad_norm": 0.12635888159275055, "learning_rate": 0.00018863012305501883, "loss": 1.1483, "step": 565 }, { "epoch": 0.11506403740597683, "grad_norm": 0.13440972566604614, "learning_rate": 0.00018860978338248755, "loss": 1.0852, "step": 566 }, { "epoch": 0.1152673307582842, "grad_norm": 0.12328968942165375, "learning_rate": 0.00018858944370995628, "loss": 1.1048, "step": 567 }, { "epoch": 0.11547062411059159, "grad_norm": 0.12037025392055511, "learning_rate": 0.000188569104037425, "loss": 1.1171, "step": 568 }, { "epoch": 0.11567391746289896, "grad_norm": 0.11991129070520401, "learning_rate": 0.00018854876436489372, "loss": 1.0827, "step": 569 }, { "epoch": 0.11587721081520634, "grad_norm": 0.11372412741184235, "learning_rate": 0.00018852842469236248, "loss": 0.9993, "step": 570 }, { "epoch": 0.11608050416751373, "grad_norm": 0.10992924124002457, "learning_rate": 0.0001885080850198312, "loss": 0.9812, "step": 571 }, { "epoch": 0.1162837975198211, "grad_norm": 0.11675936728715897, "learning_rate": 0.00018848774534729992, "loss": 0.9844, "step": 572 }, { "epoch": 0.11648709087212848, "grad_norm": 0.10757414996623993, "learning_rate": 0.00018846740567476865, "loss": 1.0607, "step": 573 }, { "epoch": 0.11669038422443587, "grad_norm": 0.11255379766225815, "learning_rate": 0.00018844706600223737, "loss": 1.1638, "step": 574 }, { "epoch": 0.11689367757674324, "grad_norm": 0.10737176239490509, "learning_rate": 0.0001884267263297061, "loss": 1.0055, "step": 575 }, { "epoch": 0.11709697092905062, "grad_norm": 0.1193508729338646, "learning_rate": 0.00018840638665717482, "loss": 1.0924, "step": 576 }, { "epoch": 0.117300264281358, "grad_norm": 0.12564769387245178, "learning_rate": 0.00018838604698464355, "loss": 1.3088, "step": 577 }, { "epoch": 0.11750355763366538, "grad_norm": 0.12675485014915466, "learning_rate": 0.0001883657073121123, "loss": 1.0682, "step": 578 }, { "epoch": 0.11770685098597276, "grad_norm": 0.12016987055540085, "learning_rate": 0.00018834536763958102, "loss": 0.9511, "step": 579 }, { "epoch": 0.11791014433828013, "grad_norm": 0.11664092540740967, "learning_rate": 0.00018832502796704975, "loss": 1.0758, "step": 580 }, { "epoch": 0.11811343769058752, "grad_norm": 0.11402445286512375, "learning_rate": 0.00018830468829451847, "loss": 1.0959, "step": 581 }, { "epoch": 0.11831673104289489, "grad_norm": 0.12505365908145905, "learning_rate": 0.0001882843486219872, "loss": 1.1621, "step": 582 }, { "epoch": 0.11852002439520228, "grad_norm": 0.13434186577796936, "learning_rate": 0.00018826400894945592, "loss": 1.273, "step": 583 }, { "epoch": 0.11872331774750966, "grad_norm": 0.1284523904323578, "learning_rate": 0.00018824366927692465, "loss": 1.1161, "step": 584 }, { "epoch": 0.11892661109981703, "grad_norm": 0.1141962930560112, "learning_rate": 0.00018822332960439337, "loss": 1.012, "step": 585 }, { "epoch": 0.11912990445212442, "grad_norm": 0.1280459314584732, "learning_rate": 0.00018820298993186212, "loss": 1.1797, "step": 586 }, { "epoch": 0.1193331978044318, "grad_norm": 0.12705819308757782, "learning_rate": 0.00018818265025933085, "loss": 1.2323, "step": 587 }, { "epoch": 0.11953649115673917, "grad_norm": 0.1341540366411209, "learning_rate": 0.00018816231058679957, "loss": 1.1219, "step": 588 }, { "epoch": 0.11973978450904656, "grad_norm": 0.1307908147573471, "learning_rate": 0.0001881419709142683, "loss": 0.992, "step": 589 }, { "epoch": 0.11994307786135393, "grad_norm": 0.127479687333107, "learning_rate": 0.00018812163124173702, "loss": 1.0326, "step": 590 }, { "epoch": 0.12014637121366131, "grad_norm": 0.09779065102338791, "learning_rate": 0.00018810129156920574, "loss": 0.7614, "step": 591 }, { "epoch": 0.1203496645659687, "grad_norm": 0.14188863337039948, "learning_rate": 0.00018808095189667447, "loss": 1.23, "step": 592 }, { "epoch": 0.12055295791827607, "grad_norm": 0.12969130277633667, "learning_rate": 0.0001880606122241432, "loss": 1.1229, "step": 593 }, { "epoch": 0.12075625127058345, "grad_norm": 0.13516603410243988, "learning_rate": 0.00018804027255161192, "loss": 1.0147, "step": 594 }, { "epoch": 0.12095954462289084, "grad_norm": 0.13307668268680573, "learning_rate": 0.00018801993287908067, "loss": 1.1908, "step": 595 }, { "epoch": 0.12116283797519821, "grad_norm": 0.11288546770811081, "learning_rate": 0.0001879995932065494, "loss": 0.9319, "step": 596 }, { "epoch": 0.1213661313275056, "grad_norm": 0.12034857273101807, "learning_rate": 0.00018797925353401812, "loss": 1.0976, "step": 597 }, { "epoch": 0.12156942467981297, "grad_norm": 0.136747807264328, "learning_rate": 0.00018795891386148684, "loss": 1.2298, "step": 598 }, { "epoch": 0.12177271803212035, "grad_norm": 0.11699377000331879, "learning_rate": 0.00018793857418895557, "loss": 1.0377, "step": 599 }, { "epoch": 0.12197601138442773, "grad_norm": 0.15257331728935242, "learning_rate": 0.0001879182345164243, "loss": 1.2306, "step": 600 }, { "epoch": 0.1221793047367351, "grad_norm": 0.1361241340637207, "learning_rate": 0.00018789789484389302, "loss": 1.1563, "step": 601 }, { "epoch": 0.12238259808904249, "grad_norm": 0.11735684424638748, "learning_rate": 0.00018787755517136174, "loss": 1.104, "step": 602 }, { "epoch": 0.12258589144134986, "grad_norm": 0.11648523807525635, "learning_rate": 0.0001878572154988305, "loss": 1.0008, "step": 603 }, { "epoch": 0.12278918479365725, "grad_norm": 0.12473436444997787, "learning_rate": 0.00018783687582629922, "loss": 1.0741, "step": 604 }, { "epoch": 0.12299247814596463, "grad_norm": 0.11664781719446182, "learning_rate": 0.00018781653615376794, "loss": 1.1155, "step": 605 }, { "epoch": 0.123195771498272, "grad_norm": 0.12415888160467148, "learning_rate": 0.00018779619648123666, "loss": 1.158, "step": 606 }, { "epoch": 0.12339906485057939, "grad_norm": 0.1223251074552536, "learning_rate": 0.0001877758568087054, "loss": 1.1045, "step": 607 }, { "epoch": 0.12360235820288677, "grad_norm": 0.12289747595787048, "learning_rate": 0.00018775551713617411, "loss": 1.0768, "step": 608 }, { "epoch": 0.12380565155519414, "grad_norm": 0.1316901594400406, "learning_rate": 0.00018773517746364284, "loss": 1.2156, "step": 609 }, { "epoch": 0.12400894490750153, "grad_norm": 0.12060056626796722, "learning_rate": 0.00018771483779111156, "loss": 1.0221, "step": 610 }, { "epoch": 0.1242122382598089, "grad_norm": 0.1384373903274536, "learning_rate": 0.00018769449811858031, "loss": 1.1059, "step": 611 }, { "epoch": 0.12441553161211628, "grad_norm": 0.12399812787771225, "learning_rate": 0.00018767415844604904, "loss": 1.0193, "step": 612 }, { "epoch": 0.12461882496442367, "grad_norm": 0.13406959176063538, "learning_rate": 0.00018765381877351776, "loss": 1.1572, "step": 613 }, { "epoch": 0.12482211831673104, "grad_norm": 0.12881499528884888, "learning_rate": 0.0001876334791009865, "loss": 1.1914, "step": 614 }, { "epoch": 0.1250254116690384, "grad_norm": 0.11472728103399277, "learning_rate": 0.0001876131394284552, "loss": 1.0822, "step": 615 }, { "epoch": 0.1252287050213458, "grad_norm": 0.1251503825187683, "learning_rate": 0.00018759279975592394, "loss": 1.1783, "step": 616 }, { "epoch": 0.12543199837365318, "grad_norm": 0.1414482593536377, "learning_rate": 0.00018757246008339266, "loss": 1.1925, "step": 617 }, { "epoch": 0.12563529172596055, "grad_norm": 0.122686967253685, "learning_rate": 0.00018755212041086139, "loss": 1.091, "step": 618 }, { "epoch": 0.12583858507826795, "grad_norm": 0.12301596254110336, "learning_rate": 0.00018753178073833014, "loss": 1.108, "step": 619 }, { "epoch": 0.12604187843057532, "grad_norm": 0.1191742941737175, "learning_rate": 0.00018751144106579886, "loss": 1.0413, "step": 620 }, { "epoch": 0.1262451717828827, "grad_norm": 0.0971694141626358, "learning_rate": 0.00018749110139326759, "loss": 0.8473, "step": 621 }, { "epoch": 0.1264484651351901, "grad_norm": 0.12381591647863388, "learning_rate": 0.0001874707617207363, "loss": 1.1503, "step": 622 }, { "epoch": 0.12665175848749746, "grad_norm": 0.13411198556423187, "learning_rate": 0.00018745042204820504, "loss": 1.164, "step": 623 }, { "epoch": 0.12685505183980483, "grad_norm": 0.12838509678840637, "learning_rate": 0.00018743008237567376, "loss": 1.1768, "step": 624 }, { "epoch": 0.1270583451921122, "grad_norm": 0.11623813211917877, "learning_rate": 0.00018740974270314248, "loss": 1.1611, "step": 625 }, { "epoch": 0.1272616385444196, "grad_norm": 0.11001920700073242, "learning_rate": 0.0001873894030306112, "loss": 1.0182, "step": 626 }, { "epoch": 0.12746493189672697, "grad_norm": 0.11987441778182983, "learning_rate": 0.00018736906335807996, "loss": 1.0509, "step": 627 }, { "epoch": 0.12766822524903434, "grad_norm": 0.13036808371543884, "learning_rate": 0.00018734872368554868, "loss": 1.2035, "step": 628 }, { "epoch": 0.12787151860134174, "grad_norm": 0.12546774744987488, "learning_rate": 0.0001873283840130174, "loss": 1.1434, "step": 629 }, { "epoch": 0.12807481195364911, "grad_norm": 0.1025729849934578, "learning_rate": 0.00018730804434048613, "loss": 0.9868, "step": 630 }, { "epoch": 0.12827810530595649, "grad_norm": 0.1013616994023323, "learning_rate": 0.00018728770466795483, "loss": 0.9281, "step": 631 }, { "epoch": 0.12848139865826388, "grad_norm": 0.11066362261772156, "learning_rate": 0.00018726736499542358, "loss": 1.0345, "step": 632 }, { "epoch": 0.12868469201057126, "grad_norm": 0.1280633807182312, "learning_rate": 0.0001872470253228923, "loss": 1.2335, "step": 633 }, { "epoch": 0.12888798536287863, "grad_norm": 0.11954978853464127, "learning_rate": 0.00018722668565036103, "loss": 1.0298, "step": 634 }, { "epoch": 0.12909127871518603, "grad_norm": 0.11124943196773529, "learning_rate": 0.00018720634597782976, "loss": 1.0896, "step": 635 }, { "epoch": 0.1292945720674934, "grad_norm": 0.12496782839298248, "learning_rate": 0.0001871860063052985, "loss": 1.0897, "step": 636 }, { "epoch": 0.12949786541980077, "grad_norm": 0.1257556527853012, "learning_rate": 0.00018716566663276723, "loss": 1.0148, "step": 637 }, { "epoch": 0.12970115877210814, "grad_norm": 0.11928705126047134, "learning_rate": 0.00018714532696023596, "loss": 1.1415, "step": 638 }, { "epoch": 0.12990445212441554, "grad_norm": 0.1109057068824768, "learning_rate": 0.00018712498728770468, "loss": 1.063, "step": 639 }, { "epoch": 0.1301077454767229, "grad_norm": 0.13905195891857147, "learning_rate": 0.0001871046476151734, "loss": 1.2346, "step": 640 }, { "epoch": 0.13031103882903028, "grad_norm": 0.12306763231754303, "learning_rate": 0.00018708430794264213, "loss": 1.0504, "step": 641 }, { "epoch": 0.13051433218133768, "grad_norm": 0.1077868863940239, "learning_rate": 0.00018706396827011085, "loss": 0.9143, "step": 642 }, { "epoch": 0.13071762553364505, "grad_norm": 0.1328214555978775, "learning_rate": 0.00018704362859757958, "loss": 1.1223, "step": 643 }, { "epoch": 0.13092091888595242, "grad_norm": 0.12459075450897217, "learning_rate": 0.00018702328892504833, "loss": 1.1896, "step": 644 }, { "epoch": 0.13112421223825982, "grad_norm": 0.11860411614179611, "learning_rate": 0.00018700294925251705, "loss": 1.0472, "step": 645 }, { "epoch": 0.1313275055905672, "grad_norm": 0.11825944483280182, "learning_rate": 0.00018698260957998578, "loss": 1.2391, "step": 646 }, { "epoch": 0.13153079894287456, "grad_norm": 0.12103937566280365, "learning_rate": 0.0001869622699074545, "loss": 1.0087, "step": 647 }, { "epoch": 0.13173409229518196, "grad_norm": 0.12289803475141525, "learning_rate": 0.00018694193023492323, "loss": 1.0867, "step": 648 }, { "epoch": 0.13193738564748933, "grad_norm": 0.12652850151062012, "learning_rate": 0.00018692159056239195, "loss": 1.2047, "step": 649 }, { "epoch": 0.1321406789997967, "grad_norm": 0.12258271127939224, "learning_rate": 0.00018690125088986068, "loss": 0.9806, "step": 650 }, { "epoch": 0.1323439723521041, "grad_norm": 0.1285620778799057, "learning_rate": 0.0001868809112173294, "loss": 0.9993, "step": 651 }, { "epoch": 0.13254726570441147, "grad_norm": 0.11906328797340393, "learning_rate": 0.00018686057154479815, "loss": 1.1029, "step": 652 }, { "epoch": 0.13275055905671884, "grad_norm": 0.13393160700798035, "learning_rate": 0.00018684023187226688, "loss": 1.1263, "step": 653 }, { "epoch": 0.1329538524090262, "grad_norm": 0.13850244879722595, "learning_rate": 0.0001868198921997356, "loss": 1.0878, "step": 654 }, { "epoch": 0.1331571457613336, "grad_norm": 0.13923142850399017, "learning_rate": 0.00018679955252720433, "loss": 1.1637, "step": 655 }, { "epoch": 0.13336043911364098, "grad_norm": 0.11642129719257355, "learning_rate": 0.00018677921285467305, "loss": 1.1134, "step": 656 }, { "epoch": 0.13356373246594835, "grad_norm": 0.12743037939071655, "learning_rate": 0.00018675887318214178, "loss": 1.0345, "step": 657 }, { "epoch": 0.13376702581825575, "grad_norm": 0.11360882222652435, "learning_rate": 0.0001867385335096105, "loss": 1.072, "step": 658 }, { "epoch": 0.13397031917056312, "grad_norm": 0.1262228637933731, "learning_rate": 0.00018671819383707922, "loss": 1.1546, "step": 659 }, { "epoch": 0.1341736125228705, "grad_norm": 0.1144820973277092, "learning_rate": 0.00018669785416454798, "loss": 1.0152, "step": 660 }, { "epoch": 0.1343769058751779, "grad_norm": 0.12834620475769043, "learning_rate": 0.0001866775144920167, "loss": 1.0456, "step": 661 }, { "epoch": 0.13458019922748526, "grad_norm": 0.11835994571447372, "learning_rate": 0.00018665717481948542, "loss": 0.991, "step": 662 }, { "epoch": 0.13478349257979264, "grad_norm": 0.11445319652557373, "learning_rate": 0.00018663683514695415, "loss": 1.0116, "step": 663 }, { "epoch": 0.13498678593210003, "grad_norm": 0.13939061760902405, "learning_rate": 0.00018661649547442287, "loss": 1.153, "step": 664 }, { "epoch": 0.1351900792844074, "grad_norm": 0.1149614006280899, "learning_rate": 0.0001865961558018916, "loss": 0.9255, "step": 665 }, { "epoch": 0.13539337263671478, "grad_norm": 0.13376334309577942, "learning_rate": 0.00018657581612936032, "loss": 1.1502, "step": 666 }, { "epoch": 0.13559666598902215, "grad_norm": 0.13265709578990936, "learning_rate": 0.00018655547645682905, "loss": 1.1292, "step": 667 }, { "epoch": 0.13579995934132955, "grad_norm": 0.11729206144809723, "learning_rate": 0.0001865351367842978, "loss": 1.2166, "step": 668 }, { "epoch": 0.13600325269363692, "grad_norm": 0.11903608590364456, "learning_rate": 0.00018651479711176652, "loss": 1.1808, "step": 669 }, { "epoch": 0.1362065460459443, "grad_norm": 0.11009612679481506, "learning_rate": 0.00018649445743923525, "loss": 0.9364, "step": 670 }, { "epoch": 0.1364098393982517, "grad_norm": 0.13966090977191925, "learning_rate": 0.00018647411776670397, "loss": 1.2463, "step": 671 }, { "epoch": 0.13661313275055906, "grad_norm": 0.12319371849298477, "learning_rate": 0.00018645377809417267, "loss": 1.1192, "step": 672 }, { "epoch": 0.13681642610286643, "grad_norm": 0.13469716906547546, "learning_rate": 0.00018643343842164142, "loss": 1.2376, "step": 673 }, { "epoch": 0.13701971945517383, "grad_norm": 0.124245285987854, "learning_rate": 0.00018641309874911015, "loss": 1.1145, "step": 674 }, { "epoch": 0.1372230128074812, "grad_norm": 0.1325312852859497, "learning_rate": 0.00018639275907657887, "loss": 1.1453, "step": 675 }, { "epoch": 0.13742630615978857, "grad_norm": 0.13344690203666687, "learning_rate": 0.0001863724194040476, "loss": 1.2191, "step": 676 }, { "epoch": 0.13762959951209597, "grad_norm": 0.1301363855600357, "learning_rate": 0.00018635207973151635, "loss": 0.9982, "step": 677 }, { "epoch": 0.13783289286440334, "grad_norm": 0.10880762338638306, "learning_rate": 0.00018633174005898507, "loss": 0.8772, "step": 678 }, { "epoch": 0.1380361862167107, "grad_norm": 0.13281653821468353, "learning_rate": 0.0001863114003864538, "loss": 1.0529, "step": 679 }, { "epoch": 0.13823947956901808, "grad_norm": 0.13998745381832123, "learning_rate": 0.0001862910607139225, "loss": 1.0996, "step": 680 }, { "epoch": 0.13844277292132548, "grad_norm": 0.1195378452539444, "learning_rate": 0.00018627072104139124, "loss": 0.9776, "step": 681 }, { "epoch": 0.13864606627363285, "grad_norm": 0.10932020843029022, "learning_rate": 0.00018625038136885997, "loss": 1.1026, "step": 682 }, { "epoch": 0.13884935962594022, "grad_norm": 0.1420464664697647, "learning_rate": 0.0001862300416963287, "loss": 1.1428, "step": 683 }, { "epoch": 0.13905265297824762, "grad_norm": 0.11747555434703827, "learning_rate": 0.00018620970202379742, "loss": 0.9985, "step": 684 }, { "epoch": 0.139255946330555, "grad_norm": 0.11964225023984909, "learning_rate": 0.00018618936235126617, "loss": 1.0268, "step": 685 }, { "epoch": 0.13945923968286236, "grad_norm": 0.11939354985952377, "learning_rate": 0.0001861690226787349, "loss": 0.993, "step": 686 }, { "epoch": 0.13966253303516976, "grad_norm": 0.14188724756240845, "learning_rate": 0.00018614868300620362, "loss": 1.0672, "step": 687 }, { "epoch": 0.13986582638747713, "grad_norm": 0.12218412756919861, "learning_rate": 0.00018612834333367231, "loss": 1.0664, "step": 688 }, { "epoch": 0.1400691197397845, "grad_norm": 0.12363380193710327, "learning_rate": 0.00018610800366114107, "loss": 1.1202, "step": 689 }, { "epoch": 0.1402724130920919, "grad_norm": 0.12523901462554932, "learning_rate": 0.0001860876639886098, "loss": 0.9601, "step": 690 }, { "epoch": 0.14047570644439927, "grad_norm": 0.1359613537788391, "learning_rate": 0.00018606732431607852, "loss": 1.2674, "step": 691 }, { "epoch": 0.14067899979670664, "grad_norm": 0.12229263782501221, "learning_rate": 0.00018604698464354724, "loss": 1.0817, "step": 692 }, { "epoch": 0.14088229314901402, "grad_norm": 0.12188601493835449, "learning_rate": 0.000186026644971016, "loss": 1.1176, "step": 693 }, { "epoch": 0.1410855865013214, "grad_norm": 0.10588016360998154, "learning_rate": 0.00018600630529848472, "loss": 0.9546, "step": 694 }, { "epoch": 0.14128887985362878, "grad_norm": 0.11985071748495102, "learning_rate": 0.00018598596562595344, "loss": 1.0765, "step": 695 }, { "epoch": 0.14149217320593616, "grad_norm": 0.13118812441825867, "learning_rate": 0.00018596562595342216, "loss": 1.0117, "step": 696 }, { "epoch": 0.14169546655824355, "grad_norm": 0.11992435902357101, "learning_rate": 0.0001859452862808909, "loss": 0.9618, "step": 697 }, { "epoch": 0.14189875991055093, "grad_norm": 0.11617527902126312, "learning_rate": 0.00018592494660835961, "loss": 1.0459, "step": 698 }, { "epoch": 0.1421020532628583, "grad_norm": 0.12465415149927139, "learning_rate": 0.00018590460693582834, "loss": 1.0635, "step": 699 }, { "epoch": 0.1423053466151657, "grad_norm": 0.12672793865203857, "learning_rate": 0.00018588426726329706, "loss": 1.26, "step": 700 }, { "epoch": 0.14250863996747307, "grad_norm": 0.12191738188266754, "learning_rate": 0.00018586392759076581, "loss": 0.9333, "step": 701 }, { "epoch": 0.14271193331978044, "grad_norm": 0.13285742700099945, "learning_rate": 0.00018584358791823454, "loss": 1.2199, "step": 702 }, { "epoch": 0.14291522667208784, "grad_norm": 0.11525557935237885, "learning_rate": 0.00018582324824570326, "loss": 1.1212, "step": 703 }, { "epoch": 0.1431185200243952, "grad_norm": 0.12379605323076248, "learning_rate": 0.000185802908573172, "loss": 0.9767, "step": 704 }, { "epoch": 0.14332181337670258, "grad_norm": 0.13637319207191467, "learning_rate": 0.0001857825689006407, "loss": 1.1399, "step": 705 }, { "epoch": 0.14352510672900995, "grad_norm": 0.12638236582279205, "learning_rate": 0.00018576222922810944, "loss": 1.2348, "step": 706 }, { "epoch": 0.14372840008131735, "grad_norm": 0.11840532720088959, "learning_rate": 0.00018574188955557816, "loss": 1.0475, "step": 707 }, { "epoch": 0.14393169343362472, "grad_norm": 0.11270745098590851, "learning_rate": 0.00018572154988304689, "loss": 1.0503, "step": 708 }, { "epoch": 0.1441349867859321, "grad_norm": 0.12445101141929626, "learning_rate": 0.00018570121021051564, "loss": 1.0658, "step": 709 }, { "epoch": 0.1443382801382395, "grad_norm": 0.11352977156639099, "learning_rate": 0.00018568087053798436, "loss": 0.9473, "step": 710 }, { "epoch": 0.14454157349054686, "grad_norm": 0.11230108141899109, "learning_rate": 0.00018566053086545309, "loss": 1.0519, "step": 711 }, { "epoch": 0.14474486684285423, "grad_norm": 0.14274398982524872, "learning_rate": 0.0001856401911929218, "loss": 1.1135, "step": 712 }, { "epoch": 0.14494816019516163, "grad_norm": 0.11553295701742172, "learning_rate": 0.0001856198515203905, "loss": 1.044, "step": 713 }, { "epoch": 0.145151453547469, "grad_norm": 0.11737996339797974, "learning_rate": 0.00018559951184785926, "loss": 1.0154, "step": 714 }, { "epoch": 0.14535474689977637, "grad_norm": 0.1481630802154541, "learning_rate": 0.00018557917217532798, "loss": 1.1544, "step": 715 }, { "epoch": 0.14555804025208377, "grad_norm": 0.12081188708543777, "learning_rate": 0.0001855588325027967, "loss": 1.0034, "step": 716 }, { "epoch": 0.14576133360439114, "grad_norm": 0.13458681106567383, "learning_rate": 0.00018553849283026543, "loss": 1.1627, "step": 717 }, { "epoch": 0.1459646269566985, "grad_norm": 0.13506878912448883, "learning_rate": 0.00018551815315773418, "loss": 1.1927, "step": 718 }, { "epoch": 0.14616792030900588, "grad_norm": 0.10834948718547821, "learning_rate": 0.0001854978134852029, "loss": 1.0943, "step": 719 }, { "epoch": 0.14637121366131328, "grad_norm": 0.13779957592487335, "learning_rate": 0.00018547747381267163, "loss": 1.2356, "step": 720 }, { "epoch": 0.14657450701362065, "grad_norm": 0.12655863165855408, "learning_rate": 0.00018545713414014033, "loss": 1.1085, "step": 721 }, { "epoch": 0.14677780036592802, "grad_norm": 0.1144525483250618, "learning_rate": 0.00018543679446760908, "loss": 1.0517, "step": 722 }, { "epoch": 0.14698109371823542, "grad_norm": 0.12001293152570724, "learning_rate": 0.0001854164547950778, "loss": 1.1439, "step": 723 }, { "epoch": 0.1471843870705428, "grad_norm": 0.12786982953548431, "learning_rate": 0.00018539611512254653, "loss": 1.1846, "step": 724 }, { "epoch": 0.14738768042285016, "grad_norm": 0.1154879704117775, "learning_rate": 0.00018537577545001526, "loss": 0.941, "step": 725 }, { "epoch": 0.14759097377515756, "grad_norm": 0.10635704547166824, "learning_rate": 0.000185355435777484, "loss": 0.915, "step": 726 }, { "epoch": 0.14779426712746493, "grad_norm": 0.11456220597028732, "learning_rate": 0.00018533509610495273, "loss": 1.0387, "step": 727 }, { "epoch": 0.1479975604797723, "grad_norm": 0.11217451840639114, "learning_rate": 0.00018531475643242146, "loss": 1.0938, "step": 728 }, { "epoch": 0.1482008538320797, "grad_norm": 0.1105191633105278, "learning_rate": 0.00018529441675989015, "loss": 1.0398, "step": 729 }, { "epoch": 0.14840414718438708, "grad_norm": 0.11848670989274979, "learning_rate": 0.0001852740770873589, "loss": 1.04, "step": 730 }, { "epoch": 0.14860744053669445, "grad_norm": 0.11965551227331161, "learning_rate": 0.00018525373741482763, "loss": 0.966, "step": 731 }, { "epoch": 0.14881073388900182, "grad_norm": 0.12252170592546463, "learning_rate": 0.00018523339774229635, "loss": 1.1997, "step": 732 }, { "epoch": 0.14901402724130922, "grad_norm": 0.11600001901388168, "learning_rate": 0.00018521305806976508, "loss": 1.2425, "step": 733 }, { "epoch": 0.1492173205936166, "grad_norm": 0.11161402612924576, "learning_rate": 0.00018519271839723383, "loss": 0.9978, "step": 734 }, { "epoch": 0.14942061394592396, "grad_norm": 0.12365563958883286, "learning_rate": 0.00018517237872470255, "loss": 0.9652, "step": 735 }, { "epoch": 0.14962390729823136, "grad_norm": 0.11252112686634064, "learning_rate": 0.00018515203905217128, "loss": 0.948, "step": 736 }, { "epoch": 0.14982720065053873, "grad_norm": 0.12211350351572037, "learning_rate": 0.00018513169937963998, "loss": 1.0636, "step": 737 }, { "epoch": 0.1500304940028461, "grad_norm": 0.13200169801712036, "learning_rate": 0.00018511135970710873, "loss": 1.158, "step": 738 }, { "epoch": 0.1502337873551535, "grad_norm": 0.11223406344652176, "learning_rate": 0.00018509102003457745, "loss": 1.1194, "step": 739 }, { "epoch": 0.15043708070746087, "grad_norm": 0.11996794492006302, "learning_rate": 0.00018507068036204618, "loss": 1.0485, "step": 740 }, { "epoch": 0.15064037405976824, "grad_norm": 0.13017338514328003, "learning_rate": 0.0001850503406895149, "loss": 1.1304, "step": 741 }, { "epoch": 0.15084366741207564, "grad_norm": 0.1273190826177597, "learning_rate": 0.00018503000101698365, "loss": 1.0937, "step": 742 }, { "epoch": 0.151046960764383, "grad_norm": 0.1322571486234665, "learning_rate": 0.00018500966134445238, "loss": 1.1364, "step": 743 }, { "epoch": 0.15125025411669038, "grad_norm": 0.12314455956220627, "learning_rate": 0.0001849893216719211, "loss": 1.0005, "step": 744 }, { "epoch": 0.15145354746899775, "grad_norm": 0.1126449927687645, "learning_rate": 0.0001849689819993898, "loss": 1.0231, "step": 745 }, { "epoch": 0.15165684082130515, "grad_norm": 0.12586358189582825, "learning_rate": 0.00018494864232685855, "loss": 1.0816, "step": 746 }, { "epoch": 0.15186013417361252, "grad_norm": 0.09933953732252121, "learning_rate": 0.00018492830265432727, "loss": 0.8666, "step": 747 }, { "epoch": 0.1520634275259199, "grad_norm": 0.12422667443752289, "learning_rate": 0.000184907962981796, "loss": 1.0502, "step": 748 }, { "epoch": 0.1522667208782273, "grad_norm": 0.12274408340454102, "learning_rate": 0.00018488762330926472, "loss": 1.1445, "step": 749 }, { "epoch": 0.15247001423053466, "grad_norm": 0.1317015141248703, "learning_rate": 0.00018486728363673348, "loss": 1.2226, "step": 750 }, { "epoch": 0.15267330758284203, "grad_norm": 0.1201949417591095, "learning_rate": 0.0001848469439642022, "loss": 0.9285, "step": 751 }, { "epoch": 0.15287660093514943, "grad_norm": 0.11115135997533798, "learning_rate": 0.00018482660429167092, "loss": 1.1262, "step": 752 }, { "epoch": 0.1530798942874568, "grad_norm": 0.11809299886226654, "learning_rate": 0.00018480626461913965, "loss": 1.0792, "step": 753 }, { "epoch": 0.15328318763976417, "grad_norm": 0.14711928367614746, "learning_rate": 0.00018478592494660835, "loss": 1.1647, "step": 754 }, { "epoch": 0.15348648099207157, "grad_norm": 0.12082501500844955, "learning_rate": 0.0001847655852740771, "loss": 1.1866, "step": 755 }, { "epoch": 0.15368977434437894, "grad_norm": 0.1093011349439621, "learning_rate": 0.00018474524560154582, "loss": 0.9978, "step": 756 }, { "epoch": 0.15389306769668631, "grad_norm": 0.11525548994541168, "learning_rate": 0.00018472490592901455, "loss": 0.9134, "step": 757 }, { "epoch": 0.15409636104899369, "grad_norm": 0.12464176118373871, "learning_rate": 0.00018470456625648327, "loss": 1.0974, "step": 758 }, { "epoch": 0.15429965440130108, "grad_norm": 0.11930055171251297, "learning_rate": 0.00018468422658395202, "loss": 0.8953, "step": 759 }, { "epoch": 0.15450294775360846, "grad_norm": 0.12347722053527832, "learning_rate": 0.00018466388691142075, "loss": 1.0212, "step": 760 }, { "epoch": 0.15470624110591583, "grad_norm": 0.1258956342935562, "learning_rate": 0.00018464354723888947, "loss": 1.2616, "step": 761 }, { "epoch": 0.15490953445822322, "grad_norm": 0.12692275643348694, "learning_rate": 0.00018462320756635817, "loss": 1.1994, "step": 762 }, { "epoch": 0.1551128278105306, "grad_norm": 0.13774073123931885, "learning_rate": 0.00018460286789382692, "loss": 1.2109, "step": 763 }, { "epoch": 0.15531612116283797, "grad_norm": 0.12587130069732666, "learning_rate": 0.00018458252822129564, "loss": 1.1059, "step": 764 }, { "epoch": 0.15551941451514537, "grad_norm": 0.13462059199810028, "learning_rate": 0.00018456218854876437, "loss": 1.0648, "step": 765 }, { "epoch": 0.15572270786745274, "grad_norm": 0.1329740285873413, "learning_rate": 0.0001845418488762331, "loss": 1.264, "step": 766 }, { "epoch": 0.1559260012197601, "grad_norm": 0.12275559455156326, "learning_rate": 0.00018452150920370185, "loss": 0.9893, "step": 767 }, { "epoch": 0.1561292945720675, "grad_norm": 0.12821702659130096, "learning_rate": 0.00018450116953117057, "loss": 1.0681, "step": 768 }, { "epoch": 0.15633258792437488, "grad_norm": 0.11758620291948318, "learning_rate": 0.0001844808298586393, "loss": 1.0476, "step": 769 }, { "epoch": 0.15653588127668225, "grad_norm": 0.11491292715072632, "learning_rate": 0.000184460490186108, "loss": 1.1428, "step": 770 }, { "epoch": 0.15673917462898962, "grad_norm": 0.12064868956804276, "learning_rate": 0.00018444015051357674, "loss": 0.9565, "step": 771 }, { "epoch": 0.15694246798129702, "grad_norm": 0.12319160997867584, "learning_rate": 0.00018441981084104547, "loss": 1.0593, "step": 772 }, { "epoch": 0.1571457613336044, "grad_norm": 0.13514620065689087, "learning_rate": 0.0001843994711685142, "loss": 1.1908, "step": 773 }, { "epoch": 0.15734905468591176, "grad_norm": 0.1343378722667694, "learning_rate": 0.00018437913149598292, "loss": 1.2193, "step": 774 }, { "epoch": 0.15755234803821916, "grad_norm": 0.13351817429065704, "learning_rate": 0.00018435879182345167, "loss": 1.1141, "step": 775 }, { "epoch": 0.15775564139052653, "grad_norm": 0.11843458563089371, "learning_rate": 0.0001843384521509204, "loss": 1.1933, "step": 776 }, { "epoch": 0.1579589347428339, "grad_norm": 0.12293927371501923, "learning_rate": 0.00018431811247838912, "loss": 1.0682, "step": 777 }, { "epoch": 0.1581622280951413, "grad_norm": 0.11566301435232162, "learning_rate": 0.00018429777280585781, "loss": 1.1093, "step": 778 }, { "epoch": 0.15836552144744867, "grad_norm": 0.11641670763492584, "learning_rate": 0.00018427743313332657, "loss": 1.2028, "step": 779 }, { "epoch": 0.15856881479975604, "grad_norm": 0.14020314812660217, "learning_rate": 0.0001842570934607953, "loss": 1.0472, "step": 780 }, { "epoch": 0.15877210815206344, "grad_norm": 0.11766766011714935, "learning_rate": 0.00018423675378826401, "loss": 0.9908, "step": 781 }, { "epoch": 0.1589754015043708, "grad_norm": 0.14530715346336365, "learning_rate": 0.00018421641411573274, "loss": 1.2046, "step": 782 }, { "epoch": 0.15917869485667818, "grad_norm": 0.12271513789892197, "learning_rate": 0.0001841960744432015, "loss": 1.1401, "step": 783 }, { "epoch": 0.15938198820898555, "grad_norm": 0.12754741311073303, "learning_rate": 0.00018417573477067022, "loss": 1.2811, "step": 784 }, { "epoch": 0.15958528156129295, "grad_norm": 0.10751698166131973, "learning_rate": 0.00018415539509813894, "loss": 0.9566, "step": 785 }, { "epoch": 0.15978857491360032, "grad_norm": 0.12434156984090805, "learning_rate": 0.00018413505542560764, "loss": 1.2307, "step": 786 }, { "epoch": 0.1599918682659077, "grad_norm": 0.1130242571234703, "learning_rate": 0.0001841147157530764, "loss": 1.0406, "step": 787 }, { "epoch": 0.1601951616182151, "grad_norm": 0.12631991505622864, "learning_rate": 0.0001840943760805451, "loss": 1.0835, "step": 788 }, { "epoch": 0.16039845497052246, "grad_norm": 0.11642556637525558, "learning_rate": 0.00018407403640801384, "loss": 0.9743, "step": 789 }, { "epoch": 0.16060174832282983, "grad_norm": 0.1119033470749855, "learning_rate": 0.00018405369673548256, "loss": 1.1377, "step": 790 }, { "epoch": 0.16080504167513723, "grad_norm": 0.14675219357013702, "learning_rate": 0.00018403335706295131, "loss": 1.2846, "step": 791 }, { "epoch": 0.1610083350274446, "grad_norm": 0.1238279864192009, "learning_rate": 0.00018401301739042004, "loss": 1.1033, "step": 792 }, { "epoch": 0.16121162837975198, "grad_norm": 0.12538330256938934, "learning_rate": 0.00018399267771788876, "loss": 1.2344, "step": 793 }, { "epoch": 0.16141492173205937, "grad_norm": 0.11384537816047668, "learning_rate": 0.00018397233804535746, "loss": 1.0143, "step": 794 }, { "epoch": 0.16161821508436675, "grad_norm": 0.1444682627916336, "learning_rate": 0.00018395199837282618, "loss": 1.2364, "step": 795 }, { "epoch": 0.16182150843667412, "grad_norm": 0.12999016046524048, "learning_rate": 0.00018393165870029494, "loss": 1.1853, "step": 796 }, { "epoch": 0.1620248017889815, "grad_norm": 0.12258971482515335, "learning_rate": 0.00018391131902776366, "loss": 1.2673, "step": 797 }, { "epoch": 0.16222809514128889, "grad_norm": 0.13033455610275269, "learning_rate": 0.00018389097935523238, "loss": 0.8922, "step": 798 }, { "epoch": 0.16243138849359626, "grad_norm": 0.14746494591236115, "learning_rate": 0.0001838706396827011, "loss": 1.2164, "step": 799 }, { "epoch": 0.16263468184590363, "grad_norm": 0.12869805097579956, "learning_rate": 0.00018385030001016986, "loss": 1.1788, "step": 800 }, { "epoch": 0.16283797519821103, "grad_norm": 0.11467185616493225, "learning_rate": 0.00018382996033763859, "loss": 0.9527, "step": 801 }, { "epoch": 0.1630412685505184, "grad_norm": 0.129184752702713, "learning_rate": 0.00018380962066510728, "loss": 1.1758, "step": 802 }, { "epoch": 0.16324456190282577, "grad_norm": 0.11696959286928177, "learning_rate": 0.000183789280992576, "loss": 1.03, "step": 803 }, { "epoch": 0.16344785525513317, "grad_norm": 0.13689257204532623, "learning_rate": 0.00018376894132004476, "loss": 1.2516, "step": 804 }, { "epoch": 0.16365114860744054, "grad_norm": 0.11370982229709625, "learning_rate": 0.00018374860164751348, "loss": 1.0484, "step": 805 }, { "epoch": 0.1638544419597479, "grad_norm": 0.13201859593391418, "learning_rate": 0.0001837282619749822, "loss": 1.0903, "step": 806 }, { "epoch": 0.1640577353120553, "grad_norm": 0.10468725860118866, "learning_rate": 0.00018370792230245093, "loss": 0.9548, "step": 807 }, { "epoch": 0.16426102866436268, "grad_norm": 0.14737223088741302, "learning_rate": 0.00018368758262991968, "loss": 1.1607, "step": 808 }, { "epoch": 0.16446432201667005, "grad_norm": 0.11500222235918045, "learning_rate": 0.0001836672429573884, "loss": 1.1032, "step": 809 }, { "epoch": 0.16466761536897742, "grad_norm": 0.12849587202072144, "learning_rate": 0.00018364690328485713, "loss": 1.255, "step": 810 }, { "epoch": 0.16487090872128482, "grad_norm": 0.10878688842058182, "learning_rate": 0.00018362656361232583, "loss": 1.1075, "step": 811 }, { "epoch": 0.1650742020735922, "grad_norm": 0.10878860950469971, "learning_rate": 0.00018360622393979458, "loss": 1.0629, "step": 812 }, { "epoch": 0.16527749542589956, "grad_norm": 0.1280430108308792, "learning_rate": 0.0001835858842672633, "loss": 1.1377, "step": 813 }, { "epoch": 0.16548078877820696, "grad_norm": 0.11831233650445938, "learning_rate": 0.00018356554459473203, "loss": 1.0786, "step": 814 }, { "epoch": 0.16568408213051433, "grad_norm": 0.11453156918287277, "learning_rate": 0.00018354520492220075, "loss": 1.0477, "step": 815 }, { "epoch": 0.1658873754828217, "grad_norm": 0.13597573339939117, "learning_rate": 0.0001835248652496695, "loss": 1.1807, "step": 816 }, { "epoch": 0.1660906688351291, "grad_norm": 0.12008185684680939, "learning_rate": 0.00018350452557713823, "loss": 1.0676, "step": 817 }, { "epoch": 0.16629396218743647, "grad_norm": 0.1363888829946518, "learning_rate": 0.00018348418590460696, "loss": 1.1582, "step": 818 }, { "epoch": 0.16649725553974384, "grad_norm": 0.11310733109712601, "learning_rate": 0.00018346384623207565, "loss": 1.0931, "step": 819 }, { "epoch": 0.16670054889205124, "grad_norm": 0.13503344357013702, "learning_rate": 0.0001834435065595444, "loss": 1.1465, "step": 820 }, { "epoch": 0.1669038422443586, "grad_norm": 0.12744784355163574, "learning_rate": 0.00018342316688701313, "loss": 1.1662, "step": 821 }, { "epoch": 0.16710713559666598, "grad_norm": 0.13695518672466278, "learning_rate": 0.00018340282721448185, "loss": 1.1846, "step": 822 }, { "epoch": 0.16731042894897336, "grad_norm": 0.12580302357673645, "learning_rate": 0.00018338248754195058, "loss": 0.93, "step": 823 }, { "epoch": 0.16751372230128075, "grad_norm": 0.12266777455806732, "learning_rate": 0.00018336214786941933, "loss": 1.1033, "step": 824 }, { "epoch": 0.16771701565358813, "grad_norm": 0.1129806861281395, "learning_rate": 0.00018334180819688805, "loss": 1.0517, "step": 825 }, { "epoch": 0.1679203090058955, "grad_norm": 0.12590476870536804, "learning_rate": 0.00018332146852435678, "loss": 1.0374, "step": 826 }, { "epoch": 0.1681236023582029, "grad_norm": 0.12631377577781677, "learning_rate": 0.00018330112885182548, "loss": 1.1898, "step": 827 }, { "epoch": 0.16832689571051027, "grad_norm": 0.13719779253005981, "learning_rate": 0.00018328078917929423, "loss": 1.1108, "step": 828 }, { "epoch": 0.16853018906281764, "grad_norm": 0.12414206564426422, "learning_rate": 0.00018326044950676295, "loss": 1.1654, "step": 829 }, { "epoch": 0.16873348241512504, "grad_norm": 0.12075278162956238, "learning_rate": 0.00018324010983423168, "loss": 1.0255, "step": 830 }, { "epoch": 0.1689367757674324, "grad_norm": 0.11906860023736954, "learning_rate": 0.0001832197701617004, "loss": 1.0433, "step": 831 }, { "epoch": 0.16914006911973978, "grad_norm": 0.11960665136575699, "learning_rate": 0.00018319943048916915, "loss": 0.9501, "step": 832 }, { "epoch": 0.16934336247204718, "grad_norm": 0.1228812113404274, "learning_rate": 0.00018317909081663788, "loss": 1.002, "step": 833 }, { "epoch": 0.16954665582435455, "grad_norm": 0.12420972436666489, "learning_rate": 0.0001831587511441066, "loss": 1.062, "step": 834 }, { "epoch": 0.16974994917666192, "grad_norm": 0.11490360647439957, "learning_rate": 0.0001831384114715753, "loss": 0.9708, "step": 835 }, { "epoch": 0.1699532425289693, "grad_norm": 0.11945214867591858, "learning_rate": 0.00018311807179904402, "loss": 1.1042, "step": 836 }, { "epoch": 0.1701565358812767, "grad_norm": 0.1234474778175354, "learning_rate": 0.00018309773212651277, "loss": 1.0258, "step": 837 }, { "epoch": 0.17035982923358406, "grad_norm": 0.12447863817214966, "learning_rate": 0.0001830773924539815, "loss": 1.1132, "step": 838 }, { "epoch": 0.17056312258589143, "grad_norm": 0.1321963667869568, "learning_rate": 0.00018305705278145022, "loss": 1.1835, "step": 839 }, { "epoch": 0.17076641593819883, "grad_norm": 0.12708254158496857, "learning_rate": 0.00018303671310891895, "loss": 1.1787, "step": 840 }, { "epoch": 0.1709697092905062, "grad_norm": 0.11481820046901703, "learning_rate": 0.0001830163734363877, "loss": 0.8837, "step": 841 }, { "epoch": 0.17117300264281357, "grad_norm": 0.11851567029953003, "learning_rate": 0.00018299603376385642, "loss": 0.9516, "step": 842 }, { "epoch": 0.17137629599512097, "grad_norm": 0.13182471692562103, "learning_rate": 0.00018297569409132512, "loss": 1.1809, "step": 843 }, { "epoch": 0.17157958934742834, "grad_norm": 0.12840509414672852, "learning_rate": 0.00018295535441879385, "loss": 1.0557, "step": 844 }, { "epoch": 0.1717828826997357, "grad_norm": 0.11280561983585358, "learning_rate": 0.0001829350147462626, "loss": 1.0737, "step": 845 }, { "epoch": 0.1719861760520431, "grad_norm": 0.13144554197788239, "learning_rate": 0.00018291467507373132, "loss": 1.0275, "step": 846 }, { "epoch": 0.17218946940435048, "grad_norm": 0.1224883422255516, "learning_rate": 0.00018289433540120005, "loss": 1.1558, "step": 847 }, { "epoch": 0.17239276275665785, "grad_norm": 0.1263243854045868, "learning_rate": 0.00018287399572866877, "loss": 0.9381, "step": 848 }, { "epoch": 0.17259605610896522, "grad_norm": 0.13391436636447906, "learning_rate": 0.00018285365605613752, "loss": 1.2548, "step": 849 }, { "epoch": 0.17279934946127262, "grad_norm": 0.12166419625282288, "learning_rate": 0.00018283331638360625, "loss": 1.0981, "step": 850 }, { "epoch": 0.17300264281358, "grad_norm": 0.13190463185310364, "learning_rate": 0.00018281297671107494, "loss": 1.1847, "step": 851 }, { "epoch": 0.17320593616588736, "grad_norm": 0.11678186804056168, "learning_rate": 0.00018279263703854367, "loss": 1.0303, "step": 852 }, { "epoch": 0.17340922951819476, "grad_norm": 0.11716858297586441, "learning_rate": 0.00018277229736601242, "loss": 0.9274, "step": 853 }, { "epoch": 0.17361252287050213, "grad_norm": 0.1340217888355255, "learning_rate": 0.00018275195769348114, "loss": 1.0179, "step": 854 }, { "epoch": 0.1738158162228095, "grad_norm": 0.12650153040885925, "learning_rate": 0.00018273161802094987, "loss": 1.0234, "step": 855 }, { "epoch": 0.1740191095751169, "grad_norm": 0.1294967234134674, "learning_rate": 0.0001827112783484186, "loss": 1.2539, "step": 856 }, { "epoch": 0.17422240292742427, "grad_norm": 0.13714881241321564, "learning_rate": 0.00018269093867588734, "loss": 1.0106, "step": 857 }, { "epoch": 0.17442569627973165, "grad_norm": 0.12365014851093292, "learning_rate": 0.00018267059900335607, "loss": 1.1184, "step": 858 }, { "epoch": 0.17462898963203904, "grad_norm": 0.11030489951372147, "learning_rate": 0.00018265025933082477, "loss": 0.9478, "step": 859 }, { "epoch": 0.17483228298434642, "grad_norm": 0.1181483343243599, "learning_rate": 0.0001826299196582935, "loss": 1.0861, "step": 860 }, { "epoch": 0.1750355763366538, "grad_norm": 0.12873612344264984, "learning_rate": 0.00018260957998576224, "loss": 0.9811, "step": 861 }, { "epoch": 0.17523886968896116, "grad_norm": 0.11688394844532013, "learning_rate": 0.00018258924031323097, "loss": 1.1643, "step": 862 }, { "epoch": 0.17544216304126856, "grad_norm": 0.12729796767234802, "learning_rate": 0.0001825689006406997, "loss": 1.0692, "step": 863 }, { "epoch": 0.17564545639357593, "grad_norm": 0.12474660575389862, "learning_rate": 0.00018254856096816842, "loss": 1.2838, "step": 864 }, { "epoch": 0.1758487497458833, "grad_norm": 0.12324024736881256, "learning_rate": 0.00018252822129563717, "loss": 1.0029, "step": 865 }, { "epoch": 0.1760520430981907, "grad_norm": 0.13511407375335693, "learning_rate": 0.0001825078816231059, "loss": 1.1398, "step": 866 }, { "epoch": 0.17625533645049807, "grad_norm": 0.13292032480239868, "learning_rate": 0.0001824875419505746, "loss": 1.3107, "step": 867 }, { "epoch": 0.17645862980280544, "grad_norm": 0.12073294073343277, "learning_rate": 0.00018246720227804331, "loss": 1.1293, "step": 868 }, { "epoch": 0.17666192315511284, "grad_norm": 0.11789250373840332, "learning_rate": 0.00018244686260551207, "loss": 1.0462, "step": 869 }, { "epoch": 0.1768652165074202, "grad_norm": 0.1194562166929245, "learning_rate": 0.0001824265229329808, "loss": 1.0017, "step": 870 }, { "epoch": 0.17706850985972758, "grad_norm": 0.10480080544948578, "learning_rate": 0.00018240618326044951, "loss": 0.8659, "step": 871 }, { "epoch": 0.17727180321203498, "grad_norm": 0.1207701787352562, "learning_rate": 0.00018238584358791824, "loss": 0.9937, "step": 872 }, { "epoch": 0.17747509656434235, "grad_norm": 0.1190091222524643, "learning_rate": 0.000182365503915387, "loss": 1.0437, "step": 873 }, { "epoch": 0.17767838991664972, "grad_norm": 0.1277458369731903, "learning_rate": 0.00018234516424285572, "loss": 1.2392, "step": 874 }, { "epoch": 0.1778816832689571, "grad_norm": 0.12237963080406189, "learning_rate": 0.00018232482457032444, "loss": 1.1032, "step": 875 }, { "epoch": 0.1780849766212645, "grad_norm": 0.1319531798362732, "learning_rate": 0.00018230448489779314, "loss": 1.2012, "step": 876 }, { "epoch": 0.17828826997357186, "grad_norm": 0.11914216727018356, "learning_rate": 0.0001822841452252619, "loss": 1.0272, "step": 877 }, { "epoch": 0.17849156332587923, "grad_norm": 0.14588242769241333, "learning_rate": 0.0001822638055527306, "loss": 1.357, "step": 878 }, { "epoch": 0.17869485667818663, "grad_norm": 0.11982700973749161, "learning_rate": 0.00018224346588019934, "loss": 1.049, "step": 879 }, { "epoch": 0.178898150030494, "grad_norm": 0.12529560923576355, "learning_rate": 0.00018222312620766806, "loss": 1.0713, "step": 880 }, { "epoch": 0.17910144338280137, "grad_norm": 0.1316487044095993, "learning_rate": 0.00018220278653513679, "loss": 1.1749, "step": 881 }, { "epoch": 0.17930473673510877, "grad_norm": 0.12096232175827026, "learning_rate": 0.00018218244686260554, "loss": 1.2104, "step": 882 }, { "epoch": 0.17950803008741614, "grad_norm": 0.1313014030456543, "learning_rate": 0.00018216210719007426, "loss": 1.0554, "step": 883 }, { "epoch": 0.1797113234397235, "grad_norm": 0.1309378743171692, "learning_rate": 0.00018214176751754296, "loss": 1.2152, "step": 884 }, { "epoch": 0.1799146167920309, "grad_norm": 0.1286410242319107, "learning_rate": 0.00018212142784501168, "loss": 1.0922, "step": 885 }, { "epoch": 0.18011791014433828, "grad_norm": 0.12893226742744446, "learning_rate": 0.00018210108817248044, "loss": 1.1969, "step": 886 }, { "epoch": 0.18032120349664565, "grad_norm": 0.11664584279060364, "learning_rate": 0.00018208074849994916, "loss": 1.0085, "step": 887 }, { "epoch": 0.18052449684895303, "grad_norm": 0.10973158478736877, "learning_rate": 0.00018206040882741788, "loss": 0.9548, "step": 888 }, { "epoch": 0.18072779020126042, "grad_norm": 0.11281079053878784, "learning_rate": 0.0001820400691548866, "loss": 0.8521, "step": 889 }, { "epoch": 0.1809310835535678, "grad_norm": 0.12198197096586227, "learning_rate": 0.00018201972948235536, "loss": 1.0537, "step": 890 }, { "epoch": 0.18113437690587517, "grad_norm": 0.09405733644962311, "learning_rate": 0.00018199938980982409, "loss": 0.7193, "step": 891 }, { "epoch": 0.18133767025818257, "grad_norm": 0.13503974676132202, "learning_rate": 0.00018197905013729278, "loss": 1.1564, "step": 892 }, { "epoch": 0.18154096361048994, "grad_norm": 0.1322106271982193, "learning_rate": 0.0001819587104647615, "loss": 1.0733, "step": 893 }, { "epoch": 0.1817442569627973, "grad_norm": 0.12791374325752258, "learning_rate": 0.00018193837079223026, "loss": 1.0701, "step": 894 }, { "epoch": 0.1819475503151047, "grad_norm": 0.12342046946287155, "learning_rate": 0.00018191803111969898, "loss": 1.1255, "step": 895 }, { "epoch": 0.18215084366741208, "grad_norm": 0.12089495360851288, "learning_rate": 0.0001818976914471677, "loss": 1.0177, "step": 896 }, { "epoch": 0.18235413701971945, "grad_norm": 0.12383720278739929, "learning_rate": 0.00018187735177463643, "loss": 1.0188, "step": 897 }, { "epoch": 0.18255743037202685, "grad_norm": 0.12089379876852036, "learning_rate": 0.00018185701210210518, "loss": 1.1106, "step": 898 }, { "epoch": 0.18276072372433422, "grad_norm": 0.12939763069152832, "learning_rate": 0.0001818366724295739, "loss": 1.1939, "step": 899 }, { "epoch": 0.1829640170766416, "grad_norm": 0.14534543454647064, "learning_rate": 0.0001818163327570426, "loss": 1.252, "step": 900 }, { "epoch": 0.18316731042894896, "grad_norm": 0.13002236187458038, "learning_rate": 0.00018179599308451133, "loss": 0.9607, "step": 901 }, { "epoch": 0.18337060378125636, "grad_norm": 0.11892438679933548, "learning_rate": 0.00018177565341198008, "loss": 0.9641, "step": 902 }, { "epoch": 0.18357389713356373, "grad_norm": 0.11869879812002182, "learning_rate": 0.0001817553137394488, "loss": 0.886, "step": 903 }, { "epoch": 0.1837771904858711, "grad_norm": 0.11826761066913605, "learning_rate": 0.00018173497406691753, "loss": 1.2055, "step": 904 }, { "epoch": 0.1839804838381785, "grad_norm": 0.1275918185710907, "learning_rate": 0.00018171463439438625, "loss": 1.0468, "step": 905 }, { "epoch": 0.18418377719048587, "grad_norm": 0.12289033085107803, "learning_rate": 0.000181694294721855, "loss": 1.1464, "step": 906 }, { "epoch": 0.18438707054279324, "grad_norm": 0.11647521704435349, "learning_rate": 0.00018167395504932373, "loss": 0.8912, "step": 907 }, { "epoch": 0.18459036389510064, "grad_norm": 0.12756259739398956, "learning_rate": 0.00018165361537679243, "loss": 1.0672, "step": 908 }, { "epoch": 0.184793657247408, "grad_norm": 0.12525498867034912, "learning_rate": 0.00018163327570426115, "loss": 1.1493, "step": 909 }, { "epoch": 0.18499695059971538, "grad_norm": 0.11629681289196014, "learning_rate": 0.0001816129360317299, "loss": 1.0021, "step": 910 }, { "epoch": 0.18520024395202278, "grad_norm": 0.1350405514240265, "learning_rate": 0.00018159259635919863, "loss": 1.1597, "step": 911 }, { "epoch": 0.18540353730433015, "grad_norm": 0.10785862803459167, "learning_rate": 0.00018157225668666735, "loss": 0.9035, "step": 912 }, { "epoch": 0.18560683065663752, "grad_norm": 0.13618353009223938, "learning_rate": 0.00018155191701413608, "loss": 1.4084, "step": 913 }, { "epoch": 0.18581012400894492, "grad_norm": 0.12942783534526825, "learning_rate": 0.00018153157734160483, "loss": 1.0529, "step": 914 }, { "epoch": 0.1860134173612523, "grad_norm": 0.12829767167568207, "learning_rate": 0.00018151123766907355, "loss": 1.1734, "step": 915 }, { "epoch": 0.18621671071355966, "grad_norm": 0.11795412003993988, "learning_rate": 0.00018149089799654225, "loss": 1.0891, "step": 916 }, { "epoch": 0.18642000406586703, "grad_norm": 0.13184364140033722, "learning_rate": 0.00018147055832401098, "loss": 1.0332, "step": 917 }, { "epoch": 0.18662329741817443, "grad_norm": 0.13445381820201874, "learning_rate": 0.00018145021865147973, "loss": 1.1417, "step": 918 }, { "epoch": 0.1868265907704818, "grad_norm": 0.1418420672416687, "learning_rate": 0.00018142987897894845, "loss": 1.1279, "step": 919 }, { "epoch": 0.18702988412278918, "grad_norm": 0.11725430935621262, "learning_rate": 0.00018140953930641718, "loss": 0.9878, "step": 920 }, { "epoch": 0.18723317747509657, "grad_norm": 0.13889212906360626, "learning_rate": 0.0001813891996338859, "loss": 1.1153, "step": 921 }, { "epoch": 0.18743647082740394, "grad_norm": 0.12875622510910034, "learning_rate": 0.00018136885996135462, "loss": 1.0345, "step": 922 }, { "epoch": 0.18763976417971132, "grad_norm": 0.12533831596374512, "learning_rate": 0.00018134852028882338, "loss": 1.0741, "step": 923 }, { "epoch": 0.18784305753201871, "grad_norm": 0.12448123842477798, "learning_rate": 0.00018132818061629207, "loss": 1.0161, "step": 924 }, { "epoch": 0.18804635088432609, "grad_norm": 0.13820883631706238, "learning_rate": 0.0001813078409437608, "loss": 1.2834, "step": 925 }, { "epoch": 0.18824964423663346, "grad_norm": 0.1304212063550949, "learning_rate": 0.00018128750127122952, "loss": 1.0927, "step": 926 }, { "epoch": 0.18845293758894086, "grad_norm": 0.12558777630329132, "learning_rate": 0.00018126716159869827, "loss": 1.1516, "step": 927 }, { "epoch": 0.18865623094124823, "grad_norm": 0.13149550557136536, "learning_rate": 0.000181246821926167, "loss": 1.0791, "step": 928 }, { "epoch": 0.1888595242935556, "grad_norm": 0.12774059176445007, "learning_rate": 0.00018122648225363572, "loss": 1.108, "step": 929 }, { "epoch": 0.18906281764586297, "grad_norm": 0.12127216160297394, "learning_rate": 0.00018120614258110445, "loss": 1.1254, "step": 930 }, { "epoch": 0.18926611099817037, "grad_norm": 0.1251489520072937, "learning_rate": 0.0001811858029085732, "loss": 1.1306, "step": 931 }, { "epoch": 0.18946940435047774, "grad_norm": 0.12320549786090851, "learning_rate": 0.00018116546323604192, "loss": 1.082, "step": 932 }, { "epoch": 0.1896726977027851, "grad_norm": 0.12626154720783234, "learning_rate": 0.00018114512356351062, "loss": 1.176, "step": 933 }, { "epoch": 0.1898759910550925, "grad_norm": 0.12401305884122849, "learning_rate": 0.00018112478389097935, "loss": 1.107, "step": 934 }, { "epoch": 0.19007928440739988, "grad_norm": 0.13284708559513092, "learning_rate": 0.0001811044442184481, "loss": 1.1977, "step": 935 }, { "epoch": 0.19028257775970725, "grad_norm": 0.11293178796768188, "learning_rate": 0.00018108410454591682, "loss": 0.8484, "step": 936 }, { "epoch": 0.19048587111201465, "grad_norm": 0.12113649398088455, "learning_rate": 0.00018106376487338555, "loss": 1.0833, "step": 937 }, { "epoch": 0.19068916446432202, "grad_norm": 0.12353657186031342, "learning_rate": 0.00018104342520085427, "loss": 1.1154, "step": 938 }, { "epoch": 0.1908924578166294, "grad_norm": 0.13213786482810974, "learning_rate": 0.00018102308552832302, "loss": 1.0866, "step": 939 }, { "epoch": 0.1910957511689368, "grad_norm": 0.12303278595209122, "learning_rate": 0.00018100274585579175, "loss": 0.9889, "step": 940 }, { "epoch": 0.19129904452124416, "grad_norm": 0.12523289024829865, "learning_rate": 0.00018098240618326044, "loss": 0.9564, "step": 941 }, { "epoch": 0.19150233787355153, "grad_norm": 0.12457413971424103, "learning_rate": 0.00018096206651072917, "loss": 1.168, "step": 942 }, { "epoch": 0.1917056312258589, "grad_norm": 0.13440296053886414, "learning_rate": 0.00018094172683819792, "loss": 1.1655, "step": 943 }, { "epoch": 0.1919089245781663, "grad_norm": 0.11574854701757431, "learning_rate": 0.00018092138716566664, "loss": 0.9982, "step": 944 }, { "epoch": 0.19211221793047367, "grad_norm": 0.1216878592967987, "learning_rate": 0.00018090104749313537, "loss": 0.97, "step": 945 }, { "epoch": 0.19231551128278104, "grad_norm": 0.11920405179262161, "learning_rate": 0.0001808807078206041, "loss": 0.9783, "step": 946 }, { "epoch": 0.19251880463508844, "grad_norm": 0.12107307463884354, "learning_rate": 0.00018086036814807284, "loss": 1.0843, "step": 947 }, { "epoch": 0.1927220979873958, "grad_norm": 0.12287328392267227, "learning_rate": 0.00018084002847554157, "loss": 1.1068, "step": 948 }, { "epoch": 0.19292539133970318, "grad_norm": 0.12466049194335938, "learning_rate": 0.00018081968880301027, "loss": 0.9383, "step": 949 }, { "epoch": 0.19312868469201058, "grad_norm": 0.11762560158967972, "learning_rate": 0.000180799349130479, "loss": 0.9855, "step": 950 }, { "epoch": 0.19333197804431795, "grad_norm": 0.12275755405426025, "learning_rate": 0.00018077900945794774, "loss": 1.0528, "step": 951 }, { "epoch": 0.19353527139662532, "grad_norm": 0.12033812701702118, "learning_rate": 0.00018075866978541647, "loss": 1.0828, "step": 952 }, { "epoch": 0.19373856474893272, "grad_norm": 0.13380326330661774, "learning_rate": 0.0001807383301128852, "loss": 1.0634, "step": 953 }, { "epoch": 0.1939418581012401, "grad_norm": 0.13521994650363922, "learning_rate": 0.00018071799044035392, "loss": 1.1512, "step": 954 }, { "epoch": 0.19414515145354747, "grad_norm": 0.1331789344549179, "learning_rate": 0.00018069765076782267, "loss": 1.2343, "step": 955 }, { "epoch": 0.19434844480585484, "grad_norm": 0.12130323797464371, "learning_rate": 0.0001806773110952914, "loss": 1.291, "step": 956 }, { "epoch": 0.19455173815816224, "grad_norm": 0.10274801403284073, "learning_rate": 0.0001806569714227601, "loss": 0.8534, "step": 957 }, { "epoch": 0.1947550315104696, "grad_norm": 0.1255219727754593, "learning_rate": 0.00018063663175022881, "loss": 1.1804, "step": 958 }, { "epoch": 0.19495832486277698, "grad_norm": 0.13403509557247162, "learning_rate": 0.00018061629207769757, "loss": 1.1882, "step": 959 }, { "epoch": 0.19516161821508438, "grad_norm": 0.1277134269475937, "learning_rate": 0.0001805959524051663, "loss": 1.1059, "step": 960 }, { "epoch": 0.19536491156739175, "grad_norm": 0.1148851290345192, "learning_rate": 0.00018057561273263501, "loss": 1.125, "step": 961 }, { "epoch": 0.19556820491969912, "grad_norm": 0.10984671115875244, "learning_rate": 0.00018055527306010374, "loss": 1.0396, "step": 962 }, { "epoch": 0.19577149827200652, "grad_norm": 0.13988138735294342, "learning_rate": 0.00018053493338757246, "loss": 1.1672, "step": 963 }, { "epoch": 0.1959747916243139, "grad_norm": 0.12106659263372421, "learning_rate": 0.00018051459371504121, "loss": 1.0142, "step": 964 }, { "epoch": 0.19617808497662126, "grad_norm": 0.10751524567604065, "learning_rate": 0.0001804942540425099, "loss": 1.0027, "step": 965 }, { "epoch": 0.19638137832892866, "grad_norm": 0.12096796184778214, "learning_rate": 0.00018047391436997864, "loss": 1.0965, "step": 966 }, { "epoch": 0.19658467168123603, "grad_norm": 0.12069959938526154, "learning_rate": 0.00018045357469744736, "loss": 0.9869, "step": 967 }, { "epoch": 0.1967879650335434, "grad_norm": 0.13281071186065674, "learning_rate": 0.0001804332350249161, "loss": 1.0361, "step": 968 }, { "epoch": 0.19699125838585077, "grad_norm": 0.12690961360931396, "learning_rate": 0.00018041289535238484, "loss": 1.005, "step": 969 }, { "epoch": 0.19719455173815817, "grad_norm": 0.1329599916934967, "learning_rate": 0.00018039255567985356, "loss": 1.1184, "step": 970 }, { "epoch": 0.19739784509046554, "grad_norm": 0.12807321548461914, "learning_rate": 0.00018037221600732229, "loss": 1.1918, "step": 971 }, { "epoch": 0.1976011384427729, "grad_norm": 0.12155921012163162, "learning_rate": 0.00018035187633479104, "loss": 1.1934, "step": 972 }, { "epoch": 0.1978044317950803, "grad_norm": 0.11720109730958939, "learning_rate": 0.00018033153666225973, "loss": 1.2674, "step": 973 }, { "epoch": 0.19800772514738768, "grad_norm": 0.12774553894996643, "learning_rate": 0.00018031119698972846, "loss": 1.1394, "step": 974 }, { "epoch": 0.19821101849969505, "grad_norm": 0.11617007106542587, "learning_rate": 0.00018029085731719718, "loss": 1.0772, "step": 975 }, { "epoch": 0.19841431185200245, "grad_norm": 0.1182067021727562, "learning_rate": 0.00018027051764466594, "loss": 1.0433, "step": 976 }, { "epoch": 0.19861760520430982, "grad_norm": 0.128327414393425, "learning_rate": 0.00018025017797213466, "loss": 1.0616, "step": 977 }, { "epoch": 0.1988208985566172, "grad_norm": 0.12075836956501007, "learning_rate": 0.00018022983829960338, "loss": 1.2187, "step": 978 }, { "epoch": 0.1990241919089246, "grad_norm": 0.132186159491539, "learning_rate": 0.0001802094986270721, "loss": 1.0614, "step": 979 }, { "epoch": 0.19922748526123196, "grad_norm": 0.135267972946167, "learning_rate": 0.00018018915895454086, "loss": 1.3447, "step": 980 }, { "epoch": 0.19943077861353933, "grad_norm": 0.13122640550136566, "learning_rate": 0.00018016881928200956, "loss": 1.3336, "step": 981 }, { "epoch": 0.1996340719658467, "grad_norm": 0.11631322652101517, "learning_rate": 0.00018014847960947828, "loss": 1.024, "step": 982 }, { "epoch": 0.1998373653181541, "grad_norm": 0.12409427016973495, "learning_rate": 0.000180128139936947, "loss": 0.9806, "step": 983 }, { "epoch": 0.20004065867046147, "grad_norm": 0.1337365210056305, "learning_rate": 0.00018010780026441576, "loss": 1.1875, "step": 984 }, { "epoch": 0.20024395202276885, "grad_norm": 0.12941214442253113, "learning_rate": 0.00018008746059188448, "loss": 1.1962, "step": 985 }, { "epoch": 0.20044724537507624, "grad_norm": 0.12374356389045715, "learning_rate": 0.0001800671209193532, "loss": 1.1213, "step": 986 }, { "epoch": 0.20065053872738361, "grad_norm": 0.13427360355854034, "learning_rate": 0.00018004678124682193, "loss": 1.209, "step": 987 }, { "epoch": 0.20085383207969099, "grad_norm": 0.11423162370920181, "learning_rate": 0.00018002644157429068, "loss": 0.8628, "step": 988 }, { "epoch": 0.20105712543199838, "grad_norm": 0.12818945944309235, "learning_rate": 0.0001800061019017594, "loss": 1.1162, "step": 989 }, { "epoch": 0.20126041878430576, "grad_norm": 0.11825679987668991, "learning_rate": 0.0001799857622292281, "loss": 0.9222, "step": 990 }, { "epoch": 0.20146371213661313, "grad_norm": 0.11358822882175446, "learning_rate": 0.00017996542255669683, "loss": 0.992, "step": 991 }, { "epoch": 0.20166700548892053, "grad_norm": 0.12839291989803314, "learning_rate": 0.00017994508288416558, "loss": 1.1777, "step": 992 }, { "epoch": 0.2018702988412279, "grad_norm": 0.12416979670524597, "learning_rate": 0.0001799247432116343, "loss": 1.2411, "step": 993 }, { "epoch": 0.20207359219353527, "grad_norm": 0.12002628296613693, "learning_rate": 0.00017990440353910303, "loss": 1.0961, "step": 994 }, { "epoch": 0.20227688554584264, "grad_norm": 0.1268136203289032, "learning_rate": 0.00017988406386657175, "loss": 1.1405, "step": 995 }, { "epoch": 0.20248017889815004, "grad_norm": 0.12864577770233154, "learning_rate": 0.0001798637241940405, "loss": 1.136, "step": 996 }, { "epoch": 0.2026834722504574, "grad_norm": 0.11293767392635345, "learning_rate": 0.00017984338452150923, "loss": 1.0633, "step": 997 }, { "epoch": 0.20288676560276478, "grad_norm": 0.11901193857192993, "learning_rate": 0.00017982304484897793, "loss": 1.1404, "step": 998 }, { "epoch": 0.20309005895507218, "grad_norm": 0.14368772506713867, "learning_rate": 0.00017980270517644665, "loss": 1.2092, "step": 999 }, { "epoch": 0.20329335230737955, "grad_norm": 0.1403762251138687, "learning_rate": 0.0001797823655039154, "loss": 1.1212, "step": 1000 }, { "epoch": 0.20349664565968692, "grad_norm": 0.10853412747383118, "learning_rate": 0.00017976202583138413, "loss": 0.9489, "step": 1001 }, { "epoch": 0.20369993901199432, "grad_norm": 0.11670242995023727, "learning_rate": 0.00017974168615885285, "loss": 1.0397, "step": 1002 }, { "epoch": 0.2039032323643017, "grad_norm": 0.12957903742790222, "learning_rate": 0.00017972134648632158, "loss": 0.9898, "step": 1003 }, { "epoch": 0.20410652571660906, "grad_norm": 0.1174166351556778, "learning_rate": 0.0001797010068137903, "loss": 1.1126, "step": 1004 }, { "epoch": 0.20430981906891646, "grad_norm": 0.12919628620147705, "learning_rate": 0.00017968066714125905, "loss": 1.0282, "step": 1005 }, { "epoch": 0.20451311242122383, "grad_norm": 0.12586313486099243, "learning_rate": 0.00017966032746872775, "loss": 1.1674, "step": 1006 }, { "epoch": 0.2047164057735312, "grad_norm": 0.12239197641611099, "learning_rate": 0.00017963998779619647, "loss": 1.0543, "step": 1007 }, { "epoch": 0.20491969912583857, "grad_norm": 0.11404930055141449, "learning_rate": 0.0001796196481236652, "loss": 1.0193, "step": 1008 }, { "epoch": 0.20512299247814597, "grad_norm": 0.14286890625953674, "learning_rate": 0.00017959930845113395, "loss": 0.994, "step": 1009 }, { "epoch": 0.20532628583045334, "grad_norm": 0.12723585963249207, "learning_rate": 0.00017957896877860268, "loss": 1.1854, "step": 1010 }, { "epoch": 0.2055295791827607, "grad_norm": 0.13282720744609833, "learning_rate": 0.0001795586291060714, "loss": 1.0967, "step": 1011 }, { "epoch": 0.2057328725350681, "grad_norm": 0.11795739084482193, "learning_rate": 0.00017953828943354012, "loss": 1.0024, "step": 1012 }, { "epoch": 0.20593616588737548, "grad_norm": 0.123084157705307, "learning_rate": 0.00017951794976100888, "loss": 1.0015, "step": 1013 }, { "epoch": 0.20613945923968285, "grad_norm": 0.13757507503032684, "learning_rate": 0.00017949761008847757, "loss": 1.1926, "step": 1014 }, { "epoch": 0.20634275259199025, "grad_norm": 0.13981647789478302, "learning_rate": 0.0001794772704159463, "loss": 1.1296, "step": 1015 }, { "epoch": 0.20654604594429762, "grad_norm": 0.12356757372617722, "learning_rate": 0.00017945693074341502, "loss": 1.1867, "step": 1016 }, { "epoch": 0.206749339296605, "grad_norm": 0.11218491941690445, "learning_rate": 0.00017943659107088377, "loss": 0.9909, "step": 1017 }, { "epoch": 0.2069526326489124, "grad_norm": 0.11628386378288269, "learning_rate": 0.0001794162513983525, "loss": 1.1048, "step": 1018 }, { "epoch": 0.20715592600121976, "grad_norm": 0.1266728788614273, "learning_rate": 0.00017939591172582122, "loss": 1.123, "step": 1019 }, { "epoch": 0.20735921935352714, "grad_norm": 0.1243995800614357, "learning_rate": 0.00017937557205328995, "loss": 1.1161, "step": 1020 }, { "epoch": 0.2075625127058345, "grad_norm": 0.10625866800546646, "learning_rate": 0.0001793552323807587, "loss": 0.9773, "step": 1021 }, { "epoch": 0.2077658060581419, "grad_norm": 0.11653080582618713, "learning_rate": 0.0001793348927082274, "loss": 1.0115, "step": 1022 }, { "epoch": 0.20796909941044928, "grad_norm": 0.12603938579559326, "learning_rate": 0.00017931455303569612, "loss": 0.9879, "step": 1023 }, { "epoch": 0.20817239276275665, "grad_norm": 0.11850478500127792, "learning_rate": 0.00017929421336316485, "loss": 1.0412, "step": 1024 }, { "epoch": 0.20837568611506405, "grad_norm": 0.13597136735916138, "learning_rate": 0.0001792738736906336, "loss": 1.1984, "step": 1025 }, { "epoch": 0.20857897946737142, "grad_norm": 0.12899504601955414, "learning_rate": 0.00017925353401810232, "loss": 1.2007, "step": 1026 }, { "epoch": 0.2087822728196788, "grad_norm": 0.12255753576755524, "learning_rate": 0.00017923319434557105, "loss": 1.0632, "step": 1027 }, { "epoch": 0.2089855661719862, "grad_norm": 0.11182371526956558, "learning_rate": 0.00017921285467303977, "loss": 1.0428, "step": 1028 }, { "epoch": 0.20918885952429356, "grad_norm": 0.10728685557842255, "learning_rate": 0.00017919251500050852, "loss": 0.9964, "step": 1029 }, { "epoch": 0.20939215287660093, "grad_norm": 0.1301811784505844, "learning_rate": 0.00017917217532797722, "loss": 1.0812, "step": 1030 }, { "epoch": 0.20959544622890833, "grad_norm": 0.12470284849405289, "learning_rate": 0.00017915183565544594, "loss": 0.9967, "step": 1031 }, { "epoch": 0.2097987395812157, "grad_norm": 0.12017293274402618, "learning_rate": 0.00017913149598291467, "loss": 1.0986, "step": 1032 }, { "epoch": 0.21000203293352307, "grad_norm": 0.14881430566310883, "learning_rate": 0.00017911115631038342, "loss": 1.2407, "step": 1033 }, { "epoch": 0.21020532628583044, "grad_norm": 0.11730567365884781, "learning_rate": 0.00017909081663785214, "loss": 0.9414, "step": 1034 }, { "epoch": 0.21040861963813784, "grad_norm": 0.12763184309005737, "learning_rate": 0.00017907047696532087, "loss": 1.0773, "step": 1035 }, { "epoch": 0.2106119129904452, "grad_norm": 0.11463324725627899, "learning_rate": 0.0001790501372927896, "loss": 0.965, "step": 1036 }, { "epoch": 0.21081520634275258, "grad_norm": 0.13079042732715607, "learning_rate": 0.00017902979762025834, "loss": 1.0491, "step": 1037 }, { "epoch": 0.21101849969505998, "grad_norm": 0.13902175426483154, "learning_rate": 0.00017900945794772704, "loss": 0.9453, "step": 1038 }, { "epoch": 0.21122179304736735, "grad_norm": 0.12852630019187927, "learning_rate": 0.00017898911827519577, "loss": 1.221, "step": 1039 }, { "epoch": 0.21142508639967472, "grad_norm": 0.10965081304311752, "learning_rate": 0.0001789687786026645, "loss": 0.9923, "step": 1040 }, { "epoch": 0.21162837975198212, "grad_norm": 0.1155104711651802, "learning_rate": 0.00017894843893013324, "loss": 0.8918, "step": 1041 }, { "epoch": 0.2118316731042895, "grad_norm": 0.13126857578754425, "learning_rate": 0.00017892809925760197, "loss": 1.1116, "step": 1042 }, { "epoch": 0.21203496645659686, "grad_norm": 0.11619725823402405, "learning_rate": 0.0001789077595850707, "loss": 0.9448, "step": 1043 }, { "epoch": 0.21223825980890426, "grad_norm": 0.12041871249675751, "learning_rate": 0.00017888741991253942, "loss": 1.0778, "step": 1044 }, { "epoch": 0.21244155316121163, "grad_norm": 0.1230979636311531, "learning_rate": 0.00017886708024000814, "loss": 1.0807, "step": 1045 }, { "epoch": 0.212644846513519, "grad_norm": 0.1263006180524826, "learning_rate": 0.0001788467405674769, "loss": 1.1668, "step": 1046 }, { "epoch": 0.21284813986582637, "grad_norm": 0.11430171877145767, "learning_rate": 0.0001788264008949456, "loss": 1.1096, "step": 1047 }, { "epoch": 0.21305143321813377, "grad_norm": 0.1243266835808754, "learning_rate": 0.0001788060612224143, "loss": 0.9583, "step": 1048 }, { "epoch": 0.21325472657044114, "grad_norm": 0.12808263301849365, "learning_rate": 0.00017878572154988304, "loss": 1.0896, "step": 1049 }, { "epoch": 0.21345801992274852, "grad_norm": 0.13576021790504456, "learning_rate": 0.0001787653818773518, "loss": 1.0807, "step": 1050 }, { "epoch": 0.21366131327505591, "grad_norm": 0.10852668434381485, "learning_rate": 0.00017874504220482051, "loss": 0.9132, "step": 1051 }, { "epoch": 0.21386460662736329, "grad_norm": 0.13336928188800812, "learning_rate": 0.00017872470253228924, "loss": 1.1131, "step": 1052 }, { "epoch": 0.21406789997967066, "grad_norm": 0.12640543282032013, "learning_rate": 0.00017870436285975796, "loss": 1.0568, "step": 1053 }, { "epoch": 0.21427119333197805, "grad_norm": 0.12157181650400162, "learning_rate": 0.00017868402318722671, "loss": 1.033, "step": 1054 }, { "epoch": 0.21447448668428543, "grad_norm": 0.12272074073553085, "learning_rate": 0.0001786636835146954, "loss": 1.0301, "step": 1055 }, { "epoch": 0.2146777800365928, "grad_norm": 0.1594497114419937, "learning_rate": 0.00017864334384216414, "loss": 1.1618, "step": 1056 }, { "epoch": 0.2148810733889002, "grad_norm": 0.14059504866600037, "learning_rate": 0.00017862300416963286, "loss": 1.1223, "step": 1057 }, { "epoch": 0.21508436674120757, "grad_norm": 0.12746313214302063, "learning_rate": 0.0001786026644971016, "loss": 1.1294, "step": 1058 }, { "epoch": 0.21528766009351494, "grad_norm": 0.13382786512374878, "learning_rate": 0.00017858232482457034, "loss": 1.0969, "step": 1059 }, { "epoch": 0.2154909534458223, "grad_norm": 0.1192721351981163, "learning_rate": 0.00017856198515203906, "loss": 0.9751, "step": 1060 }, { "epoch": 0.2156942467981297, "grad_norm": 0.1318022906780243, "learning_rate": 0.00017854164547950779, "loss": 1.267, "step": 1061 }, { "epoch": 0.21589754015043708, "grad_norm": 0.12069433927536011, "learning_rate": 0.00017852130580697654, "loss": 1.0525, "step": 1062 }, { "epoch": 0.21610083350274445, "grad_norm": 0.12405405938625336, "learning_rate": 0.00017850096613444523, "loss": 1.1731, "step": 1063 }, { "epoch": 0.21630412685505185, "grad_norm": 0.11893291026353836, "learning_rate": 0.00017848062646191396, "loss": 1.1609, "step": 1064 }, { "epoch": 0.21650742020735922, "grad_norm": 0.11019967496395111, "learning_rate": 0.00017846028678938268, "loss": 0.9735, "step": 1065 }, { "epoch": 0.2167107135596666, "grad_norm": 0.11663123220205307, "learning_rate": 0.00017843994711685144, "loss": 1.2882, "step": 1066 }, { "epoch": 0.216914006911974, "grad_norm": 0.12803837656974792, "learning_rate": 0.00017841960744432016, "loss": 1.0365, "step": 1067 }, { "epoch": 0.21711730026428136, "grad_norm": 0.13295085728168488, "learning_rate": 0.00017839926777178888, "loss": 1.1925, "step": 1068 }, { "epoch": 0.21732059361658873, "grad_norm": 0.12314966320991516, "learning_rate": 0.0001783789280992576, "loss": 1.1064, "step": 1069 }, { "epoch": 0.21752388696889613, "grad_norm": 0.12015377730131149, "learning_rate": 0.00017835858842672636, "loss": 1.1056, "step": 1070 }, { "epoch": 0.2177271803212035, "grad_norm": 0.11665552854537964, "learning_rate": 0.00017833824875419506, "loss": 1.0592, "step": 1071 }, { "epoch": 0.21793047367351087, "grad_norm": 0.11458134651184082, "learning_rate": 0.00017831790908166378, "loss": 1.1203, "step": 1072 }, { "epoch": 0.21813376702581824, "grad_norm": 0.10290549695491791, "learning_rate": 0.0001782975694091325, "loss": 0.9427, "step": 1073 }, { "epoch": 0.21833706037812564, "grad_norm": 0.12680476903915405, "learning_rate": 0.00017827722973660126, "loss": 1.1047, "step": 1074 }, { "epoch": 0.218540353730433, "grad_norm": 0.1253194808959961, "learning_rate": 0.00017825689006406998, "loss": 1.1482, "step": 1075 }, { "epoch": 0.21874364708274038, "grad_norm": 0.1381319910287857, "learning_rate": 0.0001782365503915387, "loss": 1.3134, "step": 1076 }, { "epoch": 0.21894694043504778, "grad_norm": 0.12798373401165009, "learning_rate": 0.00017821621071900743, "loss": 1.1119, "step": 1077 }, { "epoch": 0.21915023378735515, "grad_norm": 0.1302616447210312, "learning_rate": 0.00017819587104647618, "loss": 1.1038, "step": 1078 }, { "epoch": 0.21935352713966252, "grad_norm": 0.1357065588235855, "learning_rate": 0.00017817553137394488, "loss": 1.1385, "step": 1079 }, { "epoch": 0.21955682049196992, "grad_norm": 0.1307210475206375, "learning_rate": 0.0001781551917014136, "loss": 1.1692, "step": 1080 }, { "epoch": 0.2197601138442773, "grad_norm": 0.12304160743951797, "learning_rate": 0.00017813485202888233, "loss": 1.1044, "step": 1081 }, { "epoch": 0.21996340719658466, "grad_norm": 0.12165479362010956, "learning_rate": 0.00017811451235635108, "loss": 1.0762, "step": 1082 }, { "epoch": 0.22016670054889206, "grad_norm": 0.12440644204616547, "learning_rate": 0.0001780941726838198, "loss": 1.0296, "step": 1083 }, { "epoch": 0.22036999390119943, "grad_norm": 0.14743392169475555, "learning_rate": 0.00017807383301128853, "loss": 1.1824, "step": 1084 }, { "epoch": 0.2205732872535068, "grad_norm": 0.13372984528541565, "learning_rate": 0.00017805349333875725, "loss": 1.0795, "step": 1085 }, { "epoch": 0.22077658060581418, "grad_norm": 0.11515718698501587, "learning_rate": 0.00017803315366622598, "loss": 0.9869, "step": 1086 }, { "epoch": 0.22097987395812158, "grad_norm": 0.1197754368185997, "learning_rate": 0.0001780128139936947, "loss": 1.1101, "step": 1087 }, { "epoch": 0.22118316731042895, "grad_norm": 0.121689073741436, "learning_rate": 0.00017799247432116343, "loss": 1.0624, "step": 1088 }, { "epoch": 0.22138646066273632, "grad_norm": 0.12425584346055984, "learning_rate": 0.00017797213464863215, "loss": 0.9962, "step": 1089 }, { "epoch": 0.22158975401504372, "grad_norm": 0.11786684393882751, "learning_rate": 0.00017795179497610088, "loss": 0.8943, "step": 1090 }, { "epoch": 0.2217930473673511, "grad_norm": 0.13555578887462616, "learning_rate": 0.00017793145530356963, "loss": 1.2069, "step": 1091 }, { "epoch": 0.22199634071965846, "grad_norm": 0.12431347370147705, "learning_rate": 0.00017791111563103835, "loss": 1.1376, "step": 1092 }, { "epoch": 0.22219963407196586, "grad_norm": 0.12472493946552277, "learning_rate": 0.00017789077595850708, "loss": 1.1486, "step": 1093 }, { "epoch": 0.22240292742427323, "grad_norm": 0.12927775084972382, "learning_rate": 0.0001778704362859758, "loss": 1.1581, "step": 1094 }, { "epoch": 0.2226062207765806, "grad_norm": 0.12910224497318268, "learning_rate": 0.00017785009661344453, "loss": 0.988, "step": 1095 }, { "epoch": 0.222809514128888, "grad_norm": 0.11531752347946167, "learning_rate": 0.00017782975694091325, "loss": 0.9782, "step": 1096 }, { "epoch": 0.22301280748119537, "grad_norm": 0.1250569224357605, "learning_rate": 0.00017780941726838197, "loss": 1.0796, "step": 1097 }, { "epoch": 0.22321610083350274, "grad_norm": 0.1234661191701889, "learning_rate": 0.0001777890775958507, "loss": 1.1571, "step": 1098 }, { "epoch": 0.2234193941858101, "grad_norm": 0.11324235796928406, "learning_rate": 0.00017776873792331945, "loss": 1.1156, "step": 1099 }, { "epoch": 0.2236226875381175, "grad_norm": 0.12516295909881592, "learning_rate": 0.00017774839825078818, "loss": 1.0161, "step": 1100 }, { "epoch": 0.22382598089042488, "grad_norm": 0.13084611296653748, "learning_rate": 0.0001777280585782569, "loss": 1.1408, "step": 1101 }, { "epoch": 0.22402927424273225, "grad_norm": 0.1189943253993988, "learning_rate": 0.00017770771890572562, "loss": 0.9823, "step": 1102 }, { "epoch": 0.22423256759503965, "grad_norm": 0.11955268681049347, "learning_rate": 0.00017768737923319438, "loss": 0.8977, "step": 1103 }, { "epoch": 0.22443586094734702, "grad_norm": 0.12528367340564728, "learning_rate": 0.00017766703956066307, "loss": 1.1579, "step": 1104 }, { "epoch": 0.2246391542996544, "grad_norm": 0.12829215824604034, "learning_rate": 0.0001776466998881318, "loss": 1.1892, "step": 1105 }, { "epoch": 0.2248424476519618, "grad_norm": 0.12263132631778717, "learning_rate": 0.00017762636021560052, "loss": 1.1609, "step": 1106 }, { "epoch": 0.22504574100426916, "grad_norm": 0.12810589373111725, "learning_rate": 0.00017760602054306927, "loss": 1.0712, "step": 1107 }, { "epoch": 0.22524903435657653, "grad_norm": 0.1171211376786232, "learning_rate": 0.000177585680870538, "loss": 0.9694, "step": 1108 }, { "epoch": 0.22545232770888393, "grad_norm": 0.12270856648683548, "learning_rate": 0.00017756534119800672, "loss": 1.0896, "step": 1109 }, { "epoch": 0.2256556210611913, "grad_norm": 0.13578352332115173, "learning_rate": 0.00017754500152547545, "loss": 1.2125, "step": 1110 }, { "epoch": 0.22585891441349867, "grad_norm": 0.1315973401069641, "learning_rate": 0.0001775246618529442, "loss": 1.2441, "step": 1111 }, { "epoch": 0.22606220776580604, "grad_norm": 0.1222010925412178, "learning_rate": 0.0001775043221804129, "loss": 0.9894, "step": 1112 }, { "epoch": 0.22626550111811344, "grad_norm": 0.12425290793180466, "learning_rate": 0.00017748398250788162, "loss": 1.273, "step": 1113 }, { "epoch": 0.22646879447042081, "grad_norm": 0.10960794985294342, "learning_rate": 0.00017746364283535034, "loss": 0.8637, "step": 1114 }, { "epoch": 0.22667208782272819, "grad_norm": 0.13080738484859467, "learning_rate": 0.0001774433031628191, "loss": 1.1506, "step": 1115 }, { "epoch": 0.22687538117503558, "grad_norm": 0.11546586453914642, "learning_rate": 0.00017742296349028782, "loss": 1.1043, "step": 1116 }, { "epoch": 0.22707867452734296, "grad_norm": 0.12280496209859848, "learning_rate": 0.00017740262381775655, "loss": 1.1203, "step": 1117 }, { "epoch": 0.22728196787965033, "grad_norm": 0.11661294102668762, "learning_rate": 0.00017738228414522527, "loss": 1.0486, "step": 1118 }, { "epoch": 0.22748526123195772, "grad_norm": 0.12169715762138367, "learning_rate": 0.00017736194447269402, "loss": 1.2318, "step": 1119 }, { "epoch": 0.2276885545842651, "grad_norm": 0.12962935864925385, "learning_rate": 0.00017734160480016272, "loss": 1.0912, "step": 1120 }, { "epoch": 0.22789184793657247, "grad_norm": 0.14488789439201355, "learning_rate": 0.00017732126512763144, "loss": 0.9774, "step": 1121 }, { "epoch": 0.22809514128887987, "grad_norm": 0.11455550044775009, "learning_rate": 0.00017730092545510017, "loss": 0.9454, "step": 1122 }, { "epoch": 0.22829843464118724, "grad_norm": 0.11764731258153915, "learning_rate": 0.00017728058578256892, "loss": 1.0895, "step": 1123 }, { "epoch": 0.2285017279934946, "grad_norm": 0.12537989020347595, "learning_rate": 0.00017726024611003764, "loss": 1.141, "step": 1124 }, { "epoch": 0.22870502134580198, "grad_norm": 0.11639077961444855, "learning_rate": 0.00017723990643750637, "loss": 1.1259, "step": 1125 }, { "epoch": 0.22890831469810938, "grad_norm": 0.12202929705381393, "learning_rate": 0.0001772195667649751, "loss": 1.141, "step": 1126 }, { "epoch": 0.22911160805041675, "grad_norm": 0.11307729780673981, "learning_rate": 0.00017719922709244382, "loss": 0.9076, "step": 1127 }, { "epoch": 0.22931490140272412, "grad_norm": 0.11854063719511032, "learning_rate": 0.00017717888741991254, "loss": 1.0369, "step": 1128 }, { "epoch": 0.22951819475503152, "grad_norm": 0.11729457229375839, "learning_rate": 0.00017715854774738127, "loss": 1.0503, "step": 1129 }, { "epoch": 0.2297214881073389, "grad_norm": 0.13550931215286255, "learning_rate": 0.00017713820807485, "loss": 1.1211, "step": 1130 }, { "epoch": 0.22992478145964626, "grad_norm": 0.1215146854519844, "learning_rate": 0.00017711786840231871, "loss": 1.0483, "step": 1131 }, { "epoch": 0.23012807481195366, "grad_norm": 0.12911346554756165, "learning_rate": 0.00017709752872978747, "loss": 1.1133, "step": 1132 }, { "epoch": 0.23033136816426103, "grad_norm": 0.1176094263792038, "learning_rate": 0.0001770771890572562, "loss": 0.9717, "step": 1133 }, { "epoch": 0.2305346615165684, "grad_norm": 0.1320810616016388, "learning_rate": 0.00017705684938472492, "loss": 1.0978, "step": 1134 }, { "epoch": 0.2307379548688758, "grad_norm": 0.119644396007061, "learning_rate": 0.00017703650971219364, "loss": 1.03, "step": 1135 }, { "epoch": 0.23094124822118317, "grad_norm": 0.11813725531101227, "learning_rate": 0.00017701617003966236, "loss": 0.9804, "step": 1136 }, { "epoch": 0.23114454157349054, "grad_norm": 0.12088938802480698, "learning_rate": 0.0001769958303671311, "loss": 1.052, "step": 1137 }, { "epoch": 0.2313478349257979, "grad_norm": 0.11971927434206009, "learning_rate": 0.0001769754906945998, "loss": 0.8725, "step": 1138 }, { "epoch": 0.2315511282781053, "grad_norm": 0.1300465613603592, "learning_rate": 0.00017695515102206854, "loss": 1.1638, "step": 1139 }, { "epoch": 0.23175442163041268, "grad_norm": 0.14033594727516174, "learning_rate": 0.0001769348113495373, "loss": 1.2757, "step": 1140 }, { "epoch": 0.23195771498272005, "grad_norm": 0.13062700629234314, "learning_rate": 0.000176914471677006, "loss": 1.1734, "step": 1141 }, { "epoch": 0.23216100833502745, "grad_norm": 0.11161787062883377, "learning_rate": 0.00017689413200447474, "loss": 0.9733, "step": 1142 }, { "epoch": 0.23236430168733482, "grad_norm": 0.11497635394334793, "learning_rate": 0.00017687379233194346, "loss": 0.9751, "step": 1143 }, { "epoch": 0.2325675950396422, "grad_norm": 0.12658412754535675, "learning_rate": 0.0001768534526594122, "loss": 0.9667, "step": 1144 }, { "epoch": 0.2327708883919496, "grad_norm": 0.125930517911911, "learning_rate": 0.0001768331129868809, "loss": 1.2521, "step": 1145 }, { "epoch": 0.23297418174425696, "grad_norm": 0.1267358511686325, "learning_rate": 0.00017681277331434964, "loss": 1.2034, "step": 1146 }, { "epoch": 0.23317747509656434, "grad_norm": 0.11235269904136658, "learning_rate": 0.00017679243364181836, "loss": 1.0049, "step": 1147 }, { "epoch": 0.23338076844887173, "grad_norm": 0.13258063793182373, "learning_rate": 0.0001767720939692871, "loss": 1.0893, "step": 1148 }, { "epoch": 0.2335840618011791, "grad_norm": 0.108503058552742, "learning_rate": 0.00017675175429675584, "loss": 0.9366, "step": 1149 }, { "epoch": 0.23378735515348648, "grad_norm": 0.12689101696014404, "learning_rate": 0.00017673141462422456, "loss": 1.1286, "step": 1150 }, { "epoch": 0.23399064850579385, "grad_norm": 0.12492146342992783, "learning_rate": 0.00017671107495169329, "loss": 0.9183, "step": 1151 }, { "epoch": 0.23419394185810125, "grad_norm": 0.10324962437152863, "learning_rate": 0.000176690735279162, "loss": 1.0456, "step": 1152 }, { "epoch": 0.23439723521040862, "grad_norm": 0.11633274704217911, "learning_rate": 0.00017667039560663073, "loss": 0.917, "step": 1153 }, { "epoch": 0.234600528562716, "grad_norm": 0.11803746968507767, "learning_rate": 0.00017665005593409946, "loss": 1.0733, "step": 1154 }, { "epoch": 0.2348038219150234, "grad_norm": 0.128416046500206, "learning_rate": 0.00017662971626156818, "loss": 1.1525, "step": 1155 }, { "epoch": 0.23500711526733076, "grad_norm": 0.13254918158054352, "learning_rate": 0.00017660937658903693, "loss": 1.2412, "step": 1156 }, { "epoch": 0.23521040861963813, "grad_norm": 0.13515497744083405, "learning_rate": 0.00017658903691650566, "loss": 1.0627, "step": 1157 }, { "epoch": 0.23541370197194553, "grad_norm": 0.12952685356140137, "learning_rate": 0.00017656869724397438, "loss": 1.0841, "step": 1158 }, { "epoch": 0.2356169953242529, "grad_norm": 0.14173516631126404, "learning_rate": 0.0001765483575714431, "loss": 1.1436, "step": 1159 }, { "epoch": 0.23582028867656027, "grad_norm": 0.11358428746461868, "learning_rate": 0.00017652801789891183, "loss": 1.0707, "step": 1160 }, { "epoch": 0.23602358202886767, "grad_norm": 0.11959460377693176, "learning_rate": 0.00017650767822638056, "loss": 1.0038, "step": 1161 }, { "epoch": 0.23622687538117504, "grad_norm": 0.13181112706661224, "learning_rate": 0.00017648733855384928, "loss": 1.2132, "step": 1162 }, { "epoch": 0.2364301687334824, "grad_norm": 0.12374672293663025, "learning_rate": 0.000176466998881318, "loss": 1.0432, "step": 1163 }, { "epoch": 0.23663346208578978, "grad_norm": 0.1308983564376831, "learning_rate": 0.00017644665920878676, "loss": 1.1725, "step": 1164 }, { "epoch": 0.23683675543809718, "grad_norm": 0.11602329462766647, "learning_rate": 0.00017642631953625548, "loss": 1.0136, "step": 1165 }, { "epoch": 0.23704004879040455, "grad_norm": 0.1398748755455017, "learning_rate": 0.0001764059798637242, "loss": 1.0655, "step": 1166 }, { "epoch": 0.23724334214271192, "grad_norm": 0.1302013248205185, "learning_rate": 0.00017638564019119293, "loss": 1.1052, "step": 1167 }, { "epoch": 0.23744663549501932, "grad_norm": 0.11932185292243958, "learning_rate": 0.00017636530051866166, "loss": 1.1945, "step": 1168 }, { "epoch": 0.2376499288473267, "grad_norm": 0.11323782801628113, "learning_rate": 0.00017634496084613038, "loss": 1.0412, "step": 1169 }, { "epoch": 0.23785322219963406, "grad_norm": 0.1345479041337967, "learning_rate": 0.0001763246211735991, "loss": 1.082, "step": 1170 }, { "epoch": 0.23805651555194146, "grad_norm": 0.12548640370368958, "learning_rate": 0.00017630428150106783, "loss": 1.1213, "step": 1171 }, { "epoch": 0.23825980890424883, "grad_norm": 0.12849657237529755, "learning_rate": 0.00017628394182853655, "loss": 1.13, "step": 1172 }, { "epoch": 0.2384631022565562, "grad_norm": 0.11670655757188797, "learning_rate": 0.0001762636021560053, "loss": 0.8984, "step": 1173 }, { "epoch": 0.2386663956088636, "grad_norm": 0.11539500951766968, "learning_rate": 0.00017624326248347403, "loss": 1.1453, "step": 1174 }, { "epoch": 0.23886968896117097, "grad_norm": 0.13686025142669678, "learning_rate": 0.00017622292281094275, "loss": 1.2811, "step": 1175 }, { "epoch": 0.23907298231347834, "grad_norm": 0.13845805823802948, "learning_rate": 0.00017620258313841148, "loss": 1.2939, "step": 1176 }, { "epoch": 0.23927627566578574, "grad_norm": 0.12209935486316681, "learning_rate": 0.0001761822434658802, "loss": 0.8311, "step": 1177 }, { "epoch": 0.2394795690180931, "grad_norm": 0.11880161613225937, "learning_rate": 0.00017616190379334893, "loss": 1.2844, "step": 1178 }, { "epoch": 0.23968286237040048, "grad_norm": 0.1326730102300644, "learning_rate": 0.00017614156412081765, "loss": 1.1336, "step": 1179 }, { "epoch": 0.23988615572270786, "grad_norm": 0.11547461152076721, "learning_rate": 0.00017612122444828638, "loss": 0.9757, "step": 1180 }, { "epoch": 0.24008944907501525, "grad_norm": 0.1296636462211609, "learning_rate": 0.00017610088477575513, "loss": 1.084, "step": 1181 }, { "epoch": 0.24029274242732263, "grad_norm": 0.12076129764318466, "learning_rate": 0.00017608054510322385, "loss": 1.1151, "step": 1182 }, { "epoch": 0.24049603577963, "grad_norm": 0.12159736454486847, "learning_rate": 0.00017606020543069258, "loss": 1.0461, "step": 1183 }, { "epoch": 0.2406993291319374, "grad_norm": 0.13127025961875916, "learning_rate": 0.0001760398657581613, "loss": 1.0541, "step": 1184 }, { "epoch": 0.24090262248424477, "grad_norm": 0.14702552556991577, "learning_rate": 0.00017601952608563003, "loss": 1.2109, "step": 1185 }, { "epoch": 0.24110591583655214, "grad_norm": 0.11683522909879684, "learning_rate": 0.00017599918641309875, "loss": 1.1588, "step": 1186 }, { "epoch": 0.24130920918885954, "grad_norm": 0.1130138412117958, "learning_rate": 0.00017597884674056747, "loss": 1.0268, "step": 1187 }, { "epoch": 0.2415125025411669, "grad_norm": 0.10920488089323044, "learning_rate": 0.0001759585070680362, "loss": 0.9038, "step": 1188 }, { "epoch": 0.24171579589347428, "grad_norm": 0.12897898256778717, "learning_rate": 0.00017593816739550495, "loss": 1.3032, "step": 1189 }, { "epoch": 0.24191908924578168, "grad_norm": 0.1289346069097519, "learning_rate": 0.00017591782772297367, "loss": 1.1169, "step": 1190 }, { "epoch": 0.24212238259808905, "grad_norm": 0.12478041648864746, "learning_rate": 0.0001758974880504424, "loss": 1.1723, "step": 1191 }, { "epoch": 0.24232567595039642, "grad_norm": 0.13389204442501068, "learning_rate": 0.00017587714837791112, "loss": 1.1079, "step": 1192 }, { "epoch": 0.2425289693027038, "grad_norm": 0.12659893929958344, "learning_rate": 0.00017585680870537985, "loss": 1.078, "step": 1193 }, { "epoch": 0.2427322626550112, "grad_norm": 0.13224546611309052, "learning_rate": 0.00017583646903284857, "loss": 1.0784, "step": 1194 }, { "epoch": 0.24293555600731856, "grad_norm": 0.13924521207809448, "learning_rate": 0.0001758161293603173, "loss": 1.1491, "step": 1195 }, { "epoch": 0.24313884935962593, "grad_norm": 0.10379677265882492, "learning_rate": 0.00017579578968778602, "loss": 0.8733, "step": 1196 }, { "epoch": 0.24334214271193333, "grad_norm": 0.17008356750011444, "learning_rate": 0.00017577545001525477, "loss": 1.1597, "step": 1197 }, { "epoch": 0.2435454360642407, "grad_norm": 0.11082588881254196, "learning_rate": 0.0001757551103427235, "loss": 0.966, "step": 1198 }, { "epoch": 0.24374872941654807, "grad_norm": 0.12224634736776352, "learning_rate": 0.00017573477067019222, "loss": 0.9494, "step": 1199 }, { "epoch": 0.24395202276885547, "grad_norm": 0.12597164511680603, "learning_rate": 0.00017571443099766095, "loss": 0.9376, "step": 1200 }, { "epoch": 0.24415531612116284, "grad_norm": 0.12282256036996841, "learning_rate": 0.00017569409132512967, "loss": 1.1645, "step": 1201 }, { "epoch": 0.2443586094734702, "grad_norm": 0.10933969169855118, "learning_rate": 0.0001756737516525984, "loss": 1.0338, "step": 1202 }, { "epoch": 0.2445619028257776, "grad_norm": 0.12132111936807632, "learning_rate": 0.00017565341198006712, "loss": 1.1541, "step": 1203 }, { "epoch": 0.24476519617808498, "grad_norm": 0.12675434350967407, "learning_rate": 0.00017563307230753584, "loss": 1.0506, "step": 1204 }, { "epoch": 0.24496848953039235, "grad_norm": 0.14764836430549622, "learning_rate": 0.0001756127326350046, "loss": 1.0209, "step": 1205 }, { "epoch": 0.24517178288269972, "grad_norm": 0.11838477104902267, "learning_rate": 0.00017559239296247332, "loss": 0.9789, "step": 1206 }, { "epoch": 0.24537507623500712, "grad_norm": 0.11526069790124893, "learning_rate": 0.00017557205328994204, "loss": 1.0871, "step": 1207 }, { "epoch": 0.2455783695873145, "grad_norm": 0.12997418642044067, "learning_rate": 0.00017555171361741077, "loss": 1.215, "step": 1208 }, { "epoch": 0.24578166293962186, "grad_norm": 0.1175120398402214, "learning_rate": 0.0001755313739448795, "loss": 0.8355, "step": 1209 }, { "epoch": 0.24598495629192926, "grad_norm": 0.11287759989500046, "learning_rate": 0.00017551103427234822, "loss": 0.9071, "step": 1210 }, { "epoch": 0.24618824964423663, "grad_norm": 0.11898453533649445, "learning_rate": 0.00017549069459981694, "loss": 1.0175, "step": 1211 }, { "epoch": 0.246391542996544, "grad_norm": 0.13262607157230377, "learning_rate": 0.00017547035492728567, "loss": 1.1498, "step": 1212 }, { "epoch": 0.2465948363488514, "grad_norm": 0.12178485840559006, "learning_rate": 0.0001754500152547544, "loss": 1.0405, "step": 1213 }, { "epoch": 0.24679812970115877, "grad_norm": 0.13001886010169983, "learning_rate": 0.00017542967558222314, "loss": 1.0465, "step": 1214 }, { "epoch": 0.24700142305346615, "grad_norm": 0.12525972723960876, "learning_rate": 0.00017540933590969187, "loss": 1.1144, "step": 1215 }, { "epoch": 0.24720471640577354, "grad_norm": 0.11287079751491547, "learning_rate": 0.0001753889962371606, "loss": 0.9362, "step": 1216 }, { "epoch": 0.24740800975808092, "grad_norm": 0.13626334071159363, "learning_rate": 0.00017536865656462932, "loss": 1.2352, "step": 1217 }, { "epoch": 0.2476113031103883, "grad_norm": 0.12724994122982025, "learning_rate": 0.00017534831689209804, "loss": 1.0396, "step": 1218 }, { "epoch": 0.24781459646269566, "grad_norm": 0.11603401601314545, "learning_rate": 0.00017532797721956677, "loss": 0.9778, "step": 1219 }, { "epoch": 0.24801788981500306, "grad_norm": 0.12654529511928558, "learning_rate": 0.0001753076375470355, "loss": 1.0312, "step": 1220 }, { "epoch": 0.24822118316731043, "grad_norm": 0.13385628163814545, "learning_rate": 0.00017528729787450421, "loss": 1.1297, "step": 1221 }, { "epoch": 0.2484244765196178, "grad_norm": 0.12190620601177216, "learning_rate": 0.00017526695820197297, "loss": 1.1187, "step": 1222 }, { "epoch": 0.2486277698719252, "grad_norm": 0.11775553971529007, "learning_rate": 0.0001752466185294417, "loss": 1.0193, "step": 1223 }, { "epoch": 0.24883106322423257, "grad_norm": 0.10721298307180405, "learning_rate": 0.00017522627885691041, "loss": 0.9781, "step": 1224 }, { "epoch": 0.24903435657653994, "grad_norm": 0.11292947083711624, "learning_rate": 0.00017520593918437914, "loss": 1.122, "step": 1225 }, { "epoch": 0.24923764992884734, "grad_norm": 0.11116209626197815, "learning_rate": 0.00017518559951184786, "loss": 0.9238, "step": 1226 }, { "epoch": 0.2494409432811547, "grad_norm": 0.12392593175172806, "learning_rate": 0.0001751652598393166, "loss": 1.1305, "step": 1227 }, { "epoch": 0.24964423663346208, "grad_norm": 0.124233178794384, "learning_rate": 0.0001751449201667853, "loss": 1.0218, "step": 1228 }, { "epoch": 0.24984752998576948, "grad_norm": 0.1181500032544136, "learning_rate": 0.00017512458049425404, "loss": 0.9349, "step": 1229 }, { "epoch": 0.2500508233380768, "grad_norm": 0.13005246222019196, "learning_rate": 0.0001751042408217228, "loss": 1.1636, "step": 1230 }, { "epoch": 0.2502541166903842, "grad_norm": 0.12866559624671936, "learning_rate": 0.0001750839011491915, "loss": 1.1384, "step": 1231 }, { "epoch": 0.2504574100426916, "grad_norm": 0.11397498100996017, "learning_rate": 0.00017506356147666024, "loss": 1.0519, "step": 1232 }, { "epoch": 0.25066070339499896, "grad_norm": 0.11991407722234726, "learning_rate": 0.00017504322180412896, "loss": 1.025, "step": 1233 }, { "epoch": 0.25086399674730636, "grad_norm": 0.11384415626525879, "learning_rate": 0.0001750228821315977, "loss": 0.9845, "step": 1234 }, { "epoch": 0.25106729009961376, "grad_norm": 0.12114489823579788, "learning_rate": 0.0001750025424590664, "loss": 0.9427, "step": 1235 }, { "epoch": 0.2512705834519211, "grad_norm": 0.12967409193515778, "learning_rate": 0.00017498220278653514, "loss": 1.0714, "step": 1236 }, { "epoch": 0.2514738768042285, "grad_norm": 0.13375937938690186, "learning_rate": 0.00017496186311400386, "loss": 1.0678, "step": 1237 }, { "epoch": 0.2516771701565359, "grad_norm": 0.12456507235765457, "learning_rate": 0.0001749415234414726, "loss": 1.0189, "step": 1238 }, { "epoch": 0.25188046350884324, "grad_norm": 0.1372321993112564, "learning_rate": 0.00017492118376894134, "loss": 1.2524, "step": 1239 }, { "epoch": 0.25208375686115064, "grad_norm": 0.11218629777431488, "learning_rate": 0.00017490084409641006, "loss": 1.0237, "step": 1240 }, { "epoch": 0.25228705021345804, "grad_norm": 0.12430521845817566, "learning_rate": 0.00017488050442387878, "loss": 0.9717, "step": 1241 }, { "epoch": 0.2524903435657654, "grad_norm": 0.12222771346569061, "learning_rate": 0.0001748601647513475, "loss": 1.087, "step": 1242 }, { "epoch": 0.2526936369180728, "grad_norm": 0.12341856956481934, "learning_rate": 0.00017483982507881623, "loss": 1.0789, "step": 1243 }, { "epoch": 0.2528969302703802, "grad_norm": 0.13263435661792755, "learning_rate": 0.00017481948540628496, "loss": 1.1757, "step": 1244 }, { "epoch": 0.2531002236226875, "grad_norm": 0.12904416024684906, "learning_rate": 0.00017479914573375368, "loss": 1.0812, "step": 1245 }, { "epoch": 0.2533035169749949, "grad_norm": 0.12575136125087738, "learning_rate": 0.00017477880606122243, "loss": 1.0435, "step": 1246 }, { "epoch": 0.2535068103273023, "grad_norm": 0.11990928649902344, "learning_rate": 0.00017475846638869116, "loss": 0.9689, "step": 1247 }, { "epoch": 0.25371010367960967, "grad_norm": 0.12164648622274399, "learning_rate": 0.00017473812671615988, "loss": 0.9309, "step": 1248 }, { "epoch": 0.25391339703191707, "grad_norm": 0.12410687655210495, "learning_rate": 0.0001747177870436286, "loss": 1.0254, "step": 1249 }, { "epoch": 0.2541166903842244, "grad_norm": 0.13339757919311523, "learning_rate": 0.00017469744737109733, "loss": 1.0455, "step": 1250 }, { "epoch": 0.2543199837365318, "grad_norm": 0.14127810299396515, "learning_rate": 0.00017467710769856606, "loss": 1.1633, "step": 1251 }, { "epoch": 0.2545232770888392, "grad_norm": 0.10454534739255905, "learning_rate": 0.00017465676802603478, "loss": 0.7574, "step": 1252 }, { "epoch": 0.25472657044114655, "grad_norm": 0.14605766534805298, "learning_rate": 0.0001746364283535035, "loss": 1.1348, "step": 1253 }, { "epoch": 0.25492986379345395, "grad_norm": 0.11716707050800323, "learning_rate": 0.00017461608868097223, "loss": 1.0645, "step": 1254 }, { "epoch": 0.25513315714576135, "grad_norm": 0.13623961806297302, "learning_rate": 0.00017459574900844098, "loss": 1.1175, "step": 1255 }, { "epoch": 0.2553364504980687, "grad_norm": 0.11011240631341934, "learning_rate": 0.0001745754093359097, "loss": 1.0454, "step": 1256 }, { "epoch": 0.2555397438503761, "grad_norm": 0.13665513694286346, "learning_rate": 0.00017455506966337843, "loss": 1.1112, "step": 1257 }, { "epoch": 0.2557430372026835, "grad_norm": 0.11241257190704346, "learning_rate": 0.00017453472999084715, "loss": 1.0808, "step": 1258 }, { "epoch": 0.25594633055499083, "grad_norm": 0.1247948557138443, "learning_rate": 0.00017451439031831588, "loss": 0.9803, "step": 1259 }, { "epoch": 0.25614962390729823, "grad_norm": 0.14268344640731812, "learning_rate": 0.0001744940506457846, "loss": 1.1095, "step": 1260 }, { "epoch": 0.25635291725960563, "grad_norm": 0.11472602188587189, "learning_rate": 0.00017447371097325333, "loss": 0.9911, "step": 1261 }, { "epoch": 0.25655621061191297, "grad_norm": 0.14191444218158722, "learning_rate": 0.00017445337130072205, "loss": 1.0679, "step": 1262 }, { "epoch": 0.25675950396422037, "grad_norm": 0.12657268345355988, "learning_rate": 0.0001744330316281908, "loss": 0.9211, "step": 1263 }, { "epoch": 0.25696279731652777, "grad_norm": 0.1397320032119751, "learning_rate": 0.00017441269195565953, "loss": 1.1604, "step": 1264 }, { "epoch": 0.2571660906688351, "grad_norm": 0.12176384776830673, "learning_rate": 0.00017439235228312825, "loss": 1.0919, "step": 1265 }, { "epoch": 0.2573693840211425, "grad_norm": 0.13282664120197296, "learning_rate": 0.00017437201261059698, "loss": 1.259, "step": 1266 }, { "epoch": 0.2575726773734499, "grad_norm": 0.14279745519161224, "learning_rate": 0.0001743516729380657, "loss": 1.3582, "step": 1267 }, { "epoch": 0.25777597072575725, "grad_norm": 0.11482515186071396, "learning_rate": 0.00017433133326553443, "loss": 0.9338, "step": 1268 }, { "epoch": 0.25797926407806465, "grad_norm": 0.12177598476409912, "learning_rate": 0.00017431099359300315, "loss": 1.0996, "step": 1269 }, { "epoch": 0.25818255743037205, "grad_norm": 0.12271133065223694, "learning_rate": 0.00017429065392047188, "loss": 1.2357, "step": 1270 }, { "epoch": 0.2583858507826794, "grad_norm": 0.11448093503713608, "learning_rate": 0.00017427031424794063, "loss": 1.0057, "step": 1271 }, { "epoch": 0.2585891441349868, "grad_norm": 0.11486377567052841, "learning_rate": 0.00017424997457540935, "loss": 1.0429, "step": 1272 }, { "epoch": 0.2587924374872942, "grad_norm": 0.12816710770130157, "learning_rate": 0.00017422963490287808, "loss": 1.0849, "step": 1273 }, { "epoch": 0.25899573083960153, "grad_norm": 0.13030269742012024, "learning_rate": 0.0001742092952303468, "loss": 0.9698, "step": 1274 }, { "epoch": 0.25919902419190893, "grad_norm": 0.12305210530757904, "learning_rate": 0.00017418895555781553, "loss": 1.0847, "step": 1275 }, { "epoch": 0.2594023175442163, "grad_norm": 0.11980848014354706, "learning_rate": 0.00017416861588528425, "loss": 1.1857, "step": 1276 }, { "epoch": 0.2596056108965237, "grad_norm": 0.1268121749162674, "learning_rate": 0.00017414827621275297, "loss": 1.057, "step": 1277 }, { "epoch": 0.2598089042488311, "grad_norm": 0.119362972676754, "learning_rate": 0.0001741279365402217, "loss": 0.9978, "step": 1278 }, { "epoch": 0.2600121976011384, "grad_norm": 0.11040918529033661, "learning_rate": 0.00017410759686769045, "loss": 0.8646, "step": 1279 }, { "epoch": 0.2602154909534458, "grad_norm": 0.1263931393623352, "learning_rate": 0.00017408725719515917, "loss": 0.9089, "step": 1280 }, { "epoch": 0.2604187843057532, "grad_norm": 0.1311492770910263, "learning_rate": 0.0001740669175226279, "loss": 1.2096, "step": 1281 }, { "epoch": 0.26062207765806056, "grad_norm": 0.15105241537094116, "learning_rate": 0.00017404657785009662, "loss": 1.3313, "step": 1282 }, { "epoch": 0.26082537101036796, "grad_norm": 0.14878205955028534, "learning_rate": 0.00017402623817756535, "loss": 1.1671, "step": 1283 }, { "epoch": 0.26102866436267536, "grad_norm": 0.12554128468036652, "learning_rate": 0.00017400589850503407, "loss": 0.9834, "step": 1284 }, { "epoch": 0.2612319577149827, "grad_norm": 0.12347958981990814, "learning_rate": 0.0001739855588325028, "loss": 1.1064, "step": 1285 }, { "epoch": 0.2614352510672901, "grad_norm": 0.13344120979309082, "learning_rate": 0.00017396521915997152, "loss": 1.1599, "step": 1286 }, { "epoch": 0.2616385444195975, "grad_norm": 0.11492400616407394, "learning_rate": 0.00017394487948744027, "loss": 1.0664, "step": 1287 }, { "epoch": 0.26184183777190484, "grad_norm": 0.1381841003894806, "learning_rate": 0.000173924539814909, "loss": 1.2612, "step": 1288 }, { "epoch": 0.26204513112421224, "grad_norm": 0.1126202642917633, "learning_rate": 0.00017390420014237772, "loss": 1.0606, "step": 1289 }, { "epoch": 0.26224842447651964, "grad_norm": 0.12391757220029831, "learning_rate": 0.00017388386046984645, "loss": 1.1284, "step": 1290 }, { "epoch": 0.262451717828827, "grad_norm": 0.14284935593605042, "learning_rate": 0.00017386352079731517, "loss": 1.2237, "step": 1291 }, { "epoch": 0.2626550111811344, "grad_norm": 0.11940843611955643, "learning_rate": 0.0001738431811247839, "loss": 1.0164, "step": 1292 }, { "epoch": 0.2628583045334418, "grad_norm": 0.11453817039728165, "learning_rate": 0.00017382284145225262, "loss": 0.919, "step": 1293 }, { "epoch": 0.2630615978857491, "grad_norm": 0.11902697384357452, "learning_rate": 0.00017380250177972134, "loss": 1.0669, "step": 1294 }, { "epoch": 0.2632648912380565, "grad_norm": 0.12861910462379456, "learning_rate": 0.00017378216210719007, "loss": 1.051, "step": 1295 }, { "epoch": 0.2634681845903639, "grad_norm": 0.13415683805942535, "learning_rate": 0.00017376182243465882, "loss": 1.2085, "step": 1296 }, { "epoch": 0.26367147794267126, "grad_norm": 0.11324958503246307, "learning_rate": 0.00017374148276212754, "loss": 1.0347, "step": 1297 }, { "epoch": 0.26387477129497866, "grad_norm": 0.11437279731035233, "learning_rate": 0.00017372114308959627, "loss": 1.0386, "step": 1298 }, { "epoch": 0.26407806464728606, "grad_norm": 0.1309337615966797, "learning_rate": 0.000173700803417065, "loss": 1.0251, "step": 1299 }, { "epoch": 0.2642813579995934, "grad_norm": 0.12801750004291534, "learning_rate": 0.00017368046374453372, "loss": 1.0661, "step": 1300 }, { "epoch": 0.2644846513519008, "grad_norm": 0.12607401609420776, "learning_rate": 0.00017366012407200244, "loss": 1.1156, "step": 1301 }, { "epoch": 0.2646879447042082, "grad_norm": 0.1417655348777771, "learning_rate": 0.00017363978439947117, "loss": 1.3765, "step": 1302 }, { "epoch": 0.26489123805651554, "grad_norm": 0.12621742486953735, "learning_rate": 0.0001736194447269399, "loss": 0.9138, "step": 1303 }, { "epoch": 0.26509453140882294, "grad_norm": 0.12521621584892273, "learning_rate": 0.00017359910505440864, "loss": 1.1882, "step": 1304 }, { "epoch": 0.2652978247611303, "grad_norm": 0.11669400334358215, "learning_rate": 0.00017357876538187737, "loss": 1.0113, "step": 1305 }, { "epoch": 0.2655011181134377, "grad_norm": 0.12276088446378708, "learning_rate": 0.0001735584257093461, "loss": 1.0964, "step": 1306 }, { "epoch": 0.2657044114657451, "grad_norm": 0.11636564135551453, "learning_rate": 0.00017353808603681482, "loss": 1.0141, "step": 1307 }, { "epoch": 0.2659077048180524, "grad_norm": 0.10083210468292236, "learning_rate": 0.00017351774636428354, "loss": 1.0403, "step": 1308 }, { "epoch": 0.2661109981703598, "grad_norm": 0.12461689859628677, "learning_rate": 0.00017349740669175227, "loss": 1.0841, "step": 1309 }, { "epoch": 0.2663142915226672, "grad_norm": 0.12346909195184708, "learning_rate": 0.000173477067019221, "loss": 1.0268, "step": 1310 }, { "epoch": 0.26651758487497457, "grad_norm": 0.11846248060464859, "learning_rate": 0.00017345672734668971, "loss": 1.028, "step": 1311 }, { "epoch": 0.26672087822728197, "grad_norm": 0.1329965591430664, "learning_rate": 0.00017343638767415847, "loss": 1.0805, "step": 1312 }, { "epoch": 0.26692417157958936, "grad_norm": 0.12369682639837265, "learning_rate": 0.0001734160480016272, "loss": 1.0047, "step": 1313 }, { "epoch": 0.2671274649318967, "grad_norm": 0.12594352662563324, "learning_rate": 0.00017339570832909591, "loss": 1.209, "step": 1314 }, { "epoch": 0.2673307582842041, "grad_norm": 0.1423029899597168, "learning_rate": 0.00017337536865656464, "loss": 1.0829, "step": 1315 }, { "epoch": 0.2675340516365115, "grad_norm": 0.11651685833930969, "learning_rate": 0.00017335502898403336, "loss": 1.0249, "step": 1316 }, { "epoch": 0.26773734498881885, "grad_norm": 0.10999172925949097, "learning_rate": 0.0001733346893115021, "loss": 0.8872, "step": 1317 }, { "epoch": 0.26794063834112625, "grad_norm": 0.125168576836586, "learning_rate": 0.0001733143496389708, "loss": 1.0853, "step": 1318 }, { "epoch": 0.26814393169343365, "grad_norm": 0.1307574361562729, "learning_rate": 0.00017329400996643954, "loss": 0.9643, "step": 1319 }, { "epoch": 0.268347225045741, "grad_norm": 0.136819988489151, "learning_rate": 0.0001732736702939083, "loss": 1.0952, "step": 1320 }, { "epoch": 0.2685505183980484, "grad_norm": 0.12915043532848358, "learning_rate": 0.000173253330621377, "loss": 0.9278, "step": 1321 }, { "epoch": 0.2687538117503558, "grad_norm": 0.12452216446399689, "learning_rate": 0.00017323299094884574, "loss": 1.0679, "step": 1322 }, { "epoch": 0.26895710510266313, "grad_norm": 0.1167951300740242, "learning_rate": 0.00017321265127631446, "loss": 1.009, "step": 1323 }, { "epoch": 0.26916039845497053, "grad_norm": 0.12355060130357742, "learning_rate": 0.00017319231160378319, "loss": 1.1398, "step": 1324 }, { "epoch": 0.2693636918072779, "grad_norm": 0.14160853624343872, "learning_rate": 0.0001731719719312519, "loss": 1.1444, "step": 1325 }, { "epoch": 0.26956698515958527, "grad_norm": 0.12388666719198227, "learning_rate": 0.00017315163225872064, "loss": 1.1242, "step": 1326 }, { "epoch": 0.26977027851189267, "grad_norm": 0.11084824055433273, "learning_rate": 0.00017313129258618936, "loss": 0.9006, "step": 1327 }, { "epoch": 0.26997357186420007, "grad_norm": 0.11720530688762665, "learning_rate": 0.0001731109529136581, "loss": 0.9474, "step": 1328 }, { "epoch": 0.2701768652165074, "grad_norm": 0.13025008141994476, "learning_rate": 0.00017309061324112684, "loss": 1.0815, "step": 1329 }, { "epoch": 0.2703801585688148, "grad_norm": 0.14168627560138702, "learning_rate": 0.00017307027356859556, "loss": 1.0938, "step": 1330 }, { "epoch": 0.27058345192112215, "grad_norm": 0.14329680800437927, "learning_rate": 0.00017304993389606428, "loss": 1.2552, "step": 1331 }, { "epoch": 0.27078674527342955, "grad_norm": 0.12423396855592728, "learning_rate": 0.000173029594223533, "loss": 0.8778, "step": 1332 }, { "epoch": 0.27099003862573695, "grad_norm": 0.13177728652954102, "learning_rate": 0.00017300925455100173, "loss": 1.2632, "step": 1333 }, { "epoch": 0.2711933319780443, "grad_norm": 0.12286023795604706, "learning_rate": 0.00017298891487847046, "loss": 1.078, "step": 1334 }, { "epoch": 0.2713966253303517, "grad_norm": 0.10991277545690536, "learning_rate": 0.00017296857520593918, "loss": 1.0038, "step": 1335 }, { "epoch": 0.2715999186826591, "grad_norm": 0.1368594616651535, "learning_rate": 0.0001729482355334079, "loss": 1.0416, "step": 1336 }, { "epoch": 0.27180321203496643, "grad_norm": 0.11537830531597137, "learning_rate": 0.00017292789586087666, "loss": 1.0933, "step": 1337 }, { "epoch": 0.27200650538727383, "grad_norm": 0.11709605902433395, "learning_rate": 0.00017290755618834538, "loss": 0.958, "step": 1338 }, { "epoch": 0.27220979873958123, "grad_norm": 0.1164301261305809, "learning_rate": 0.0001728872165158141, "loss": 0.8833, "step": 1339 }, { "epoch": 0.2724130920918886, "grad_norm": 0.13498760759830475, "learning_rate": 0.00017286687684328283, "loss": 1.1173, "step": 1340 }, { "epoch": 0.272616385444196, "grad_norm": 0.11391112208366394, "learning_rate": 0.00017284653717075156, "loss": 0.9329, "step": 1341 }, { "epoch": 0.2728196787965034, "grad_norm": 0.12780262529850006, "learning_rate": 0.00017282619749822028, "loss": 1.1273, "step": 1342 }, { "epoch": 0.2730229721488107, "grad_norm": 0.11829452961683273, "learning_rate": 0.000172805857825689, "loss": 0.8299, "step": 1343 }, { "epoch": 0.2732262655011181, "grad_norm": 0.12499269843101501, "learning_rate": 0.00017278551815315773, "loss": 1.1501, "step": 1344 }, { "epoch": 0.2734295588534255, "grad_norm": 0.13114666938781738, "learning_rate": 0.00017276517848062648, "loss": 1.0625, "step": 1345 }, { "epoch": 0.27363285220573286, "grad_norm": 0.1208108589053154, "learning_rate": 0.0001727448388080952, "loss": 0.965, "step": 1346 }, { "epoch": 0.27383614555804026, "grad_norm": 0.12325561046600342, "learning_rate": 0.00017272449913556393, "loss": 1.0976, "step": 1347 }, { "epoch": 0.27403943891034765, "grad_norm": 0.12004940211772919, "learning_rate": 0.00017270415946303265, "loss": 0.9958, "step": 1348 }, { "epoch": 0.274242732262655, "grad_norm": 0.1253954917192459, "learning_rate": 0.00017268381979050138, "loss": 1.1158, "step": 1349 }, { "epoch": 0.2744460256149624, "grad_norm": 0.12844887375831604, "learning_rate": 0.0001726634801179701, "loss": 1.0849, "step": 1350 }, { "epoch": 0.2746493189672698, "grad_norm": 0.1340886950492859, "learning_rate": 0.00017264314044543883, "loss": 1.2566, "step": 1351 }, { "epoch": 0.27485261231957714, "grad_norm": 0.12355068325996399, "learning_rate": 0.00017262280077290755, "loss": 0.9769, "step": 1352 }, { "epoch": 0.27505590567188454, "grad_norm": 0.10396768152713776, "learning_rate": 0.0001726024611003763, "loss": 0.9058, "step": 1353 }, { "epoch": 0.27525919902419194, "grad_norm": 0.1249571368098259, "learning_rate": 0.00017258212142784503, "loss": 1.0982, "step": 1354 }, { "epoch": 0.2754624923764993, "grad_norm": 0.13168682157993317, "learning_rate": 0.00017256178175531375, "loss": 1.077, "step": 1355 }, { "epoch": 0.2756657857288067, "grad_norm": 0.11570144444704056, "learning_rate": 0.00017254144208278248, "loss": 0.9515, "step": 1356 }, { "epoch": 0.275869079081114, "grad_norm": 0.13097792863845825, "learning_rate": 0.0001725211024102512, "loss": 1.1836, "step": 1357 }, { "epoch": 0.2760723724334214, "grad_norm": 0.13371975719928741, "learning_rate": 0.00017250076273771993, "loss": 1.1521, "step": 1358 }, { "epoch": 0.2762756657857288, "grad_norm": 0.11649662256240845, "learning_rate": 0.00017248042306518865, "loss": 0.9173, "step": 1359 }, { "epoch": 0.27647895913803616, "grad_norm": 0.1347874402999878, "learning_rate": 0.00017246008339265738, "loss": 1.2533, "step": 1360 }, { "epoch": 0.27668225249034356, "grad_norm": 0.13108506798744202, "learning_rate": 0.00017243974372012613, "loss": 1.28, "step": 1361 }, { "epoch": 0.27688554584265096, "grad_norm": 0.12440016865730286, "learning_rate": 0.00017241940404759485, "loss": 1.0599, "step": 1362 }, { "epoch": 0.2770888391949583, "grad_norm": 0.14487305283546448, "learning_rate": 0.00017239906437506358, "loss": 0.9908, "step": 1363 }, { "epoch": 0.2772921325472657, "grad_norm": 0.1289856880903244, "learning_rate": 0.0001723787247025323, "loss": 1.0855, "step": 1364 }, { "epoch": 0.2774954258995731, "grad_norm": 0.12901484966278076, "learning_rate": 0.00017235838503000102, "loss": 1.1945, "step": 1365 }, { "epoch": 0.27769871925188044, "grad_norm": 0.12738290429115295, "learning_rate": 0.00017233804535746975, "loss": 1.1233, "step": 1366 }, { "epoch": 0.27790201260418784, "grad_norm": 0.13745670020580292, "learning_rate": 0.00017231770568493847, "loss": 1.174, "step": 1367 }, { "epoch": 0.27810530595649524, "grad_norm": 0.1181466281414032, "learning_rate": 0.0001722973660124072, "loss": 1.0206, "step": 1368 }, { "epoch": 0.2783085993088026, "grad_norm": 0.11488956212997437, "learning_rate": 0.00017227702633987595, "loss": 1.0443, "step": 1369 }, { "epoch": 0.27851189266111, "grad_norm": 0.1327381134033203, "learning_rate": 0.00017225668666734467, "loss": 1.1791, "step": 1370 }, { "epoch": 0.2787151860134174, "grad_norm": 0.13029593229293823, "learning_rate": 0.0001722363469948134, "loss": 1.1022, "step": 1371 }, { "epoch": 0.2789184793657247, "grad_norm": 0.10697850584983826, "learning_rate": 0.00017221600732228212, "loss": 0.9923, "step": 1372 }, { "epoch": 0.2791217727180321, "grad_norm": 0.11224257200956345, "learning_rate": 0.00017219566764975085, "loss": 0.9701, "step": 1373 }, { "epoch": 0.2793250660703395, "grad_norm": 0.11932025849819183, "learning_rate": 0.00017217532797721957, "loss": 1.0136, "step": 1374 }, { "epoch": 0.27952835942264687, "grad_norm": 0.11104830354452133, "learning_rate": 0.0001721549883046883, "loss": 1.1131, "step": 1375 }, { "epoch": 0.27973165277495426, "grad_norm": 0.136908620595932, "learning_rate": 0.00017213464863215702, "loss": 1.2888, "step": 1376 }, { "epoch": 0.27993494612726166, "grad_norm": 0.13100826740264893, "learning_rate": 0.00017211430895962575, "loss": 1.2221, "step": 1377 }, { "epoch": 0.280138239479569, "grad_norm": 0.1406666785478592, "learning_rate": 0.0001720939692870945, "loss": 1.2672, "step": 1378 }, { "epoch": 0.2803415328318764, "grad_norm": 0.10946685820817947, "learning_rate": 0.00017207362961456322, "loss": 0.8652, "step": 1379 }, { "epoch": 0.2805448261841838, "grad_norm": 0.11411663144826889, "learning_rate": 0.00017205328994203195, "loss": 0.8172, "step": 1380 }, { "epoch": 0.28074811953649115, "grad_norm": 0.132404625415802, "learning_rate": 0.00017203295026950067, "loss": 1.12, "step": 1381 }, { "epoch": 0.28095141288879855, "grad_norm": 0.12594282627105713, "learning_rate": 0.0001720126105969694, "loss": 1.2019, "step": 1382 }, { "epoch": 0.2811547062411059, "grad_norm": 0.14421536028385162, "learning_rate": 0.00017199227092443812, "loss": 1.301, "step": 1383 }, { "epoch": 0.2813579995934133, "grad_norm": 0.118538998067379, "learning_rate": 0.00017197193125190684, "loss": 1.0952, "step": 1384 }, { "epoch": 0.2815612929457207, "grad_norm": 0.1211504191160202, "learning_rate": 0.00017195159157937557, "loss": 1.0272, "step": 1385 }, { "epoch": 0.28176458629802803, "grad_norm": 0.13460633158683777, "learning_rate": 0.00017193125190684432, "loss": 1.1372, "step": 1386 }, { "epoch": 0.28196787965033543, "grad_norm": 0.11669941991567612, "learning_rate": 0.00017191091223431304, "loss": 1.0313, "step": 1387 }, { "epoch": 0.2821711730026428, "grad_norm": 0.1414983719587326, "learning_rate": 0.00017189057256178177, "loss": 1.3215, "step": 1388 }, { "epoch": 0.28237446635495017, "grad_norm": 0.11535824090242386, "learning_rate": 0.0001718702328892505, "loss": 1.0569, "step": 1389 }, { "epoch": 0.28257775970725757, "grad_norm": 0.11279894411563873, "learning_rate": 0.00017184989321671922, "loss": 0.9706, "step": 1390 }, { "epoch": 0.28278105305956497, "grad_norm": 0.12699778378009796, "learning_rate": 0.00017182955354418794, "loss": 1.0541, "step": 1391 }, { "epoch": 0.2829843464118723, "grad_norm": 0.13677164912223816, "learning_rate": 0.00017180921387165667, "loss": 1.0118, "step": 1392 }, { "epoch": 0.2831876397641797, "grad_norm": 0.1261303573846817, "learning_rate": 0.0001717888741991254, "loss": 1.0019, "step": 1393 }, { "epoch": 0.2833909331164871, "grad_norm": 0.15269511938095093, "learning_rate": 0.00017176853452659414, "loss": 1.1414, "step": 1394 }, { "epoch": 0.28359422646879445, "grad_norm": 0.11726024746894836, "learning_rate": 0.00017174819485406287, "loss": 1.071, "step": 1395 }, { "epoch": 0.28379751982110185, "grad_norm": 0.10793468356132507, "learning_rate": 0.0001717278551815316, "loss": 1.0911, "step": 1396 }, { "epoch": 0.28400081317340925, "grad_norm": 0.13417348265647888, "learning_rate": 0.00017170751550900032, "loss": 1.182, "step": 1397 }, { "epoch": 0.2842041065257166, "grad_norm": 0.1220618337392807, "learning_rate": 0.00017168717583646904, "loss": 1.1165, "step": 1398 }, { "epoch": 0.284407399878024, "grad_norm": 0.1326867640018463, "learning_rate": 0.00017166683616393776, "loss": 1.0426, "step": 1399 }, { "epoch": 0.2846106932303314, "grad_norm": 0.12562425434589386, "learning_rate": 0.0001716464964914065, "loss": 1.1779, "step": 1400 }, { "epoch": 0.28481398658263873, "grad_norm": 0.13102425634860992, "learning_rate": 0.00017162615681887521, "loss": 1.1402, "step": 1401 }, { "epoch": 0.28501727993494613, "grad_norm": 0.12704792618751526, "learning_rate": 0.00017160581714634397, "loss": 1.18, "step": 1402 }, { "epoch": 0.28522057328725353, "grad_norm": 0.12526075541973114, "learning_rate": 0.0001715854774738127, "loss": 1.109, "step": 1403 }, { "epoch": 0.2854238666395609, "grad_norm": 0.12174190580844879, "learning_rate": 0.00017156513780128141, "loss": 1.0839, "step": 1404 }, { "epoch": 0.2856271599918683, "grad_norm": 0.13030166923999786, "learning_rate": 0.00017154479812875014, "loss": 1.0982, "step": 1405 }, { "epoch": 0.2858304533441757, "grad_norm": 0.12179411202669144, "learning_rate": 0.00017152445845621886, "loss": 1.0617, "step": 1406 }, { "epoch": 0.286033746696483, "grad_norm": 0.12964552640914917, "learning_rate": 0.0001715041187836876, "loss": 1.117, "step": 1407 }, { "epoch": 0.2862370400487904, "grad_norm": 0.12146733701229095, "learning_rate": 0.0001714837791111563, "loss": 1.1715, "step": 1408 }, { "epoch": 0.28644033340109776, "grad_norm": 0.12994210422039032, "learning_rate": 0.00017146343943862504, "loss": 1.1975, "step": 1409 }, { "epoch": 0.28664362675340516, "grad_norm": 0.12996168434619904, "learning_rate": 0.0001714430997660938, "loss": 1.1968, "step": 1410 }, { "epoch": 0.28684692010571256, "grad_norm": 0.13590598106384277, "learning_rate": 0.0001714227600935625, "loss": 1.1045, "step": 1411 }, { "epoch": 0.2870502134580199, "grad_norm": 0.12337225675582886, "learning_rate": 0.00017140242042103124, "loss": 1.1262, "step": 1412 }, { "epoch": 0.2872535068103273, "grad_norm": 0.11442485451698303, "learning_rate": 0.00017138208074849996, "loss": 1.0697, "step": 1413 }, { "epoch": 0.2874568001626347, "grad_norm": 0.1333555281162262, "learning_rate": 0.00017136174107596869, "loss": 0.9691, "step": 1414 }, { "epoch": 0.28766009351494204, "grad_norm": 0.13435356318950653, "learning_rate": 0.0001713414014034374, "loss": 1.071, "step": 1415 }, { "epoch": 0.28786338686724944, "grad_norm": 0.11869612336158752, "learning_rate": 0.00017132106173090613, "loss": 1.2081, "step": 1416 }, { "epoch": 0.28806668021955684, "grad_norm": 0.13402745127677917, "learning_rate": 0.00017130072205837486, "loss": 1.1887, "step": 1417 }, { "epoch": 0.2882699735718642, "grad_norm": 0.1282026469707489, "learning_rate": 0.00017128038238584358, "loss": 1.1802, "step": 1418 }, { "epoch": 0.2884732669241716, "grad_norm": 0.12006261944770813, "learning_rate": 0.00017126004271331234, "loss": 1.0366, "step": 1419 }, { "epoch": 0.288676560276479, "grad_norm": 0.10971211642026901, "learning_rate": 0.00017123970304078106, "loss": 1.0502, "step": 1420 }, { "epoch": 0.2888798536287863, "grad_norm": 0.12401802092790604, "learning_rate": 0.00017121936336824978, "loss": 0.9525, "step": 1421 }, { "epoch": 0.2890831469810937, "grad_norm": 0.12699580192565918, "learning_rate": 0.0001711990236957185, "loss": 1.0588, "step": 1422 }, { "epoch": 0.2892864403334011, "grad_norm": 0.10931636393070221, "learning_rate": 0.00017117868402318723, "loss": 0.9512, "step": 1423 }, { "epoch": 0.28948973368570846, "grad_norm": 0.13325555622577667, "learning_rate": 0.00017115834435065596, "loss": 1.2333, "step": 1424 }, { "epoch": 0.28969302703801586, "grad_norm": 0.1266210675239563, "learning_rate": 0.00017113800467812468, "loss": 1.018, "step": 1425 }, { "epoch": 0.28989632039032326, "grad_norm": 0.12187005579471588, "learning_rate": 0.0001711176650055934, "loss": 1.0865, "step": 1426 }, { "epoch": 0.2900996137426306, "grad_norm": 0.10819690674543381, "learning_rate": 0.00017109732533306216, "loss": 0.8355, "step": 1427 }, { "epoch": 0.290302907094938, "grad_norm": 0.11890331655740738, "learning_rate": 0.00017107698566053088, "loss": 1.0521, "step": 1428 }, { "epoch": 0.2905062004472454, "grad_norm": 0.12693597376346588, "learning_rate": 0.0001710566459879996, "loss": 1.1304, "step": 1429 }, { "epoch": 0.29070949379955274, "grad_norm": 0.12627696990966797, "learning_rate": 0.00017103630631546833, "loss": 1.0533, "step": 1430 }, { "epoch": 0.29091278715186014, "grad_norm": 0.13593046367168427, "learning_rate": 0.00017101596664293706, "loss": 1.2738, "step": 1431 }, { "epoch": 0.29111608050416754, "grad_norm": 0.10364729166030884, "learning_rate": 0.00017099562697040578, "loss": 0.9938, "step": 1432 }, { "epoch": 0.2913193738564749, "grad_norm": 0.11455982178449631, "learning_rate": 0.0001709752872978745, "loss": 0.9798, "step": 1433 }, { "epoch": 0.2915226672087823, "grad_norm": 0.12030831724405289, "learning_rate": 0.00017095494762534323, "loss": 1.1068, "step": 1434 }, { "epoch": 0.2917259605610896, "grad_norm": 0.12434829771518707, "learning_rate": 0.00017093460795281198, "loss": 0.9511, "step": 1435 }, { "epoch": 0.291929253913397, "grad_norm": 0.13269619643688202, "learning_rate": 0.0001709142682802807, "loss": 1.0469, "step": 1436 }, { "epoch": 0.2921325472657044, "grad_norm": 0.12037021666765213, "learning_rate": 0.00017089392860774943, "loss": 1.1413, "step": 1437 }, { "epoch": 0.29233584061801177, "grad_norm": 0.1290545016527176, "learning_rate": 0.00017087358893521815, "loss": 0.9745, "step": 1438 }, { "epoch": 0.29253913397031917, "grad_norm": 0.13319085538387299, "learning_rate": 0.00017085324926268688, "loss": 1.3102, "step": 1439 }, { "epoch": 0.29274242732262656, "grad_norm": 0.11888034641742706, "learning_rate": 0.0001708329095901556, "loss": 0.982, "step": 1440 }, { "epoch": 0.2929457206749339, "grad_norm": 0.10824552178382874, "learning_rate": 0.00017081256991762433, "loss": 0.922, "step": 1441 }, { "epoch": 0.2931490140272413, "grad_norm": 0.11319594085216522, "learning_rate": 0.00017079223024509305, "loss": 0.997, "step": 1442 }, { "epoch": 0.2933523073795487, "grad_norm": 0.12176964432001114, "learning_rate": 0.0001707718905725618, "loss": 0.9845, "step": 1443 }, { "epoch": 0.29355560073185605, "grad_norm": 0.13725343346595764, "learning_rate": 0.00017075155090003053, "loss": 1.3378, "step": 1444 }, { "epoch": 0.29375889408416345, "grad_norm": 0.1362079679965973, "learning_rate": 0.00017073121122749925, "loss": 1.2632, "step": 1445 }, { "epoch": 0.29396218743647085, "grad_norm": 0.12925031781196594, "learning_rate": 0.00017071087155496798, "loss": 1.0776, "step": 1446 }, { "epoch": 0.2941654807887782, "grad_norm": 0.11627811938524246, "learning_rate": 0.0001706905318824367, "loss": 0.9881, "step": 1447 }, { "epoch": 0.2943687741410856, "grad_norm": 0.13387028872966766, "learning_rate": 0.00017067019220990543, "loss": 1.0899, "step": 1448 }, { "epoch": 0.294572067493393, "grad_norm": 0.12257883697748184, "learning_rate": 0.00017064985253737415, "loss": 1.1031, "step": 1449 }, { "epoch": 0.29477536084570033, "grad_norm": 0.14938175678253174, "learning_rate": 0.00017062951286484287, "loss": 1.1557, "step": 1450 }, { "epoch": 0.29497865419800773, "grad_norm": 0.12559346854686737, "learning_rate": 0.00017060917319231163, "loss": 1.1062, "step": 1451 }, { "epoch": 0.2951819475503151, "grad_norm": 0.12475700676441193, "learning_rate": 0.00017058883351978035, "loss": 1.0834, "step": 1452 }, { "epoch": 0.29538524090262247, "grad_norm": 0.1364937126636505, "learning_rate": 0.00017056849384724908, "loss": 1.0028, "step": 1453 }, { "epoch": 0.29558853425492987, "grad_norm": 0.12429028004407883, "learning_rate": 0.0001705481541747178, "loss": 1.1087, "step": 1454 }, { "epoch": 0.29579182760723727, "grad_norm": 0.1251228153705597, "learning_rate": 0.00017052781450218652, "loss": 0.9675, "step": 1455 }, { "epoch": 0.2959951209595446, "grad_norm": 0.12485919892787933, "learning_rate": 0.00017050747482965525, "loss": 1.0045, "step": 1456 }, { "epoch": 0.296198414311852, "grad_norm": 0.12948845326900482, "learning_rate": 0.00017048713515712397, "loss": 1.1154, "step": 1457 }, { "epoch": 0.2964017076641594, "grad_norm": 0.1288408488035202, "learning_rate": 0.0001704667954845927, "loss": 1.1203, "step": 1458 }, { "epoch": 0.29660500101646675, "grad_norm": 0.13588744401931763, "learning_rate": 0.00017044645581206142, "loss": 1.1436, "step": 1459 }, { "epoch": 0.29680829436877415, "grad_norm": 0.1264243721961975, "learning_rate": 0.00017042611613953017, "loss": 1.1903, "step": 1460 }, { "epoch": 0.2970115877210815, "grad_norm": 0.12819139659404755, "learning_rate": 0.0001704057764669989, "loss": 1.1337, "step": 1461 }, { "epoch": 0.2972148810733889, "grad_norm": 0.1189684271812439, "learning_rate": 0.00017038543679446762, "loss": 1.1437, "step": 1462 }, { "epoch": 0.2974181744256963, "grad_norm": 0.1304028183221817, "learning_rate": 0.00017036509712193635, "loss": 1.1653, "step": 1463 }, { "epoch": 0.29762146777800363, "grad_norm": 0.12161426246166229, "learning_rate": 0.00017034475744940507, "loss": 1.0482, "step": 1464 }, { "epoch": 0.29782476113031103, "grad_norm": 0.1224290132522583, "learning_rate": 0.0001703244177768738, "loss": 1.0592, "step": 1465 }, { "epoch": 0.29802805448261843, "grad_norm": 0.1365649402141571, "learning_rate": 0.00017030407810434252, "loss": 1.0758, "step": 1466 }, { "epoch": 0.2982313478349258, "grad_norm": 0.12406224012374878, "learning_rate": 0.00017028373843181124, "loss": 1.0901, "step": 1467 }, { "epoch": 0.2984346411872332, "grad_norm": 0.13438360393047333, "learning_rate": 0.00017026339875928, "loss": 1.1552, "step": 1468 }, { "epoch": 0.2986379345395406, "grad_norm": 0.14297276735305786, "learning_rate": 0.00017024305908674872, "loss": 1.1271, "step": 1469 }, { "epoch": 0.2988412278918479, "grad_norm": 0.11946640908718109, "learning_rate": 0.00017022271941421745, "loss": 1.0295, "step": 1470 }, { "epoch": 0.2990445212441553, "grad_norm": 0.12182927876710892, "learning_rate": 0.00017020237974168617, "loss": 1.0381, "step": 1471 }, { "epoch": 0.2992478145964627, "grad_norm": 0.1238449215888977, "learning_rate": 0.0001701820400691549, "loss": 1.0399, "step": 1472 }, { "epoch": 0.29945110794877006, "grad_norm": 0.12575775384902954, "learning_rate": 0.00017016170039662362, "loss": 1.1552, "step": 1473 }, { "epoch": 0.29965440130107746, "grad_norm": 0.14087268710136414, "learning_rate": 0.00017014136072409234, "loss": 1.1225, "step": 1474 }, { "epoch": 0.29985769465338485, "grad_norm": 0.13070684671401978, "learning_rate": 0.00017012102105156107, "loss": 1.1097, "step": 1475 }, { "epoch": 0.3000609880056922, "grad_norm": 0.12527720630168915, "learning_rate": 0.00017010068137902982, "loss": 1.0171, "step": 1476 }, { "epoch": 0.3002642813579996, "grad_norm": 0.12080081552267075, "learning_rate": 0.00017008034170649854, "loss": 1.0934, "step": 1477 }, { "epoch": 0.300467574710307, "grad_norm": 0.13225379586219788, "learning_rate": 0.00017006000203396727, "loss": 1.1286, "step": 1478 }, { "epoch": 0.30067086806261434, "grad_norm": 0.14612498879432678, "learning_rate": 0.000170039662361436, "loss": 1.4, "step": 1479 }, { "epoch": 0.30087416141492174, "grad_norm": 0.12612837553024292, "learning_rate": 0.00017001932268890472, "loss": 0.9265, "step": 1480 }, { "epoch": 0.30107745476722914, "grad_norm": 0.11075981706380844, "learning_rate": 0.00016999898301637344, "loss": 0.992, "step": 1481 }, { "epoch": 0.3012807481195365, "grad_norm": 0.11420360207557678, "learning_rate": 0.00016997864334384217, "loss": 1.033, "step": 1482 }, { "epoch": 0.3014840414718439, "grad_norm": 0.1344219148159027, "learning_rate": 0.0001699583036713109, "loss": 1.0934, "step": 1483 }, { "epoch": 0.3016873348241513, "grad_norm": 0.13956451416015625, "learning_rate": 0.00016993796399877964, "loss": 1.2297, "step": 1484 }, { "epoch": 0.3018906281764586, "grad_norm": 0.1293005496263504, "learning_rate": 0.00016991762432624837, "loss": 1.0928, "step": 1485 }, { "epoch": 0.302093921528766, "grad_norm": 0.11039478331804276, "learning_rate": 0.0001698972846537171, "loss": 0.9906, "step": 1486 }, { "epoch": 0.30229721488107336, "grad_norm": 0.13603124022483826, "learning_rate": 0.00016987694498118582, "loss": 1.3131, "step": 1487 }, { "epoch": 0.30250050823338076, "grad_norm": 0.14525099098682404, "learning_rate": 0.00016985660530865454, "loss": 1.1205, "step": 1488 }, { "epoch": 0.30270380158568816, "grad_norm": 0.14237269759178162, "learning_rate": 0.00016983626563612326, "loss": 1.1144, "step": 1489 }, { "epoch": 0.3029070949379955, "grad_norm": 0.10434848070144653, "learning_rate": 0.000169815925963592, "loss": 0.9038, "step": 1490 }, { "epoch": 0.3031103882903029, "grad_norm": 0.11946713179349899, "learning_rate": 0.0001697955862910607, "loss": 1.0611, "step": 1491 }, { "epoch": 0.3033136816426103, "grad_norm": 0.12547194957733154, "learning_rate": 0.00016977524661852946, "loss": 0.999, "step": 1492 }, { "epoch": 0.30351697499491764, "grad_norm": 0.13156647980213165, "learning_rate": 0.0001697549069459982, "loss": 1.1174, "step": 1493 }, { "epoch": 0.30372026834722504, "grad_norm": 0.13008251786231995, "learning_rate": 0.00016973456727346691, "loss": 1.1239, "step": 1494 }, { "epoch": 0.30392356169953244, "grad_norm": 0.1194852888584137, "learning_rate": 0.00016971422760093564, "loss": 0.9974, "step": 1495 }, { "epoch": 0.3041268550518398, "grad_norm": 0.12988907098770142, "learning_rate": 0.00016969388792840436, "loss": 1.0105, "step": 1496 }, { "epoch": 0.3043301484041472, "grad_norm": 0.13736090064048767, "learning_rate": 0.0001696735482558731, "loss": 1.167, "step": 1497 }, { "epoch": 0.3045334417564546, "grad_norm": 0.12946954369544983, "learning_rate": 0.0001696532085833418, "loss": 1.1628, "step": 1498 }, { "epoch": 0.3047367351087619, "grad_norm": 0.12599951028823853, "learning_rate": 0.00016963286891081054, "loss": 1.2581, "step": 1499 }, { "epoch": 0.3049400284610693, "grad_norm": 0.12264920026063919, "learning_rate": 0.00016961252923827926, "loss": 0.966, "step": 1500 }, { "epoch": 0.3051433218133767, "grad_norm": 0.12567077577114105, "learning_rate": 0.000169592189565748, "loss": 1.0898, "step": 1501 }, { "epoch": 0.30534661516568407, "grad_norm": 0.12665922939777374, "learning_rate": 0.00016957184989321674, "loss": 1.1621, "step": 1502 }, { "epoch": 0.30554990851799146, "grad_norm": 0.10949800908565521, "learning_rate": 0.00016955151022068546, "loss": 0.9312, "step": 1503 }, { "epoch": 0.30575320187029886, "grad_norm": 0.13273455202579498, "learning_rate": 0.00016953117054815419, "loss": 1.1535, "step": 1504 }, { "epoch": 0.3059564952226062, "grad_norm": 0.13857555389404297, "learning_rate": 0.0001695108308756229, "loss": 1.1564, "step": 1505 }, { "epoch": 0.3061597885749136, "grad_norm": 0.10915102064609528, "learning_rate": 0.00016949049120309163, "loss": 0.8977, "step": 1506 }, { "epoch": 0.306363081927221, "grad_norm": 0.11122920364141464, "learning_rate": 0.00016947015153056036, "loss": 0.9389, "step": 1507 }, { "epoch": 0.30656637527952835, "grad_norm": 0.13575953245162964, "learning_rate": 0.00016944981185802908, "loss": 1.2634, "step": 1508 }, { "epoch": 0.30676966863183575, "grad_norm": 0.12309823930263519, "learning_rate": 0.00016942947218549783, "loss": 1.0562, "step": 1509 }, { "epoch": 0.30697296198414314, "grad_norm": 0.13939395546913147, "learning_rate": 0.00016940913251296656, "loss": 1.2801, "step": 1510 }, { "epoch": 0.3071762553364505, "grad_norm": 0.11922150105237961, "learning_rate": 0.00016938879284043528, "loss": 1.0694, "step": 1511 }, { "epoch": 0.3073795486887579, "grad_norm": 0.12427409738302231, "learning_rate": 0.000169368453167904, "loss": 1.0676, "step": 1512 }, { "epoch": 0.30758284204106523, "grad_norm": 0.11560991406440735, "learning_rate": 0.00016934811349537273, "loss": 0.9416, "step": 1513 }, { "epoch": 0.30778613539337263, "grad_norm": 0.12494566291570663, "learning_rate": 0.00016932777382284146, "loss": 1.0484, "step": 1514 }, { "epoch": 0.30798942874568, "grad_norm": 0.14169259369373322, "learning_rate": 0.00016930743415031018, "loss": 1.2045, "step": 1515 }, { "epoch": 0.30819272209798737, "grad_norm": 0.13265348970890045, "learning_rate": 0.0001692870944777789, "loss": 0.9964, "step": 1516 }, { "epoch": 0.30839601545029477, "grad_norm": 0.1246609166264534, "learning_rate": 0.00016926675480524766, "loss": 1.0218, "step": 1517 }, { "epoch": 0.30859930880260217, "grad_norm": 0.1305045336484909, "learning_rate": 0.00016924641513271638, "loss": 1.0899, "step": 1518 }, { "epoch": 0.3088026021549095, "grad_norm": 0.1269298940896988, "learning_rate": 0.0001692260754601851, "loss": 1.1613, "step": 1519 }, { "epoch": 0.3090058955072169, "grad_norm": 0.13356846570968628, "learning_rate": 0.00016920573578765383, "loss": 1.2171, "step": 1520 }, { "epoch": 0.3092091888595243, "grad_norm": 0.12417469173669815, "learning_rate": 0.00016918539611512256, "loss": 1.0345, "step": 1521 }, { "epoch": 0.30941248221183165, "grad_norm": 0.12965606153011322, "learning_rate": 0.00016916505644259128, "loss": 1.003, "step": 1522 }, { "epoch": 0.30961577556413905, "grad_norm": 0.13075895607471466, "learning_rate": 0.00016914471677006, "loss": 1.1521, "step": 1523 }, { "epoch": 0.30981906891644645, "grad_norm": 0.1491623818874359, "learning_rate": 0.00016912437709752873, "loss": 1.1669, "step": 1524 }, { "epoch": 0.3100223622687538, "grad_norm": 0.13368669152259827, "learning_rate": 0.00016910403742499748, "loss": 1.1996, "step": 1525 }, { "epoch": 0.3102256556210612, "grad_norm": 0.12484747171401978, "learning_rate": 0.0001690836977524662, "loss": 0.9979, "step": 1526 }, { "epoch": 0.3104289489733686, "grad_norm": 0.11716404557228088, "learning_rate": 0.00016906335807993493, "loss": 1.0686, "step": 1527 }, { "epoch": 0.31063224232567593, "grad_norm": 0.1104549840092659, "learning_rate": 0.00016904301840740365, "loss": 0.9746, "step": 1528 }, { "epoch": 0.31083553567798333, "grad_norm": 0.1288052350282669, "learning_rate": 0.00016902267873487238, "loss": 1.0593, "step": 1529 }, { "epoch": 0.31103882903029073, "grad_norm": 0.13284744322299957, "learning_rate": 0.0001690023390623411, "loss": 1.1837, "step": 1530 }, { "epoch": 0.3112421223825981, "grad_norm": 0.10993791371583939, "learning_rate": 0.00016898199938980983, "loss": 0.9076, "step": 1531 }, { "epoch": 0.3114454157349055, "grad_norm": 0.1289556920528412, "learning_rate": 0.00016896165971727855, "loss": 1.124, "step": 1532 }, { "epoch": 0.31164870908721287, "grad_norm": 0.12656551599502563, "learning_rate": 0.0001689413200447473, "loss": 1.0672, "step": 1533 }, { "epoch": 0.3118520024395202, "grad_norm": 0.12359779328107834, "learning_rate": 0.00016892098037221603, "loss": 1.1162, "step": 1534 }, { "epoch": 0.3120552957918276, "grad_norm": 0.13356052339076996, "learning_rate": 0.00016890064069968475, "loss": 1.1927, "step": 1535 }, { "epoch": 0.312258589144135, "grad_norm": 0.12397721409797668, "learning_rate": 0.00016888030102715348, "loss": 1.0744, "step": 1536 }, { "epoch": 0.31246188249644236, "grad_norm": 0.14322160184383392, "learning_rate": 0.0001688599613546222, "loss": 1.076, "step": 1537 }, { "epoch": 0.31266517584874975, "grad_norm": 0.1378001719713211, "learning_rate": 0.00016883962168209093, "loss": 1.4044, "step": 1538 }, { "epoch": 0.3128684692010571, "grad_norm": 0.12438174337148666, "learning_rate": 0.00016881928200955965, "loss": 1.1154, "step": 1539 }, { "epoch": 0.3130717625533645, "grad_norm": 0.12698177993297577, "learning_rate": 0.00016879894233702837, "loss": 1.1988, "step": 1540 }, { "epoch": 0.3132750559056719, "grad_norm": 0.12074883282184601, "learning_rate": 0.00016877860266449713, "loss": 1.0175, "step": 1541 }, { "epoch": 0.31347834925797924, "grad_norm": 0.13820214569568634, "learning_rate": 0.00016875826299196585, "loss": 1.0498, "step": 1542 }, { "epoch": 0.31368164261028664, "grad_norm": 0.14697261154651642, "learning_rate": 0.00016873792331943458, "loss": 1.2677, "step": 1543 }, { "epoch": 0.31388493596259404, "grad_norm": 0.13973405957221985, "learning_rate": 0.0001687175836469033, "loss": 1.2233, "step": 1544 }, { "epoch": 0.3140882293149014, "grad_norm": 0.1303880661725998, "learning_rate": 0.000168697243974372, "loss": 1.1023, "step": 1545 }, { "epoch": 0.3142915226672088, "grad_norm": 0.13434049487113953, "learning_rate": 0.00016867690430184075, "loss": 1.1206, "step": 1546 }, { "epoch": 0.3144948160195162, "grad_norm": 0.11447029560804367, "learning_rate": 0.00016865656462930947, "loss": 0.978, "step": 1547 }, { "epoch": 0.3146981093718235, "grad_norm": 0.12716947495937347, "learning_rate": 0.0001686362249567782, "loss": 1.1544, "step": 1548 }, { "epoch": 0.3149014027241309, "grad_norm": 0.12545545399188995, "learning_rate": 0.00016861588528424692, "loss": 0.9976, "step": 1549 }, { "epoch": 0.3151046960764383, "grad_norm": 0.13446862995624542, "learning_rate": 0.00016859554561171567, "loss": 1.1167, "step": 1550 }, { "epoch": 0.31530798942874566, "grad_norm": 0.12542487680912018, "learning_rate": 0.0001685752059391844, "loss": 1.1148, "step": 1551 }, { "epoch": 0.31551128278105306, "grad_norm": 0.12793605029582977, "learning_rate": 0.00016855486626665312, "loss": 1.1139, "step": 1552 }, { "epoch": 0.31571457613336046, "grad_norm": 0.13481125235557556, "learning_rate": 0.00016853452659412182, "loss": 1.144, "step": 1553 }, { "epoch": 0.3159178694856678, "grad_norm": 0.11555742472410202, "learning_rate": 0.00016851418692159057, "loss": 1.0276, "step": 1554 }, { "epoch": 0.3161211628379752, "grad_norm": 0.11695119738578796, "learning_rate": 0.0001684938472490593, "loss": 0.9493, "step": 1555 }, { "epoch": 0.3163244561902826, "grad_norm": 0.13503003120422363, "learning_rate": 0.00016847350757652802, "loss": 1.0556, "step": 1556 }, { "epoch": 0.31652774954258994, "grad_norm": 0.1347092092037201, "learning_rate": 0.00016845316790399674, "loss": 1.0362, "step": 1557 }, { "epoch": 0.31673104289489734, "grad_norm": 0.12576071918010712, "learning_rate": 0.0001684328282314655, "loss": 1.085, "step": 1558 }, { "epoch": 0.31693433624720474, "grad_norm": 0.1280100792646408, "learning_rate": 0.00016841248855893422, "loss": 1.2004, "step": 1559 }, { "epoch": 0.3171376295995121, "grad_norm": 0.11573471873998642, "learning_rate": 0.00016839214888640295, "loss": 0.9058, "step": 1560 }, { "epoch": 0.3173409229518195, "grad_norm": 0.12192318588495255, "learning_rate": 0.00016837180921387167, "loss": 0.9789, "step": 1561 }, { "epoch": 0.3175442163041269, "grad_norm": 0.1251290738582611, "learning_rate": 0.0001683514695413404, "loss": 0.9818, "step": 1562 }, { "epoch": 0.3177475096564342, "grad_norm": 0.12726342678070068, "learning_rate": 0.00016833112986880912, "loss": 0.9911, "step": 1563 }, { "epoch": 0.3179508030087416, "grad_norm": 0.12146829068660736, "learning_rate": 0.00016831079019627784, "loss": 1.0005, "step": 1564 }, { "epoch": 0.318154096361049, "grad_norm": 0.12948118150234222, "learning_rate": 0.00016829045052374657, "loss": 0.9286, "step": 1565 }, { "epoch": 0.31835738971335636, "grad_norm": 0.1411774903535843, "learning_rate": 0.00016827011085121532, "loss": 1.031, "step": 1566 }, { "epoch": 0.31856068306566376, "grad_norm": 0.12407765537500381, "learning_rate": 0.00016824977117868404, "loss": 1.0538, "step": 1567 }, { "epoch": 0.3187639764179711, "grad_norm": 0.1235983669757843, "learning_rate": 0.00016822943150615277, "loss": 0.9356, "step": 1568 }, { "epoch": 0.3189672697702785, "grad_norm": 0.13756640255451202, "learning_rate": 0.0001682090918336215, "loss": 1.3008, "step": 1569 }, { "epoch": 0.3191705631225859, "grad_norm": 0.14735132455825806, "learning_rate": 0.00016818875216109022, "loss": 1.1271, "step": 1570 }, { "epoch": 0.31937385647489325, "grad_norm": 0.14694719016551971, "learning_rate": 0.00016816841248855894, "loss": 1.1222, "step": 1571 }, { "epoch": 0.31957714982720065, "grad_norm": 0.10828382521867752, "learning_rate": 0.00016814807281602767, "loss": 1.0565, "step": 1572 }, { "epoch": 0.31978044317950804, "grad_norm": 0.1332756131887436, "learning_rate": 0.0001681277331434964, "loss": 1.0085, "step": 1573 }, { "epoch": 0.3199837365318154, "grad_norm": 0.12354031950235367, "learning_rate": 0.00016810739347096514, "loss": 1.1101, "step": 1574 }, { "epoch": 0.3201870298841228, "grad_norm": 0.1273805797100067, "learning_rate": 0.00016808705379843387, "loss": 1.1402, "step": 1575 }, { "epoch": 0.3203903232364302, "grad_norm": 0.1219901368021965, "learning_rate": 0.0001680667141259026, "loss": 1.1955, "step": 1576 }, { "epoch": 0.32059361658873753, "grad_norm": 0.13021346926689148, "learning_rate": 0.00016804637445337132, "loss": 1.2073, "step": 1577 }, { "epoch": 0.3207969099410449, "grad_norm": 0.11928975582122803, "learning_rate": 0.00016802603478084004, "loss": 1.0758, "step": 1578 }, { "epoch": 0.3210002032933523, "grad_norm": 0.10524530708789825, "learning_rate": 0.00016800569510830876, "loss": 0.9655, "step": 1579 }, { "epoch": 0.32120349664565967, "grad_norm": 0.13994352519512177, "learning_rate": 0.0001679853554357775, "loss": 1.1405, "step": 1580 }, { "epoch": 0.32140678999796707, "grad_norm": 0.13520392775535583, "learning_rate": 0.0001679650157632462, "loss": 1.3525, "step": 1581 }, { "epoch": 0.32161008335027447, "grad_norm": 0.13306692242622375, "learning_rate": 0.00016794467609071496, "loss": 1.3286, "step": 1582 }, { "epoch": 0.3218133767025818, "grad_norm": 0.1361495852470398, "learning_rate": 0.0001679243364181837, "loss": 1.0468, "step": 1583 }, { "epoch": 0.3220166700548892, "grad_norm": 0.1192341074347496, "learning_rate": 0.0001679039967456524, "loss": 0.7855, "step": 1584 }, { "epoch": 0.3222199634071966, "grad_norm": 0.12359831482172012, "learning_rate": 0.00016788365707312114, "loss": 1.1627, "step": 1585 }, { "epoch": 0.32242325675950395, "grad_norm": 0.1272861659526825, "learning_rate": 0.00016786331740058984, "loss": 1.0442, "step": 1586 }, { "epoch": 0.32262655011181135, "grad_norm": 0.1261843740940094, "learning_rate": 0.0001678429777280586, "loss": 1.0033, "step": 1587 }, { "epoch": 0.32282984346411875, "grad_norm": 0.11822490394115448, "learning_rate": 0.0001678226380555273, "loss": 1.0815, "step": 1588 }, { "epoch": 0.3230331368164261, "grad_norm": 0.13497643172740936, "learning_rate": 0.00016780229838299604, "loss": 1.0485, "step": 1589 }, { "epoch": 0.3232364301687335, "grad_norm": 0.12484399974346161, "learning_rate": 0.00016778195871046476, "loss": 1.244, "step": 1590 }, { "epoch": 0.3234397235210409, "grad_norm": 0.12844592332839966, "learning_rate": 0.0001677616190379335, "loss": 1.2803, "step": 1591 }, { "epoch": 0.32364301687334823, "grad_norm": 0.12499992549419403, "learning_rate": 0.00016774127936540224, "loss": 1.0049, "step": 1592 }, { "epoch": 0.32384631022565563, "grad_norm": 0.12357242405414581, "learning_rate": 0.00016772093969287096, "loss": 1.0463, "step": 1593 }, { "epoch": 0.324049603577963, "grad_norm": 0.11749047785997391, "learning_rate": 0.00016770060002033966, "loss": 0.9639, "step": 1594 }, { "epoch": 0.3242528969302704, "grad_norm": 0.1409110128879547, "learning_rate": 0.0001676802603478084, "loss": 1.0898, "step": 1595 }, { "epoch": 0.32445619028257777, "grad_norm": 0.1287623941898346, "learning_rate": 0.00016765992067527713, "loss": 1.1735, "step": 1596 }, { "epoch": 0.3246594836348851, "grad_norm": 0.1255931705236435, "learning_rate": 0.00016763958100274586, "loss": 1.0209, "step": 1597 }, { "epoch": 0.3248627769871925, "grad_norm": 0.1277484893798828, "learning_rate": 0.00016761924133021458, "loss": 1.1324, "step": 1598 }, { "epoch": 0.3250660703394999, "grad_norm": 0.14885109663009644, "learning_rate": 0.00016759890165768333, "loss": 1.1919, "step": 1599 }, { "epoch": 0.32526936369180726, "grad_norm": 0.12765826284885406, "learning_rate": 0.00016757856198515206, "loss": 1.1301, "step": 1600 }, { "epoch": 0.32547265704411465, "grad_norm": 0.12677320837974548, "learning_rate": 0.00016755822231262078, "loss": 1.1406, "step": 1601 }, { "epoch": 0.32567595039642205, "grad_norm": 0.12238804996013641, "learning_rate": 0.00016753788264008948, "loss": 0.9797, "step": 1602 }, { "epoch": 0.3258792437487294, "grad_norm": 0.13958637416362762, "learning_rate": 0.00016751754296755823, "loss": 1.1974, "step": 1603 }, { "epoch": 0.3260825371010368, "grad_norm": 0.12978553771972656, "learning_rate": 0.00016749720329502696, "loss": 0.9885, "step": 1604 }, { "epoch": 0.3262858304533442, "grad_norm": 0.12407691776752472, "learning_rate": 0.00016747686362249568, "loss": 1.1167, "step": 1605 }, { "epoch": 0.32648912380565154, "grad_norm": 0.13904057443141937, "learning_rate": 0.0001674565239499644, "loss": 1.1454, "step": 1606 }, { "epoch": 0.32669241715795894, "grad_norm": 0.1415109634399414, "learning_rate": 0.00016743618427743316, "loss": 1.2619, "step": 1607 }, { "epoch": 0.32689571051026634, "grad_norm": 0.11249466240406036, "learning_rate": 0.00016741584460490188, "loss": 0.9385, "step": 1608 }, { "epoch": 0.3270990038625737, "grad_norm": 0.11592496186494827, "learning_rate": 0.0001673955049323706, "loss": 0.9985, "step": 1609 }, { "epoch": 0.3273022972148811, "grad_norm": 0.11594976484775543, "learning_rate": 0.0001673751652598393, "loss": 0.9626, "step": 1610 }, { "epoch": 0.3275055905671885, "grad_norm": 0.12570694088935852, "learning_rate": 0.00016735482558730806, "loss": 0.9815, "step": 1611 }, { "epoch": 0.3277088839194958, "grad_norm": 0.12933030724525452, "learning_rate": 0.00016733448591477678, "loss": 1.0988, "step": 1612 }, { "epoch": 0.3279121772718032, "grad_norm": 0.14309881627559662, "learning_rate": 0.0001673141462422455, "loss": 1.1589, "step": 1613 }, { "epoch": 0.3281154706241106, "grad_norm": 0.14047057926654816, "learning_rate": 0.00016729380656971423, "loss": 1.2038, "step": 1614 }, { "epoch": 0.32831876397641796, "grad_norm": 0.1269095540046692, "learning_rate": 0.00016727346689718298, "loss": 0.9248, "step": 1615 }, { "epoch": 0.32852205732872536, "grad_norm": 0.14122694730758667, "learning_rate": 0.0001672531272246517, "loss": 1.1879, "step": 1616 }, { "epoch": 0.32872535068103276, "grad_norm": 0.133163183927536, "learning_rate": 0.00016723278755212043, "loss": 1.0718, "step": 1617 }, { "epoch": 0.3289286440333401, "grad_norm": 0.13817080855369568, "learning_rate": 0.00016721244787958915, "loss": 1.1519, "step": 1618 }, { "epoch": 0.3291319373856475, "grad_norm": 0.12117751687765121, "learning_rate": 0.00016719210820705788, "loss": 1.0104, "step": 1619 }, { "epoch": 0.32933523073795484, "grad_norm": 0.1269875317811966, "learning_rate": 0.0001671717685345266, "loss": 1.0644, "step": 1620 }, { "epoch": 0.32953852409026224, "grad_norm": 0.13901706039905548, "learning_rate": 0.00016715142886199533, "loss": 1.3002, "step": 1621 }, { "epoch": 0.32974181744256964, "grad_norm": 0.1284133940935135, "learning_rate": 0.00016713108918946405, "loss": 1.1447, "step": 1622 }, { "epoch": 0.329945110794877, "grad_norm": 0.13423141837120056, "learning_rate": 0.0001671107495169328, "loss": 1.2566, "step": 1623 }, { "epoch": 0.3301484041471844, "grad_norm": 0.12908455729484558, "learning_rate": 0.00016709040984440153, "loss": 1.1041, "step": 1624 }, { "epoch": 0.3303516974994918, "grad_norm": 0.1317860186100006, "learning_rate": 0.00016707007017187025, "loss": 1.1273, "step": 1625 }, { "epoch": 0.3305549908517991, "grad_norm": 0.1394864320755005, "learning_rate": 0.00016704973049933898, "loss": 1.0699, "step": 1626 }, { "epoch": 0.3307582842041065, "grad_norm": 0.1309152990579605, "learning_rate": 0.00016702939082680767, "loss": 0.9554, "step": 1627 }, { "epoch": 0.3309615775564139, "grad_norm": 0.11993929743766785, "learning_rate": 0.00016700905115427643, "loss": 0.9747, "step": 1628 }, { "epoch": 0.33116487090872126, "grad_norm": 0.11589863151311874, "learning_rate": 0.00016698871148174515, "loss": 0.9217, "step": 1629 }, { "epoch": 0.33136816426102866, "grad_norm": 0.12004578858613968, "learning_rate": 0.00016696837180921387, "loss": 0.9453, "step": 1630 }, { "epoch": 0.33157145761333606, "grad_norm": 0.1407518982887268, "learning_rate": 0.0001669480321366826, "loss": 1.1528, "step": 1631 }, { "epoch": 0.3317747509656434, "grad_norm": 0.1286914050579071, "learning_rate": 0.00016692769246415135, "loss": 1.038, "step": 1632 }, { "epoch": 0.3319780443179508, "grad_norm": 0.13304589688777924, "learning_rate": 0.00016690735279162007, "loss": 1.1159, "step": 1633 }, { "epoch": 0.3321813376702582, "grad_norm": 0.13245166838169098, "learning_rate": 0.0001668870131190888, "loss": 1.0368, "step": 1634 }, { "epoch": 0.33238463102256555, "grad_norm": 0.12715977430343628, "learning_rate": 0.0001668666734465575, "loss": 1.024, "step": 1635 }, { "epoch": 0.33258792437487295, "grad_norm": 0.13726472854614258, "learning_rate": 0.00016684633377402625, "loss": 1.0574, "step": 1636 }, { "epoch": 0.33279121772718034, "grad_norm": 0.10961025953292847, "learning_rate": 0.00016682599410149497, "loss": 0.9979, "step": 1637 }, { "epoch": 0.3329945110794877, "grad_norm": 0.13879232108592987, "learning_rate": 0.0001668056544289637, "loss": 1.2669, "step": 1638 }, { "epoch": 0.3331978044317951, "grad_norm": 0.12887312471866608, "learning_rate": 0.00016678531475643242, "loss": 1.1217, "step": 1639 }, { "epoch": 0.3334010977841025, "grad_norm": 0.1309410184621811, "learning_rate": 0.00016676497508390117, "loss": 1.1331, "step": 1640 }, { "epoch": 0.33360439113640983, "grad_norm": 0.12577351927757263, "learning_rate": 0.0001667446354113699, "loss": 0.9315, "step": 1641 }, { "epoch": 0.3338076844887172, "grad_norm": 0.1263495236635208, "learning_rate": 0.00016672429573883862, "loss": 0.9782, "step": 1642 }, { "epoch": 0.3340109778410246, "grad_norm": 0.12090608477592468, "learning_rate": 0.00016670395606630732, "loss": 0.9817, "step": 1643 }, { "epoch": 0.33421427119333197, "grad_norm": 0.1330811232328415, "learning_rate": 0.00016668361639377607, "loss": 1.0682, "step": 1644 }, { "epoch": 0.33441756454563937, "grad_norm": 0.13265149295330048, "learning_rate": 0.0001666632767212448, "loss": 0.8999, "step": 1645 }, { "epoch": 0.3346208578979467, "grad_norm": 0.12737800180912018, "learning_rate": 0.00016664293704871352, "loss": 1.1103, "step": 1646 }, { "epoch": 0.3348241512502541, "grad_norm": 0.13904881477355957, "learning_rate": 0.00016662259737618224, "loss": 1.1776, "step": 1647 }, { "epoch": 0.3350274446025615, "grad_norm": 0.13159041106700897, "learning_rate": 0.000166602257703651, "loss": 1.0343, "step": 1648 }, { "epoch": 0.33523073795486885, "grad_norm": 0.12564794719219208, "learning_rate": 0.00016658191803111972, "loss": 0.9786, "step": 1649 }, { "epoch": 0.33543403130717625, "grad_norm": 0.1561056673526764, "learning_rate": 0.00016656157835858844, "loss": 1.1937, "step": 1650 }, { "epoch": 0.33563732465948365, "grad_norm": 0.13286349177360535, "learning_rate": 0.00016654123868605714, "loss": 1.0153, "step": 1651 }, { "epoch": 0.335840618011791, "grad_norm": 0.12319796532392502, "learning_rate": 0.0001665208990135259, "loss": 1.0423, "step": 1652 }, { "epoch": 0.3360439113640984, "grad_norm": 0.13758210837841034, "learning_rate": 0.00016650055934099462, "loss": 1.0618, "step": 1653 }, { "epoch": 0.3362472047164058, "grad_norm": 0.11521997302770615, "learning_rate": 0.00016648021966846334, "loss": 0.9787, "step": 1654 }, { "epoch": 0.33645049806871313, "grad_norm": 0.1308450698852539, "learning_rate": 0.00016645987999593207, "loss": 1.1284, "step": 1655 }, { "epoch": 0.33665379142102053, "grad_norm": 0.13632404804229736, "learning_rate": 0.00016643954032340082, "loss": 1.1278, "step": 1656 }, { "epoch": 0.33685708477332793, "grad_norm": 0.12073387950658798, "learning_rate": 0.00016641920065086954, "loss": 1.0399, "step": 1657 }, { "epoch": 0.3370603781256353, "grad_norm": 0.12028390169143677, "learning_rate": 0.00016639886097833827, "loss": 1.0384, "step": 1658 }, { "epoch": 0.3372636714779427, "grad_norm": 0.12499553710222244, "learning_rate": 0.00016637852130580696, "loss": 1.0769, "step": 1659 }, { "epoch": 0.33746696483025007, "grad_norm": 0.16057424247264862, "learning_rate": 0.00016635818163327572, "loss": 1.3772, "step": 1660 }, { "epoch": 0.3376702581825574, "grad_norm": 0.12566526234149933, "learning_rate": 0.00016633784196074444, "loss": 1.1722, "step": 1661 }, { "epoch": 0.3378735515348648, "grad_norm": 0.11908633261919022, "learning_rate": 0.00016631750228821317, "loss": 1.0332, "step": 1662 }, { "epoch": 0.3380768448871722, "grad_norm": 0.14457720518112183, "learning_rate": 0.0001662971626156819, "loss": 1.187, "step": 1663 }, { "epoch": 0.33828013823947956, "grad_norm": 0.12620577216148376, "learning_rate": 0.00016627682294315064, "loss": 1.2064, "step": 1664 }, { "epoch": 0.33848343159178695, "grad_norm": 0.1155720204114914, "learning_rate": 0.00016625648327061937, "loss": 0.936, "step": 1665 }, { "epoch": 0.33868672494409435, "grad_norm": 0.12141234427690506, "learning_rate": 0.0001662361435980881, "loss": 1.0185, "step": 1666 }, { "epoch": 0.3388900182964017, "grad_norm": 0.11690623313188553, "learning_rate": 0.0001662158039255568, "loss": 0.9533, "step": 1667 }, { "epoch": 0.3390933116487091, "grad_norm": 0.127701997756958, "learning_rate": 0.0001661954642530255, "loss": 1.0834, "step": 1668 }, { "epoch": 0.3392966050010165, "grad_norm": 0.12167434394359589, "learning_rate": 0.00016617512458049426, "loss": 0.9166, "step": 1669 }, { "epoch": 0.33949989835332384, "grad_norm": 0.1415378600358963, "learning_rate": 0.000166154784907963, "loss": 1.0921, "step": 1670 }, { "epoch": 0.33970319170563124, "grad_norm": 0.13397271931171417, "learning_rate": 0.0001661344452354317, "loss": 1.1214, "step": 1671 }, { "epoch": 0.3399064850579386, "grad_norm": 0.1336379051208496, "learning_rate": 0.00016611410556290044, "loss": 1.1034, "step": 1672 }, { "epoch": 0.340109778410246, "grad_norm": 0.1404540240764618, "learning_rate": 0.0001660937658903692, "loss": 1.1435, "step": 1673 }, { "epoch": 0.3403130717625534, "grad_norm": 0.10813318192958832, "learning_rate": 0.0001660734262178379, "loss": 0.8935, "step": 1674 }, { "epoch": 0.3405163651148607, "grad_norm": 0.1491374522447586, "learning_rate": 0.00016605308654530664, "loss": 1.1378, "step": 1675 }, { "epoch": 0.3407196584671681, "grad_norm": 0.12213015556335449, "learning_rate": 0.00016603274687277534, "loss": 1.1236, "step": 1676 }, { "epoch": 0.3409229518194755, "grad_norm": 0.12762251496315002, "learning_rate": 0.0001660124072002441, "loss": 1.1892, "step": 1677 }, { "epoch": 0.34112624517178286, "grad_norm": 0.14703615009784698, "learning_rate": 0.0001659920675277128, "loss": 1.1918, "step": 1678 }, { "epoch": 0.34132953852409026, "grad_norm": 0.13121256232261658, "learning_rate": 0.00016597172785518154, "loss": 1.084, "step": 1679 }, { "epoch": 0.34153283187639766, "grad_norm": 0.15220001339912415, "learning_rate": 0.00016595138818265026, "loss": 1.3663, "step": 1680 }, { "epoch": 0.341736125228705, "grad_norm": 0.1325935572385788, "learning_rate": 0.000165931048510119, "loss": 1.0294, "step": 1681 }, { "epoch": 0.3419394185810124, "grad_norm": 0.10651461035013199, "learning_rate": 0.00016591070883758774, "loss": 0.8741, "step": 1682 }, { "epoch": 0.3421427119333198, "grad_norm": 0.1287640780210495, "learning_rate": 0.00016589036916505646, "loss": 1.087, "step": 1683 }, { "epoch": 0.34234600528562714, "grad_norm": 0.1286855936050415, "learning_rate": 0.00016587002949252516, "loss": 0.9785, "step": 1684 }, { "epoch": 0.34254929863793454, "grad_norm": 0.12485534697771072, "learning_rate": 0.0001658496898199939, "loss": 0.9792, "step": 1685 }, { "epoch": 0.34275259199024194, "grad_norm": 0.11311212927103043, "learning_rate": 0.00016582935014746263, "loss": 0.9772, "step": 1686 }, { "epoch": 0.3429558853425493, "grad_norm": 0.13208623230457306, "learning_rate": 0.00016580901047493136, "loss": 1.1733, "step": 1687 }, { "epoch": 0.3431591786948567, "grad_norm": 0.11595738679170609, "learning_rate": 0.00016578867080240008, "loss": 1.0305, "step": 1688 }, { "epoch": 0.3433624720471641, "grad_norm": 0.14235566556453705, "learning_rate": 0.00016576833112986883, "loss": 1.1916, "step": 1689 }, { "epoch": 0.3435657653994714, "grad_norm": 0.12602582573890686, "learning_rate": 0.00016574799145733756, "loss": 1.0003, "step": 1690 }, { "epoch": 0.3437690587517788, "grad_norm": 0.1448718011379242, "learning_rate": 0.00016572765178480628, "loss": 1.1201, "step": 1691 }, { "epoch": 0.3439723521040862, "grad_norm": 0.12688006460666656, "learning_rate": 0.00016570731211227498, "loss": 0.9846, "step": 1692 }, { "epoch": 0.34417564545639356, "grad_norm": 0.12715177237987518, "learning_rate": 0.00016568697243974373, "loss": 1.1093, "step": 1693 }, { "epoch": 0.34437893880870096, "grad_norm": 0.14105954766273499, "learning_rate": 0.00016566663276721246, "loss": 1.2714, "step": 1694 }, { "epoch": 0.34458223216100836, "grad_norm": 0.12558870017528534, "learning_rate": 0.00016564629309468118, "loss": 0.9724, "step": 1695 }, { "epoch": 0.3447855255133157, "grad_norm": 0.11886492371559143, "learning_rate": 0.0001656259534221499, "loss": 1.1076, "step": 1696 }, { "epoch": 0.3449888188656231, "grad_norm": 0.13078825175762177, "learning_rate": 0.00016560561374961866, "loss": 1.1457, "step": 1697 }, { "epoch": 0.34519211221793045, "grad_norm": 0.12331999093294144, "learning_rate": 0.00016558527407708738, "loss": 1.0065, "step": 1698 }, { "epoch": 0.34539540557023785, "grad_norm": 0.12109193205833435, "learning_rate": 0.0001655649344045561, "loss": 1.1099, "step": 1699 }, { "epoch": 0.34559869892254524, "grad_norm": 0.1176178902387619, "learning_rate": 0.0001655445947320248, "loss": 0.9692, "step": 1700 }, { "epoch": 0.3458019922748526, "grad_norm": 0.1067582294344902, "learning_rate": 0.00016552425505949355, "loss": 0.8452, "step": 1701 }, { "epoch": 0.34600528562716, "grad_norm": 0.11509659141302109, "learning_rate": 0.00016550391538696228, "loss": 0.9062, "step": 1702 }, { "epoch": 0.3462085789794674, "grad_norm": 0.12043119221925735, "learning_rate": 0.000165483575714431, "loss": 1.112, "step": 1703 }, { "epoch": 0.34641187233177473, "grad_norm": 0.12769265472888947, "learning_rate": 0.00016546323604189973, "loss": 1.1163, "step": 1704 }, { "epoch": 0.3466151656840821, "grad_norm": 0.13460403680801392, "learning_rate": 0.00016544289636936848, "loss": 1.2376, "step": 1705 }, { "epoch": 0.3468184590363895, "grad_norm": 0.11211954802274704, "learning_rate": 0.0001654225566968372, "loss": 0.8811, "step": 1706 }, { "epoch": 0.34702175238869687, "grad_norm": 0.1208495944738388, "learning_rate": 0.00016540221702430593, "loss": 0.9858, "step": 1707 }, { "epoch": 0.34722504574100427, "grad_norm": 0.13525189459323883, "learning_rate": 0.00016538187735177463, "loss": 1.0372, "step": 1708 }, { "epoch": 0.34742833909331167, "grad_norm": 0.11987826973199844, "learning_rate": 0.00016536153767924335, "loss": 1.0966, "step": 1709 }, { "epoch": 0.347631632445619, "grad_norm": 0.12538312375545502, "learning_rate": 0.0001653411980067121, "loss": 1.0718, "step": 1710 }, { "epoch": 0.3478349257979264, "grad_norm": 0.12674830853939056, "learning_rate": 0.00016532085833418083, "loss": 1.09, "step": 1711 }, { "epoch": 0.3480382191502338, "grad_norm": 0.11861549317836761, "learning_rate": 0.00016530051866164955, "loss": 0.9924, "step": 1712 }, { "epoch": 0.34824151250254115, "grad_norm": 0.12545670568943024, "learning_rate": 0.00016528017898911828, "loss": 1.1363, "step": 1713 }, { "epoch": 0.34844480585484855, "grad_norm": 0.12180805951356888, "learning_rate": 0.00016525983931658703, "loss": 0.9195, "step": 1714 }, { "epoch": 0.34864809920715595, "grad_norm": 0.14458616077899933, "learning_rate": 0.00016523949964405575, "loss": 1.0096, "step": 1715 }, { "epoch": 0.3488513925594633, "grad_norm": 0.13006000220775604, "learning_rate": 0.00016521915997152445, "loss": 1.1037, "step": 1716 }, { "epoch": 0.3490546859117707, "grad_norm": 0.11734442412853241, "learning_rate": 0.00016519882029899317, "loss": 0.9942, "step": 1717 }, { "epoch": 0.3492579792640781, "grad_norm": 0.10168986022472382, "learning_rate": 0.00016517848062646193, "loss": 0.8829, "step": 1718 }, { "epoch": 0.34946127261638543, "grad_norm": 0.13804613053798676, "learning_rate": 0.00016515814095393065, "loss": 1.021, "step": 1719 }, { "epoch": 0.34966456596869283, "grad_norm": 0.13653217256069183, "learning_rate": 0.00016513780128139937, "loss": 1.0905, "step": 1720 }, { "epoch": 0.34986785932100023, "grad_norm": 0.12326166778802872, "learning_rate": 0.0001651174616088681, "loss": 1.0843, "step": 1721 }, { "epoch": 0.3500711526733076, "grad_norm": 0.1265186071395874, "learning_rate": 0.00016509712193633685, "loss": 1.1012, "step": 1722 }, { "epoch": 0.35027444602561497, "grad_norm": 0.12159296125173569, "learning_rate": 0.00016507678226380557, "loss": 1.0491, "step": 1723 }, { "epoch": 0.3504777393779223, "grad_norm": 0.12199139595031738, "learning_rate": 0.00016505644259127427, "loss": 1.1179, "step": 1724 }, { "epoch": 0.3506810327302297, "grad_norm": 0.13243111968040466, "learning_rate": 0.000165036102918743, "loss": 1.0576, "step": 1725 }, { "epoch": 0.3508843260825371, "grad_norm": 0.1342582106590271, "learning_rate": 0.00016501576324621175, "loss": 1.1035, "step": 1726 }, { "epoch": 0.35108761943484446, "grad_norm": 0.15361081063747406, "learning_rate": 0.00016499542357368047, "loss": 1.1772, "step": 1727 }, { "epoch": 0.35129091278715185, "grad_norm": 0.1446637064218521, "learning_rate": 0.0001649750839011492, "loss": 1.0995, "step": 1728 }, { "epoch": 0.35149420613945925, "grad_norm": 0.12943841516971588, "learning_rate": 0.00016495474422861792, "loss": 1.0938, "step": 1729 }, { "epoch": 0.3516974994917666, "grad_norm": 0.11111871153116226, "learning_rate": 0.00016493440455608667, "loss": 0.8568, "step": 1730 }, { "epoch": 0.351900792844074, "grad_norm": 0.12905767560005188, "learning_rate": 0.0001649140648835554, "loss": 1.0485, "step": 1731 }, { "epoch": 0.3521040861963814, "grad_norm": 0.14140938222408295, "learning_rate": 0.00016489372521102412, "loss": 1.0996, "step": 1732 }, { "epoch": 0.35230737954868874, "grad_norm": 0.120769202709198, "learning_rate": 0.00016487338553849282, "loss": 1.0729, "step": 1733 }, { "epoch": 0.35251067290099614, "grad_norm": 0.1240081861615181, "learning_rate": 0.00016485304586596157, "loss": 1.1505, "step": 1734 }, { "epoch": 0.35271396625330353, "grad_norm": 0.128762885928154, "learning_rate": 0.0001648327061934303, "loss": 1.09, "step": 1735 }, { "epoch": 0.3529172596056109, "grad_norm": 0.13550743460655212, "learning_rate": 0.00016481236652089902, "loss": 1.1706, "step": 1736 }, { "epoch": 0.3531205529579183, "grad_norm": 0.13279037177562714, "learning_rate": 0.00016479202684836774, "loss": 1.2231, "step": 1737 }, { "epoch": 0.3533238463102257, "grad_norm": 0.11756809800863266, "learning_rate": 0.0001647716871758365, "loss": 1.0519, "step": 1738 }, { "epoch": 0.353527139662533, "grad_norm": 0.11612554639577866, "learning_rate": 0.00016475134750330522, "loss": 0.9618, "step": 1739 }, { "epoch": 0.3537304330148404, "grad_norm": 0.12984800338745117, "learning_rate": 0.00016473100783077394, "loss": 1.1865, "step": 1740 }, { "epoch": 0.3539337263671478, "grad_norm": 0.12334571778774261, "learning_rate": 0.00016471066815824264, "loss": 1.1693, "step": 1741 }, { "epoch": 0.35413701971945516, "grad_norm": 0.13324569165706635, "learning_rate": 0.0001646903284857114, "loss": 1.0449, "step": 1742 }, { "epoch": 0.35434031307176256, "grad_norm": 0.12119297683238983, "learning_rate": 0.00016466998881318012, "loss": 0.9664, "step": 1743 }, { "epoch": 0.35454360642406996, "grad_norm": 0.12139979749917984, "learning_rate": 0.00016464964914064884, "loss": 1.1399, "step": 1744 }, { "epoch": 0.3547468997763773, "grad_norm": 0.13679492473602295, "learning_rate": 0.00016462930946811757, "loss": 1.2815, "step": 1745 }, { "epoch": 0.3549501931286847, "grad_norm": 0.14377973973751068, "learning_rate": 0.00016460896979558632, "loss": 1.1322, "step": 1746 }, { "epoch": 0.3551534864809921, "grad_norm": 0.130259171128273, "learning_rate": 0.00016458863012305504, "loss": 1.0832, "step": 1747 }, { "epoch": 0.35535677983329944, "grad_norm": 0.14110639691352844, "learning_rate": 0.00016456829045052377, "loss": 1.0277, "step": 1748 }, { "epoch": 0.35556007318560684, "grad_norm": 0.1513645052909851, "learning_rate": 0.00016454795077799246, "loss": 1.1862, "step": 1749 }, { "epoch": 0.3557633665379142, "grad_norm": 0.1150139644742012, "learning_rate": 0.0001645276111054612, "loss": 1.1027, "step": 1750 }, { "epoch": 0.3559666598902216, "grad_norm": 0.11318166553974152, "learning_rate": 0.00016450727143292994, "loss": 1.0458, "step": 1751 }, { "epoch": 0.356169953242529, "grad_norm": 0.12470010668039322, "learning_rate": 0.00016448693176039867, "loss": 1.0969, "step": 1752 }, { "epoch": 0.3563732465948363, "grad_norm": 0.12987849116325378, "learning_rate": 0.0001644665920878674, "loss": 1.2132, "step": 1753 }, { "epoch": 0.3565765399471437, "grad_norm": 0.1229574978351593, "learning_rate": 0.00016444625241533611, "loss": 1.01, "step": 1754 }, { "epoch": 0.3567798332994511, "grad_norm": 0.12658950686454773, "learning_rate": 0.00016442591274280487, "loss": 1.1362, "step": 1755 }, { "epoch": 0.35698312665175846, "grad_norm": 0.13322791457176208, "learning_rate": 0.0001644055730702736, "loss": 1.1059, "step": 1756 }, { "epoch": 0.35718642000406586, "grad_norm": 0.12812237441539764, "learning_rate": 0.0001643852333977423, "loss": 0.9566, "step": 1757 }, { "epoch": 0.35738971335637326, "grad_norm": 0.12379775196313858, "learning_rate": 0.000164364893725211, "loss": 1.0166, "step": 1758 }, { "epoch": 0.3575930067086806, "grad_norm": 0.12064617872238159, "learning_rate": 0.00016434455405267976, "loss": 1.001, "step": 1759 }, { "epoch": 0.357796300060988, "grad_norm": 0.12102466076612473, "learning_rate": 0.0001643242143801485, "loss": 1.1048, "step": 1760 }, { "epoch": 0.3579995934132954, "grad_norm": 0.1230425089597702, "learning_rate": 0.0001643038747076172, "loss": 0.962, "step": 1761 }, { "epoch": 0.35820288676560275, "grad_norm": 0.12632609903812408, "learning_rate": 0.00016428353503508594, "loss": 1.1316, "step": 1762 }, { "epoch": 0.35840618011791014, "grad_norm": 0.1422523409128189, "learning_rate": 0.0001642631953625547, "loss": 1.2044, "step": 1763 }, { "epoch": 0.35860947347021754, "grad_norm": 0.1147986575961113, "learning_rate": 0.0001642428556900234, "loss": 0.9114, "step": 1764 }, { "epoch": 0.3588127668225249, "grad_norm": 0.1134926900267601, "learning_rate": 0.0001642225160174921, "loss": 1.066, "step": 1765 }, { "epoch": 0.3590160601748323, "grad_norm": 0.10886301100254059, "learning_rate": 0.00016420217634496083, "loss": 0.9999, "step": 1766 }, { "epoch": 0.3592193535271397, "grad_norm": 0.12393435835838318, "learning_rate": 0.00016418183667242959, "loss": 1.0631, "step": 1767 }, { "epoch": 0.359422646879447, "grad_norm": 0.1252308487892151, "learning_rate": 0.0001641614969998983, "loss": 1.088, "step": 1768 }, { "epoch": 0.3596259402317544, "grad_norm": 0.13078045845031738, "learning_rate": 0.00016414115732736704, "loss": 1.0634, "step": 1769 }, { "epoch": 0.3598292335840618, "grad_norm": 0.12720254063606262, "learning_rate": 0.00016412081765483576, "loss": 1.1295, "step": 1770 }, { "epoch": 0.36003252693636917, "grad_norm": 0.12251488864421844, "learning_rate": 0.0001641004779823045, "loss": 0.9843, "step": 1771 }, { "epoch": 0.36023582028867657, "grad_norm": 0.12935830652713776, "learning_rate": 0.00016408013830977324, "loss": 1.0496, "step": 1772 }, { "epoch": 0.36043911364098397, "grad_norm": 0.12900424003601074, "learning_rate": 0.00016405979863724193, "loss": 0.9895, "step": 1773 }, { "epoch": 0.3606424069932913, "grad_norm": 0.14351366460323334, "learning_rate": 0.00016403945896471066, "loss": 1.2852, "step": 1774 }, { "epoch": 0.3608457003455987, "grad_norm": 0.12761393189430237, "learning_rate": 0.0001640191192921794, "loss": 1.086, "step": 1775 }, { "epoch": 0.36104899369790605, "grad_norm": 0.1086045354604721, "learning_rate": 0.00016399877961964813, "loss": 0.8948, "step": 1776 }, { "epoch": 0.36125228705021345, "grad_norm": 0.11502155661582947, "learning_rate": 0.00016397843994711686, "loss": 0.8168, "step": 1777 }, { "epoch": 0.36145558040252085, "grad_norm": 0.12591351568698883, "learning_rate": 0.00016395810027458558, "loss": 0.9193, "step": 1778 }, { "epoch": 0.3616588737548282, "grad_norm": 0.1310427188873291, "learning_rate": 0.00016393776060205433, "loss": 0.8922, "step": 1779 }, { "epoch": 0.3618621671071356, "grad_norm": 0.12844674289226532, "learning_rate": 0.00016391742092952306, "loss": 1.213, "step": 1780 }, { "epoch": 0.362065460459443, "grad_norm": 0.12577317655086517, "learning_rate": 0.00016389708125699176, "loss": 0.8967, "step": 1781 }, { "epoch": 0.36226875381175033, "grad_norm": 0.12694710493087769, "learning_rate": 0.00016387674158446048, "loss": 1.0951, "step": 1782 }, { "epoch": 0.36247204716405773, "grad_norm": 0.14658670127391815, "learning_rate": 0.00016385640191192923, "loss": 1.2866, "step": 1783 }, { "epoch": 0.36267534051636513, "grad_norm": 0.11917047947645187, "learning_rate": 0.00016383606223939796, "loss": 1.1441, "step": 1784 }, { "epoch": 0.3628786338686725, "grad_norm": 0.11192582547664642, "learning_rate": 0.00016381572256686668, "loss": 1.0195, "step": 1785 }, { "epoch": 0.36308192722097987, "grad_norm": 0.12563778460025787, "learning_rate": 0.0001637953828943354, "loss": 1.0646, "step": 1786 }, { "epoch": 0.36328522057328727, "grad_norm": 0.11741344630718231, "learning_rate": 0.00016377504322180416, "loss": 1.0123, "step": 1787 }, { "epoch": 0.3634885139255946, "grad_norm": 0.12519432604312897, "learning_rate": 0.00016375470354927288, "loss": 0.9654, "step": 1788 }, { "epoch": 0.363691807277902, "grad_norm": 0.13249295949935913, "learning_rate": 0.0001637343638767416, "loss": 1.1636, "step": 1789 }, { "epoch": 0.3638951006302094, "grad_norm": 0.14184780418872833, "learning_rate": 0.0001637140242042103, "loss": 1.2066, "step": 1790 }, { "epoch": 0.36409839398251675, "grad_norm": 0.13372722268104553, "learning_rate": 0.00016369368453167903, "loss": 1.0466, "step": 1791 }, { "epoch": 0.36430168733482415, "grad_norm": 0.14696893095970154, "learning_rate": 0.00016367334485914778, "loss": 1.0983, "step": 1792 }, { "epoch": 0.36450498068713155, "grad_norm": 0.1386573314666748, "learning_rate": 0.0001636530051866165, "loss": 1.1763, "step": 1793 }, { "epoch": 0.3647082740394389, "grad_norm": 0.1271977722644806, "learning_rate": 0.00016363266551408523, "loss": 1.0824, "step": 1794 }, { "epoch": 0.3649115673917463, "grad_norm": 0.13254235684871674, "learning_rate": 0.00016361232584155395, "loss": 0.9628, "step": 1795 }, { "epoch": 0.3651148607440537, "grad_norm": 0.1489454209804535, "learning_rate": 0.0001635919861690227, "loss": 1.256, "step": 1796 }, { "epoch": 0.36531815409636104, "grad_norm": 0.11988960951566696, "learning_rate": 0.00016357164649649143, "loss": 1.0667, "step": 1797 }, { "epoch": 0.36552144744866844, "grad_norm": 0.11505492776632309, "learning_rate": 0.00016355130682396013, "loss": 1.037, "step": 1798 }, { "epoch": 0.36572474080097583, "grad_norm": 0.1133279800415039, "learning_rate": 0.00016353096715142885, "loss": 1.0006, "step": 1799 }, { "epoch": 0.3659280341532832, "grad_norm": 0.14962686598300934, "learning_rate": 0.0001635106274788976, "loss": 1.3546, "step": 1800 }, { "epoch": 0.3661313275055906, "grad_norm": 0.13253025710582733, "learning_rate": 0.00016349028780636633, "loss": 1.1064, "step": 1801 }, { "epoch": 0.3663346208578979, "grad_norm": 0.11647074669599533, "learning_rate": 0.00016346994813383505, "loss": 0.9201, "step": 1802 }, { "epoch": 0.3665379142102053, "grad_norm": 0.12080147862434387, "learning_rate": 0.00016344960846130378, "loss": 1.0346, "step": 1803 }, { "epoch": 0.3667412075625127, "grad_norm": 0.12051571905612946, "learning_rate": 0.00016342926878877253, "loss": 1.0125, "step": 1804 }, { "epoch": 0.36694450091482006, "grad_norm": 0.11931899935007095, "learning_rate": 0.00016340892911624125, "loss": 1.0123, "step": 1805 }, { "epoch": 0.36714779426712746, "grad_norm": 0.12983456254005432, "learning_rate": 0.00016338858944370995, "loss": 0.9814, "step": 1806 }, { "epoch": 0.36735108761943486, "grad_norm": 0.14519883692264557, "learning_rate": 0.00016336824977117867, "loss": 1.1524, "step": 1807 }, { "epoch": 0.3675543809717422, "grad_norm": 0.11531595140695572, "learning_rate": 0.00016334791009864742, "loss": 0.9979, "step": 1808 }, { "epoch": 0.3677576743240496, "grad_norm": 0.13013306260108948, "learning_rate": 0.00016332757042611615, "loss": 0.9104, "step": 1809 }, { "epoch": 0.367960967676357, "grad_norm": 0.12455404549837112, "learning_rate": 0.00016330723075358487, "loss": 1.1263, "step": 1810 }, { "epoch": 0.36816426102866434, "grad_norm": 0.1386694461107254, "learning_rate": 0.0001632868910810536, "loss": 1.1143, "step": 1811 }, { "epoch": 0.36836755438097174, "grad_norm": 0.12970969080924988, "learning_rate": 0.00016326655140852235, "loss": 0.9942, "step": 1812 }, { "epoch": 0.36857084773327914, "grad_norm": 0.13323652744293213, "learning_rate": 0.00016324621173599107, "loss": 1.1296, "step": 1813 }, { "epoch": 0.3687741410855865, "grad_norm": 0.12079238891601562, "learning_rate": 0.00016322587206345977, "loss": 0.9846, "step": 1814 }, { "epoch": 0.3689774344378939, "grad_norm": 0.11877255141735077, "learning_rate": 0.0001632055323909285, "loss": 1.0053, "step": 1815 }, { "epoch": 0.3691807277902013, "grad_norm": 0.12340681999921799, "learning_rate": 0.00016318519271839725, "loss": 0.9803, "step": 1816 }, { "epoch": 0.3693840211425086, "grad_norm": 0.11937633156776428, "learning_rate": 0.00016316485304586597, "loss": 1.0217, "step": 1817 }, { "epoch": 0.369587314494816, "grad_norm": 0.12306183576583862, "learning_rate": 0.0001631445133733347, "loss": 1.005, "step": 1818 }, { "epoch": 0.3697906078471234, "grad_norm": 0.1307355761528015, "learning_rate": 0.00016312417370080342, "loss": 1.0241, "step": 1819 }, { "epoch": 0.36999390119943076, "grad_norm": 0.12387688457965851, "learning_rate": 0.00016310383402827217, "loss": 1.0489, "step": 1820 }, { "epoch": 0.37019719455173816, "grad_norm": 0.13056257367134094, "learning_rate": 0.0001630834943557409, "loss": 1.0639, "step": 1821 }, { "epoch": 0.37040048790404556, "grad_norm": 0.1329268217086792, "learning_rate": 0.0001630631546832096, "loss": 1.0949, "step": 1822 }, { "epoch": 0.3706037812563529, "grad_norm": 0.1100173369050026, "learning_rate": 0.00016304281501067832, "loss": 0.9644, "step": 1823 }, { "epoch": 0.3708070746086603, "grad_norm": 0.13045302033424377, "learning_rate": 0.00016302247533814707, "loss": 1.0773, "step": 1824 }, { "epoch": 0.3710103679609677, "grad_norm": 0.12959614396095276, "learning_rate": 0.0001630021356656158, "loss": 1.0633, "step": 1825 }, { "epoch": 0.37121366131327505, "grad_norm": 0.1272924393415451, "learning_rate": 0.00016298179599308452, "loss": 0.9854, "step": 1826 }, { "epoch": 0.37141695466558244, "grad_norm": 0.13959196209907532, "learning_rate": 0.00016296145632055324, "loss": 1.1186, "step": 1827 }, { "epoch": 0.37162024801788984, "grad_norm": 0.120680071413517, "learning_rate": 0.000162941116648022, "loss": 0.9871, "step": 1828 }, { "epoch": 0.3718235413701972, "grad_norm": 0.11955247074365616, "learning_rate": 0.00016292077697549072, "loss": 1.0019, "step": 1829 }, { "epoch": 0.3720268347225046, "grad_norm": 0.13293783366680145, "learning_rate": 0.00016290043730295942, "loss": 1.1245, "step": 1830 }, { "epoch": 0.37223012807481193, "grad_norm": 0.13701294362545013, "learning_rate": 0.00016288009763042814, "loss": 0.9353, "step": 1831 }, { "epoch": 0.3724334214271193, "grad_norm": 0.12601931393146515, "learning_rate": 0.00016285975795789687, "loss": 1.0166, "step": 1832 }, { "epoch": 0.3726367147794267, "grad_norm": 0.12148377299308777, "learning_rate": 0.00016283941828536562, "loss": 1.0335, "step": 1833 }, { "epoch": 0.37284000813173407, "grad_norm": 0.1322852075099945, "learning_rate": 0.00016281907861283434, "loss": 0.9585, "step": 1834 }, { "epoch": 0.37304330148404147, "grad_norm": 0.13737133145332336, "learning_rate": 0.00016279873894030307, "loss": 1.0907, "step": 1835 }, { "epoch": 0.37324659483634887, "grad_norm": 0.12207762897014618, "learning_rate": 0.0001627783992677718, "loss": 1.1334, "step": 1836 }, { "epoch": 0.3734498881886562, "grad_norm": 0.13265001773834229, "learning_rate": 0.00016275805959524054, "loss": 1.1315, "step": 1837 }, { "epoch": 0.3736531815409636, "grad_norm": 0.1349770426750183, "learning_rate": 0.00016273771992270924, "loss": 1.0977, "step": 1838 }, { "epoch": 0.373856474893271, "grad_norm": 0.1335778385400772, "learning_rate": 0.00016271738025017796, "loss": 1.038, "step": 1839 }, { "epoch": 0.37405976824557835, "grad_norm": 0.13259084522724152, "learning_rate": 0.0001626970405776467, "loss": 1.0422, "step": 1840 }, { "epoch": 0.37426306159788575, "grad_norm": 0.13083282113075256, "learning_rate": 0.00016267670090511544, "loss": 1.0672, "step": 1841 }, { "epoch": 0.37446635495019315, "grad_norm": 0.12019068002700806, "learning_rate": 0.00016265636123258416, "loss": 0.8895, "step": 1842 }, { "epoch": 0.3746696483025005, "grad_norm": 0.12882567942142487, "learning_rate": 0.0001626360215600529, "loss": 1.071, "step": 1843 }, { "epoch": 0.3748729416548079, "grad_norm": 0.12891016900539398, "learning_rate": 0.00016261568188752161, "loss": 0.9553, "step": 1844 }, { "epoch": 0.3750762350071153, "grad_norm": 0.12769286334514618, "learning_rate": 0.00016259534221499037, "loss": 1.0616, "step": 1845 }, { "epoch": 0.37527952835942263, "grad_norm": 0.14067451655864716, "learning_rate": 0.00016257500254245906, "loss": 1.2434, "step": 1846 }, { "epoch": 0.37548282171173003, "grad_norm": 0.14272430539131165, "learning_rate": 0.0001625546628699278, "loss": 1.1185, "step": 1847 }, { "epoch": 0.37568611506403743, "grad_norm": 0.1343206763267517, "learning_rate": 0.0001625343231973965, "loss": 1.1766, "step": 1848 }, { "epoch": 0.3758894084163448, "grad_norm": 0.11800689250230789, "learning_rate": 0.00016251398352486526, "loss": 1.0509, "step": 1849 }, { "epoch": 0.37609270176865217, "grad_norm": 0.126071959733963, "learning_rate": 0.000162493643852334, "loss": 1.0304, "step": 1850 }, { "epoch": 0.37629599512095957, "grad_norm": 0.1479204297065735, "learning_rate": 0.0001624733041798027, "loss": 1.2349, "step": 1851 }, { "epoch": 0.3764992884732669, "grad_norm": 0.1391003429889679, "learning_rate": 0.00016245296450727144, "loss": 0.9928, "step": 1852 }, { "epoch": 0.3767025818255743, "grad_norm": 0.14163215458393097, "learning_rate": 0.0001624326248347402, "loss": 1.2806, "step": 1853 }, { "epoch": 0.3769058751778817, "grad_norm": 0.113652303814888, "learning_rate": 0.0001624122851622089, "loss": 0.9222, "step": 1854 }, { "epoch": 0.37710916853018905, "grad_norm": 0.13163653016090393, "learning_rate": 0.0001623919454896776, "loss": 0.9817, "step": 1855 }, { "epoch": 0.37731246188249645, "grad_norm": 0.12150076776742935, "learning_rate": 0.00016237160581714633, "loss": 1.0522, "step": 1856 }, { "epoch": 0.3775157552348038, "grad_norm": 0.12493383884429932, "learning_rate": 0.00016235126614461509, "loss": 1.1715, "step": 1857 }, { "epoch": 0.3777190485871112, "grad_norm": 0.12059423327445984, "learning_rate": 0.0001623309264720838, "loss": 1.0005, "step": 1858 }, { "epoch": 0.3779223419394186, "grad_norm": 0.13585112988948822, "learning_rate": 0.00016231058679955253, "loss": 1.0747, "step": 1859 }, { "epoch": 0.37812563529172594, "grad_norm": 0.13678506016731262, "learning_rate": 0.00016229024712702126, "loss": 1.2324, "step": 1860 }, { "epoch": 0.37832892864403334, "grad_norm": 0.13325399160385132, "learning_rate": 0.00016226990745449, "loss": 1.0719, "step": 1861 }, { "epoch": 0.37853222199634073, "grad_norm": 0.13250133395195007, "learning_rate": 0.00016224956778195874, "loss": 1.2482, "step": 1862 }, { "epoch": 0.3787355153486481, "grad_norm": 0.13788394629955292, "learning_rate": 0.00016222922810942743, "loss": 1.209, "step": 1863 }, { "epoch": 0.3789388087009555, "grad_norm": 0.13350476324558258, "learning_rate": 0.00016220888843689616, "loss": 1.133, "step": 1864 }, { "epoch": 0.3791421020532629, "grad_norm": 0.13107924163341522, "learning_rate": 0.0001621885487643649, "loss": 1.0644, "step": 1865 }, { "epoch": 0.3793453954055702, "grad_norm": 0.12230812013149261, "learning_rate": 0.00016216820909183363, "loss": 0.8282, "step": 1866 }, { "epoch": 0.3795486887578776, "grad_norm": 0.11637227237224579, "learning_rate": 0.00016214786941930236, "loss": 0.9427, "step": 1867 }, { "epoch": 0.379751982110185, "grad_norm": 0.12177541106939316, "learning_rate": 0.00016212752974677108, "loss": 1.1639, "step": 1868 }, { "epoch": 0.37995527546249236, "grad_norm": 0.149050772190094, "learning_rate": 0.00016210719007423983, "loss": 1.225, "step": 1869 }, { "epoch": 0.38015856881479976, "grad_norm": 0.12929648160934448, "learning_rate": 0.00016208685040170856, "loss": 0.9552, "step": 1870 }, { "epoch": 0.38036186216710716, "grad_norm": 0.13179321587085724, "learning_rate": 0.00016206651072917726, "loss": 1.1057, "step": 1871 }, { "epoch": 0.3805651555194145, "grad_norm": 0.15877602994441986, "learning_rate": 0.00016204617105664598, "loss": 1.2518, "step": 1872 }, { "epoch": 0.3807684488717219, "grad_norm": 0.12015218287706375, "learning_rate": 0.00016202583138411473, "loss": 0.9414, "step": 1873 }, { "epoch": 0.3809717422240293, "grad_norm": 0.11854024976491928, "learning_rate": 0.00016200549171158346, "loss": 0.8964, "step": 1874 }, { "epoch": 0.38117503557633664, "grad_norm": 0.1399824023246765, "learning_rate": 0.00016198515203905218, "loss": 1.1772, "step": 1875 }, { "epoch": 0.38137832892864404, "grad_norm": 0.13480430841445923, "learning_rate": 0.0001619648123665209, "loss": 1.1182, "step": 1876 }, { "epoch": 0.38158162228095144, "grad_norm": 0.13443569839000702, "learning_rate": 0.00016194447269398963, "loss": 1.1376, "step": 1877 }, { "epoch": 0.3817849156332588, "grad_norm": 0.12492494285106659, "learning_rate": 0.00016192413302145838, "loss": 1.0976, "step": 1878 }, { "epoch": 0.3819882089855662, "grad_norm": 0.1359935700893402, "learning_rate": 0.00016190379334892708, "loss": 1.2954, "step": 1879 }, { "epoch": 0.3821915023378736, "grad_norm": 0.11808416247367859, "learning_rate": 0.0001618834536763958, "loss": 0.9972, "step": 1880 }, { "epoch": 0.3823947956901809, "grad_norm": 0.10978496074676514, "learning_rate": 0.00016186311400386453, "loss": 1.0534, "step": 1881 }, { "epoch": 0.3825980890424883, "grad_norm": 0.13503976166248322, "learning_rate": 0.00016184277433133328, "loss": 1.1353, "step": 1882 }, { "epoch": 0.38280138239479566, "grad_norm": 0.1396964192390442, "learning_rate": 0.000161822434658802, "loss": 1.1936, "step": 1883 }, { "epoch": 0.38300467574710306, "grad_norm": 0.1180117055773735, "learning_rate": 0.00016180209498627073, "loss": 1.1091, "step": 1884 }, { "epoch": 0.38320796909941046, "grad_norm": 0.13506156206130981, "learning_rate": 0.00016178175531373945, "loss": 1.12, "step": 1885 }, { "epoch": 0.3834112624517178, "grad_norm": 0.12191524356603622, "learning_rate": 0.0001617614156412082, "loss": 1.1521, "step": 1886 }, { "epoch": 0.3836145558040252, "grad_norm": 0.13350510597229004, "learning_rate": 0.0001617410759686769, "loss": 1.1081, "step": 1887 }, { "epoch": 0.3838178491563326, "grad_norm": 0.1178809106349945, "learning_rate": 0.00016172073629614563, "loss": 1.0511, "step": 1888 }, { "epoch": 0.38402114250863995, "grad_norm": 0.13383956253528595, "learning_rate": 0.00016170039662361435, "loss": 1.0467, "step": 1889 }, { "epoch": 0.38422443586094734, "grad_norm": 0.12543490529060364, "learning_rate": 0.0001616800569510831, "loss": 1.038, "step": 1890 }, { "epoch": 0.38442772921325474, "grad_norm": 0.12253366410732269, "learning_rate": 0.00016165971727855183, "loss": 1.0494, "step": 1891 }, { "epoch": 0.3846310225655621, "grad_norm": 0.10339358448982239, "learning_rate": 0.00016163937760602055, "loss": 0.8586, "step": 1892 }, { "epoch": 0.3848343159178695, "grad_norm": 0.14473773539066315, "learning_rate": 0.00016161903793348927, "loss": 1.0651, "step": 1893 }, { "epoch": 0.3850376092701769, "grad_norm": 0.12131127715110779, "learning_rate": 0.00016159869826095803, "loss": 1.0674, "step": 1894 }, { "epoch": 0.3852409026224842, "grad_norm": 0.1297827512025833, "learning_rate": 0.00016157835858842672, "loss": 1.1171, "step": 1895 }, { "epoch": 0.3854441959747916, "grad_norm": 0.14175108075141907, "learning_rate": 0.00016155801891589545, "loss": 1.151, "step": 1896 }, { "epoch": 0.385647489327099, "grad_norm": 0.12038639187812805, "learning_rate": 0.00016153767924336417, "loss": 1.0481, "step": 1897 }, { "epoch": 0.38585078267940637, "grad_norm": 0.11626328527927399, "learning_rate": 0.00016151733957083292, "loss": 0.9606, "step": 1898 }, { "epoch": 0.38605407603171377, "grad_norm": 0.127833291888237, "learning_rate": 0.00016149699989830165, "loss": 1.0139, "step": 1899 }, { "epoch": 0.38625736938402117, "grad_norm": 0.13045917451381683, "learning_rate": 0.00016147666022577037, "loss": 1.035, "step": 1900 }, { "epoch": 0.3864606627363285, "grad_norm": 0.1294708251953125, "learning_rate": 0.0001614563205532391, "loss": 1.1797, "step": 1901 }, { "epoch": 0.3866639560886359, "grad_norm": 0.12301066517829895, "learning_rate": 0.00016143598088070785, "loss": 1.0155, "step": 1902 }, { "epoch": 0.3868672494409433, "grad_norm": 0.12555493414402008, "learning_rate": 0.00016141564120817655, "loss": 1.0664, "step": 1903 }, { "epoch": 0.38707054279325065, "grad_norm": 0.1144699901342392, "learning_rate": 0.00016139530153564527, "loss": 0.9436, "step": 1904 }, { "epoch": 0.38727383614555805, "grad_norm": 0.12643945217132568, "learning_rate": 0.000161374961863114, "loss": 1.0495, "step": 1905 }, { "epoch": 0.38747712949786545, "grad_norm": 0.13180217146873474, "learning_rate": 0.00016135462219058275, "loss": 1.0142, "step": 1906 }, { "epoch": 0.3876804228501728, "grad_norm": 0.12776418030261993, "learning_rate": 0.00016133428251805147, "loss": 1.0508, "step": 1907 }, { "epoch": 0.3878837162024802, "grad_norm": 0.12063184380531311, "learning_rate": 0.0001613139428455202, "loss": 1.0565, "step": 1908 }, { "epoch": 0.38808700955478753, "grad_norm": 0.12819765508174896, "learning_rate": 0.00016129360317298892, "loss": 1.0611, "step": 1909 }, { "epoch": 0.38829030290709493, "grad_norm": 0.12989814579486847, "learning_rate": 0.00016127326350045767, "loss": 0.9904, "step": 1910 }, { "epoch": 0.38849359625940233, "grad_norm": 0.11423414200544357, "learning_rate": 0.0001612529238279264, "loss": 0.9869, "step": 1911 }, { "epoch": 0.3886968896117097, "grad_norm": 0.14000189304351807, "learning_rate": 0.0001612325841553951, "loss": 1.0534, "step": 1912 }, { "epoch": 0.38890018296401707, "grad_norm": 0.13626928627490997, "learning_rate": 0.00016121224448286382, "loss": 1.2318, "step": 1913 }, { "epoch": 0.38910347631632447, "grad_norm": 0.15019413828849792, "learning_rate": 0.00016119190481033257, "loss": 1.2027, "step": 1914 }, { "epoch": 0.3893067696686318, "grad_norm": 0.1226695328950882, "learning_rate": 0.0001611715651378013, "loss": 0.9538, "step": 1915 }, { "epoch": 0.3895100630209392, "grad_norm": 0.12817354500293732, "learning_rate": 0.00016115122546527002, "loss": 1.0928, "step": 1916 }, { "epoch": 0.3897133563732466, "grad_norm": 0.12198452651500702, "learning_rate": 0.00016113088579273874, "loss": 0.9358, "step": 1917 }, { "epoch": 0.38991664972555395, "grad_norm": 0.11782688647508621, "learning_rate": 0.00016111054612020747, "loss": 0.8752, "step": 1918 }, { "epoch": 0.39011994307786135, "grad_norm": 0.10750327259302139, "learning_rate": 0.00016109020644767622, "loss": 0.897, "step": 1919 }, { "epoch": 0.39032323643016875, "grad_norm": 0.12854933738708496, "learning_rate": 0.00016106986677514492, "loss": 1.1868, "step": 1920 }, { "epoch": 0.3905265297824761, "grad_norm": 0.12874183058738708, "learning_rate": 0.00016104952710261364, "loss": 1.1017, "step": 1921 }, { "epoch": 0.3907298231347835, "grad_norm": 0.11504833400249481, "learning_rate": 0.00016102918743008237, "loss": 1.0192, "step": 1922 }, { "epoch": 0.3909331164870909, "grad_norm": 0.13493189215660095, "learning_rate": 0.00016100884775755112, "loss": 1.1141, "step": 1923 }, { "epoch": 0.39113640983939824, "grad_norm": 0.1267276406288147, "learning_rate": 0.00016098850808501984, "loss": 1.1147, "step": 1924 }, { "epoch": 0.39133970319170563, "grad_norm": 0.1132739931344986, "learning_rate": 0.00016096816841248857, "loss": 0.9775, "step": 1925 }, { "epoch": 0.39154299654401303, "grad_norm": 0.1274060159921646, "learning_rate": 0.0001609478287399573, "loss": 1.1937, "step": 1926 }, { "epoch": 0.3917462898963204, "grad_norm": 0.11706088483333588, "learning_rate": 0.00016092748906742604, "loss": 0.9774, "step": 1927 }, { "epoch": 0.3919495832486278, "grad_norm": 0.12287326157093048, "learning_rate": 0.00016090714939489474, "loss": 1.0348, "step": 1928 }, { "epoch": 0.3921528766009352, "grad_norm": 0.12456396222114563, "learning_rate": 0.00016088680972236346, "loss": 0.9097, "step": 1929 }, { "epoch": 0.3923561699532425, "grad_norm": 0.11585330218076706, "learning_rate": 0.0001608664700498322, "loss": 1.0469, "step": 1930 }, { "epoch": 0.3925594633055499, "grad_norm": 0.1536455601453781, "learning_rate": 0.00016084613037730094, "loss": 1.1005, "step": 1931 }, { "epoch": 0.3927627566578573, "grad_norm": 0.12221349030733109, "learning_rate": 0.00016082579070476966, "loss": 0.9721, "step": 1932 }, { "epoch": 0.39296605001016466, "grad_norm": 0.12621140480041504, "learning_rate": 0.0001608054510322384, "loss": 1.0791, "step": 1933 }, { "epoch": 0.39316934336247206, "grad_norm": 0.13487426936626434, "learning_rate": 0.0001607851113597071, "loss": 0.9798, "step": 1934 }, { "epoch": 0.3933726367147794, "grad_norm": 0.13655559718608856, "learning_rate": 0.00016076477168717586, "loss": 1.1909, "step": 1935 }, { "epoch": 0.3935759300670868, "grad_norm": 0.12217934429645538, "learning_rate": 0.00016074443201464456, "loss": 0.9678, "step": 1936 }, { "epoch": 0.3937792234193942, "grad_norm": 0.13106785714626312, "learning_rate": 0.0001607240923421133, "loss": 1.1034, "step": 1937 }, { "epoch": 0.39398251677170154, "grad_norm": 0.13911622762680054, "learning_rate": 0.000160703752669582, "loss": 1.136, "step": 1938 }, { "epoch": 0.39418581012400894, "grad_norm": 0.12952151894569397, "learning_rate": 0.00016068341299705076, "loss": 1.0292, "step": 1939 }, { "epoch": 0.39438910347631634, "grad_norm": 0.12866811454296112, "learning_rate": 0.0001606630733245195, "loss": 1.1424, "step": 1940 }, { "epoch": 0.3945923968286237, "grad_norm": 0.12720058858394623, "learning_rate": 0.0001606427336519882, "loss": 1.1688, "step": 1941 }, { "epoch": 0.3947956901809311, "grad_norm": 0.13742884993553162, "learning_rate": 0.00016062239397945694, "loss": 1.0547, "step": 1942 }, { "epoch": 0.3949989835332385, "grad_norm": 0.12812934815883636, "learning_rate": 0.0001606020543069257, "loss": 1.0, "step": 1943 }, { "epoch": 0.3952022768855458, "grad_norm": 0.1265181452035904, "learning_rate": 0.00016058171463439439, "loss": 1.2084, "step": 1944 }, { "epoch": 0.3954055702378532, "grad_norm": 0.11929038912057877, "learning_rate": 0.0001605613749618631, "loss": 1.0126, "step": 1945 }, { "epoch": 0.3956088635901606, "grad_norm": 0.128428652882576, "learning_rate": 0.00016054103528933183, "loss": 1.1729, "step": 1946 }, { "epoch": 0.39581215694246796, "grad_norm": 0.14802579581737518, "learning_rate": 0.00016052069561680059, "loss": 1.1445, "step": 1947 }, { "epoch": 0.39601545029477536, "grad_norm": 0.1259651482105255, "learning_rate": 0.0001605003559442693, "loss": 1.0906, "step": 1948 }, { "epoch": 0.39621874364708276, "grad_norm": 0.12911193072795868, "learning_rate": 0.00016048001627173803, "loss": 1.0483, "step": 1949 }, { "epoch": 0.3964220369993901, "grad_norm": 0.1306496411561966, "learning_rate": 0.00016045967659920676, "loss": 1.105, "step": 1950 }, { "epoch": 0.3966253303516975, "grad_norm": 0.12834158539772034, "learning_rate": 0.0001604393369266755, "loss": 1.0814, "step": 1951 }, { "epoch": 0.3968286237040049, "grad_norm": 0.11149043589830399, "learning_rate": 0.0001604189972541442, "loss": 0.9313, "step": 1952 }, { "epoch": 0.39703191705631224, "grad_norm": 0.13247650861740112, "learning_rate": 0.00016039865758161293, "loss": 1.0523, "step": 1953 }, { "epoch": 0.39723521040861964, "grad_norm": 0.13839392364025116, "learning_rate": 0.00016037831790908166, "loss": 1.1873, "step": 1954 }, { "epoch": 0.39743850376092704, "grad_norm": 0.13359107077121735, "learning_rate": 0.0001603579782365504, "loss": 1.1791, "step": 1955 }, { "epoch": 0.3976417971132344, "grad_norm": 0.13618066906929016, "learning_rate": 0.00016033763856401913, "loss": 1.1673, "step": 1956 }, { "epoch": 0.3978450904655418, "grad_norm": 0.13163338601589203, "learning_rate": 0.00016031729889148786, "loss": 1.1715, "step": 1957 }, { "epoch": 0.3980483838178492, "grad_norm": 0.12153584510087967, "learning_rate": 0.00016029695921895658, "loss": 1.0582, "step": 1958 }, { "epoch": 0.3982516771701565, "grad_norm": 0.13559706509113312, "learning_rate": 0.0001602766195464253, "loss": 1.1622, "step": 1959 }, { "epoch": 0.3984549705224639, "grad_norm": 0.12290627509355545, "learning_rate": 0.00016025627987389403, "loss": 0.9632, "step": 1960 }, { "epoch": 0.39865826387477127, "grad_norm": 0.1298772245645523, "learning_rate": 0.00016023594020136276, "loss": 1.1786, "step": 1961 }, { "epoch": 0.39886155722707867, "grad_norm": 0.13533517718315125, "learning_rate": 0.00016021560052883148, "loss": 0.9701, "step": 1962 }, { "epoch": 0.39906485057938607, "grad_norm": 0.12389865517616272, "learning_rate": 0.0001601952608563002, "loss": 0.9966, "step": 1963 }, { "epoch": 0.3992681439316934, "grad_norm": 0.11722499132156372, "learning_rate": 0.00016017492118376896, "loss": 0.7836, "step": 1964 }, { "epoch": 0.3994714372840008, "grad_norm": 0.142978236079216, "learning_rate": 0.00016015458151123768, "loss": 1.1237, "step": 1965 }, { "epoch": 0.3996747306363082, "grad_norm": 0.12385619431734085, "learning_rate": 0.0001601342418387064, "loss": 1.0396, "step": 1966 }, { "epoch": 0.39987802398861555, "grad_norm": 0.12524884939193726, "learning_rate": 0.00016011390216617513, "loss": 1.1242, "step": 1967 }, { "epoch": 0.40008131734092295, "grad_norm": 0.13346579670906067, "learning_rate": 0.00016009356249364388, "loss": 1.1125, "step": 1968 }, { "epoch": 0.40028461069323035, "grad_norm": 0.12904873490333557, "learning_rate": 0.00016007322282111258, "loss": 1.0506, "step": 1969 }, { "epoch": 0.4004879040455377, "grad_norm": 0.12711603939533234, "learning_rate": 0.0001600528831485813, "loss": 1.0445, "step": 1970 }, { "epoch": 0.4006911973978451, "grad_norm": 0.13451595604419708, "learning_rate": 0.00016003254347605003, "loss": 1.0552, "step": 1971 }, { "epoch": 0.4008944907501525, "grad_norm": 0.146467387676239, "learning_rate": 0.00016001220380351878, "loss": 1.0093, "step": 1972 }, { "epoch": 0.40109778410245983, "grad_norm": 0.12398801743984222, "learning_rate": 0.0001599918641309875, "loss": 0.9897, "step": 1973 }, { "epoch": 0.40130107745476723, "grad_norm": 0.10917028784751892, "learning_rate": 0.00015997152445845623, "loss": 0.8371, "step": 1974 }, { "epoch": 0.40150437080707463, "grad_norm": 0.14363138377666473, "learning_rate": 0.00015995118478592495, "loss": 1.0559, "step": 1975 }, { "epoch": 0.40170766415938197, "grad_norm": 0.12954387068748474, "learning_rate": 0.0001599308451133937, "loss": 1.1048, "step": 1976 }, { "epoch": 0.40191095751168937, "grad_norm": 0.12329546362161636, "learning_rate": 0.0001599105054408624, "loss": 1.0156, "step": 1977 }, { "epoch": 0.40211425086399677, "grad_norm": 0.11060404032468796, "learning_rate": 0.00015989016576833113, "loss": 0.8623, "step": 1978 }, { "epoch": 0.4023175442163041, "grad_norm": 0.1489768773317337, "learning_rate": 0.00015986982609579985, "loss": 1.0878, "step": 1979 }, { "epoch": 0.4025208375686115, "grad_norm": 0.12081994861364365, "learning_rate": 0.0001598494864232686, "loss": 0.9822, "step": 1980 }, { "epoch": 0.4027241309209189, "grad_norm": 0.14777058362960815, "learning_rate": 0.00015982914675073733, "loss": 1.2367, "step": 1981 }, { "epoch": 0.40292742427322625, "grad_norm": 0.12689609825611115, "learning_rate": 0.00015980880707820605, "loss": 1.1029, "step": 1982 }, { "epoch": 0.40313071762553365, "grad_norm": 0.12985149025917053, "learning_rate": 0.00015978846740567477, "loss": 1.1604, "step": 1983 }, { "epoch": 0.40333401097784105, "grad_norm": 0.11619044095277786, "learning_rate": 0.00015976812773314353, "loss": 0.9736, "step": 1984 }, { "epoch": 0.4035373043301484, "grad_norm": 0.13137032091617584, "learning_rate": 0.00015974778806061222, "loss": 1.0877, "step": 1985 }, { "epoch": 0.4037405976824558, "grad_norm": 0.13488256931304932, "learning_rate": 0.00015972744838808095, "loss": 1.2048, "step": 1986 }, { "epoch": 0.40394389103476314, "grad_norm": 0.2605299949645996, "learning_rate": 0.00015970710871554967, "loss": 0.9133, "step": 1987 }, { "epoch": 0.40414718438707053, "grad_norm": 0.13198648393154144, "learning_rate": 0.00015968676904301842, "loss": 0.9512, "step": 1988 }, { "epoch": 0.40435047773937793, "grad_norm": 0.1144537478685379, "learning_rate": 0.00015966642937048715, "loss": 1.0429, "step": 1989 }, { "epoch": 0.4045537710916853, "grad_norm": 0.1217435896396637, "learning_rate": 0.00015964608969795587, "loss": 1.0744, "step": 1990 }, { "epoch": 0.4047570644439927, "grad_norm": 0.12606003880500793, "learning_rate": 0.0001596257500254246, "loss": 1.0945, "step": 1991 }, { "epoch": 0.4049603577963001, "grad_norm": 0.13098153471946716, "learning_rate": 0.00015960541035289335, "loss": 1.2815, "step": 1992 }, { "epoch": 0.4051636511486074, "grad_norm": 0.11636700481176376, "learning_rate": 0.00015958507068036205, "loss": 0.9849, "step": 1993 }, { "epoch": 0.4053669445009148, "grad_norm": 0.12562847137451172, "learning_rate": 0.00015956473100783077, "loss": 1.0519, "step": 1994 }, { "epoch": 0.4055702378532222, "grad_norm": 0.13366295397281647, "learning_rate": 0.0001595443913352995, "loss": 1.2831, "step": 1995 }, { "epoch": 0.40577353120552956, "grad_norm": 0.12537652254104614, "learning_rate": 0.00015952405166276825, "loss": 0.9735, "step": 1996 }, { "epoch": 0.40597682455783696, "grad_norm": 0.12336364388465881, "learning_rate": 0.00015950371199023697, "loss": 1.0355, "step": 1997 }, { "epoch": 0.40618011791014436, "grad_norm": 0.13342751562595367, "learning_rate": 0.0001594833723177057, "loss": 1.1506, "step": 1998 }, { "epoch": 0.4063834112624517, "grad_norm": 0.13104167580604553, "learning_rate": 0.00015946303264517442, "loss": 1.2302, "step": 1999 }, { "epoch": 0.4065867046147591, "grad_norm": 0.1287468671798706, "learning_rate": 0.00015944269297264314, "loss": 1.1503, "step": 2000 }, { "epoch": 0.4067899979670665, "grad_norm": 0.1316487044095993, "learning_rate": 0.00015942235330011187, "loss": 1.0732, "step": 2001 }, { "epoch": 0.40699329131937384, "grad_norm": 0.12111165374517441, "learning_rate": 0.0001594020136275806, "loss": 0.9384, "step": 2002 }, { "epoch": 0.40719658467168124, "grad_norm": 0.11894603073596954, "learning_rate": 0.00015938167395504932, "loss": 1.0157, "step": 2003 }, { "epoch": 0.40739987802398864, "grad_norm": 0.13228829205036163, "learning_rate": 0.00015936133428251804, "loss": 1.11, "step": 2004 }, { "epoch": 0.407603171376296, "grad_norm": 0.12275683134794235, "learning_rate": 0.0001593409946099868, "loss": 1.0183, "step": 2005 }, { "epoch": 0.4078064647286034, "grad_norm": 0.1273687779903412, "learning_rate": 0.00015932065493745552, "loss": 1.024, "step": 2006 }, { "epoch": 0.4080097580809108, "grad_norm": 0.13763071596622467, "learning_rate": 0.00015930031526492424, "loss": 1.1465, "step": 2007 }, { "epoch": 0.4082130514332181, "grad_norm": 0.13428914546966553, "learning_rate": 0.00015927997559239297, "loss": 1.1119, "step": 2008 }, { "epoch": 0.4084163447855255, "grad_norm": 0.12992502748966217, "learning_rate": 0.0001592596359198617, "loss": 1.0341, "step": 2009 }, { "epoch": 0.4086196381378329, "grad_norm": 0.11928235739469528, "learning_rate": 0.00015923929624733042, "loss": 0.9454, "step": 2010 }, { "epoch": 0.40882293149014026, "grad_norm": 0.13093991577625275, "learning_rate": 0.00015921895657479914, "loss": 0.9528, "step": 2011 }, { "epoch": 0.40902622484244766, "grad_norm": 0.1252833604812622, "learning_rate": 0.00015919861690226787, "loss": 1.0849, "step": 2012 }, { "epoch": 0.409229518194755, "grad_norm": 0.13304093480110168, "learning_rate": 0.00015917827722973662, "loss": 1.1173, "step": 2013 }, { "epoch": 0.4094328115470624, "grad_norm": 0.11735294759273529, "learning_rate": 0.00015915793755720534, "loss": 0.9961, "step": 2014 }, { "epoch": 0.4096361048993698, "grad_norm": 0.133205845952034, "learning_rate": 0.00015913759788467407, "loss": 1.0818, "step": 2015 }, { "epoch": 0.40983939825167714, "grad_norm": 0.12949281930923462, "learning_rate": 0.0001591172582121428, "loss": 1.0151, "step": 2016 }, { "epoch": 0.41004269160398454, "grad_norm": 0.12819421291351318, "learning_rate": 0.00015909691853961151, "loss": 1.0449, "step": 2017 }, { "epoch": 0.41024598495629194, "grad_norm": 0.12694479525089264, "learning_rate": 0.00015907657886708024, "loss": 0.9756, "step": 2018 }, { "epoch": 0.4104492783085993, "grad_norm": 0.11785703897476196, "learning_rate": 0.00015905623919454896, "loss": 0.8295, "step": 2019 }, { "epoch": 0.4106525716609067, "grad_norm": 0.12432985007762909, "learning_rate": 0.0001590358995220177, "loss": 1.0375, "step": 2020 }, { "epoch": 0.4108558650132141, "grad_norm": 0.12006914615631104, "learning_rate": 0.00015901555984948644, "loss": 0.9818, "step": 2021 }, { "epoch": 0.4110591583655214, "grad_norm": 0.13180270791053772, "learning_rate": 0.00015899522017695516, "loss": 1.165, "step": 2022 }, { "epoch": 0.4112624517178288, "grad_norm": 0.13662603497505188, "learning_rate": 0.0001589748805044239, "loss": 1.0978, "step": 2023 }, { "epoch": 0.4114657450701362, "grad_norm": 0.137676402926445, "learning_rate": 0.0001589545408318926, "loss": 1.0695, "step": 2024 }, { "epoch": 0.41166903842244357, "grad_norm": 0.13281960785388947, "learning_rate": 0.00015893420115936136, "loss": 0.9717, "step": 2025 }, { "epoch": 0.41187233177475097, "grad_norm": 0.1159568652510643, "learning_rate": 0.00015891386148683006, "loss": 0.9863, "step": 2026 }, { "epoch": 0.41207562512705836, "grad_norm": 0.12235623598098755, "learning_rate": 0.00015889352181429879, "loss": 1.167, "step": 2027 }, { "epoch": 0.4122789184793657, "grad_norm": 0.11709940433502197, "learning_rate": 0.0001588731821417675, "loss": 1.0618, "step": 2028 }, { "epoch": 0.4124822118316731, "grad_norm": 0.13078409433364868, "learning_rate": 0.00015885284246923626, "loss": 1.1512, "step": 2029 }, { "epoch": 0.4126855051839805, "grad_norm": 0.12789343297481537, "learning_rate": 0.000158832502796705, "loss": 1.2586, "step": 2030 }, { "epoch": 0.41288879853628785, "grad_norm": 0.12031058967113495, "learning_rate": 0.0001588121631241737, "loss": 0.9543, "step": 2031 }, { "epoch": 0.41309209188859525, "grad_norm": 0.1303958296775818, "learning_rate": 0.00015879182345164244, "loss": 1.1823, "step": 2032 }, { "epoch": 0.41329538524090265, "grad_norm": 0.12915648519992828, "learning_rate": 0.0001587714837791112, "loss": 1.1199, "step": 2033 }, { "epoch": 0.41349867859321, "grad_norm": 0.13749873638153076, "learning_rate": 0.00015875114410657988, "loss": 1.1359, "step": 2034 }, { "epoch": 0.4137019719455174, "grad_norm": 0.120378777384758, "learning_rate": 0.0001587308044340486, "loss": 1.0311, "step": 2035 }, { "epoch": 0.4139052652978248, "grad_norm": 0.13083983957767487, "learning_rate": 0.00015871046476151733, "loss": 1.1491, "step": 2036 }, { "epoch": 0.41410855865013213, "grad_norm": 0.1264946162700653, "learning_rate": 0.00015869012508898609, "loss": 1.0575, "step": 2037 }, { "epoch": 0.41431185200243953, "grad_norm": 0.13813161849975586, "learning_rate": 0.0001586697854164548, "loss": 1.1596, "step": 2038 }, { "epoch": 0.4145151453547469, "grad_norm": 0.1298746019601822, "learning_rate": 0.00015864944574392353, "loss": 1.0107, "step": 2039 }, { "epoch": 0.41471843870705427, "grad_norm": 0.13159529864788055, "learning_rate": 0.00015862910607139226, "loss": 0.9777, "step": 2040 }, { "epoch": 0.41492173205936167, "grad_norm": 0.13329805433750153, "learning_rate": 0.00015860876639886098, "loss": 1.1749, "step": 2041 }, { "epoch": 0.415125025411669, "grad_norm": 0.12069873511791229, "learning_rate": 0.0001585884267263297, "loss": 1.1532, "step": 2042 }, { "epoch": 0.4153283187639764, "grad_norm": 0.11439201235771179, "learning_rate": 0.00015856808705379843, "loss": 0.8484, "step": 2043 }, { "epoch": 0.4155316121162838, "grad_norm": 0.14021088182926178, "learning_rate": 0.00015854774738126716, "loss": 0.9592, "step": 2044 }, { "epoch": 0.41573490546859115, "grad_norm": 0.12401128560304642, "learning_rate": 0.00015852740770873588, "loss": 0.9658, "step": 2045 }, { "epoch": 0.41593819882089855, "grad_norm": 0.1366535723209381, "learning_rate": 0.00015850706803620463, "loss": 1.1259, "step": 2046 }, { "epoch": 0.41614149217320595, "grad_norm": 0.11328650265932083, "learning_rate": 0.00015848672836367336, "loss": 0.9499, "step": 2047 }, { "epoch": 0.4163447855255133, "grad_norm": 0.13193942606449127, "learning_rate": 0.00015846638869114208, "loss": 1.0833, "step": 2048 }, { "epoch": 0.4165480788778207, "grad_norm": 0.1413910835981369, "learning_rate": 0.0001584460490186108, "loss": 1.0693, "step": 2049 }, { "epoch": 0.4167513722301281, "grad_norm": 0.11957409977912903, "learning_rate": 0.00015842570934607953, "loss": 1.0102, "step": 2050 }, { "epoch": 0.41695466558243544, "grad_norm": 0.12360769510269165, "learning_rate": 0.00015840536967354825, "loss": 1.1312, "step": 2051 }, { "epoch": 0.41715795893474283, "grad_norm": 0.12471318989992142, "learning_rate": 0.00015838503000101698, "loss": 1.1255, "step": 2052 }, { "epoch": 0.41736125228705023, "grad_norm": 0.129171222448349, "learning_rate": 0.0001583646903284857, "loss": 1.103, "step": 2053 }, { "epoch": 0.4175645456393576, "grad_norm": 0.14544697105884552, "learning_rate": 0.00015834435065595446, "loss": 1.0373, "step": 2054 }, { "epoch": 0.417767838991665, "grad_norm": 0.12571415305137634, "learning_rate": 0.00015832401098342318, "loss": 1.0675, "step": 2055 }, { "epoch": 0.4179711323439724, "grad_norm": 0.12805119156837463, "learning_rate": 0.0001583036713108919, "loss": 1.0713, "step": 2056 }, { "epoch": 0.4181744256962797, "grad_norm": 0.1386822909116745, "learning_rate": 0.00015828333163836063, "loss": 1.1084, "step": 2057 }, { "epoch": 0.4183777190485871, "grad_norm": 0.14066076278686523, "learning_rate": 0.00015826299196582935, "loss": 1.2577, "step": 2058 }, { "epoch": 0.4185810124008945, "grad_norm": 0.1231965720653534, "learning_rate": 0.00015824265229329808, "loss": 0.9684, "step": 2059 }, { "epoch": 0.41878430575320186, "grad_norm": 0.11889393627643585, "learning_rate": 0.0001582223126207668, "loss": 1.0393, "step": 2060 }, { "epoch": 0.41898759910550926, "grad_norm": 0.13274893164634705, "learning_rate": 0.00015820197294823553, "loss": 1.1742, "step": 2061 }, { "epoch": 0.41919089245781666, "grad_norm": 0.14034253358840942, "learning_rate": 0.00015818163327570428, "loss": 0.9673, "step": 2062 }, { "epoch": 0.419394185810124, "grad_norm": 0.14197202026844025, "learning_rate": 0.000158161293603173, "loss": 1.0931, "step": 2063 }, { "epoch": 0.4195974791624314, "grad_norm": 0.12458556890487671, "learning_rate": 0.00015814095393064173, "loss": 0.9713, "step": 2064 }, { "epoch": 0.41980077251473874, "grad_norm": 0.1311383694410324, "learning_rate": 0.00015812061425811045, "loss": 1.2641, "step": 2065 }, { "epoch": 0.42000406586704614, "grad_norm": 0.13218726217746735, "learning_rate": 0.00015810027458557918, "loss": 1.2132, "step": 2066 }, { "epoch": 0.42020735921935354, "grad_norm": 0.14619286358356476, "learning_rate": 0.0001580799349130479, "loss": 1.1031, "step": 2067 }, { "epoch": 0.4204106525716609, "grad_norm": 0.13700971007347107, "learning_rate": 0.00015805959524051662, "loss": 1.0364, "step": 2068 }, { "epoch": 0.4206139459239683, "grad_norm": 0.12797488272190094, "learning_rate": 0.00015803925556798535, "loss": 1.1025, "step": 2069 }, { "epoch": 0.4208172392762757, "grad_norm": 0.1373629868030548, "learning_rate": 0.0001580189158954541, "loss": 1.1924, "step": 2070 }, { "epoch": 0.421020532628583, "grad_norm": 0.11743365973234177, "learning_rate": 0.00015799857622292283, "loss": 0.9271, "step": 2071 }, { "epoch": 0.4212238259808904, "grad_norm": 0.12701068818569183, "learning_rate": 0.00015797823655039155, "loss": 1.0388, "step": 2072 }, { "epoch": 0.4214271193331978, "grad_norm": 0.14013634622097015, "learning_rate": 0.00015795789687786027, "loss": 1.1594, "step": 2073 }, { "epoch": 0.42163041268550516, "grad_norm": 0.12524859607219696, "learning_rate": 0.000157937557205329, "loss": 1.0191, "step": 2074 }, { "epoch": 0.42183370603781256, "grad_norm": 0.11082011461257935, "learning_rate": 0.00015791721753279772, "loss": 0.9211, "step": 2075 }, { "epoch": 0.42203699939011996, "grad_norm": 0.12579189240932465, "learning_rate": 0.00015789687786026645, "loss": 1.0269, "step": 2076 }, { "epoch": 0.4222402927424273, "grad_norm": 0.13665060698986053, "learning_rate": 0.00015787653818773517, "loss": 1.0662, "step": 2077 }, { "epoch": 0.4224435860947347, "grad_norm": 0.12775637209415436, "learning_rate": 0.00015785619851520392, "loss": 1.2206, "step": 2078 }, { "epoch": 0.4226468794470421, "grad_norm": 0.13883183896541595, "learning_rate": 0.00015783585884267265, "loss": 1.1484, "step": 2079 }, { "epoch": 0.42285017279934944, "grad_norm": 0.12899038195610046, "learning_rate": 0.00015781551917014137, "loss": 1.128, "step": 2080 }, { "epoch": 0.42305346615165684, "grad_norm": 0.12957079708576202, "learning_rate": 0.0001577951794976101, "loss": 1.1362, "step": 2081 }, { "epoch": 0.42325675950396424, "grad_norm": 0.12619063258171082, "learning_rate": 0.00015777483982507882, "loss": 1.1022, "step": 2082 }, { "epoch": 0.4234600528562716, "grad_norm": 0.12516093254089355, "learning_rate": 0.00015775450015254755, "loss": 1.1149, "step": 2083 }, { "epoch": 0.423663346208579, "grad_norm": 0.12650391459465027, "learning_rate": 0.00015773416048001627, "loss": 0.9535, "step": 2084 }, { "epoch": 0.4238666395608864, "grad_norm": 0.14746572077274323, "learning_rate": 0.000157713820807485, "loss": 1.222, "step": 2085 }, { "epoch": 0.4240699329131937, "grad_norm": 0.10933158546686172, "learning_rate": 0.00015769348113495372, "loss": 0.8832, "step": 2086 }, { "epoch": 0.4242732262655011, "grad_norm": 0.13668397068977356, "learning_rate": 0.00015767314146242247, "loss": 1.1808, "step": 2087 }, { "epoch": 0.4244765196178085, "grad_norm": 0.13031315803527832, "learning_rate": 0.0001576528017898912, "loss": 0.9752, "step": 2088 }, { "epoch": 0.42467981297011587, "grad_norm": 0.14403130114078522, "learning_rate": 0.00015763246211735992, "loss": 1.1372, "step": 2089 }, { "epoch": 0.42488310632242327, "grad_norm": 0.13902884721755981, "learning_rate": 0.00015761212244482864, "loss": 1.1492, "step": 2090 }, { "epoch": 0.42508639967473066, "grad_norm": 0.11546601355075836, "learning_rate": 0.00015759178277229737, "loss": 1.0091, "step": 2091 }, { "epoch": 0.425289693027038, "grad_norm": 0.12731419503688812, "learning_rate": 0.0001575714430997661, "loss": 1.0627, "step": 2092 }, { "epoch": 0.4254929863793454, "grad_norm": 0.11023043841123581, "learning_rate": 0.00015755110342723482, "loss": 0.8111, "step": 2093 }, { "epoch": 0.42569627973165275, "grad_norm": 0.14930586516857147, "learning_rate": 0.00015753076375470354, "loss": 1.206, "step": 2094 }, { "epoch": 0.42589957308396015, "grad_norm": 0.1300898790359497, "learning_rate": 0.0001575104240821723, "loss": 1.0915, "step": 2095 }, { "epoch": 0.42610286643626755, "grad_norm": 0.13914939761161804, "learning_rate": 0.00015749008440964102, "loss": 1.0512, "step": 2096 }, { "epoch": 0.4263061597885749, "grad_norm": 0.15199615061283112, "learning_rate": 0.00015746974473710974, "loss": 1.2611, "step": 2097 }, { "epoch": 0.4265094531408823, "grad_norm": 0.13422483205795288, "learning_rate": 0.00015744940506457847, "loss": 1.1213, "step": 2098 }, { "epoch": 0.4267127464931897, "grad_norm": 0.1285259872674942, "learning_rate": 0.0001574290653920472, "loss": 1.1144, "step": 2099 }, { "epoch": 0.42691603984549703, "grad_norm": 0.11812227219343185, "learning_rate": 0.00015740872571951592, "loss": 1.0033, "step": 2100 }, { "epoch": 0.42711933319780443, "grad_norm": 0.13837237656116486, "learning_rate": 0.00015738838604698464, "loss": 1.111, "step": 2101 }, { "epoch": 0.42732262655011183, "grad_norm": 0.1241428554058075, "learning_rate": 0.00015736804637445336, "loss": 1.0571, "step": 2102 }, { "epoch": 0.42752591990241917, "grad_norm": 0.1367318034172058, "learning_rate": 0.00015734770670192212, "loss": 1.1312, "step": 2103 }, { "epoch": 0.42772921325472657, "grad_norm": 0.12532354891300201, "learning_rate": 0.00015732736702939084, "loss": 1.0407, "step": 2104 }, { "epoch": 0.42793250660703397, "grad_norm": 0.12487448751926422, "learning_rate": 0.00015730702735685957, "loss": 1.072, "step": 2105 }, { "epoch": 0.4281357999593413, "grad_norm": 0.1195039302110672, "learning_rate": 0.0001572866876843283, "loss": 0.9995, "step": 2106 }, { "epoch": 0.4283390933116487, "grad_norm": 0.16366197168827057, "learning_rate": 0.00015726634801179701, "loss": 1.4101, "step": 2107 }, { "epoch": 0.4285423866639561, "grad_norm": 0.1324339210987091, "learning_rate": 0.00015724600833926574, "loss": 1.1455, "step": 2108 }, { "epoch": 0.42874568001626345, "grad_norm": 0.1581498682498932, "learning_rate": 0.00015722566866673446, "loss": 1.3092, "step": 2109 }, { "epoch": 0.42894897336857085, "grad_norm": 0.13514567911624908, "learning_rate": 0.0001572053289942032, "loss": 1.1998, "step": 2110 }, { "epoch": 0.42915226672087825, "grad_norm": 0.1304248571395874, "learning_rate": 0.00015718498932167194, "loss": 1.1488, "step": 2111 }, { "epoch": 0.4293555600731856, "grad_norm": 0.1261410117149353, "learning_rate": 0.00015716464964914066, "loss": 1.128, "step": 2112 }, { "epoch": 0.429558853425493, "grad_norm": 0.12761110067367554, "learning_rate": 0.0001571443099766094, "loss": 1.1816, "step": 2113 }, { "epoch": 0.4297621467778004, "grad_norm": 0.1307440996170044, "learning_rate": 0.0001571239703040781, "loss": 1.1362, "step": 2114 }, { "epoch": 0.42996544013010773, "grad_norm": 0.12130671739578247, "learning_rate": 0.00015710363063154684, "loss": 0.903, "step": 2115 }, { "epoch": 0.43016873348241513, "grad_norm": 0.12277641892433167, "learning_rate": 0.00015708329095901556, "loss": 0.9964, "step": 2116 }, { "epoch": 0.43037202683472253, "grad_norm": 0.13223405182361603, "learning_rate": 0.00015706295128648429, "loss": 0.9409, "step": 2117 }, { "epoch": 0.4305753201870299, "grad_norm": 0.12146423757076263, "learning_rate": 0.000157042611613953, "loss": 1.0116, "step": 2118 }, { "epoch": 0.4307786135393373, "grad_norm": 0.1319243311882019, "learning_rate": 0.00015702227194142176, "loss": 1.1354, "step": 2119 }, { "epoch": 0.4309819068916446, "grad_norm": 0.10945885628461838, "learning_rate": 0.0001570019322688905, "loss": 0.9314, "step": 2120 }, { "epoch": 0.431185200243952, "grad_norm": 0.12822148203849792, "learning_rate": 0.0001569815925963592, "loss": 0.9862, "step": 2121 }, { "epoch": 0.4313884935962594, "grad_norm": 0.13050609827041626, "learning_rate": 0.00015696125292382794, "loss": 1.1618, "step": 2122 }, { "epoch": 0.43159178694856676, "grad_norm": 0.1293252408504486, "learning_rate": 0.00015694091325129666, "loss": 1.0692, "step": 2123 }, { "epoch": 0.43179508030087416, "grad_norm": 0.15059755742549896, "learning_rate": 0.00015692057357876538, "loss": 0.9984, "step": 2124 }, { "epoch": 0.43199837365318156, "grad_norm": 0.13384853303432465, "learning_rate": 0.0001569002339062341, "loss": 1.1272, "step": 2125 }, { "epoch": 0.4322016670054889, "grad_norm": 0.1330154538154602, "learning_rate": 0.00015687989423370283, "loss": 0.958, "step": 2126 }, { "epoch": 0.4324049603577963, "grad_norm": 0.13418689370155334, "learning_rate": 0.00015685955456117156, "loss": 1.0939, "step": 2127 }, { "epoch": 0.4326082537101037, "grad_norm": 0.1272914856672287, "learning_rate": 0.0001568392148886403, "loss": 1.0839, "step": 2128 }, { "epoch": 0.43281154706241104, "grad_norm": 0.13827690482139587, "learning_rate": 0.00015681887521610903, "loss": 1.2304, "step": 2129 }, { "epoch": 0.43301484041471844, "grad_norm": 0.13577015697956085, "learning_rate": 0.00015679853554357776, "loss": 1.0749, "step": 2130 }, { "epoch": 0.43321813376702584, "grad_norm": 0.1302756816148758, "learning_rate": 0.00015677819587104648, "loss": 1.1381, "step": 2131 }, { "epoch": 0.4334214271193332, "grad_norm": 0.13045424222946167, "learning_rate": 0.0001567578561985152, "loss": 1.0538, "step": 2132 }, { "epoch": 0.4336247204716406, "grad_norm": 0.12351250648498535, "learning_rate": 0.00015673751652598393, "loss": 1.0061, "step": 2133 }, { "epoch": 0.433828013823948, "grad_norm": 0.1451653242111206, "learning_rate": 0.00015671717685345266, "loss": 1.1764, "step": 2134 }, { "epoch": 0.4340313071762553, "grad_norm": 0.12439122051000595, "learning_rate": 0.00015669683718092138, "loss": 0.9121, "step": 2135 }, { "epoch": 0.4342346005285627, "grad_norm": 0.1257990002632141, "learning_rate": 0.00015667649750839013, "loss": 1.0822, "step": 2136 }, { "epoch": 0.4344378938808701, "grad_norm": 0.13994207978248596, "learning_rate": 0.00015665615783585886, "loss": 1.2759, "step": 2137 }, { "epoch": 0.43464118723317746, "grad_norm": 0.14246414601802826, "learning_rate": 0.00015663581816332758, "loss": 1.2934, "step": 2138 }, { "epoch": 0.43484448058548486, "grad_norm": 0.1359516978263855, "learning_rate": 0.0001566154784907963, "loss": 1.1651, "step": 2139 }, { "epoch": 0.43504777393779226, "grad_norm": 0.12099796533584595, "learning_rate": 0.00015659513881826503, "loss": 0.9028, "step": 2140 }, { "epoch": 0.4352510672900996, "grad_norm": 0.12675108015537262, "learning_rate": 0.00015657479914573375, "loss": 1.026, "step": 2141 }, { "epoch": 0.435454360642407, "grad_norm": 0.13229331374168396, "learning_rate": 0.00015655445947320248, "loss": 1.0562, "step": 2142 }, { "epoch": 0.4356576539947144, "grad_norm": 0.12163117527961731, "learning_rate": 0.0001565341198006712, "loss": 1.1147, "step": 2143 }, { "epoch": 0.43586094734702174, "grad_norm": 0.13495270907878876, "learning_rate": 0.00015651378012813995, "loss": 1.0544, "step": 2144 }, { "epoch": 0.43606424069932914, "grad_norm": 0.11769222468137741, "learning_rate": 0.00015649344045560868, "loss": 1.0549, "step": 2145 }, { "epoch": 0.4362675340516365, "grad_norm": 0.14370734989643097, "learning_rate": 0.0001564731007830774, "loss": 1.1513, "step": 2146 }, { "epoch": 0.4364708274039439, "grad_norm": 0.1351398378610611, "learning_rate": 0.00015645276111054613, "loss": 1.0814, "step": 2147 }, { "epoch": 0.4366741207562513, "grad_norm": 0.13881100714206696, "learning_rate": 0.00015643242143801485, "loss": 1.1576, "step": 2148 }, { "epoch": 0.4368774141085586, "grad_norm": 0.11548882722854614, "learning_rate": 0.00015641208176548358, "loss": 1.0153, "step": 2149 }, { "epoch": 0.437080707460866, "grad_norm": 0.1512657254934311, "learning_rate": 0.0001563917420929523, "loss": 1.3081, "step": 2150 }, { "epoch": 0.4372840008131734, "grad_norm": 0.14576168358325958, "learning_rate": 0.00015637140242042103, "loss": 1.244, "step": 2151 }, { "epoch": 0.43748729416548077, "grad_norm": 0.14067471027374268, "learning_rate": 0.00015635106274788978, "loss": 1.023, "step": 2152 }, { "epoch": 0.43769058751778817, "grad_norm": 0.12753300368785858, "learning_rate": 0.0001563307230753585, "loss": 1.0292, "step": 2153 }, { "epoch": 0.43789388087009556, "grad_norm": 0.13246901333332062, "learning_rate": 0.00015631038340282723, "loss": 1.1607, "step": 2154 }, { "epoch": 0.4380971742224029, "grad_norm": 0.13709375262260437, "learning_rate": 0.00015629004373029595, "loss": 0.9848, "step": 2155 }, { "epoch": 0.4383004675747103, "grad_norm": 0.14666500687599182, "learning_rate": 0.00015626970405776468, "loss": 1.1349, "step": 2156 }, { "epoch": 0.4385037609270177, "grad_norm": 0.13992977142333984, "learning_rate": 0.0001562493643852334, "loss": 1.2692, "step": 2157 }, { "epoch": 0.43870705427932505, "grad_norm": 0.1444278359413147, "learning_rate": 0.00015622902471270212, "loss": 1.0354, "step": 2158 }, { "epoch": 0.43891034763163245, "grad_norm": 0.12587270140647888, "learning_rate": 0.00015620868504017085, "loss": 0.9783, "step": 2159 }, { "epoch": 0.43911364098393985, "grad_norm": 0.1311299055814743, "learning_rate": 0.0001561883453676396, "loss": 1.0541, "step": 2160 }, { "epoch": 0.4393169343362472, "grad_norm": 0.1246386170387268, "learning_rate": 0.00015616800569510832, "loss": 1.0384, "step": 2161 }, { "epoch": 0.4395202276885546, "grad_norm": 0.1286439597606659, "learning_rate": 0.00015614766602257705, "loss": 1.0909, "step": 2162 }, { "epoch": 0.439723521040862, "grad_norm": 0.12758703529834747, "learning_rate": 0.00015612732635004577, "loss": 1.0835, "step": 2163 }, { "epoch": 0.43992681439316933, "grad_norm": 0.1129162386059761, "learning_rate": 0.0001561069866775145, "loss": 0.9967, "step": 2164 }, { "epoch": 0.44013010774547673, "grad_norm": 0.12939536571502686, "learning_rate": 0.00015608664700498322, "loss": 1.133, "step": 2165 }, { "epoch": 0.4403334010977841, "grad_norm": 0.1393767148256302, "learning_rate": 0.00015606630733245195, "loss": 1.2379, "step": 2166 }, { "epoch": 0.44053669445009147, "grad_norm": 0.11744683235883713, "learning_rate": 0.00015604596765992067, "loss": 0.9686, "step": 2167 }, { "epoch": 0.44073998780239887, "grad_norm": 0.12241906672716141, "learning_rate": 0.0001560256279873894, "loss": 1.0188, "step": 2168 }, { "epoch": 0.44094328115470627, "grad_norm": 0.1316422075033188, "learning_rate": 0.00015600528831485815, "loss": 1.0292, "step": 2169 }, { "epoch": 0.4411465745070136, "grad_norm": 0.13489259779453278, "learning_rate": 0.00015598494864232687, "loss": 1.1332, "step": 2170 }, { "epoch": 0.441349867859321, "grad_norm": 0.13207697868347168, "learning_rate": 0.0001559646089697956, "loss": 1.0482, "step": 2171 }, { "epoch": 0.44155316121162835, "grad_norm": 0.13211561739444733, "learning_rate": 0.00015594426929726432, "loss": 1.064, "step": 2172 }, { "epoch": 0.44175645456393575, "grad_norm": 0.14821046590805054, "learning_rate": 0.00015592392962473305, "loss": 1.1058, "step": 2173 }, { "epoch": 0.44195974791624315, "grad_norm": 0.11314582824707031, "learning_rate": 0.00015590358995220177, "loss": 0.9351, "step": 2174 }, { "epoch": 0.4421630412685505, "grad_norm": 0.12001941353082657, "learning_rate": 0.0001558832502796705, "loss": 0.9875, "step": 2175 }, { "epoch": 0.4423663346208579, "grad_norm": 0.1433805674314499, "learning_rate": 0.00015586291060713922, "loss": 1.1552, "step": 2176 }, { "epoch": 0.4425696279731653, "grad_norm": 0.11265136301517487, "learning_rate": 0.00015584257093460797, "loss": 0.9431, "step": 2177 }, { "epoch": 0.44277292132547263, "grad_norm": 0.12973473966121674, "learning_rate": 0.0001558222312620767, "loss": 1.0591, "step": 2178 }, { "epoch": 0.44297621467778003, "grad_norm": 0.13749995827674866, "learning_rate": 0.00015580189158954542, "loss": 1.161, "step": 2179 }, { "epoch": 0.44317950803008743, "grad_norm": 0.12814348936080933, "learning_rate": 0.00015578155191701414, "loss": 1.1314, "step": 2180 }, { "epoch": 0.4433828013823948, "grad_norm": 0.1292744129896164, "learning_rate": 0.00015576121224448287, "loss": 0.9885, "step": 2181 }, { "epoch": 0.4435860947347022, "grad_norm": 0.11668647080659866, "learning_rate": 0.0001557408725719516, "loss": 0.9546, "step": 2182 }, { "epoch": 0.4437893880870096, "grad_norm": 0.13656672835350037, "learning_rate": 0.00015572053289942032, "loss": 1.1105, "step": 2183 }, { "epoch": 0.4439926814393169, "grad_norm": 0.13544489443302155, "learning_rate": 0.00015570019322688904, "loss": 1.0632, "step": 2184 }, { "epoch": 0.4441959747916243, "grad_norm": 0.14713092148303986, "learning_rate": 0.0001556798535543578, "loss": 1.0508, "step": 2185 }, { "epoch": 0.4443992681439317, "grad_norm": 0.1292864978313446, "learning_rate": 0.00015565951388182652, "loss": 0.8771, "step": 2186 }, { "epoch": 0.44460256149623906, "grad_norm": 0.12545311450958252, "learning_rate": 0.00015563917420929524, "loss": 1.1117, "step": 2187 }, { "epoch": 0.44480585484854646, "grad_norm": 0.1412542760372162, "learning_rate": 0.00015561883453676397, "loss": 1.3364, "step": 2188 }, { "epoch": 0.44500914820085385, "grad_norm": 0.11404701322317123, "learning_rate": 0.0001555984948642327, "loss": 0.9512, "step": 2189 }, { "epoch": 0.4452124415531612, "grad_norm": 0.11949559301137924, "learning_rate": 0.00015557815519170142, "loss": 1.0406, "step": 2190 }, { "epoch": 0.4454157349054686, "grad_norm": 0.10985735058784485, "learning_rate": 0.00015555781551917014, "loss": 0.9292, "step": 2191 }, { "epoch": 0.445619028257776, "grad_norm": 0.14047123491764069, "learning_rate": 0.00015553747584663886, "loss": 1.0895, "step": 2192 }, { "epoch": 0.44582232161008334, "grad_norm": 0.1520707756280899, "learning_rate": 0.00015551713617410762, "loss": 1.118, "step": 2193 }, { "epoch": 0.44602561496239074, "grad_norm": 0.13202513754367828, "learning_rate": 0.00015549679650157634, "loss": 1.0031, "step": 2194 }, { "epoch": 0.44622890831469814, "grad_norm": 0.13803257048130035, "learning_rate": 0.00015547645682904507, "loss": 1.106, "step": 2195 }, { "epoch": 0.4464322016670055, "grad_norm": 0.13089017570018768, "learning_rate": 0.0001554561171565138, "loss": 1.0706, "step": 2196 }, { "epoch": 0.4466354950193129, "grad_norm": 0.12646476924419403, "learning_rate": 0.00015543577748398251, "loss": 1.0548, "step": 2197 }, { "epoch": 0.4468387883716202, "grad_norm": 0.14208228886127472, "learning_rate": 0.00015541543781145124, "loss": 1.1952, "step": 2198 }, { "epoch": 0.4470420817239276, "grad_norm": 0.1471976786851883, "learning_rate": 0.00015539509813891996, "loss": 1.1987, "step": 2199 }, { "epoch": 0.447245375076235, "grad_norm": 0.11970525234937668, "learning_rate": 0.0001553747584663887, "loss": 0.9523, "step": 2200 }, { "epoch": 0.44744866842854236, "grad_norm": 0.1178225502371788, "learning_rate": 0.00015535441879385744, "loss": 0.9105, "step": 2201 }, { "epoch": 0.44765196178084976, "grad_norm": 0.11376915872097015, "learning_rate": 0.00015533407912132616, "loss": 0.9041, "step": 2202 }, { "epoch": 0.44785525513315716, "grad_norm": 0.12055668234825134, "learning_rate": 0.0001553137394487949, "loss": 0.9057, "step": 2203 }, { "epoch": 0.4480585484854645, "grad_norm": 0.1293669193983078, "learning_rate": 0.0001552933997762636, "loss": 1.047, "step": 2204 }, { "epoch": 0.4482618418377719, "grad_norm": 0.14985014498233795, "learning_rate": 0.00015527306010373234, "loss": 1.2782, "step": 2205 }, { "epoch": 0.4484651351900793, "grad_norm": 0.12716402113437653, "learning_rate": 0.00015525272043120106, "loss": 1.0629, "step": 2206 }, { "epoch": 0.44866842854238664, "grad_norm": 0.13868549466133118, "learning_rate": 0.00015523238075866979, "loss": 1.248, "step": 2207 }, { "epoch": 0.44887172189469404, "grad_norm": 0.12867020070552826, "learning_rate": 0.0001552120410861385, "loss": 1.0579, "step": 2208 }, { "epoch": 0.44907501524700144, "grad_norm": 0.14104703068733215, "learning_rate": 0.00015519170141360723, "loss": 1.2559, "step": 2209 }, { "epoch": 0.4492783085993088, "grad_norm": 0.13124023377895355, "learning_rate": 0.00015517136174107599, "loss": 0.9115, "step": 2210 }, { "epoch": 0.4494816019516162, "grad_norm": 0.1268378347158432, "learning_rate": 0.0001551510220685447, "loss": 1.0753, "step": 2211 }, { "epoch": 0.4496848953039236, "grad_norm": 0.12500889599323273, "learning_rate": 0.00015513068239601344, "loss": 1.1563, "step": 2212 }, { "epoch": 0.4498881886562309, "grad_norm": 0.15399597585201263, "learning_rate": 0.00015511034272348216, "loss": 1.3699, "step": 2213 }, { "epoch": 0.4500914820085383, "grad_norm": 0.13191649317741394, "learning_rate": 0.00015509000305095088, "loss": 1.0768, "step": 2214 }, { "epoch": 0.4502947753608457, "grad_norm": 0.13257142901420593, "learning_rate": 0.0001550696633784196, "loss": 1.2508, "step": 2215 }, { "epoch": 0.45049806871315307, "grad_norm": 0.1298341304063797, "learning_rate": 0.00015504932370588833, "loss": 1.0288, "step": 2216 }, { "epoch": 0.45070136206546046, "grad_norm": 0.12427882850170135, "learning_rate": 0.00015502898403335706, "loss": 1.0185, "step": 2217 }, { "epoch": 0.45090465541776786, "grad_norm": 0.13115955889225006, "learning_rate": 0.0001550086443608258, "loss": 1.0496, "step": 2218 }, { "epoch": 0.4511079487700752, "grad_norm": 0.12085919827222824, "learning_rate": 0.00015498830468829453, "loss": 1.0642, "step": 2219 }, { "epoch": 0.4513112421223826, "grad_norm": 0.14256790280342102, "learning_rate": 0.00015496796501576326, "loss": 1.199, "step": 2220 }, { "epoch": 0.45151453547469, "grad_norm": 0.14703063666820526, "learning_rate": 0.00015494762534323198, "loss": 1.232, "step": 2221 }, { "epoch": 0.45171782882699735, "grad_norm": 0.13513730466365814, "learning_rate": 0.0001549272856707007, "loss": 1.0598, "step": 2222 }, { "epoch": 0.45192112217930475, "grad_norm": 0.12173596769571304, "learning_rate": 0.00015490694599816943, "loss": 0.9541, "step": 2223 }, { "epoch": 0.4521244155316121, "grad_norm": 0.13201670348644257, "learning_rate": 0.00015488660632563816, "loss": 1.0898, "step": 2224 }, { "epoch": 0.4523277088839195, "grad_norm": 0.1306207776069641, "learning_rate": 0.00015486626665310688, "loss": 1.1851, "step": 2225 }, { "epoch": 0.4525310022362269, "grad_norm": 0.14152634143829346, "learning_rate": 0.00015484592698057563, "loss": 1.1348, "step": 2226 }, { "epoch": 0.45273429558853423, "grad_norm": 0.12412508577108383, "learning_rate": 0.00015482558730804436, "loss": 0.943, "step": 2227 }, { "epoch": 0.45293758894084163, "grad_norm": 0.1367032378911972, "learning_rate": 0.00015480524763551308, "loss": 1.1014, "step": 2228 }, { "epoch": 0.453140882293149, "grad_norm": 0.13346408307552338, "learning_rate": 0.0001547849079629818, "loss": 1.0675, "step": 2229 }, { "epoch": 0.45334417564545637, "grad_norm": 0.1253054141998291, "learning_rate": 0.00015476456829045053, "loss": 1.0117, "step": 2230 }, { "epoch": 0.45354746899776377, "grad_norm": 0.12263582646846771, "learning_rate": 0.00015474422861791925, "loss": 1.0491, "step": 2231 }, { "epoch": 0.45375076235007117, "grad_norm": 0.11762181669473648, "learning_rate": 0.00015472388894538798, "loss": 0.9007, "step": 2232 }, { "epoch": 0.4539540557023785, "grad_norm": 0.1264612227678299, "learning_rate": 0.0001547035492728567, "loss": 1.0378, "step": 2233 }, { "epoch": 0.4541573490546859, "grad_norm": 0.13340885937213898, "learning_rate": 0.00015468320960032545, "loss": 1.0248, "step": 2234 }, { "epoch": 0.4543606424069933, "grad_norm": 0.13584084808826447, "learning_rate": 0.00015466286992779418, "loss": 1.1549, "step": 2235 }, { "epoch": 0.45456393575930065, "grad_norm": 0.13265646994113922, "learning_rate": 0.0001546425302552629, "loss": 1.1589, "step": 2236 }, { "epoch": 0.45476722911160805, "grad_norm": 0.13352257013320923, "learning_rate": 0.00015462219058273163, "loss": 1.3275, "step": 2237 }, { "epoch": 0.45497052246391545, "grad_norm": 0.13025180995464325, "learning_rate": 0.00015460185091020035, "loss": 1.1053, "step": 2238 }, { "epoch": 0.4551738158162228, "grad_norm": 0.13518528640270233, "learning_rate": 0.00015458151123766908, "loss": 1.043, "step": 2239 }, { "epoch": 0.4553771091685302, "grad_norm": 0.13065437972545624, "learning_rate": 0.0001545611715651378, "loss": 1.1238, "step": 2240 }, { "epoch": 0.4555804025208376, "grad_norm": 0.12943416833877563, "learning_rate": 0.00015454083189260653, "loss": 1.2424, "step": 2241 }, { "epoch": 0.45578369587314493, "grad_norm": 0.1298946738243103, "learning_rate": 0.00015452049222007528, "loss": 1.0909, "step": 2242 }, { "epoch": 0.45598698922545233, "grad_norm": 0.1344904750585556, "learning_rate": 0.000154500152547544, "loss": 1.2078, "step": 2243 }, { "epoch": 0.45619028257775973, "grad_norm": 0.11595308780670166, "learning_rate": 0.00015447981287501273, "loss": 0.9557, "step": 2244 }, { "epoch": 0.4563935759300671, "grad_norm": 0.11699800193309784, "learning_rate": 0.00015445947320248145, "loss": 0.969, "step": 2245 }, { "epoch": 0.4565968692823745, "grad_norm": 0.13354718685150146, "learning_rate": 0.00015443913352995018, "loss": 1.2055, "step": 2246 }, { "epoch": 0.45680016263468187, "grad_norm": 0.12961523234844208, "learning_rate": 0.0001544187938574189, "loss": 1.1531, "step": 2247 }, { "epoch": 0.4570034559869892, "grad_norm": 0.12993821501731873, "learning_rate": 0.00015439845418488762, "loss": 1.0975, "step": 2248 }, { "epoch": 0.4572067493392966, "grad_norm": 0.12171147763729095, "learning_rate": 0.00015437811451235635, "loss": 1.1262, "step": 2249 }, { "epoch": 0.45741004269160396, "grad_norm": 0.1307455450296402, "learning_rate": 0.00015435777483982507, "loss": 1.0757, "step": 2250 }, { "epoch": 0.45761333604391136, "grad_norm": 0.12793178856372833, "learning_rate": 0.00015433743516729382, "loss": 1.1616, "step": 2251 }, { "epoch": 0.45781662939621875, "grad_norm": 0.13869251310825348, "learning_rate": 0.00015431709549476255, "loss": 1.0674, "step": 2252 }, { "epoch": 0.4580199227485261, "grad_norm": 0.13465169072151184, "learning_rate": 0.00015429675582223127, "loss": 1.0685, "step": 2253 }, { "epoch": 0.4582232161008335, "grad_norm": 0.12114840000867844, "learning_rate": 0.0001542764161497, "loss": 1.0562, "step": 2254 }, { "epoch": 0.4584265094531409, "grad_norm": 0.11819116771221161, "learning_rate": 0.00015425607647716872, "loss": 0.9489, "step": 2255 }, { "epoch": 0.45862980280544824, "grad_norm": 0.1262710839509964, "learning_rate": 0.00015423573680463745, "loss": 1.014, "step": 2256 }, { "epoch": 0.45883309615775564, "grad_norm": 0.1145327240228653, "learning_rate": 0.00015421539713210617, "loss": 1.0435, "step": 2257 }, { "epoch": 0.45903638951006304, "grad_norm": 0.13413353264331818, "learning_rate": 0.0001541950574595749, "loss": 1.2098, "step": 2258 }, { "epoch": 0.4592396828623704, "grad_norm": 0.14301779866218567, "learning_rate": 0.00015417471778704365, "loss": 1.2555, "step": 2259 }, { "epoch": 0.4594429762146778, "grad_norm": 0.12918636202812195, "learning_rate": 0.00015415437811451237, "loss": 1.2728, "step": 2260 }, { "epoch": 0.4596462695669852, "grad_norm": 0.13419827818870544, "learning_rate": 0.0001541340384419811, "loss": 1.1594, "step": 2261 }, { "epoch": 0.4598495629192925, "grad_norm": 0.132028728723526, "learning_rate": 0.00015411369876944982, "loss": 1.0049, "step": 2262 }, { "epoch": 0.4600528562715999, "grad_norm": 0.12330999970436096, "learning_rate": 0.00015409335909691855, "loss": 1.0211, "step": 2263 }, { "epoch": 0.4602561496239073, "grad_norm": 0.12041660398244858, "learning_rate": 0.00015407301942438727, "loss": 0.9111, "step": 2264 }, { "epoch": 0.46045944297621466, "grad_norm": 0.13959679007530212, "learning_rate": 0.000154052679751856, "loss": 1.2186, "step": 2265 }, { "epoch": 0.46066273632852206, "grad_norm": 0.12078391015529633, "learning_rate": 0.00015403234007932472, "loss": 0.9896, "step": 2266 }, { "epoch": 0.46086602968082946, "grad_norm": 0.13155217468738556, "learning_rate": 0.00015401200040679347, "loss": 1.1405, "step": 2267 }, { "epoch": 0.4610693230331368, "grad_norm": 0.13416320085525513, "learning_rate": 0.0001539916607342622, "loss": 1.1094, "step": 2268 }, { "epoch": 0.4612726163854442, "grad_norm": 0.13319726288318634, "learning_rate": 0.00015397132106173092, "loss": 1.0477, "step": 2269 }, { "epoch": 0.4614759097377516, "grad_norm": 0.1303132325410843, "learning_rate": 0.00015395098138919964, "loss": 1.1049, "step": 2270 }, { "epoch": 0.46167920309005894, "grad_norm": 0.1119418814778328, "learning_rate": 0.00015393064171666837, "loss": 0.8764, "step": 2271 }, { "epoch": 0.46188249644236634, "grad_norm": 0.13639549911022186, "learning_rate": 0.0001539103020441371, "loss": 1.2873, "step": 2272 }, { "epoch": 0.46208578979467374, "grad_norm": 0.1421010047197342, "learning_rate": 0.00015388996237160582, "loss": 1.2249, "step": 2273 }, { "epoch": 0.4622890831469811, "grad_norm": 0.12574367225170135, "learning_rate": 0.00015386962269907454, "loss": 1.063, "step": 2274 }, { "epoch": 0.4624923764992885, "grad_norm": 0.1510375589132309, "learning_rate": 0.0001538492830265433, "loss": 1.2542, "step": 2275 }, { "epoch": 0.4626956698515958, "grad_norm": 0.13016802072525024, "learning_rate": 0.00015382894335401202, "loss": 1.0646, "step": 2276 }, { "epoch": 0.4628989632039032, "grad_norm": 0.11884848028421402, "learning_rate": 0.00015380860368148074, "loss": 1.0169, "step": 2277 }, { "epoch": 0.4631022565562106, "grad_norm": 0.12734943628311157, "learning_rate": 0.00015378826400894947, "loss": 1.0399, "step": 2278 }, { "epoch": 0.46330554990851797, "grad_norm": 0.11856262385845184, "learning_rate": 0.0001537679243364182, "loss": 0.9773, "step": 2279 }, { "epoch": 0.46350884326082537, "grad_norm": 0.12701541185379028, "learning_rate": 0.00015374758466388692, "loss": 0.9503, "step": 2280 }, { "epoch": 0.46371213661313276, "grad_norm": 0.12200977653265, "learning_rate": 0.00015372724499135564, "loss": 0.9685, "step": 2281 }, { "epoch": 0.4639154299654401, "grad_norm": 0.1607646942138672, "learning_rate": 0.00015370690531882436, "loss": 1.2651, "step": 2282 }, { "epoch": 0.4641187233177475, "grad_norm": 0.1287887990474701, "learning_rate": 0.00015368656564629312, "loss": 1.0041, "step": 2283 }, { "epoch": 0.4643220166700549, "grad_norm": 0.11581754684448242, "learning_rate": 0.00015366622597376184, "loss": 0.8802, "step": 2284 }, { "epoch": 0.46452531002236225, "grad_norm": 0.12691698968410492, "learning_rate": 0.00015364588630123056, "loss": 0.9865, "step": 2285 }, { "epoch": 0.46472860337466965, "grad_norm": 0.15262743830680847, "learning_rate": 0.0001536255466286993, "loss": 1.2384, "step": 2286 }, { "epoch": 0.46493189672697705, "grad_norm": 0.12902504205703735, "learning_rate": 0.00015360520695616801, "loss": 1.0832, "step": 2287 }, { "epoch": 0.4651351900792844, "grad_norm": 0.13777056336402893, "learning_rate": 0.00015358486728363674, "loss": 1.2671, "step": 2288 }, { "epoch": 0.4653384834315918, "grad_norm": 0.12391048669815063, "learning_rate": 0.00015356452761110546, "loss": 1.1296, "step": 2289 }, { "epoch": 0.4655417767838992, "grad_norm": 0.13558468222618103, "learning_rate": 0.0001535441879385742, "loss": 1.3, "step": 2290 }, { "epoch": 0.46574507013620653, "grad_norm": 0.13611246645450592, "learning_rate": 0.0001535238482660429, "loss": 1.2885, "step": 2291 }, { "epoch": 0.46594836348851393, "grad_norm": 0.11027907580137253, "learning_rate": 0.00015350350859351166, "loss": 0.8807, "step": 2292 }, { "epoch": 0.4661516568408213, "grad_norm": 0.1303076446056366, "learning_rate": 0.0001534831689209804, "loss": 0.9479, "step": 2293 }, { "epoch": 0.46635495019312867, "grad_norm": 0.12296570837497711, "learning_rate": 0.0001534628292484491, "loss": 0.9483, "step": 2294 }, { "epoch": 0.46655824354543607, "grad_norm": 0.13646475970745087, "learning_rate": 0.00015344248957591784, "loss": 1.0576, "step": 2295 }, { "epoch": 0.46676153689774347, "grad_norm": 0.12281665205955505, "learning_rate": 0.00015342214990338656, "loss": 1.0582, "step": 2296 }, { "epoch": 0.4669648302500508, "grad_norm": 0.12840229272842407, "learning_rate": 0.00015340181023085529, "loss": 1.0531, "step": 2297 }, { "epoch": 0.4671681236023582, "grad_norm": 0.13027642667293549, "learning_rate": 0.000153381470558324, "loss": 1.1658, "step": 2298 }, { "epoch": 0.4673714169546656, "grad_norm": 0.13270190358161926, "learning_rate": 0.00015336113088579273, "loss": 1.2343, "step": 2299 }, { "epoch": 0.46757471030697295, "grad_norm": 0.12298402190208435, "learning_rate": 0.00015334079121326149, "loss": 1.0148, "step": 2300 }, { "epoch": 0.46777800365928035, "grad_norm": 0.11776307225227356, "learning_rate": 0.0001533204515407302, "loss": 1.0423, "step": 2301 }, { "epoch": 0.4679812970115877, "grad_norm": 0.1274150162935257, "learning_rate": 0.00015330011186819893, "loss": 1.0485, "step": 2302 }, { "epoch": 0.4681845903638951, "grad_norm": 0.12356690317392349, "learning_rate": 0.00015327977219566766, "loss": 1.0259, "step": 2303 }, { "epoch": 0.4683878837162025, "grad_norm": 0.11949564516544342, "learning_rate": 0.00015325943252313638, "loss": 1.0544, "step": 2304 }, { "epoch": 0.46859117706850983, "grad_norm": 0.11649688333272934, "learning_rate": 0.0001532390928506051, "loss": 1.0703, "step": 2305 }, { "epoch": 0.46879447042081723, "grad_norm": 0.12657220661640167, "learning_rate": 0.00015321875317807383, "loss": 0.9195, "step": 2306 }, { "epoch": 0.46899776377312463, "grad_norm": 0.11678668856620789, "learning_rate": 0.00015319841350554256, "loss": 1.0412, "step": 2307 }, { "epoch": 0.469201057125432, "grad_norm": 0.1137353926897049, "learning_rate": 0.0001531780738330113, "loss": 0.9843, "step": 2308 }, { "epoch": 0.4694043504777394, "grad_norm": 0.11690492928028107, "learning_rate": 0.00015315773416048003, "loss": 1.0313, "step": 2309 }, { "epoch": 0.4696076438300468, "grad_norm": 0.14086581766605377, "learning_rate": 0.00015313739448794876, "loss": 1.2184, "step": 2310 }, { "epoch": 0.4698109371823541, "grad_norm": 0.13605134189128876, "learning_rate": 0.00015311705481541748, "loss": 1.0273, "step": 2311 }, { "epoch": 0.4700142305346615, "grad_norm": 0.12567712366580963, "learning_rate": 0.0001530967151428862, "loss": 1.0822, "step": 2312 }, { "epoch": 0.4702175238869689, "grad_norm": 0.12103762477636337, "learning_rate": 0.00015307637547035493, "loss": 0.9511, "step": 2313 }, { "epoch": 0.47042081723927626, "grad_norm": 0.13223135471343994, "learning_rate": 0.00015305603579782366, "loss": 1.116, "step": 2314 }, { "epoch": 0.47062411059158366, "grad_norm": 0.12696783244609833, "learning_rate": 0.00015303569612529238, "loss": 1.046, "step": 2315 }, { "epoch": 0.47082740394389105, "grad_norm": 0.13583315908908844, "learning_rate": 0.00015301535645276113, "loss": 1.2365, "step": 2316 }, { "epoch": 0.4710306972961984, "grad_norm": 0.1245473176240921, "learning_rate": 0.00015299501678022986, "loss": 1.1478, "step": 2317 }, { "epoch": 0.4712339906485058, "grad_norm": 0.1365327388048172, "learning_rate": 0.00015297467710769858, "loss": 1.1697, "step": 2318 }, { "epoch": 0.4714372840008132, "grad_norm": 0.13741904497146606, "learning_rate": 0.0001529543374351673, "loss": 1.1585, "step": 2319 }, { "epoch": 0.47164057735312054, "grad_norm": 0.13385626673698425, "learning_rate": 0.00015293399776263603, "loss": 1.1195, "step": 2320 }, { "epoch": 0.47184387070542794, "grad_norm": 0.12970289587974548, "learning_rate": 0.00015291365809010475, "loss": 1.1, "step": 2321 }, { "epoch": 0.47204716405773534, "grad_norm": 0.13030849397182465, "learning_rate": 0.00015289331841757348, "loss": 1.1754, "step": 2322 }, { "epoch": 0.4722504574100427, "grad_norm": 0.1363505721092224, "learning_rate": 0.0001528729787450422, "loss": 1.2234, "step": 2323 }, { "epoch": 0.4724537507623501, "grad_norm": 0.1340765804052353, "learning_rate": 0.00015285263907251095, "loss": 0.9473, "step": 2324 }, { "epoch": 0.4726570441146575, "grad_norm": 0.12515921890735626, "learning_rate": 0.00015283229939997968, "loss": 1.0829, "step": 2325 }, { "epoch": 0.4728603374669648, "grad_norm": 0.1202256977558136, "learning_rate": 0.0001528119597274484, "loss": 0.9616, "step": 2326 }, { "epoch": 0.4730636308192722, "grad_norm": 0.10012631863355637, "learning_rate": 0.00015279162005491713, "loss": 0.7739, "step": 2327 }, { "epoch": 0.47326692417157956, "grad_norm": 0.12161195278167725, "learning_rate": 0.00015277128038238585, "loss": 1.0245, "step": 2328 }, { "epoch": 0.47347021752388696, "grad_norm": 0.12597283720970154, "learning_rate": 0.00015275094070985458, "loss": 1.1309, "step": 2329 }, { "epoch": 0.47367351087619436, "grad_norm": 0.12898840010166168, "learning_rate": 0.0001527306010373233, "loss": 1.0073, "step": 2330 }, { "epoch": 0.4738768042285017, "grad_norm": 0.11734145879745483, "learning_rate": 0.00015271026136479203, "loss": 0.8884, "step": 2331 }, { "epoch": 0.4740800975808091, "grad_norm": 0.11760027706623077, "learning_rate": 0.00015268992169226075, "loss": 1.0386, "step": 2332 }, { "epoch": 0.4742833909331165, "grad_norm": 0.13076893985271454, "learning_rate": 0.0001526695820197295, "loss": 1.1217, "step": 2333 }, { "epoch": 0.47448668428542384, "grad_norm": 0.12086467444896698, "learning_rate": 0.00015264924234719823, "loss": 1.1314, "step": 2334 }, { "epoch": 0.47468997763773124, "grad_norm": 0.1257351189851761, "learning_rate": 0.00015262890267466695, "loss": 1.0988, "step": 2335 }, { "epoch": 0.47489327099003864, "grad_norm": 0.13056614995002747, "learning_rate": 0.00015260856300213567, "loss": 1.0929, "step": 2336 }, { "epoch": 0.475096564342346, "grad_norm": 0.1115044355392456, "learning_rate": 0.0001525882233296044, "loss": 0.9365, "step": 2337 }, { "epoch": 0.4752998576946534, "grad_norm": 0.11613184213638306, "learning_rate": 0.00015256788365707312, "loss": 0.9492, "step": 2338 }, { "epoch": 0.4755031510469608, "grad_norm": 0.13431620597839355, "learning_rate": 0.00015254754398454185, "loss": 1.0483, "step": 2339 }, { "epoch": 0.4757064443992681, "grad_norm": 0.13704031705856323, "learning_rate": 0.00015252720431201057, "loss": 1.324, "step": 2340 }, { "epoch": 0.4759097377515755, "grad_norm": 0.14616814255714417, "learning_rate": 0.00015250686463947932, "loss": 1.2488, "step": 2341 }, { "epoch": 0.4761130311038829, "grad_norm": 0.14007219672203064, "learning_rate": 0.00015248652496694805, "loss": 1.1427, "step": 2342 }, { "epoch": 0.47631632445619027, "grad_norm": 0.14786280691623688, "learning_rate": 0.00015246618529441677, "loss": 1.3224, "step": 2343 }, { "epoch": 0.47651961780849766, "grad_norm": 0.13280178606510162, "learning_rate": 0.0001524458456218855, "loss": 1.2878, "step": 2344 }, { "epoch": 0.47672291116080506, "grad_norm": 0.13446266949176788, "learning_rate": 0.00015242550594935422, "loss": 0.9997, "step": 2345 }, { "epoch": 0.4769262045131124, "grad_norm": 0.1296195685863495, "learning_rate": 0.00015240516627682295, "loss": 1.196, "step": 2346 }, { "epoch": 0.4771294978654198, "grad_norm": 0.13888056576251984, "learning_rate": 0.00015238482660429167, "loss": 1.1782, "step": 2347 }, { "epoch": 0.4773327912177272, "grad_norm": 0.14144721627235413, "learning_rate": 0.0001523644869317604, "loss": 1.0023, "step": 2348 }, { "epoch": 0.47753608457003455, "grad_norm": 0.1382543295621872, "learning_rate": 0.00015234414725922915, "loss": 1.128, "step": 2349 }, { "epoch": 0.47773937792234195, "grad_norm": 0.14320622384548187, "learning_rate": 0.00015232380758669787, "loss": 1.1825, "step": 2350 }, { "epoch": 0.47794267127464934, "grad_norm": 0.13087749481201172, "learning_rate": 0.0001523034679141666, "loss": 1.1676, "step": 2351 }, { "epoch": 0.4781459646269567, "grad_norm": 0.12107618153095245, "learning_rate": 0.00015228312824163532, "loss": 1.0275, "step": 2352 }, { "epoch": 0.4783492579792641, "grad_norm": 0.12728255987167358, "learning_rate": 0.00015226278856910404, "loss": 0.9566, "step": 2353 }, { "epoch": 0.4785525513315715, "grad_norm": 0.13032306730747223, "learning_rate": 0.00015224244889657277, "loss": 0.9879, "step": 2354 }, { "epoch": 0.47875584468387883, "grad_norm": 0.13414493203163147, "learning_rate": 0.0001522221092240415, "loss": 1.1314, "step": 2355 }, { "epoch": 0.4789591380361862, "grad_norm": 0.13473325967788696, "learning_rate": 0.00015220176955151022, "loss": 1.0828, "step": 2356 }, { "epoch": 0.47916243138849357, "grad_norm": 0.13013584911823273, "learning_rate": 0.00015218142987897897, "loss": 0.9822, "step": 2357 }, { "epoch": 0.47936572474080097, "grad_norm": 0.13635900616645813, "learning_rate": 0.0001521610902064477, "loss": 1.1549, "step": 2358 }, { "epoch": 0.47956901809310837, "grad_norm": 0.14560578763484955, "learning_rate": 0.00015214075053391642, "loss": 1.1529, "step": 2359 }, { "epoch": 0.4797723114454157, "grad_norm": 0.13965454697608948, "learning_rate": 0.00015212041086138514, "loss": 1.1511, "step": 2360 }, { "epoch": 0.4799756047977231, "grad_norm": 0.14002491533756256, "learning_rate": 0.00015210007118885387, "loss": 1.1332, "step": 2361 }, { "epoch": 0.4801788981500305, "grad_norm": 0.14013326168060303, "learning_rate": 0.0001520797315163226, "loss": 1.1585, "step": 2362 }, { "epoch": 0.48038219150233785, "grad_norm": 0.114499032497406, "learning_rate": 0.00015205939184379132, "loss": 0.8636, "step": 2363 }, { "epoch": 0.48058548485464525, "grad_norm": 0.14330022037029266, "learning_rate": 0.00015203905217126004, "loss": 1.0499, "step": 2364 }, { "epoch": 0.48078877820695265, "grad_norm": 0.13167035579681396, "learning_rate": 0.0001520187124987288, "loss": 1.047, "step": 2365 }, { "epoch": 0.48099207155926, "grad_norm": 0.12093020975589752, "learning_rate": 0.00015199837282619752, "loss": 1.0635, "step": 2366 }, { "epoch": 0.4811953649115674, "grad_norm": 0.13088001310825348, "learning_rate": 0.00015197803315366624, "loss": 1.1499, "step": 2367 }, { "epoch": 0.4813986582638748, "grad_norm": 0.13969479501247406, "learning_rate": 0.00015195769348113497, "loss": 1.2346, "step": 2368 }, { "epoch": 0.48160195161618213, "grad_norm": 0.129147008061409, "learning_rate": 0.0001519373538086037, "loss": 0.9817, "step": 2369 }, { "epoch": 0.48180524496848953, "grad_norm": 0.13874943554401398, "learning_rate": 0.00015191701413607242, "loss": 1.0194, "step": 2370 }, { "epoch": 0.48200853832079693, "grad_norm": 0.13884292542934418, "learning_rate": 0.00015189667446354114, "loss": 1.1071, "step": 2371 }, { "epoch": 0.4822118316731043, "grad_norm": 0.13045528531074524, "learning_rate": 0.00015187633479100986, "loss": 1.1242, "step": 2372 }, { "epoch": 0.4824151250254117, "grad_norm": 0.15773905813694, "learning_rate": 0.0001518559951184786, "loss": 1.2639, "step": 2373 }, { "epoch": 0.48261841837771907, "grad_norm": 0.11095882952213287, "learning_rate": 0.00015183565544594734, "loss": 1.0023, "step": 2374 }, { "epoch": 0.4828217117300264, "grad_norm": 0.1181846410036087, "learning_rate": 0.00015181531577341606, "loss": 0.9518, "step": 2375 }, { "epoch": 0.4830250050823338, "grad_norm": 0.11797620356082916, "learning_rate": 0.0001517949761008848, "loss": 0.9792, "step": 2376 }, { "epoch": 0.4832282984346412, "grad_norm": 0.11560335010290146, "learning_rate": 0.0001517746364283535, "loss": 0.9539, "step": 2377 }, { "epoch": 0.48343159178694856, "grad_norm": 0.1399577260017395, "learning_rate": 0.00015175429675582224, "loss": 1.1419, "step": 2378 }, { "epoch": 0.48363488513925595, "grad_norm": 0.12643292546272278, "learning_rate": 0.00015173395708329096, "loss": 1.07, "step": 2379 }, { "epoch": 0.48383817849156335, "grad_norm": 0.11252279579639435, "learning_rate": 0.0001517136174107597, "loss": 0.9261, "step": 2380 }, { "epoch": 0.4840414718438707, "grad_norm": 0.12694686651229858, "learning_rate": 0.0001516932777382284, "loss": 1.0992, "step": 2381 }, { "epoch": 0.4842447651961781, "grad_norm": 0.11446068435907364, "learning_rate": 0.00015167293806569716, "loss": 0.9807, "step": 2382 }, { "epoch": 0.48444805854848544, "grad_norm": 0.12001042813062668, "learning_rate": 0.0001516525983931659, "loss": 0.9139, "step": 2383 }, { "epoch": 0.48465135190079284, "grad_norm": 0.12721174955368042, "learning_rate": 0.0001516322587206346, "loss": 1.0866, "step": 2384 }, { "epoch": 0.48485464525310024, "grad_norm": 0.12574180960655212, "learning_rate": 0.00015161191904810334, "loss": 1.1573, "step": 2385 }, { "epoch": 0.4850579386054076, "grad_norm": 0.12667550146579742, "learning_rate": 0.00015159157937557206, "loss": 1.0796, "step": 2386 }, { "epoch": 0.485261231957715, "grad_norm": 0.13312119245529175, "learning_rate": 0.00015157123970304079, "loss": 1.261, "step": 2387 }, { "epoch": 0.4854645253100224, "grad_norm": 0.13041463494300842, "learning_rate": 0.0001515509000305095, "loss": 1.0956, "step": 2388 }, { "epoch": 0.4856678186623297, "grad_norm": 0.12114804238080978, "learning_rate": 0.00015153056035797823, "loss": 0.887, "step": 2389 }, { "epoch": 0.4858711120146371, "grad_norm": 0.144356831908226, "learning_rate": 0.00015151022068544699, "loss": 1.1402, "step": 2390 }, { "epoch": 0.4860744053669445, "grad_norm": 0.12829992175102234, "learning_rate": 0.0001514898810129157, "loss": 0.9359, "step": 2391 }, { "epoch": 0.48627769871925186, "grad_norm": 0.12318047136068344, "learning_rate": 0.00015146954134038443, "loss": 1.0306, "step": 2392 }, { "epoch": 0.48648099207155926, "grad_norm": 0.12492537498474121, "learning_rate": 0.00015144920166785316, "loss": 1.0563, "step": 2393 }, { "epoch": 0.48668428542386666, "grad_norm": 0.130072683095932, "learning_rate": 0.00015142886199532188, "loss": 1.1365, "step": 2394 }, { "epoch": 0.486887578776174, "grad_norm": 0.11817184090614319, "learning_rate": 0.0001514085223227906, "loss": 1.0596, "step": 2395 }, { "epoch": 0.4870908721284814, "grad_norm": 0.1323062777519226, "learning_rate": 0.00015138818265025933, "loss": 1.0337, "step": 2396 }, { "epoch": 0.4872941654807888, "grad_norm": 0.13455109298229218, "learning_rate": 0.00015136784297772806, "loss": 1.1477, "step": 2397 }, { "epoch": 0.48749745883309614, "grad_norm": 0.11852074414491653, "learning_rate": 0.0001513475033051968, "loss": 1.1026, "step": 2398 }, { "epoch": 0.48770075218540354, "grad_norm": 0.11237514764070511, "learning_rate": 0.00015132716363266553, "loss": 0.9384, "step": 2399 }, { "epoch": 0.48790404553771094, "grad_norm": 0.10929456353187561, "learning_rate": 0.00015130682396013426, "loss": 0.9844, "step": 2400 }, { "epoch": 0.4881073388900183, "grad_norm": 0.13844764232635498, "learning_rate": 0.00015128648428760298, "loss": 1.1771, "step": 2401 }, { "epoch": 0.4883106322423257, "grad_norm": 0.13155733048915863, "learning_rate": 0.0001512661446150717, "loss": 1.085, "step": 2402 }, { "epoch": 0.4885139255946331, "grad_norm": 0.13567966222763062, "learning_rate": 0.00015124580494254043, "loss": 1.089, "step": 2403 }, { "epoch": 0.4887172189469404, "grad_norm": 0.1349712312221527, "learning_rate": 0.00015122546527000916, "loss": 1.1356, "step": 2404 }, { "epoch": 0.4889205122992478, "grad_norm": 0.11694735288619995, "learning_rate": 0.00015120512559747788, "loss": 0.8803, "step": 2405 }, { "epoch": 0.4891238056515552, "grad_norm": 0.11674166470766068, "learning_rate": 0.00015118478592494663, "loss": 0.9611, "step": 2406 }, { "epoch": 0.48932709900386256, "grad_norm": 0.1268279105424881, "learning_rate": 0.00015116444625241536, "loss": 1.0746, "step": 2407 }, { "epoch": 0.48953039235616996, "grad_norm": 0.1330219954252243, "learning_rate": 0.00015114410657988408, "loss": 1.1476, "step": 2408 }, { "epoch": 0.4897336857084773, "grad_norm": 0.13246414065361023, "learning_rate": 0.0001511237669073528, "loss": 1.0918, "step": 2409 }, { "epoch": 0.4899369790607847, "grad_norm": 0.12214238941669464, "learning_rate": 0.00015110342723482153, "loss": 1.0092, "step": 2410 }, { "epoch": 0.4901402724130921, "grad_norm": 0.1193271204829216, "learning_rate": 0.00015108308756229025, "loss": 0.8615, "step": 2411 }, { "epoch": 0.49034356576539945, "grad_norm": 0.12478460371494293, "learning_rate": 0.00015106274788975898, "loss": 1.0334, "step": 2412 }, { "epoch": 0.49054685911770685, "grad_norm": 0.14054545760154724, "learning_rate": 0.0001510424082172277, "loss": 1.1424, "step": 2413 }, { "epoch": 0.49075015247001424, "grad_norm": 0.15053215622901917, "learning_rate": 0.00015102206854469643, "loss": 1.0335, "step": 2414 }, { "epoch": 0.4909534458223216, "grad_norm": 0.13923850655555725, "learning_rate": 0.00015100172887216518, "loss": 1.1668, "step": 2415 }, { "epoch": 0.491156739174629, "grad_norm": 0.13445380330085754, "learning_rate": 0.0001509813891996339, "loss": 1.1683, "step": 2416 }, { "epoch": 0.4913600325269364, "grad_norm": 0.14007751643657684, "learning_rate": 0.00015096104952710263, "loss": 1.1405, "step": 2417 }, { "epoch": 0.49156332587924373, "grad_norm": 0.1334713101387024, "learning_rate": 0.00015094070985457135, "loss": 1.1468, "step": 2418 }, { "epoch": 0.4917666192315511, "grad_norm": 0.12781627476215363, "learning_rate": 0.00015092037018204008, "loss": 1.1051, "step": 2419 }, { "epoch": 0.4919699125838585, "grad_norm": 0.1371796876192093, "learning_rate": 0.0001509000305095088, "loss": 1.1274, "step": 2420 }, { "epoch": 0.49217320593616587, "grad_norm": 0.15052980184555054, "learning_rate": 0.00015087969083697753, "loss": 1.1741, "step": 2421 }, { "epoch": 0.49237649928847327, "grad_norm": 0.12332694232463837, "learning_rate": 0.00015085935116444625, "loss": 1.0382, "step": 2422 }, { "epoch": 0.49257979264078067, "grad_norm": 0.1108141764998436, "learning_rate": 0.000150839011491915, "loss": 1.0011, "step": 2423 }, { "epoch": 0.492783085993088, "grad_norm": 0.13298697769641876, "learning_rate": 0.00015081867181938373, "loss": 1.1736, "step": 2424 }, { "epoch": 0.4929863793453954, "grad_norm": 0.1383012980222702, "learning_rate": 0.00015079833214685245, "loss": 1.2248, "step": 2425 }, { "epoch": 0.4931896726977028, "grad_norm": 0.13049232959747314, "learning_rate": 0.00015077799247432117, "loss": 1.0214, "step": 2426 }, { "epoch": 0.49339296605001015, "grad_norm": 0.14081017673015594, "learning_rate": 0.0001507576528017899, "loss": 1.1836, "step": 2427 }, { "epoch": 0.49359625940231755, "grad_norm": 0.14135879278182983, "learning_rate": 0.00015073731312925862, "loss": 1.0908, "step": 2428 }, { "epoch": 0.49379955275462495, "grad_norm": 0.12276162207126617, "learning_rate": 0.00015071697345672735, "loss": 1.0615, "step": 2429 }, { "epoch": 0.4940028461069323, "grad_norm": 0.13314439356327057, "learning_rate": 0.00015069663378419607, "loss": 1.0126, "step": 2430 }, { "epoch": 0.4942061394592397, "grad_norm": 0.13110828399658203, "learning_rate": 0.00015067629411166482, "loss": 1.0914, "step": 2431 }, { "epoch": 0.4944094328115471, "grad_norm": 0.14637964963912964, "learning_rate": 0.00015065595443913355, "loss": 1.1895, "step": 2432 }, { "epoch": 0.49461272616385443, "grad_norm": 0.13631272315979004, "learning_rate": 0.00015063561476660227, "loss": 1.0877, "step": 2433 }, { "epoch": 0.49481601951616183, "grad_norm": 0.12627999484539032, "learning_rate": 0.000150615275094071, "loss": 1.0902, "step": 2434 }, { "epoch": 0.4950193128684692, "grad_norm": 0.1452523022890091, "learning_rate": 0.00015059493542153972, "loss": 1.2531, "step": 2435 }, { "epoch": 0.4952226062207766, "grad_norm": 0.12937428057193756, "learning_rate": 0.00015057459574900845, "loss": 1.149, "step": 2436 }, { "epoch": 0.49542589957308397, "grad_norm": 0.1313169300556183, "learning_rate": 0.00015055425607647717, "loss": 1.1124, "step": 2437 }, { "epoch": 0.4956291929253913, "grad_norm": 0.13300736248493195, "learning_rate": 0.0001505339164039459, "loss": 1.2259, "step": 2438 }, { "epoch": 0.4958324862776987, "grad_norm": 0.12567725777626038, "learning_rate": 0.00015051357673141465, "loss": 1.1431, "step": 2439 }, { "epoch": 0.4960357796300061, "grad_norm": 0.12322575598955154, "learning_rate": 0.00015049323705888337, "loss": 1.0824, "step": 2440 }, { "epoch": 0.49623907298231346, "grad_norm": 0.11976869404315948, "learning_rate": 0.0001504728973863521, "loss": 1.057, "step": 2441 }, { "epoch": 0.49644236633462085, "grad_norm": 0.13577309250831604, "learning_rate": 0.00015045255771382082, "loss": 1.1027, "step": 2442 }, { "epoch": 0.49664565968692825, "grad_norm": 0.13949300348758698, "learning_rate": 0.00015043221804128954, "loss": 1.1102, "step": 2443 }, { "epoch": 0.4968489530392356, "grad_norm": 0.1493709832429886, "learning_rate": 0.00015041187836875827, "loss": 1.2468, "step": 2444 }, { "epoch": 0.497052246391543, "grad_norm": 0.13680393993854523, "learning_rate": 0.000150391538696227, "loss": 1.0607, "step": 2445 }, { "epoch": 0.4972555397438504, "grad_norm": 0.12200003862380981, "learning_rate": 0.00015037119902369572, "loss": 0.931, "step": 2446 }, { "epoch": 0.49745883309615774, "grad_norm": 0.12486010044813156, "learning_rate": 0.00015035085935116447, "loss": 1.0529, "step": 2447 }, { "epoch": 0.49766212644846514, "grad_norm": 0.14312241971492767, "learning_rate": 0.0001503305196786332, "loss": 1.1921, "step": 2448 }, { "epoch": 0.49786541980077254, "grad_norm": 0.1357506513595581, "learning_rate": 0.00015031018000610192, "loss": 1.0561, "step": 2449 }, { "epoch": 0.4980687131530799, "grad_norm": 0.14294788241386414, "learning_rate": 0.00015028984033357064, "loss": 0.9931, "step": 2450 }, { "epoch": 0.4982720065053873, "grad_norm": 0.1087241843342781, "learning_rate": 0.00015026950066103937, "loss": 1.0038, "step": 2451 }, { "epoch": 0.4984752998576947, "grad_norm": 0.11965546011924744, "learning_rate": 0.0001502491609885081, "loss": 1.0055, "step": 2452 }, { "epoch": 0.498678593210002, "grad_norm": 0.13440768420696259, "learning_rate": 0.00015022882131597682, "loss": 0.9883, "step": 2453 }, { "epoch": 0.4988818865623094, "grad_norm": 0.12496986985206604, "learning_rate": 0.00015020848164344554, "loss": 1.1175, "step": 2454 }, { "epoch": 0.4990851799146168, "grad_norm": 0.1410161703824997, "learning_rate": 0.00015018814197091427, "loss": 1.0976, "step": 2455 }, { "epoch": 0.49928847326692416, "grad_norm": 0.12262056767940521, "learning_rate": 0.00015016780229838302, "loss": 0.9429, "step": 2456 }, { "epoch": 0.49949176661923156, "grad_norm": 0.12424588203430176, "learning_rate": 0.00015014746262585174, "loss": 1.01, "step": 2457 }, { "epoch": 0.49969505997153896, "grad_norm": 0.12014136463403702, "learning_rate": 0.00015012712295332047, "loss": 1.0769, "step": 2458 }, { "epoch": 0.4998983533238463, "grad_norm": 0.12330099940299988, "learning_rate": 0.00015010678328078916, "loss": 0.9982, "step": 2459 }, { "epoch": 0.5001016466761536, "grad_norm": 0.12830835580825806, "learning_rate": 0.00015008644360825791, "loss": 1.1893, "step": 2460 }, { "epoch": 0.500304940028461, "grad_norm": 0.12516823410987854, "learning_rate": 0.00015006610393572664, "loss": 0.9871, "step": 2461 }, { "epoch": 0.5005082333807684, "grad_norm": 0.13631972670555115, "learning_rate": 0.00015004576426319536, "loss": 1.1454, "step": 2462 }, { "epoch": 0.5007115267330758, "grad_norm": 0.1340373456478119, "learning_rate": 0.0001500254245906641, "loss": 1.0478, "step": 2463 }, { "epoch": 0.5009148200853832, "grad_norm": 0.13719302415847778, "learning_rate": 0.00015000508491813284, "loss": 0.9423, "step": 2464 }, { "epoch": 0.5011181134376906, "grad_norm": 0.12467597424983978, "learning_rate": 0.00014998474524560156, "loss": 1.0493, "step": 2465 }, { "epoch": 0.5013214067899979, "grad_norm": 0.11926814168691635, "learning_rate": 0.0001499644055730703, "loss": 0.9844, "step": 2466 }, { "epoch": 0.5015247001423053, "grad_norm": 0.12327981740236282, "learning_rate": 0.00014994406590053899, "loss": 1.0336, "step": 2467 }, { "epoch": 0.5017279934946127, "grad_norm": 0.126510351896286, "learning_rate": 0.00014992372622800774, "loss": 1.0061, "step": 2468 }, { "epoch": 0.5019312868469201, "grad_norm": 0.12146785855293274, "learning_rate": 0.00014990338655547646, "loss": 1.011, "step": 2469 }, { "epoch": 0.5021345801992275, "grad_norm": 0.12402217090129852, "learning_rate": 0.00014988304688294519, "loss": 1.0177, "step": 2470 }, { "epoch": 0.5023378735515349, "grad_norm": 0.13122454285621643, "learning_rate": 0.0001498627072104139, "loss": 0.9743, "step": 2471 }, { "epoch": 0.5025411669038422, "grad_norm": 0.13217094540596008, "learning_rate": 0.00014984236753788266, "loss": 1.0227, "step": 2472 }, { "epoch": 0.5027444602561496, "grad_norm": 0.14987598359584808, "learning_rate": 0.0001498220278653514, "loss": 1.1522, "step": 2473 }, { "epoch": 0.502947753608457, "grad_norm": 0.13689711689949036, "learning_rate": 0.0001498016881928201, "loss": 1.0014, "step": 2474 }, { "epoch": 0.5031510469607644, "grad_norm": 0.11815892159938812, "learning_rate": 0.00014978134852028884, "loss": 0.8749, "step": 2475 }, { "epoch": 0.5033543403130718, "grad_norm": 0.11772647500038147, "learning_rate": 0.00014976100884775756, "loss": 0.9894, "step": 2476 }, { "epoch": 0.5035576336653791, "grad_norm": 0.13443076610565186, "learning_rate": 0.00014974066917522628, "loss": 1.1191, "step": 2477 }, { "epoch": 0.5037609270176865, "grad_norm": 0.13787920773029327, "learning_rate": 0.000149720329502695, "loss": 1.0513, "step": 2478 }, { "epoch": 0.5039642203699939, "grad_norm": 0.13152827322483063, "learning_rate": 0.00014969998983016373, "loss": 1.1012, "step": 2479 }, { "epoch": 0.5041675137223013, "grad_norm": 0.12392322719097137, "learning_rate": 0.00014967965015763249, "loss": 1.0068, "step": 2480 }, { "epoch": 0.5043708070746087, "grad_norm": 0.13253094255924225, "learning_rate": 0.0001496593104851012, "loss": 1.0683, "step": 2481 }, { "epoch": 0.5045741004269161, "grad_norm": 0.12664328515529633, "learning_rate": 0.00014963897081256993, "loss": 1.0333, "step": 2482 }, { "epoch": 0.5047773937792234, "grad_norm": 0.13020643591880798, "learning_rate": 0.00014961863114003866, "loss": 1.0567, "step": 2483 }, { "epoch": 0.5049806871315308, "grad_norm": 0.1261332482099533, "learning_rate": 0.00014959829146750738, "loss": 0.9574, "step": 2484 }, { "epoch": 0.5051839804838382, "grad_norm": 0.13825035095214844, "learning_rate": 0.0001495779517949761, "loss": 1.2168, "step": 2485 }, { "epoch": 0.5053872738361456, "grad_norm": 0.1333974152803421, "learning_rate": 0.00014955761212244483, "loss": 1.0775, "step": 2486 }, { "epoch": 0.505590567188453, "grad_norm": 0.12436322122812271, "learning_rate": 0.00014953727244991356, "loss": 1.0043, "step": 2487 }, { "epoch": 0.5057938605407604, "grad_norm": 0.13626371324062347, "learning_rate": 0.0001495169327773823, "loss": 1.1736, "step": 2488 }, { "epoch": 0.5059971538930677, "grad_norm": 0.13061967492103577, "learning_rate": 0.00014949659310485103, "loss": 0.9288, "step": 2489 }, { "epoch": 0.506200447245375, "grad_norm": 0.12033544480800629, "learning_rate": 0.00014947625343231976, "loss": 1.0222, "step": 2490 }, { "epoch": 0.5064037405976825, "grad_norm": 0.14046040177345276, "learning_rate": 0.00014945591375978848, "loss": 1.1382, "step": 2491 }, { "epoch": 0.5066070339499898, "grad_norm": 0.1174360066652298, "learning_rate": 0.0001494355740872572, "loss": 0.8663, "step": 2492 }, { "epoch": 0.5068103273022972, "grad_norm": 0.13645724952220917, "learning_rate": 0.00014941523441472593, "loss": 1.1139, "step": 2493 }, { "epoch": 0.5070136206546046, "grad_norm": 0.1309158205986023, "learning_rate": 0.00014939489474219465, "loss": 1.1784, "step": 2494 }, { "epoch": 0.5072169140069119, "grad_norm": 0.12230408936738968, "learning_rate": 0.00014937455506966338, "loss": 1.1231, "step": 2495 }, { "epoch": 0.5074202073592193, "grad_norm": 0.1440531313419342, "learning_rate": 0.0001493542153971321, "loss": 1.2055, "step": 2496 }, { "epoch": 0.5076235007115267, "grad_norm": 0.13199447095394135, "learning_rate": 0.00014933387572460086, "loss": 1.1044, "step": 2497 }, { "epoch": 0.5078267940638341, "grad_norm": 0.12693634629249573, "learning_rate": 0.00014931353605206958, "loss": 1.0495, "step": 2498 }, { "epoch": 0.5080300874161415, "grad_norm": 0.1196681559085846, "learning_rate": 0.0001492931963795383, "loss": 0.9505, "step": 2499 }, { "epoch": 0.5082333807684488, "grad_norm": 0.1331620216369629, "learning_rate": 0.000149272856707007, "loss": 1.0419, "step": 2500 }, { "epoch": 0.5084366741207562, "grad_norm": 0.12307044863700867, "learning_rate": 0.00014925251703447575, "loss": 1.0216, "step": 2501 }, { "epoch": 0.5086399674730636, "grad_norm": 0.14936399459838867, "learning_rate": 0.00014923217736194448, "loss": 1.2894, "step": 2502 }, { "epoch": 0.508843260825371, "grad_norm": 0.1165819764137268, "learning_rate": 0.0001492118376894132, "loss": 1.0173, "step": 2503 }, { "epoch": 0.5090465541776784, "grad_norm": 0.13525764644145966, "learning_rate": 0.00014919149801688193, "loss": 1.0883, "step": 2504 }, { "epoch": 0.5092498475299858, "grad_norm": 0.13654504716396332, "learning_rate": 0.00014917115834435068, "loss": 0.9356, "step": 2505 }, { "epoch": 0.5094531408822931, "grad_norm": 0.12151267379522324, "learning_rate": 0.0001491508186718194, "loss": 0.9508, "step": 2506 }, { "epoch": 0.5096564342346005, "grad_norm": 0.13334833085536957, "learning_rate": 0.00014913047899928813, "loss": 1.2175, "step": 2507 }, { "epoch": 0.5098597275869079, "grad_norm": 0.13975641131401062, "learning_rate": 0.00014911013932675682, "loss": 1.0501, "step": 2508 }, { "epoch": 0.5100630209392153, "grad_norm": 0.13203707337379456, "learning_rate": 0.00014908979965422558, "loss": 0.9835, "step": 2509 }, { "epoch": 0.5102663142915227, "grad_norm": 0.154182568192482, "learning_rate": 0.0001490694599816943, "loss": 1.1632, "step": 2510 }, { "epoch": 0.5104696076438301, "grad_norm": 0.13297821581363678, "learning_rate": 0.00014904912030916302, "loss": 0.9965, "step": 2511 }, { "epoch": 0.5106729009961374, "grad_norm": 0.123105987906456, "learning_rate": 0.00014902878063663175, "loss": 0.9264, "step": 2512 }, { "epoch": 0.5108761943484448, "grad_norm": 0.1457197219133377, "learning_rate": 0.0001490084409641005, "loss": 1.1452, "step": 2513 }, { "epoch": 0.5110794877007522, "grad_norm": 0.12882955372333527, "learning_rate": 0.00014898810129156923, "loss": 1.0295, "step": 2514 }, { "epoch": 0.5112827810530596, "grad_norm": 0.1381346881389618, "learning_rate": 0.00014896776161903795, "loss": 1.1416, "step": 2515 }, { "epoch": 0.511486074405367, "grad_norm": 0.12074743956327438, "learning_rate": 0.00014894742194650665, "loss": 0.9549, "step": 2516 }, { "epoch": 0.5116893677576744, "grad_norm": 0.12559756636619568, "learning_rate": 0.0001489270822739754, "loss": 1.014, "step": 2517 }, { "epoch": 0.5118926611099817, "grad_norm": 0.13586939871311188, "learning_rate": 0.00014890674260144412, "loss": 1.1621, "step": 2518 }, { "epoch": 0.5120959544622891, "grad_norm": 0.1177433580160141, "learning_rate": 0.00014888640292891285, "loss": 0.9666, "step": 2519 }, { "epoch": 0.5122992478145965, "grad_norm": 0.12881316244602203, "learning_rate": 0.00014886606325638157, "loss": 1.1345, "step": 2520 }, { "epoch": 0.5125025411669039, "grad_norm": 0.1258634775876999, "learning_rate": 0.00014884572358385032, "loss": 1.05, "step": 2521 }, { "epoch": 0.5127058345192113, "grad_norm": 0.12486784160137177, "learning_rate": 0.00014882538391131905, "loss": 1.144, "step": 2522 }, { "epoch": 0.5129091278715187, "grad_norm": 0.13641564548015594, "learning_rate": 0.00014880504423878777, "loss": 1.2183, "step": 2523 }, { "epoch": 0.5131124212238259, "grad_norm": 0.13277971744537354, "learning_rate": 0.00014878470456625647, "loss": 1.1815, "step": 2524 }, { "epoch": 0.5133157145761333, "grad_norm": 0.14261163771152496, "learning_rate": 0.00014876436489372522, "loss": 1.217, "step": 2525 }, { "epoch": 0.5135190079284407, "grad_norm": 0.13848505914211273, "learning_rate": 0.00014874402522119395, "loss": 1.2031, "step": 2526 }, { "epoch": 0.5137223012807481, "grad_norm": 0.10906849801540375, "learning_rate": 0.00014872368554866267, "loss": 0.9407, "step": 2527 }, { "epoch": 0.5139255946330555, "grad_norm": 0.13533109426498413, "learning_rate": 0.0001487033458761314, "loss": 1.1576, "step": 2528 }, { "epoch": 0.5141288879853628, "grad_norm": 0.13062264025211334, "learning_rate": 0.00014868300620360015, "loss": 1.1019, "step": 2529 }, { "epoch": 0.5143321813376702, "grad_norm": 0.1373278796672821, "learning_rate": 0.00014866266653106887, "loss": 0.9672, "step": 2530 }, { "epoch": 0.5145354746899776, "grad_norm": 0.15875272452831268, "learning_rate": 0.0001486423268585376, "loss": 1.3291, "step": 2531 }, { "epoch": 0.514738768042285, "grad_norm": 0.1146063432097435, "learning_rate": 0.0001486219871860063, "loss": 1.0362, "step": 2532 }, { "epoch": 0.5149420613945924, "grad_norm": 0.13759560883045197, "learning_rate": 0.00014860164751347504, "loss": 1.0423, "step": 2533 }, { "epoch": 0.5151453547468998, "grad_norm": 0.1348053216934204, "learning_rate": 0.00014858130784094377, "loss": 1.0733, "step": 2534 }, { "epoch": 0.5153486480992071, "grad_norm": 0.12033452838659286, "learning_rate": 0.0001485609681684125, "loss": 0.9471, "step": 2535 }, { "epoch": 0.5155519414515145, "grad_norm": 0.12116893380880356, "learning_rate": 0.00014854062849588122, "loss": 0.8554, "step": 2536 }, { "epoch": 0.5157552348038219, "grad_norm": 0.13480456173419952, "learning_rate": 0.00014852028882334994, "loss": 1.0257, "step": 2537 }, { "epoch": 0.5159585281561293, "grad_norm": 0.1279120147228241, "learning_rate": 0.0001484999491508187, "loss": 1.1841, "step": 2538 }, { "epoch": 0.5161618215084367, "grad_norm": 0.12960465252399445, "learning_rate": 0.00014847960947828742, "loss": 1.022, "step": 2539 }, { "epoch": 0.5163651148607441, "grad_norm": 0.12386467307806015, "learning_rate": 0.00014845926980575614, "loss": 0.9364, "step": 2540 }, { "epoch": 0.5165684082130514, "grad_norm": 0.1340230405330658, "learning_rate": 0.00014843893013322484, "loss": 1.1693, "step": 2541 }, { "epoch": 0.5167717015653588, "grad_norm": 0.13475503027439117, "learning_rate": 0.0001484185904606936, "loss": 1.1208, "step": 2542 }, { "epoch": 0.5169749949176662, "grad_norm": 0.13605645298957825, "learning_rate": 0.00014839825078816232, "loss": 1.0327, "step": 2543 }, { "epoch": 0.5171782882699736, "grad_norm": 0.11159854382276535, "learning_rate": 0.00014837791111563104, "loss": 0.9095, "step": 2544 }, { "epoch": 0.517381581622281, "grad_norm": 0.12562917172908783, "learning_rate": 0.00014835757144309976, "loss": 1.0037, "step": 2545 }, { "epoch": 0.5175848749745884, "grad_norm": 0.12805363535881042, "learning_rate": 0.00014833723177056852, "loss": 1.053, "step": 2546 }, { "epoch": 0.5177881683268957, "grad_norm": 0.1303015947341919, "learning_rate": 0.00014831689209803724, "loss": 1.0277, "step": 2547 }, { "epoch": 0.5179914616792031, "grad_norm": 0.13903219997882843, "learning_rate": 0.00014829655242550597, "loss": 1.1639, "step": 2548 }, { "epoch": 0.5181947550315105, "grad_norm": 0.13119028508663177, "learning_rate": 0.00014827621275297466, "loss": 0.9134, "step": 2549 }, { "epoch": 0.5183980483838179, "grad_norm": 0.12713825702667236, "learning_rate": 0.00014825587308044341, "loss": 1.0313, "step": 2550 }, { "epoch": 0.5186013417361253, "grad_norm": 0.13641834259033203, "learning_rate": 0.00014823553340791214, "loss": 1.0787, "step": 2551 }, { "epoch": 0.5188046350884326, "grad_norm": 0.1124555915594101, "learning_rate": 0.00014821519373538086, "loss": 0.9135, "step": 2552 }, { "epoch": 0.51900792844074, "grad_norm": 0.10946158319711685, "learning_rate": 0.0001481948540628496, "loss": 0.8105, "step": 2553 }, { "epoch": 0.5192112217930474, "grad_norm": 0.12753844261169434, "learning_rate": 0.00014817451439031834, "loss": 1.0308, "step": 2554 }, { "epoch": 0.5194145151453547, "grad_norm": 0.14424805343151093, "learning_rate": 0.00014815417471778706, "loss": 1.0104, "step": 2555 }, { "epoch": 0.5196178084976621, "grad_norm": 0.13107620179653168, "learning_rate": 0.0001481338350452558, "loss": 1.1754, "step": 2556 }, { "epoch": 0.5198211018499695, "grad_norm": 0.11977977305650711, "learning_rate": 0.00014811349537272449, "loss": 1.0019, "step": 2557 }, { "epoch": 0.5200243952022768, "grad_norm": 0.11917620897293091, "learning_rate": 0.00014809315570019324, "loss": 0.9636, "step": 2558 }, { "epoch": 0.5202276885545842, "grad_norm": 0.12576279044151306, "learning_rate": 0.00014807281602766196, "loss": 1.1341, "step": 2559 }, { "epoch": 0.5204309819068916, "grad_norm": 0.1402411311864853, "learning_rate": 0.00014805247635513069, "loss": 1.1186, "step": 2560 }, { "epoch": 0.520634275259199, "grad_norm": 0.15055212378501892, "learning_rate": 0.0001480321366825994, "loss": 1.1878, "step": 2561 }, { "epoch": 0.5208375686115064, "grad_norm": 0.11402598023414612, "learning_rate": 0.00014801179701006816, "loss": 0.7465, "step": 2562 }, { "epoch": 0.5210408619638138, "grad_norm": 0.12650637328624725, "learning_rate": 0.0001479914573375369, "loss": 1.0605, "step": 2563 }, { "epoch": 0.5212441553161211, "grad_norm": 0.13538390398025513, "learning_rate": 0.0001479711176650056, "loss": 0.9924, "step": 2564 }, { "epoch": 0.5214474486684285, "grad_norm": 0.12981672585010529, "learning_rate": 0.0001479507779924743, "loss": 1.0908, "step": 2565 }, { "epoch": 0.5216507420207359, "grad_norm": 0.13389542698860168, "learning_rate": 0.00014793043831994306, "loss": 1.0369, "step": 2566 }, { "epoch": 0.5218540353730433, "grad_norm": 0.1256348341703415, "learning_rate": 0.00014791009864741178, "loss": 1.1209, "step": 2567 }, { "epoch": 0.5220573287253507, "grad_norm": 0.13984240591526031, "learning_rate": 0.0001478897589748805, "loss": 1.2199, "step": 2568 }, { "epoch": 0.5222606220776581, "grad_norm": 0.12872397899627686, "learning_rate": 0.00014786941930234923, "loss": 1.0793, "step": 2569 }, { "epoch": 0.5224639154299654, "grad_norm": 0.12694962322711945, "learning_rate": 0.00014784907962981798, "loss": 0.9623, "step": 2570 }, { "epoch": 0.5226672087822728, "grad_norm": 0.13034392893314362, "learning_rate": 0.0001478287399572867, "loss": 1.2404, "step": 2571 }, { "epoch": 0.5228705021345802, "grad_norm": 0.1416521519422531, "learning_rate": 0.00014780840028475543, "loss": 1.2426, "step": 2572 }, { "epoch": 0.5230737954868876, "grad_norm": 0.12421387434005737, "learning_rate": 0.00014778806061222413, "loss": 1.0685, "step": 2573 }, { "epoch": 0.523277088839195, "grad_norm": 0.1387767344713211, "learning_rate": 0.00014776772093969288, "loss": 1.202, "step": 2574 }, { "epoch": 0.5234803821915024, "grad_norm": 0.13308827579021454, "learning_rate": 0.0001477473812671616, "loss": 1.2395, "step": 2575 }, { "epoch": 0.5236836755438097, "grad_norm": 0.15293751657009125, "learning_rate": 0.00014772704159463033, "loss": 1.1062, "step": 2576 }, { "epoch": 0.5238869688961171, "grad_norm": 0.1332782655954361, "learning_rate": 0.00014770670192209906, "loss": 1.1205, "step": 2577 }, { "epoch": 0.5240902622484245, "grad_norm": 0.11857607960700989, "learning_rate": 0.0001476863622495678, "loss": 1.1111, "step": 2578 }, { "epoch": 0.5242935556007319, "grad_norm": 0.13509269058704376, "learning_rate": 0.00014766602257703653, "loss": 1.0806, "step": 2579 }, { "epoch": 0.5244968489530393, "grad_norm": 0.12904144823551178, "learning_rate": 0.00014764568290450526, "loss": 1.021, "step": 2580 }, { "epoch": 0.5247001423053466, "grad_norm": 0.1381101906299591, "learning_rate": 0.00014762534323197395, "loss": 1.2025, "step": 2581 }, { "epoch": 0.524903435657654, "grad_norm": 0.13160142302513123, "learning_rate": 0.00014760500355944268, "loss": 1.0126, "step": 2582 }, { "epoch": 0.5251067290099614, "grad_norm": 0.14287696778774261, "learning_rate": 0.00014758466388691143, "loss": 1.1466, "step": 2583 }, { "epoch": 0.5253100223622688, "grad_norm": 0.13337363302707672, "learning_rate": 0.00014756432421438015, "loss": 1.0036, "step": 2584 }, { "epoch": 0.5255133157145762, "grad_norm": 0.14575807750225067, "learning_rate": 0.00014754398454184888, "loss": 1.1033, "step": 2585 }, { "epoch": 0.5257166090668836, "grad_norm": 0.12519006431102753, "learning_rate": 0.0001475236448693176, "loss": 1.004, "step": 2586 }, { "epoch": 0.5259199024191908, "grad_norm": 0.12951436638832092, "learning_rate": 0.00014750330519678635, "loss": 0.9679, "step": 2587 }, { "epoch": 0.5261231957714982, "grad_norm": 0.1465519517660141, "learning_rate": 0.00014748296552425508, "loss": 1.19, "step": 2588 }, { "epoch": 0.5263264891238056, "grad_norm": 0.12192967534065247, "learning_rate": 0.00014746262585172378, "loss": 1.0086, "step": 2589 }, { "epoch": 0.526529782476113, "grad_norm": 0.13444490730762482, "learning_rate": 0.0001474422861791925, "loss": 0.9185, "step": 2590 }, { "epoch": 0.5267330758284204, "grad_norm": 0.13128428161144257, "learning_rate": 0.00014742194650666125, "loss": 0.8979, "step": 2591 }, { "epoch": 0.5269363691807278, "grad_norm": 0.14445891976356506, "learning_rate": 0.00014740160683412998, "loss": 1.1736, "step": 2592 }, { "epoch": 0.5271396625330351, "grad_norm": 0.13069060444831848, "learning_rate": 0.0001473812671615987, "loss": 1.0791, "step": 2593 }, { "epoch": 0.5273429558853425, "grad_norm": 0.11903716623783112, "learning_rate": 0.00014736092748906743, "loss": 1.0983, "step": 2594 }, { "epoch": 0.5275462492376499, "grad_norm": 0.14502301812171936, "learning_rate": 0.00014734058781653618, "loss": 1.1266, "step": 2595 }, { "epoch": 0.5277495425899573, "grad_norm": 0.12276476621627808, "learning_rate": 0.0001473202481440049, "loss": 0.9658, "step": 2596 }, { "epoch": 0.5279528359422647, "grad_norm": 0.1322438269853592, "learning_rate": 0.00014729990847147363, "loss": 1.068, "step": 2597 }, { "epoch": 0.5281561292945721, "grad_norm": 0.12933704257011414, "learning_rate": 0.00014727956879894232, "loss": 0.9955, "step": 2598 }, { "epoch": 0.5283594226468794, "grad_norm": 0.13503174483776093, "learning_rate": 0.00014725922912641108, "loss": 1.117, "step": 2599 }, { "epoch": 0.5285627159991868, "grad_norm": 0.13893373310565948, "learning_rate": 0.0001472388894538798, "loss": 1.1355, "step": 2600 }, { "epoch": 0.5287660093514942, "grad_norm": 0.13064657151699066, "learning_rate": 0.00014721854978134852, "loss": 0.9111, "step": 2601 }, { "epoch": 0.5289693027038016, "grad_norm": 0.13640174269676208, "learning_rate": 0.00014719821010881725, "loss": 1.1903, "step": 2602 }, { "epoch": 0.529172596056109, "grad_norm": 0.13113752007484436, "learning_rate": 0.000147177870436286, "loss": 0.926, "step": 2603 }, { "epoch": 0.5293758894084164, "grad_norm": 0.15011656284332275, "learning_rate": 0.00014715753076375472, "loss": 1.0861, "step": 2604 }, { "epoch": 0.5295791827607237, "grad_norm": 0.1330660730600357, "learning_rate": 0.00014713719109122345, "loss": 1.1027, "step": 2605 }, { "epoch": 0.5297824761130311, "grad_norm": 0.1252673864364624, "learning_rate": 0.00014711685141869215, "loss": 0.9304, "step": 2606 }, { "epoch": 0.5299857694653385, "grad_norm": 0.12724831700325012, "learning_rate": 0.0001470965117461609, "loss": 1.0286, "step": 2607 }, { "epoch": 0.5301890628176459, "grad_norm": 0.12352915853261948, "learning_rate": 0.00014707617207362962, "loss": 1.0159, "step": 2608 }, { "epoch": 0.5303923561699533, "grad_norm": 0.1302500218153, "learning_rate": 0.00014705583240109835, "loss": 1.0642, "step": 2609 }, { "epoch": 0.5305956495222606, "grad_norm": 0.12427016347646713, "learning_rate": 0.00014703549272856707, "loss": 0.9496, "step": 2610 }, { "epoch": 0.530798942874568, "grad_norm": 0.13810168206691742, "learning_rate": 0.00014701515305603582, "loss": 1.0421, "step": 2611 }, { "epoch": 0.5310022362268754, "grad_norm": 0.1359987109899521, "learning_rate": 0.00014699481338350455, "loss": 0.9605, "step": 2612 }, { "epoch": 0.5312055295791828, "grad_norm": 0.1282379925251007, "learning_rate": 0.00014697447371097327, "loss": 1.0654, "step": 2613 }, { "epoch": 0.5314088229314902, "grad_norm": 0.1283995509147644, "learning_rate": 0.00014695413403844197, "loss": 1.0312, "step": 2614 }, { "epoch": 0.5316121162837976, "grad_norm": 0.12052475661039352, "learning_rate": 0.00014693379436591072, "loss": 1.1057, "step": 2615 }, { "epoch": 0.5318154096361049, "grad_norm": 0.13645312190055847, "learning_rate": 0.00014691345469337945, "loss": 1.0701, "step": 2616 }, { "epoch": 0.5320187029884123, "grad_norm": 0.13875778019428253, "learning_rate": 0.00014689311502084817, "loss": 1.2868, "step": 2617 }, { "epoch": 0.5322219963407196, "grad_norm": 0.12762780487537384, "learning_rate": 0.0001468727753483169, "loss": 0.9696, "step": 2618 }, { "epoch": 0.532425289693027, "grad_norm": 0.14250846207141876, "learning_rate": 0.00014685243567578565, "loss": 1.1762, "step": 2619 }, { "epoch": 0.5326285830453344, "grad_norm": 0.10621387511491776, "learning_rate": 0.00014683209600325437, "loss": 0.8195, "step": 2620 }, { "epoch": 0.5328318763976418, "grad_norm": 0.14604990184307098, "learning_rate": 0.0001468117563307231, "loss": 1.0794, "step": 2621 }, { "epoch": 0.5330351697499491, "grad_norm": 0.13326723873615265, "learning_rate": 0.0001467914166581918, "loss": 1.0018, "step": 2622 }, { "epoch": 0.5332384631022565, "grad_norm": 0.12089519202709198, "learning_rate": 0.00014677107698566052, "loss": 1.0615, "step": 2623 }, { "epoch": 0.5334417564545639, "grad_norm": 0.1269814819097519, "learning_rate": 0.00014675073731312927, "loss": 0.9591, "step": 2624 }, { "epoch": 0.5336450498068713, "grad_norm": 0.13674674928188324, "learning_rate": 0.000146730397640598, "loss": 1.0315, "step": 2625 }, { "epoch": 0.5338483431591787, "grad_norm": 0.1372392177581787, "learning_rate": 0.00014671005796806672, "loss": 1.0483, "step": 2626 }, { "epoch": 0.5340516365114861, "grad_norm": 0.12088494002819061, "learning_rate": 0.00014668971829553544, "loss": 0.9172, "step": 2627 }, { "epoch": 0.5342549298637934, "grad_norm": 0.1240740641951561, "learning_rate": 0.0001466693786230042, "loss": 1.0149, "step": 2628 }, { "epoch": 0.5344582232161008, "grad_norm": 0.13450276851654053, "learning_rate": 0.00014664903895047292, "loss": 1.0719, "step": 2629 }, { "epoch": 0.5346615165684082, "grad_norm": 0.12809321284294128, "learning_rate": 0.00014662869927794162, "loss": 0.9452, "step": 2630 }, { "epoch": 0.5348648099207156, "grad_norm": 0.1411091536283493, "learning_rate": 0.00014660835960541034, "loss": 1.2103, "step": 2631 }, { "epoch": 0.535068103273023, "grad_norm": 0.12086781114339828, "learning_rate": 0.0001465880199328791, "loss": 0.8857, "step": 2632 }, { "epoch": 0.5352713966253303, "grad_norm": 0.13093651831150055, "learning_rate": 0.00014656768026034782, "loss": 1.1842, "step": 2633 }, { "epoch": 0.5354746899776377, "grad_norm": 0.11652904748916626, "learning_rate": 0.00014654734058781654, "loss": 1.0096, "step": 2634 }, { "epoch": 0.5356779833299451, "grad_norm": 0.13243702054023743, "learning_rate": 0.00014652700091528526, "loss": 1.1378, "step": 2635 }, { "epoch": 0.5358812766822525, "grad_norm": 0.14085280895233154, "learning_rate": 0.00014650666124275402, "loss": 1.0759, "step": 2636 }, { "epoch": 0.5360845700345599, "grad_norm": 0.126717671751976, "learning_rate": 0.00014648632157022274, "loss": 1.0012, "step": 2637 }, { "epoch": 0.5362878633868673, "grad_norm": 0.12660568952560425, "learning_rate": 0.00014646598189769144, "loss": 0.9195, "step": 2638 }, { "epoch": 0.5364911567391746, "grad_norm": 0.12521329522132874, "learning_rate": 0.00014644564222516016, "loss": 1.1181, "step": 2639 }, { "epoch": 0.536694450091482, "grad_norm": 0.1392340511083603, "learning_rate": 0.00014642530255262891, "loss": 1.1255, "step": 2640 }, { "epoch": 0.5368977434437894, "grad_norm": 0.1406872570514679, "learning_rate": 0.00014640496288009764, "loss": 1.0753, "step": 2641 }, { "epoch": 0.5371010367960968, "grad_norm": 0.12615209817886353, "learning_rate": 0.00014638462320756636, "loss": 0.9859, "step": 2642 }, { "epoch": 0.5373043301484042, "grad_norm": 0.12144862115383148, "learning_rate": 0.0001463642835350351, "loss": 0.9186, "step": 2643 }, { "epoch": 0.5375076235007116, "grad_norm": 0.12902086973190308, "learning_rate": 0.00014634394386250384, "loss": 1.1739, "step": 2644 }, { "epoch": 0.5377109168530189, "grad_norm": 0.12960048019886017, "learning_rate": 0.00014632360418997256, "loss": 1.0439, "step": 2645 }, { "epoch": 0.5379142102053263, "grad_norm": 0.12488772720098495, "learning_rate": 0.00014630326451744126, "loss": 1.0363, "step": 2646 }, { "epoch": 0.5381175035576337, "grad_norm": 0.14255747199058533, "learning_rate": 0.00014628292484490999, "loss": 1.1972, "step": 2647 }, { "epoch": 0.5383207969099411, "grad_norm": 0.11950040608644485, "learning_rate": 0.00014626258517237874, "loss": 0.931, "step": 2648 }, { "epoch": 0.5385240902622485, "grad_norm": 0.1382722705602646, "learning_rate": 0.00014624224549984746, "loss": 1.1622, "step": 2649 }, { "epoch": 0.5387273836145559, "grad_norm": 0.13348785042762756, "learning_rate": 0.00014622190582731619, "loss": 1.1186, "step": 2650 }, { "epoch": 0.5389306769668631, "grad_norm": 0.1255137175321579, "learning_rate": 0.0001462015661547849, "loss": 1.1545, "step": 2651 }, { "epoch": 0.5391339703191705, "grad_norm": 0.12063666433095932, "learning_rate": 0.00014618122648225366, "loss": 0.9628, "step": 2652 }, { "epoch": 0.5393372636714779, "grad_norm": 0.1361551582813263, "learning_rate": 0.00014616088680972239, "loss": 1.1738, "step": 2653 }, { "epoch": 0.5395405570237853, "grad_norm": 0.14640627801418304, "learning_rate": 0.0001461405471371911, "loss": 1.2436, "step": 2654 }, { "epoch": 0.5397438503760927, "grad_norm": 0.13391757011413574, "learning_rate": 0.0001461202074646598, "loss": 1.0063, "step": 2655 }, { "epoch": 0.5399471437284001, "grad_norm": 0.13022476434707642, "learning_rate": 0.00014609986779212856, "loss": 0.9757, "step": 2656 }, { "epoch": 0.5401504370807074, "grad_norm": 0.12605974078178406, "learning_rate": 0.00014607952811959728, "loss": 1.0168, "step": 2657 }, { "epoch": 0.5403537304330148, "grad_norm": 0.12972256541252136, "learning_rate": 0.000146059188447066, "loss": 1.1375, "step": 2658 }, { "epoch": 0.5405570237853222, "grad_norm": 0.12093812972307205, "learning_rate": 0.00014603884877453473, "loss": 1.0279, "step": 2659 }, { "epoch": 0.5407603171376296, "grad_norm": 0.13197238743305206, "learning_rate": 0.00014601850910200348, "loss": 1.1084, "step": 2660 }, { "epoch": 0.540963610489937, "grad_norm": 0.14289307594299316, "learning_rate": 0.0001459981694294722, "loss": 0.9985, "step": 2661 }, { "epoch": 0.5411669038422443, "grad_norm": 0.12929311394691467, "learning_rate": 0.00014597782975694093, "loss": 1.1129, "step": 2662 }, { "epoch": 0.5413701971945517, "grad_norm": 0.12893937528133392, "learning_rate": 0.00014595749008440963, "loss": 0.9588, "step": 2663 }, { "epoch": 0.5415734905468591, "grad_norm": 0.1215519979596138, "learning_rate": 0.00014593715041187836, "loss": 1.0356, "step": 2664 }, { "epoch": 0.5417767838991665, "grad_norm": 0.12775017321109772, "learning_rate": 0.0001459168107393471, "loss": 0.9293, "step": 2665 }, { "epoch": 0.5419800772514739, "grad_norm": 0.13559330999851227, "learning_rate": 0.00014589647106681583, "loss": 1.0579, "step": 2666 }, { "epoch": 0.5421833706037813, "grad_norm": 0.13883750140666962, "learning_rate": 0.00014587613139428456, "loss": 1.1288, "step": 2667 }, { "epoch": 0.5423866639560886, "grad_norm": 0.12956243753433228, "learning_rate": 0.00014585579172175328, "loss": 1.0156, "step": 2668 }, { "epoch": 0.542589957308396, "grad_norm": 0.12133780866861343, "learning_rate": 0.00014583545204922203, "loss": 1.0124, "step": 2669 }, { "epoch": 0.5427932506607034, "grad_norm": 0.13446684181690216, "learning_rate": 0.00014581511237669076, "loss": 1.1288, "step": 2670 }, { "epoch": 0.5429965440130108, "grad_norm": 0.1329856663942337, "learning_rate": 0.00014579477270415945, "loss": 1.009, "step": 2671 }, { "epoch": 0.5431998373653182, "grad_norm": 0.1257416158914566, "learning_rate": 0.00014577443303162818, "loss": 0.9678, "step": 2672 }, { "epoch": 0.5434031307176256, "grad_norm": 0.118684783577919, "learning_rate": 0.00014575409335909693, "loss": 0.9732, "step": 2673 }, { "epoch": 0.5436064240699329, "grad_norm": 0.12146252393722534, "learning_rate": 0.00014573375368656565, "loss": 1.0225, "step": 2674 }, { "epoch": 0.5438097174222403, "grad_norm": 0.13205134868621826, "learning_rate": 0.00014571341401403438, "loss": 1.1626, "step": 2675 }, { "epoch": 0.5440130107745477, "grad_norm": 0.1180446445941925, "learning_rate": 0.0001456930743415031, "loss": 0.9787, "step": 2676 }, { "epoch": 0.5442163041268551, "grad_norm": 0.12436480075120926, "learning_rate": 0.00014567273466897185, "loss": 0.9211, "step": 2677 }, { "epoch": 0.5444195974791625, "grad_norm": 0.13441622257232666, "learning_rate": 0.00014565239499644058, "loss": 1.082, "step": 2678 }, { "epoch": 0.5446228908314699, "grad_norm": 0.13546496629714966, "learning_rate": 0.00014563205532390928, "loss": 0.9564, "step": 2679 }, { "epoch": 0.5448261841837772, "grad_norm": 0.13210104405879974, "learning_rate": 0.000145611715651378, "loss": 1.1043, "step": 2680 }, { "epoch": 0.5450294775360846, "grad_norm": 0.12021714448928833, "learning_rate": 0.00014559137597884675, "loss": 1.0093, "step": 2681 }, { "epoch": 0.545232770888392, "grad_norm": 0.14060239493846893, "learning_rate": 0.00014557103630631548, "loss": 0.8708, "step": 2682 }, { "epoch": 0.5454360642406993, "grad_norm": 0.10503846406936646, "learning_rate": 0.0001455506966337842, "loss": 0.8049, "step": 2683 }, { "epoch": 0.5456393575930067, "grad_norm": 0.1391855627298355, "learning_rate": 0.00014553035696125293, "loss": 1.1862, "step": 2684 }, { "epoch": 0.545842650945314, "grad_norm": 0.13078033924102783, "learning_rate": 0.00014551001728872168, "loss": 1.0238, "step": 2685 }, { "epoch": 0.5460459442976214, "grad_norm": 0.12442688643932343, "learning_rate": 0.0001454896776161904, "loss": 0.9966, "step": 2686 }, { "epoch": 0.5462492376499288, "grad_norm": 0.11848010122776031, "learning_rate": 0.0001454693379436591, "loss": 0.9754, "step": 2687 }, { "epoch": 0.5464525310022362, "grad_norm": 0.13601583242416382, "learning_rate": 0.00014544899827112782, "loss": 1.0994, "step": 2688 }, { "epoch": 0.5466558243545436, "grad_norm": 0.13946221768856049, "learning_rate": 0.00014542865859859658, "loss": 1.1127, "step": 2689 }, { "epoch": 0.546859117706851, "grad_norm": 0.1452294886112213, "learning_rate": 0.0001454083189260653, "loss": 1.146, "step": 2690 }, { "epoch": 0.5470624110591583, "grad_norm": 0.14542357623577118, "learning_rate": 0.00014538797925353402, "loss": 1.1916, "step": 2691 }, { "epoch": 0.5472657044114657, "grad_norm": 0.11541703343391418, "learning_rate": 0.00014536763958100275, "loss": 0.9437, "step": 2692 }, { "epoch": 0.5474689977637731, "grad_norm": 0.12495800107717514, "learning_rate": 0.0001453472999084715, "loss": 1.0014, "step": 2693 }, { "epoch": 0.5476722911160805, "grad_norm": 0.13895189762115479, "learning_rate": 0.00014532696023594022, "loss": 1.1139, "step": 2694 }, { "epoch": 0.5478755844683879, "grad_norm": 0.12779201567173004, "learning_rate": 0.00014530662056340892, "loss": 1.0702, "step": 2695 }, { "epoch": 0.5480788778206953, "grad_norm": 0.14240634441375732, "learning_rate": 0.00014528628089087765, "loss": 1.1233, "step": 2696 }, { "epoch": 0.5482821711730026, "grad_norm": 0.12415528297424316, "learning_rate": 0.0001452659412183464, "loss": 1.0954, "step": 2697 }, { "epoch": 0.54848546452531, "grad_norm": 0.13816578686237335, "learning_rate": 0.00014524560154581512, "loss": 1.2772, "step": 2698 }, { "epoch": 0.5486887578776174, "grad_norm": 0.12729184329509735, "learning_rate": 0.00014522526187328385, "loss": 1.0519, "step": 2699 }, { "epoch": 0.5488920512299248, "grad_norm": 0.12732116878032684, "learning_rate": 0.00014520492220075257, "loss": 1.0562, "step": 2700 }, { "epoch": 0.5490953445822322, "grad_norm": 0.12312710285186768, "learning_rate": 0.00014518458252822132, "loss": 1.0082, "step": 2701 }, { "epoch": 0.5492986379345396, "grad_norm": 0.1302732676267624, "learning_rate": 0.00014516424285569005, "loss": 1.1847, "step": 2702 }, { "epoch": 0.5495019312868469, "grad_norm": 0.13683298230171204, "learning_rate": 0.00014514390318315874, "loss": 1.1784, "step": 2703 }, { "epoch": 0.5497052246391543, "grad_norm": 0.1429167538881302, "learning_rate": 0.00014512356351062747, "loss": 1.2792, "step": 2704 }, { "epoch": 0.5499085179914617, "grad_norm": 0.136098712682724, "learning_rate": 0.0001451032238380962, "loss": 1.2149, "step": 2705 }, { "epoch": 0.5501118113437691, "grad_norm": 0.1201593205332756, "learning_rate": 0.00014508288416556495, "loss": 0.9673, "step": 2706 }, { "epoch": 0.5503151046960765, "grad_norm": 0.10153687000274658, "learning_rate": 0.00014506254449303367, "loss": 0.8142, "step": 2707 }, { "epoch": 0.5505183980483839, "grad_norm": 0.11609897762537003, "learning_rate": 0.0001450422048205024, "loss": 1.0168, "step": 2708 }, { "epoch": 0.5507216914006912, "grad_norm": 0.12856177985668182, "learning_rate": 0.00014502186514797112, "loss": 1.1785, "step": 2709 }, { "epoch": 0.5509249847529986, "grad_norm": 0.11091580241918564, "learning_rate": 0.00014500152547543987, "loss": 0.9282, "step": 2710 }, { "epoch": 0.551128278105306, "grad_norm": 0.13458651304244995, "learning_rate": 0.0001449811858029086, "loss": 1.1647, "step": 2711 }, { "epoch": 0.5513315714576134, "grad_norm": 0.12265376001596451, "learning_rate": 0.0001449608461303773, "loss": 1.0149, "step": 2712 }, { "epoch": 0.5515348648099208, "grad_norm": 0.12033109366893768, "learning_rate": 0.00014494050645784602, "loss": 0.9313, "step": 2713 }, { "epoch": 0.551738158162228, "grad_norm": 0.13308046758174896, "learning_rate": 0.00014492016678531477, "loss": 1.044, "step": 2714 }, { "epoch": 0.5519414515145354, "grad_norm": 0.12852205336093903, "learning_rate": 0.0001448998271127835, "loss": 1.0578, "step": 2715 }, { "epoch": 0.5521447448668428, "grad_norm": 0.13972130417823792, "learning_rate": 0.00014487948744025222, "loss": 1.0903, "step": 2716 }, { "epoch": 0.5523480382191502, "grad_norm": 0.14152394235134125, "learning_rate": 0.00014485914776772094, "loss": 1.2561, "step": 2717 }, { "epoch": 0.5525513315714576, "grad_norm": 0.1381314992904663, "learning_rate": 0.0001448388080951897, "loss": 1.1794, "step": 2718 }, { "epoch": 0.552754624923765, "grad_norm": 0.11829142272472382, "learning_rate": 0.00014481846842265842, "loss": 0.9385, "step": 2719 }, { "epoch": 0.5529579182760723, "grad_norm": 0.13279980421066284, "learning_rate": 0.00014479812875012711, "loss": 1.0034, "step": 2720 }, { "epoch": 0.5531612116283797, "grad_norm": 0.1229550689458847, "learning_rate": 0.00014477778907759584, "loss": 0.9929, "step": 2721 }, { "epoch": 0.5533645049806871, "grad_norm": 0.12663327157497406, "learning_rate": 0.0001447574494050646, "loss": 1.0095, "step": 2722 }, { "epoch": 0.5535677983329945, "grad_norm": 0.14191538095474243, "learning_rate": 0.00014473710973253332, "loss": 1.0165, "step": 2723 }, { "epoch": 0.5537710916853019, "grad_norm": 0.12460799515247345, "learning_rate": 0.00014471677006000204, "loss": 0.8327, "step": 2724 }, { "epoch": 0.5539743850376093, "grad_norm": 0.11980767548084259, "learning_rate": 0.00014469643038747076, "loss": 0.9807, "step": 2725 }, { "epoch": 0.5541776783899166, "grad_norm": 0.12429416179656982, "learning_rate": 0.00014467609071493952, "loss": 1.0604, "step": 2726 }, { "epoch": 0.554380971742224, "grad_norm": 0.14179259538650513, "learning_rate": 0.00014465575104240824, "loss": 1.1217, "step": 2727 }, { "epoch": 0.5545842650945314, "grad_norm": 0.12223639339208603, "learning_rate": 0.00014463541136987694, "loss": 0.9083, "step": 2728 }, { "epoch": 0.5547875584468388, "grad_norm": 0.13745662569999695, "learning_rate": 0.00014461507169734566, "loss": 1.0864, "step": 2729 }, { "epoch": 0.5549908517991462, "grad_norm": 0.12111254036426544, "learning_rate": 0.00014459473202481441, "loss": 0.995, "step": 2730 }, { "epoch": 0.5551941451514536, "grad_norm": 0.14073847234249115, "learning_rate": 0.00014457439235228314, "loss": 1.0003, "step": 2731 }, { "epoch": 0.5553974385037609, "grad_norm": 0.13188788294792175, "learning_rate": 0.00014455405267975186, "loss": 1.1628, "step": 2732 }, { "epoch": 0.5556007318560683, "grad_norm": 0.10727431625127792, "learning_rate": 0.0001445337130072206, "loss": 0.9385, "step": 2733 }, { "epoch": 0.5558040252083757, "grad_norm": 0.12671469151973724, "learning_rate": 0.00014451337333468934, "loss": 0.984, "step": 2734 }, { "epoch": 0.5560073185606831, "grad_norm": 0.12647178769111633, "learning_rate": 0.00014449303366215806, "loss": 1.0865, "step": 2735 }, { "epoch": 0.5562106119129905, "grad_norm": 0.1198342889547348, "learning_rate": 0.00014447269398962676, "loss": 1.0589, "step": 2736 }, { "epoch": 0.5564139052652978, "grad_norm": 0.13245652616024017, "learning_rate": 0.00014445235431709548, "loss": 0.9953, "step": 2737 }, { "epoch": 0.5566171986176052, "grad_norm": 0.11206847429275513, "learning_rate": 0.00014443201464456424, "loss": 0.8762, "step": 2738 }, { "epoch": 0.5568204919699126, "grad_norm": 0.16584132611751556, "learning_rate": 0.00014441167497203296, "loss": 1.2808, "step": 2739 }, { "epoch": 0.55702378532222, "grad_norm": 0.1278923898935318, "learning_rate": 0.00014439133529950169, "loss": 1.1515, "step": 2740 }, { "epoch": 0.5572270786745274, "grad_norm": 0.1336185187101364, "learning_rate": 0.0001443709956269704, "loss": 1.0372, "step": 2741 }, { "epoch": 0.5574303720268348, "grad_norm": 0.13731592893600464, "learning_rate": 0.00014435065595443916, "loss": 1.0837, "step": 2742 }, { "epoch": 0.557633665379142, "grad_norm": 0.13053496181964874, "learning_rate": 0.00014433031628190789, "loss": 0.9402, "step": 2743 }, { "epoch": 0.5578369587314495, "grad_norm": 0.14074589312076569, "learning_rate": 0.00014430997660937658, "loss": 1.1168, "step": 2744 }, { "epoch": 0.5580402520837568, "grad_norm": 0.1500421017408371, "learning_rate": 0.0001442896369368453, "loss": 1.0726, "step": 2745 }, { "epoch": 0.5582435454360642, "grad_norm": 0.14489975571632385, "learning_rate": 0.00014426929726431403, "loss": 1.263, "step": 2746 }, { "epoch": 0.5584468387883716, "grad_norm": 0.14446121454238892, "learning_rate": 0.00014424895759178278, "loss": 1.2142, "step": 2747 }, { "epoch": 0.558650132140679, "grad_norm": 0.13410677015781403, "learning_rate": 0.0001442286179192515, "loss": 1.0715, "step": 2748 }, { "epoch": 0.5588534254929863, "grad_norm": 0.1425483077764511, "learning_rate": 0.00014420827824672023, "loss": 0.9858, "step": 2749 }, { "epoch": 0.5590567188452937, "grad_norm": 0.13073715567588806, "learning_rate": 0.00014418793857418896, "loss": 1.0657, "step": 2750 }, { "epoch": 0.5592600121976011, "grad_norm": 0.1257767379283905, "learning_rate": 0.0001441675989016577, "loss": 1.0749, "step": 2751 }, { "epoch": 0.5594633055499085, "grad_norm": 0.1408379077911377, "learning_rate": 0.0001441472592291264, "loss": 1.1656, "step": 2752 }, { "epoch": 0.5596665989022159, "grad_norm": 0.13191954791545868, "learning_rate": 0.00014412691955659513, "loss": 1.0291, "step": 2753 }, { "epoch": 0.5598698922545233, "grad_norm": 0.12902916967868805, "learning_rate": 0.00014410657988406385, "loss": 1.075, "step": 2754 }, { "epoch": 0.5600731856068306, "grad_norm": 0.13078373670578003, "learning_rate": 0.0001440862402115326, "loss": 1.1693, "step": 2755 }, { "epoch": 0.560276478959138, "grad_norm": 0.1379525512456894, "learning_rate": 0.00014406590053900133, "loss": 1.1614, "step": 2756 }, { "epoch": 0.5604797723114454, "grad_norm": 0.12570443749427795, "learning_rate": 0.00014404556086647006, "loss": 1.0245, "step": 2757 }, { "epoch": 0.5606830656637528, "grad_norm": 0.13668902218341827, "learning_rate": 0.00014402522119393878, "loss": 1.1636, "step": 2758 }, { "epoch": 0.5608863590160602, "grad_norm": 0.13914015889167786, "learning_rate": 0.00014400488152140753, "loss": 1.0136, "step": 2759 }, { "epoch": 0.5610896523683676, "grad_norm": 0.13811741769313812, "learning_rate": 0.00014398454184887623, "loss": 1.2955, "step": 2760 }, { "epoch": 0.5612929457206749, "grad_norm": 0.14095258712768555, "learning_rate": 0.00014396420217634495, "loss": 1.1373, "step": 2761 }, { "epoch": 0.5614962390729823, "grad_norm": 0.11365115642547607, "learning_rate": 0.00014394386250381368, "loss": 0.827, "step": 2762 }, { "epoch": 0.5616995324252897, "grad_norm": 0.1321718692779541, "learning_rate": 0.00014392352283128243, "loss": 1.0739, "step": 2763 }, { "epoch": 0.5619028257775971, "grad_norm": 0.13008981943130493, "learning_rate": 0.00014390318315875115, "loss": 1.0253, "step": 2764 }, { "epoch": 0.5621061191299045, "grad_norm": 0.11360891908407211, "learning_rate": 0.00014388284348621988, "loss": 0.8921, "step": 2765 }, { "epoch": 0.5623094124822118, "grad_norm": 0.1246936097741127, "learning_rate": 0.0001438625038136886, "loss": 0.8497, "step": 2766 }, { "epoch": 0.5625127058345192, "grad_norm": 0.14330574870109558, "learning_rate": 0.00014384216414115735, "loss": 1.1843, "step": 2767 }, { "epoch": 0.5627159991868266, "grad_norm": 0.1149834543466568, "learning_rate": 0.00014382182446862608, "loss": 0.8757, "step": 2768 }, { "epoch": 0.562919292539134, "grad_norm": 0.13841336965560913, "learning_rate": 0.00014380148479609478, "loss": 1.0849, "step": 2769 }, { "epoch": 0.5631225858914414, "grad_norm": 0.12189842760562897, "learning_rate": 0.0001437811451235635, "loss": 1.0182, "step": 2770 }, { "epoch": 0.5633258792437488, "grad_norm": 0.14273017644882202, "learning_rate": 0.00014376080545103225, "loss": 1.1301, "step": 2771 }, { "epoch": 0.5635291725960561, "grad_norm": 0.13799621164798737, "learning_rate": 0.00014374046577850098, "loss": 1.0078, "step": 2772 }, { "epoch": 0.5637324659483635, "grad_norm": 0.1299772560596466, "learning_rate": 0.0001437201261059697, "loss": 0.9765, "step": 2773 }, { "epoch": 0.5639357593006709, "grad_norm": 0.13939563930034637, "learning_rate": 0.00014369978643343843, "loss": 1.1519, "step": 2774 }, { "epoch": 0.5641390526529783, "grad_norm": 0.14570674300193787, "learning_rate": 0.00014367944676090718, "loss": 1.0858, "step": 2775 }, { "epoch": 0.5643423460052857, "grad_norm": 0.12805186212062836, "learning_rate": 0.0001436591070883759, "loss": 0.916, "step": 2776 }, { "epoch": 0.564545639357593, "grad_norm": 0.12251273542642593, "learning_rate": 0.0001436387674158446, "loss": 1.0465, "step": 2777 }, { "epoch": 0.5647489327099003, "grad_norm": 0.1256076544523239, "learning_rate": 0.00014361842774331332, "loss": 0.9972, "step": 2778 }, { "epoch": 0.5649522260622077, "grad_norm": 0.12593501806259155, "learning_rate": 0.00014359808807078207, "loss": 0.961, "step": 2779 }, { "epoch": 0.5651555194145151, "grad_norm": 0.1273297369480133, "learning_rate": 0.0001435777483982508, "loss": 0.9951, "step": 2780 }, { "epoch": 0.5653588127668225, "grad_norm": 0.1263994574546814, "learning_rate": 0.00014355740872571952, "loss": 1.0616, "step": 2781 }, { "epoch": 0.5655621061191299, "grad_norm": 0.11736489087343216, "learning_rate": 0.00014353706905318825, "loss": 0.9839, "step": 2782 }, { "epoch": 0.5657653994714373, "grad_norm": 0.12970155477523804, "learning_rate": 0.000143516729380657, "loss": 0.9299, "step": 2783 }, { "epoch": 0.5659686928237446, "grad_norm": 0.13361741602420807, "learning_rate": 0.00014349638970812572, "loss": 1.0209, "step": 2784 }, { "epoch": 0.566171986176052, "grad_norm": 0.13938020169734955, "learning_rate": 0.00014347605003559442, "loss": 1.0303, "step": 2785 }, { "epoch": 0.5663752795283594, "grad_norm": 0.13315965235233307, "learning_rate": 0.00014345571036306315, "loss": 1.1152, "step": 2786 }, { "epoch": 0.5665785728806668, "grad_norm": 0.14047378301620483, "learning_rate": 0.00014343537069053187, "loss": 1.2173, "step": 2787 }, { "epoch": 0.5667818662329742, "grad_norm": 0.1367003172636032, "learning_rate": 0.00014341503101800062, "loss": 1.0284, "step": 2788 }, { "epoch": 0.5669851595852815, "grad_norm": 0.1463545858860016, "learning_rate": 0.00014339469134546935, "loss": 1.0506, "step": 2789 }, { "epoch": 0.5671884529375889, "grad_norm": 0.12741826474666595, "learning_rate": 0.00014337435167293807, "loss": 1.0226, "step": 2790 }, { "epoch": 0.5673917462898963, "grad_norm": 0.1232975423336029, "learning_rate": 0.0001433540120004068, "loss": 0.9259, "step": 2791 }, { "epoch": 0.5675950396422037, "grad_norm": 0.13350965082645416, "learning_rate": 0.00014333367232787555, "loss": 1.0739, "step": 2792 }, { "epoch": 0.5677983329945111, "grad_norm": 0.1262935996055603, "learning_rate": 0.00014331333265534424, "loss": 1.1412, "step": 2793 }, { "epoch": 0.5680016263468185, "grad_norm": 0.1304781287908554, "learning_rate": 0.00014329299298281297, "loss": 1.0605, "step": 2794 }, { "epoch": 0.5682049196991258, "grad_norm": 0.13018850982189178, "learning_rate": 0.0001432726533102817, "loss": 1.048, "step": 2795 }, { "epoch": 0.5684082130514332, "grad_norm": 0.13948385417461395, "learning_rate": 0.00014325231363775044, "loss": 1.2018, "step": 2796 }, { "epoch": 0.5686115064037406, "grad_norm": 0.1164885088801384, "learning_rate": 0.00014323197396521917, "loss": 0.9532, "step": 2797 }, { "epoch": 0.568814799756048, "grad_norm": 0.1407950073480606, "learning_rate": 0.0001432116342926879, "loss": 1.0816, "step": 2798 }, { "epoch": 0.5690180931083554, "grad_norm": 0.12568843364715576, "learning_rate": 0.00014319129462015662, "loss": 0.9222, "step": 2799 }, { "epoch": 0.5692213864606628, "grad_norm": 0.14112015068531036, "learning_rate": 0.00014317095494762537, "loss": 1.057, "step": 2800 }, { "epoch": 0.5694246798129701, "grad_norm": 0.1322345733642578, "learning_rate": 0.00014315061527509407, "loss": 1.0804, "step": 2801 }, { "epoch": 0.5696279731652775, "grad_norm": 0.13166458904743195, "learning_rate": 0.0001431302756025628, "loss": 0.9637, "step": 2802 }, { "epoch": 0.5698312665175849, "grad_norm": 0.13725675642490387, "learning_rate": 0.00014310993593003152, "loss": 0.9894, "step": 2803 }, { "epoch": 0.5700345598698923, "grad_norm": 0.1358625739812851, "learning_rate": 0.00014308959625750027, "loss": 1.1097, "step": 2804 }, { "epoch": 0.5702378532221997, "grad_norm": 0.14208373427391052, "learning_rate": 0.000143069256584969, "loss": 1.1789, "step": 2805 }, { "epoch": 0.5704411465745071, "grad_norm": 0.12727318704128265, "learning_rate": 0.00014304891691243772, "loss": 0.9598, "step": 2806 }, { "epoch": 0.5706444399268144, "grad_norm": 0.12927868962287903, "learning_rate": 0.00014302857723990644, "loss": 1.0194, "step": 2807 }, { "epoch": 0.5708477332791217, "grad_norm": 0.14685644209384918, "learning_rate": 0.0001430082375673752, "loss": 1.1379, "step": 2808 }, { "epoch": 0.5710510266314291, "grad_norm": 0.14648008346557617, "learning_rate": 0.0001429878978948439, "loss": 1.2296, "step": 2809 }, { "epoch": 0.5712543199837365, "grad_norm": 0.12980784475803375, "learning_rate": 0.00014296755822231261, "loss": 1.0982, "step": 2810 }, { "epoch": 0.571457613336044, "grad_norm": 0.11192413419485092, "learning_rate": 0.00014294721854978134, "loss": 0.9545, "step": 2811 }, { "epoch": 0.5716609066883513, "grad_norm": 0.15568038821220398, "learning_rate": 0.0001429268788772501, "loss": 1.1671, "step": 2812 }, { "epoch": 0.5718642000406586, "grad_norm": 0.14970743656158447, "learning_rate": 0.00014290653920471881, "loss": 1.0711, "step": 2813 }, { "epoch": 0.572067493392966, "grad_norm": 0.13441245257854462, "learning_rate": 0.00014288619953218754, "loss": 1.0231, "step": 2814 }, { "epoch": 0.5722707867452734, "grad_norm": 0.12407507002353668, "learning_rate": 0.00014286585985965626, "loss": 1.0276, "step": 2815 }, { "epoch": 0.5724740800975808, "grad_norm": 0.13431482017040253, "learning_rate": 0.00014284552018712502, "loss": 1.1361, "step": 2816 }, { "epoch": 0.5726773734498882, "grad_norm": 0.132259339094162, "learning_rate": 0.0001428251805145937, "loss": 1.2343, "step": 2817 }, { "epoch": 0.5728806668021955, "grad_norm": 0.1342546045780182, "learning_rate": 0.00014280484084206244, "loss": 1.0906, "step": 2818 }, { "epoch": 0.5730839601545029, "grad_norm": 0.12521067261695862, "learning_rate": 0.00014278450116953116, "loss": 1.0881, "step": 2819 }, { "epoch": 0.5732872535068103, "grad_norm": 0.12174705415964127, "learning_rate": 0.0001427641614969999, "loss": 0.8563, "step": 2820 }, { "epoch": 0.5734905468591177, "grad_norm": 0.14310669898986816, "learning_rate": 0.00014274382182446864, "loss": 1.2119, "step": 2821 }, { "epoch": 0.5736938402114251, "grad_norm": 0.11739708483219147, "learning_rate": 0.00014272348215193736, "loss": 0.8849, "step": 2822 }, { "epoch": 0.5738971335637325, "grad_norm": 0.14041262865066528, "learning_rate": 0.0001427031424794061, "loss": 1.3593, "step": 2823 }, { "epoch": 0.5741004269160398, "grad_norm": 0.13473278284072876, "learning_rate": 0.00014268280280687484, "loss": 1.0379, "step": 2824 }, { "epoch": 0.5743037202683472, "grad_norm": 0.12364168465137482, "learning_rate": 0.00014266246313434354, "loss": 1.0167, "step": 2825 }, { "epoch": 0.5745070136206546, "grad_norm": 0.1333821415901184, "learning_rate": 0.00014264212346181226, "loss": 1.0472, "step": 2826 }, { "epoch": 0.574710306972962, "grad_norm": 0.11603229492902756, "learning_rate": 0.00014262178378928098, "loss": 0.8045, "step": 2827 }, { "epoch": 0.5749136003252694, "grad_norm": 0.13383187353610992, "learning_rate": 0.0001426014441167497, "loss": 1.1617, "step": 2828 }, { "epoch": 0.5751168936775768, "grad_norm": 0.1249544620513916, "learning_rate": 0.00014258110444421846, "loss": 1.0211, "step": 2829 }, { "epoch": 0.5753201870298841, "grad_norm": 0.12109317630529404, "learning_rate": 0.00014256076477168719, "loss": 0.9672, "step": 2830 }, { "epoch": 0.5755234803821915, "grad_norm": 0.1185065507888794, "learning_rate": 0.0001425404250991559, "loss": 0.857, "step": 2831 }, { "epoch": 0.5757267737344989, "grad_norm": 0.162327840924263, "learning_rate": 0.00014252008542662463, "loss": 1.2834, "step": 2832 }, { "epoch": 0.5759300670868063, "grad_norm": 0.12928487360477448, "learning_rate": 0.00014249974575409339, "loss": 1.1067, "step": 2833 }, { "epoch": 0.5761333604391137, "grad_norm": 0.12098827958106995, "learning_rate": 0.00014247940608156208, "loss": 0.8984, "step": 2834 }, { "epoch": 0.5763366537914211, "grad_norm": 0.12587502598762512, "learning_rate": 0.0001424590664090308, "loss": 1.0488, "step": 2835 }, { "epoch": 0.5765399471437284, "grad_norm": 0.12398620694875717, "learning_rate": 0.00014243872673649953, "loss": 0.9838, "step": 2836 }, { "epoch": 0.5767432404960358, "grad_norm": 0.12822575867176056, "learning_rate": 0.00014241838706396828, "loss": 1.0103, "step": 2837 }, { "epoch": 0.5769465338483432, "grad_norm": 0.13499167561531067, "learning_rate": 0.000142398047391437, "loss": 1.1553, "step": 2838 }, { "epoch": 0.5771498272006506, "grad_norm": 0.12537875771522522, "learning_rate": 0.00014237770771890573, "loss": 0.9383, "step": 2839 }, { "epoch": 0.577353120552958, "grad_norm": 0.13840174674987793, "learning_rate": 0.00014235736804637446, "loss": 1.0803, "step": 2840 }, { "epoch": 0.5775564139052652, "grad_norm": 0.11736918240785599, "learning_rate": 0.0001423370283738432, "loss": 0.908, "step": 2841 }, { "epoch": 0.5777597072575726, "grad_norm": 0.12442715466022491, "learning_rate": 0.0001423166887013119, "loss": 0.9844, "step": 2842 }, { "epoch": 0.57796300060988, "grad_norm": 0.13206282258033752, "learning_rate": 0.00014229634902878063, "loss": 1.063, "step": 2843 }, { "epoch": 0.5781662939621874, "grad_norm": 0.1393408477306366, "learning_rate": 0.00014227600935624935, "loss": 1.0633, "step": 2844 }, { "epoch": 0.5783695873144948, "grad_norm": 0.140583336353302, "learning_rate": 0.0001422556696837181, "loss": 1.1749, "step": 2845 }, { "epoch": 0.5785728806668022, "grad_norm": 0.1310548186302185, "learning_rate": 0.00014223533001118683, "loss": 1.1205, "step": 2846 }, { "epoch": 0.5787761740191095, "grad_norm": 0.1283491551876068, "learning_rate": 0.00014221499033865556, "loss": 1.0956, "step": 2847 }, { "epoch": 0.5789794673714169, "grad_norm": 0.12449255585670471, "learning_rate": 0.00014219465066612428, "loss": 1.0153, "step": 2848 }, { "epoch": 0.5791827607237243, "grad_norm": 0.13952034711837769, "learning_rate": 0.00014217431099359303, "loss": 1.0639, "step": 2849 }, { "epoch": 0.5793860540760317, "grad_norm": 0.1438504010438919, "learning_rate": 0.00014215397132106173, "loss": 1.1237, "step": 2850 }, { "epoch": 0.5795893474283391, "grad_norm": 0.13687646389007568, "learning_rate": 0.00014213363164853045, "loss": 1.0719, "step": 2851 }, { "epoch": 0.5797926407806465, "grad_norm": 0.14046727120876312, "learning_rate": 0.00014211329197599918, "loss": 1.2391, "step": 2852 }, { "epoch": 0.5799959341329538, "grad_norm": 0.1313040554523468, "learning_rate": 0.00014209295230346793, "loss": 1.0689, "step": 2853 }, { "epoch": 0.5801992274852612, "grad_norm": 0.1264270395040512, "learning_rate": 0.00014207261263093665, "loss": 0.9856, "step": 2854 }, { "epoch": 0.5804025208375686, "grad_norm": 0.12176066637039185, "learning_rate": 0.00014205227295840538, "loss": 1.0131, "step": 2855 }, { "epoch": 0.580605814189876, "grad_norm": 0.13929857313632965, "learning_rate": 0.0001420319332858741, "loss": 1.134, "step": 2856 }, { "epoch": 0.5808091075421834, "grad_norm": 0.12523682415485382, "learning_rate": 0.00014201159361334285, "loss": 1.0868, "step": 2857 }, { "epoch": 0.5810124008944908, "grad_norm": 0.13270434737205505, "learning_rate": 0.00014199125394081155, "loss": 1.183, "step": 2858 }, { "epoch": 0.5812156942467981, "grad_norm": 0.1330588310956955, "learning_rate": 0.00014197091426828028, "loss": 1.2487, "step": 2859 }, { "epoch": 0.5814189875991055, "grad_norm": 0.130279541015625, "learning_rate": 0.000141950574595749, "loss": 1.0885, "step": 2860 }, { "epoch": 0.5816222809514129, "grad_norm": 0.1529773771762848, "learning_rate": 0.00014193023492321775, "loss": 1.098, "step": 2861 }, { "epoch": 0.5818255743037203, "grad_norm": 0.14715005457401276, "learning_rate": 0.00014190989525068648, "loss": 1.086, "step": 2862 }, { "epoch": 0.5820288676560277, "grad_norm": 0.12468834221363068, "learning_rate": 0.0001418895555781552, "loss": 1.0349, "step": 2863 }, { "epoch": 0.5822321610083351, "grad_norm": 0.1332579404115677, "learning_rate": 0.00014186921590562393, "loss": 1.0514, "step": 2864 }, { "epoch": 0.5824354543606424, "grad_norm": 0.13424143195152283, "learning_rate": 0.00014184887623309268, "loss": 1.0859, "step": 2865 }, { "epoch": 0.5826387477129498, "grad_norm": 0.11994919180870056, "learning_rate": 0.00014182853656056137, "loss": 0.9416, "step": 2866 }, { "epoch": 0.5828420410652572, "grad_norm": 0.13324035704135895, "learning_rate": 0.0001418081968880301, "loss": 1.112, "step": 2867 }, { "epoch": 0.5830453344175646, "grad_norm": 0.14520680904388428, "learning_rate": 0.00014178785721549882, "loss": 1.0231, "step": 2868 }, { "epoch": 0.583248627769872, "grad_norm": 0.14066869020462036, "learning_rate": 0.00014176751754296755, "loss": 1.281, "step": 2869 }, { "epoch": 0.5834519211221793, "grad_norm": 0.1384185403585434, "learning_rate": 0.0001417471778704363, "loss": 1.0393, "step": 2870 }, { "epoch": 0.5836552144744866, "grad_norm": 0.1287851184606552, "learning_rate": 0.00014172683819790502, "loss": 1.0967, "step": 2871 }, { "epoch": 0.583858507826794, "grad_norm": 0.11896179616451263, "learning_rate": 0.00014170649852537375, "loss": 0.844, "step": 2872 }, { "epoch": 0.5840618011791014, "grad_norm": 0.1319238543510437, "learning_rate": 0.00014168615885284247, "loss": 1.0279, "step": 2873 }, { "epoch": 0.5842650945314088, "grad_norm": 0.1428615152835846, "learning_rate": 0.0001416658191803112, "loss": 1.0823, "step": 2874 }, { "epoch": 0.5844683878837162, "grad_norm": 0.11939448863267899, "learning_rate": 0.00014164547950777992, "loss": 1.0417, "step": 2875 }, { "epoch": 0.5846716812360235, "grad_norm": 0.13555167615413666, "learning_rate": 0.00014162513983524865, "loss": 1.0476, "step": 2876 }, { "epoch": 0.5848749745883309, "grad_norm": 0.12872137129306793, "learning_rate": 0.00014160480016271737, "loss": 0.9306, "step": 2877 }, { "epoch": 0.5850782679406383, "grad_norm": 0.12111514061689377, "learning_rate": 0.00014158446049018612, "loss": 0.9841, "step": 2878 }, { "epoch": 0.5852815612929457, "grad_norm": 0.12589818239212036, "learning_rate": 0.00014156412081765485, "loss": 1.0237, "step": 2879 }, { "epoch": 0.5854848546452531, "grad_norm": 0.12264888733625412, "learning_rate": 0.00014154378114512357, "loss": 0.9417, "step": 2880 }, { "epoch": 0.5856881479975605, "grad_norm": 0.14193598926067352, "learning_rate": 0.0001415234414725923, "loss": 1.2845, "step": 2881 }, { "epoch": 0.5858914413498678, "grad_norm": 0.14116251468658447, "learning_rate": 0.00014150310180006102, "loss": 1.001, "step": 2882 }, { "epoch": 0.5860947347021752, "grad_norm": 0.14120200276374817, "learning_rate": 0.00014148276212752974, "loss": 1.2458, "step": 2883 }, { "epoch": 0.5862980280544826, "grad_norm": 0.13560935854911804, "learning_rate": 0.00014146242245499847, "loss": 1.1217, "step": 2884 }, { "epoch": 0.58650132140679, "grad_norm": 0.14672443270683289, "learning_rate": 0.0001414420827824672, "loss": 1.188, "step": 2885 }, { "epoch": 0.5867046147590974, "grad_norm": 0.12481992691755295, "learning_rate": 0.00014142174310993594, "loss": 1.0063, "step": 2886 }, { "epoch": 0.5869079081114048, "grad_norm": 0.13482870161533356, "learning_rate": 0.00014140140343740467, "loss": 1.0498, "step": 2887 }, { "epoch": 0.5871112014637121, "grad_norm": 0.16956381499767303, "learning_rate": 0.0001413810637648734, "loss": 1.1202, "step": 2888 }, { "epoch": 0.5873144948160195, "grad_norm": 0.1285228431224823, "learning_rate": 0.00014136072409234212, "loss": 0.9175, "step": 2889 }, { "epoch": 0.5875177881683269, "grad_norm": 0.12045499682426453, "learning_rate": 0.00014134038441981087, "loss": 1.0062, "step": 2890 }, { "epoch": 0.5877210815206343, "grad_norm": 0.11959182471036911, "learning_rate": 0.00014132004474727957, "loss": 0.9041, "step": 2891 }, { "epoch": 0.5879243748729417, "grad_norm": 0.12867799401283264, "learning_rate": 0.0001412997050747483, "loss": 0.9985, "step": 2892 }, { "epoch": 0.5881276682252491, "grad_norm": 0.12349910289049149, "learning_rate": 0.00014127936540221702, "loss": 0.9357, "step": 2893 }, { "epoch": 0.5883309615775564, "grad_norm": 0.12842735648155212, "learning_rate": 0.00014125902572968577, "loss": 1.0862, "step": 2894 }, { "epoch": 0.5885342549298638, "grad_norm": 0.1375754177570343, "learning_rate": 0.0001412386860571545, "loss": 1.1911, "step": 2895 }, { "epoch": 0.5887375482821712, "grad_norm": 0.13770340383052826, "learning_rate": 0.00014121834638462322, "loss": 1.0785, "step": 2896 }, { "epoch": 0.5889408416344786, "grad_norm": 0.13841983675956726, "learning_rate": 0.00014119800671209194, "loss": 1.044, "step": 2897 }, { "epoch": 0.589144134986786, "grad_norm": 0.12044288218021393, "learning_rate": 0.0001411776670395607, "loss": 1.0482, "step": 2898 }, { "epoch": 0.5893474283390933, "grad_norm": 0.11521141231060028, "learning_rate": 0.0001411573273670294, "loss": 0.8782, "step": 2899 }, { "epoch": 0.5895507216914007, "grad_norm": 0.13133427500724792, "learning_rate": 0.00014113698769449811, "loss": 1.0602, "step": 2900 }, { "epoch": 0.5897540150437081, "grad_norm": 0.12748554348945618, "learning_rate": 0.00014111664802196684, "loss": 1.0442, "step": 2901 }, { "epoch": 0.5899573083960155, "grad_norm": 0.13325203955173492, "learning_rate": 0.0001410963083494356, "loss": 1.0469, "step": 2902 }, { "epoch": 0.5901606017483229, "grad_norm": 0.12706689536571503, "learning_rate": 0.00014107596867690431, "loss": 1.0692, "step": 2903 }, { "epoch": 0.5903638951006303, "grad_norm": 0.12228814512491226, "learning_rate": 0.00014105562900437304, "loss": 0.8914, "step": 2904 }, { "epoch": 0.5905671884529375, "grad_norm": 0.1334328055381775, "learning_rate": 0.00014103528933184176, "loss": 1.028, "step": 2905 }, { "epoch": 0.5907704818052449, "grad_norm": 0.11916909366846085, "learning_rate": 0.00014101494965931052, "loss": 0.9827, "step": 2906 }, { "epoch": 0.5909737751575523, "grad_norm": 0.12943509221076965, "learning_rate": 0.0001409946099867792, "loss": 1.1302, "step": 2907 }, { "epoch": 0.5911770685098597, "grad_norm": 0.1251513957977295, "learning_rate": 0.00014097427031424794, "loss": 0.9508, "step": 2908 }, { "epoch": 0.5913803618621671, "grad_norm": 0.13130627572536469, "learning_rate": 0.00014095393064171666, "loss": 1.0461, "step": 2909 }, { "epoch": 0.5915836552144745, "grad_norm": 0.13331666588783264, "learning_rate": 0.0001409335909691854, "loss": 1.1964, "step": 2910 }, { "epoch": 0.5917869485667818, "grad_norm": 0.12930695712566376, "learning_rate": 0.00014091325129665414, "loss": 1.0012, "step": 2911 }, { "epoch": 0.5919902419190892, "grad_norm": 0.1442381590604782, "learning_rate": 0.00014089291162412286, "loss": 1.2208, "step": 2912 }, { "epoch": 0.5921935352713966, "grad_norm": 0.12667718529701233, "learning_rate": 0.00014087257195159159, "loss": 0.9417, "step": 2913 }, { "epoch": 0.592396828623704, "grad_norm": 0.12730923295021057, "learning_rate": 0.0001408522322790603, "loss": 1.0481, "step": 2914 }, { "epoch": 0.5926001219760114, "grad_norm": 0.11554036289453506, "learning_rate": 0.00014083189260652904, "loss": 0.8343, "step": 2915 }, { "epoch": 0.5928034153283188, "grad_norm": 0.13052915036678314, "learning_rate": 0.00014081155293399776, "loss": 1.0713, "step": 2916 }, { "epoch": 0.5930067086806261, "grad_norm": 0.12292870879173279, "learning_rate": 0.00014079121326146648, "loss": 1.0347, "step": 2917 }, { "epoch": 0.5932100020329335, "grad_norm": 0.13543544709682465, "learning_rate": 0.0001407708735889352, "loss": 1.1291, "step": 2918 }, { "epoch": 0.5934132953852409, "grad_norm": 0.13335563242435455, "learning_rate": 0.00014075053391640396, "loss": 1.0373, "step": 2919 }, { "epoch": 0.5936165887375483, "grad_norm": 0.12110266089439392, "learning_rate": 0.00014073019424387268, "loss": 0.9317, "step": 2920 }, { "epoch": 0.5938198820898557, "grad_norm": 0.11466968059539795, "learning_rate": 0.0001407098545713414, "loss": 0.922, "step": 2921 }, { "epoch": 0.594023175442163, "grad_norm": 0.13369932770729065, "learning_rate": 0.00014068951489881013, "loss": 1.0871, "step": 2922 }, { "epoch": 0.5942264687944704, "grad_norm": 0.12968046963214874, "learning_rate": 0.00014066917522627886, "loss": 0.971, "step": 2923 }, { "epoch": 0.5944297621467778, "grad_norm": 0.12824739515781403, "learning_rate": 0.00014064883555374758, "loss": 1.1196, "step": 2924 }, { "epoch": 0.5946330554990852, "grad_norm": 0.12155873328447342, "learning_rate": 0.0001406284958812163, "loss": 0.886, "step": 2925 }, { "epoch": 0.5948363488513926, "grad_norm": 0.12435124814510345, "learning_rate": 0.00014060815620868503, "loss": 1.0395, "step": 2926 }, { "epoch": 0.5950396422037, "grad_norm": 0.1359453648328781, "learning_rate": 0.00014058781653615378, "loss": 1.1477, "step": 2927 }, { "epoch": 0.5952429355560073, "grad_norm": 0.10797560214996338, "learning_rate": 0.0001405674768636225, "loss": 0.8632, "step": 2928 }, { "epoch": 0.5954462289083147, "grad_norm": 0.12806884944438934, "learning_rate": 0.00014054713719109123, "loss": 0.9678, "step": 2929 }, { "epoch": 0.5956495222606221, "grad_norm": 0.13405455648899078, "learning_rate": 0.00014052679751855996, "loss": 1.1721, "step": 2930 }, { "epoch": 0.5958528156129295, "grad_norm": 0.11106649786233902, "learning_rate": 0.00014050645784602868, "loss": 0.8613, "step": 2931 }, { "epoch": 0.5960561089652369, "grad_norm": 0.1545085906982422, "learning_rate": 0.0001404861181734974, "loss": 1.2441, "step": 2932 }, { "epoch": 0.5962594023175443, "grad_norm": 0.1290442794561386, "learning_rate": 0.00014046577850096613, "loss": 1.0544, "step": 2933 }, { "epoch": 0.5964626956698516, "grad_norm": 0.11824672669172287, "learning_rate": 0.00014044543882843485, "loss": 0.953, "step": 2934 }, { "epoch": 0.596665989022159, "grad_norm": 0.13066919147968292, "learning_rate": 0.0001404250991559036, "loss": 1.1097, "step": 2935 }, { "epoch": 0.5968692823744663, "grad_norm": 0.13196654617786407, "learning_rate": 0.00014040475948337233, "loss": 0.9026, "step": 2936 }, { "epoch": 0.5970725757267737, "grad_norm": 0.1255139410495758, "learning_rate": 0.00014038441981084105, "loss": 0.9072, "step": 2937 }, { "epoch": 0.5972758690790811, "grad_norm": 0.12293802946805954, "learning_rate": 0.00014036408013830978, "loss": 0.9046, "step": 2938 }, { "epoch": 0.5974791624313885, "grad_norm": 0.15161147713661194, "learning_rate": 0.0001403437404657785, "loss": 1.1338, "step": 2939 }, { "epoch": 0.5976824557836958, "grad_norm": 0.1161181703209877, "learning_rate": 0.00014032340079324723, "loss": 0.8716, "step": 2940 }, { "epoch": 0.5978857491360032, "grad_norm": 0.14142772555351257, "learning_rate": 0.00014030306112071595, "loss": 1.0611, "step": 2941 }, { "epoch": 0.5980890424883106, "grad_norm": 0.13781876862049103, "learning_rate": 0.00014028272144818468, "loss": 1.149, "step": 2942 }, { "epoch": 0.598292335840618, "grad_norm": 0.12525886297225952, "learning_rate": 0.00014026238177565343, "loss": 0.9765, "step": 2943 }, { "epoch": 0.5984956291929254, "grad_norm": 0.11980410665273666, "learning_rate": 0.00014024204210312215, "loss": 0.979, "step": 2944 }, { "epoch": 0.5986989225452328, "grad_norm": 0.14083100855350494, "learning_rate": 0.00014022170243059088, "loss": 1.1407, "step": 2945 }, { "epoch": 0.5989022158975401, "grad_norm": 0.12020063400268555, "learning_rate": 0.0001402013627580596, "loss": 0.9946, "step": 2946 }, { "epoch": 0.5991055092498475, "grad_norm": 0.13902409374713898, "learning_rate": 0.00014018102308552835, "loss": 1.0152, "step": 2947 }, { "epoch": 0.5993088026021549, "grad_norm": 0.12778332829475403, "learning_rate": 0.00014016068341299705, "loss": 1.0196, "step": 2948 }, { "epoch": 0.5995120959544623, "grad_norm": 0.12210957705974579, "learning_rate": 0.00014014034374046578, "loss": 1.136, "step": 2949 }, { "epoch": 0.5997153893067697, "grad_norm": 0.1324332356452942, "learning_rate": 0.0001401200040679345, "loss": 1.055, "step": 2950 }, { "epoch": 0.599918682659077, "grad_norm": 0.14248095452785492, "learning_rate": 0.00014009966439540325, "loss": 1.2809, "step": 2951 }, { "epoch": 0.6001219760113844, "grad_norm": 0.12518227100372314, "learning_rate": 0.00014007932472287198, "loss": 1.0029, "step": 2952 }, { "epoch": 0.6003252693636918, "grad_norm": 0.14796386659145355, "learning_rate": 0.0001400589850503407, "loss": 1.2305, "step": 2953 }, { "epoch": 0.6005285627159992, "grad_norm": 0.1290920078754425, "learning_rate": 0.00014003864537780942, "loss": 0.9874, "step": 2954 }, { "epoch": 0.6007318560683066, "grad_norm": 0.12988100945949554, "learning_rate": 0.00014001830570527815, "loss": 0.9438, "step": 2955 }, { "epoch": 0.600935149420614, "grad_norm": 0.12497319281101227, "learning_rate": 0.00013999796603274687, "loss": 0.8797, "step": 2956 }, { "epoch": 0.6011384427729213, "grad_norm": 0.1346983015537262, "learning_rate": 0.0001399776263602156, "loss": 1.0419, "step": 2957 }, { "epoch": 0.6013417361252287, "grad_norm": 0.11253220587968826, "learning_rate": 0.00013995728668768432, "loss": 0.9803, "step": 2958 }, { "epoch": 0.6015450294775361, "grad_norm": 0.1398647278547287, "learning_rate": 0.00013993694701515305, "loss": 1.1748, "step": 2959 }, { "epoch": 0.6017483228298435, "grad_norm": 0.14113448560237885, "learning_rate": 0.0001399166073426218, "loss": 1.1243, "step": 2960 }, { "epoch": 0.6019516161821509, "grad_norm": 0.1343860626220703, "learning_rate": 0.00013989626767009052, "loss": 1.1181, "step": 2961 }, { "epoch": 0.6021549095344583, "grad_norm": 0.13300351798534393, "learning_rate": 0.00013987592799755925, "loss": 1.108, "step": 2962 }, { "epoch": 0.6023582028867656, "grad_norm": 0.1379079967737198, "learning_rate": 0.00013985558832502797, "loss": 1.1145, "step": 2963 }, { "epoch": 0.602561496239073, "grad_norm": 0.13258612155914307, "learning_rate": 0.0001398352486524967, "loss": 1.0888, "step": 2964 }, { "epoch": 0.6027647895913804, "grad_norm": 0.1152709499001503, "learning_rate": 0.00013981490897996542, "loss": 0.8821, "step": 2965 }, { "epoch": 0.6029680829436878, "grad_norm": 0.14803390204906464, "learning_rate": 0.00013979456930743415, "loss": 1.1625, "step": 2966 }, { "epoch": 0.6031713762959952, "grad_norm": 0.12902309000492096, "learning_rate": 0.00013977422963490287, "loss": 1.0304, "step": 2967 }, { "epoch": 0.6033746696483026, "grad_norm": 0.1235414445400238, "learning_rate": 0.00013975388996237162, "loss": 1.0143, "step": 2968 }, { "epoch": 0.6035779630006098, "grad_norm": 0.1427546590566635, "learning_rate": 0.00013973355028984035, "loss": 1.2047, "step": 2969 }, { "epoch": 0.6037812563529172, "grad_norm": 0.1456848382949829, "learning_rate": 0.00013971321061730907, "loss": 1.0673, "step": 2970 }, { "epoch": 0.6039845497052246, "grad_norm": 0.13823378086090088, "learning_rate": 0.0001396928709447778, "loss": 0.9845, "step": 2971 }, { "epoch": 0.604187843057532, "grad_norm": 0.15567836165428162, "learning_rate": 0.00013967253127224652, "loss": 1.1844, "step": 2972 }, { "epoch": 0.6043911364098394, "grad_norm": 0.12227654457092285, "learning_rate": 0.00013965219159971524, "loss": 1.0443, "step": 2973 }, { "epoch": 0.6045944297621467, "grad_norm": 0.14952129125595093, "learning_rate": 0.00013963185192718397, "loss": 1.1285, "step": 2974 }, { "epoch": 0.6047977231144541, "grad_norm": 0.1279451698064804, "learning_rate": 0.0001396115122546527, "loss": 0.9304, "step": 2975 }, { "epoch": 0.6050010164667615, "grad_norm": 0.13317649066448212, "learning_rate": 0.00013959117258212144, "loss": 1.0864, "step": 2976 }, { "epoch": 0.6052043098190689, "grad_norm": 0.13362491130828857, "learning_rate": 0.00013957083290959017, "loss": 1.0547, "step": 2977 }, { "epoch": 0.6054076031713763, "grad_norm": 0.13469024002552032, "learning_rate": 0.0001395504932370589, "loss": 1.0941, "step": 2978 }, { "epoch": 0.6056108965236837, "grad_norm": 0.1265508234500885, "learning_rate": 0.00013953015356452762, "loss": 0.9379, "step": 2979 }, { "epoch": 0.605814189875991, "grad_norm": 0.1344381868839264, "learning_rate": 0.00013950981389199634, "loss": 1.1047, "step": 2980 }, { "epoch": 0.6060174832282984, "grad_norm": 0.13309423625469208, "learning_rate": 0.00013948947421946507, "loss": 1.0916, "step": 2981 }, { "epoch": 0.6062207765806058, "grad_norm": 0.1394202560186386, "learning_rate": 0.0001394691345469338, "loss": 1.0521, "step": 2982 }, { "epoch": 0.6064240699329132, "grad_norm": 0.12950794398784637, "learning_rate": 0.00013944879487440252, "loss": 1.0414, "step": 2983 }, { "epoch": 0.6066273632852206, "grad_norm": 0.15191194415092468, "learning_rate": 0.00013942845520187127, "loss": 1.1255, "step": 2984 }, { "epoch": 0.606830656637528, "grad_norm": 0.142736554145813, "learning_rate": 0.00013940811552934, "loss": 1.1004, "step": 2985 }, { "epoch": 0.6070339499898353, "grad_norm": 0.13812166452407837, "learning_rate": 0.00013938777585680872, "loss": 1.0688, "step": 2986 }, { "epoch": 0.6072372433421427, "grad_norm": 0.1332339346408844, "learning_rate": 0.00013936743618427744, "loss": 1.0214, "step": 2987 }, { "epoch": 0.6074405366944501, "grad_norm": 0.11382775753736496, "learning_rate": 0.00013934709651174616, "loss": 0.9555, "step": 2988 }, { "epoch": 0.6076438300467575, "grad_norm": 0.1476142406463623, "learning_rate": 0.0001393267568392149, "loss": 1.1355, "step": 2989 }, { "epoch": 0.6078471233990649, "grad_norm": 0.15201976895332336, "learning_rate": 0.00013930641716668361, "loss": 1.2253, "step": 2990 }, { "epoch": 0.6080504167513723, "grad_norm": 0.13920465111732483, "learning_rate": 0.00013928607749415234, "loss": 1.1671, "step": 2991 }, { "epoch": 0.6082537101036796, "grad_norm": 0.11285021156072617, "learning_rate": 0.0001392657378216211, "loss": 0.8908, "step": 2992 }, { "epoch": 0.608457003455987, "grad_norm": 0.14005322754383087, "learning_rate": 0.00013924539814908981, "loss": 1.0991, "step": 2993 }, { "epoch": 0.6086602968082944, "grad_norm": 0.14553718268871307, "learning_rate": 0.00013922505847655854, "loss": 1.1135, "step": 2994 }, { "epoch": 0.6088635901606018, "grad_norm": 0.1322544664144516, "learning_rate": 0.00013920471880402726, "loss": 1.0209, "step": 2995 }, { "epoch": 0.6090668835129092, "grad_norm": 0.12357106804847717, "learning_rate": 0.000139184379131496, "loss": 0.9475, "step": 2996 }, { "epoch": 0.6092701768652166, "grad_norm": 0.1173151507973671, "learning_rate": 0.0001391640394589647, "loss": 0.9806, "step": 2997 }, { "epoch": 0.6094734702175238, "grad_norm": 0.12091773003339767, "learning_rate": 0.00013914369978643344, "loss": 0.9308, "step": 2998 }, { "epoch": 0.6096767635698312, "grad_norm": 0.12371361255645752, "learning_rate": 0.00013912336011390216, "loss": 1.001, "step": 2999 }, { "epoch": 0.6098800569221386, "grad_norm": 0.11926256865262985, "learning_rate": 0.00013910302044137089, "loss": 0.9655, "step": 3000 }, { "epoch": 0.610083350274446, "grad_norm": 0.12953068315982819, "learning_rate": 0.00013908268076883964, "loss": 1.0736, "step": 3001 }, { "epoch": 0.6102866436267534, "grad_norm": 0.12367159873247147, "learning_rate": 0.00013906234109630836, "loss": 1.0428, "step": 3002 }, { "epoch": 0.6104899369790607, "grad_norm": 0.12180911749601364, "learning_rate": 0.00013904200142377709, "loss": 0.9661, "step": 3003 }, { "epoch": 0.6106932303313681, "grad_norm": 0.13220947980880737, "learning_rate": 0.0001390216617512458, "loss": 0.8966, "step": 3004 }, { "epoch": 0.6108965236836755, "grad_norm": 0.15283820033073425, "learning_rate": 0.00013900132207871453, "loss": 1.2692, "step": 3005 }, { "epoch": 0.6110998170359829, "grad_norm": 0.1325535923242569, "learning_rate": 0.00013898098240618326, "loss": 0.9849, "step": 3006 }, { "epoch": 0.6113031103882903, "grad_norm": 0.14417356252670288, "learning_rate": 0.00013896064273365198, "loss": 1.2099, "step": 3007 }, { "epoch": 0.6115064037405977, "grad_norm": 0.1250670701265335, "learning_rate": 0.0001389403030611207, "loss": 0.9963, "step": 3008 }, { "epoch": 0.611709697092905, "grad_norm": 0.1299847513437271, "learning_rate": 0.00013891996338858946, "loss": 1.0131, "step": 3009 }, { "epoch": 0.6119129904452124, "grad_norm": 0.13631494343280792, "learning_rate": 0.00013889962371605818, "loss": 1.106, "step": 3010 }, { "epoch": 0.6121162837975198, "grad_norm": 0.12008505314588547, "learning_rate": 0.0001388792840435269, "loss": 0.9617, "step": 3011 }, { "epoch": 0.6123195771498272, "grad_norm": 0.13346195220947266, "learning_rate": 0.00013885894437099563, "loss": 1.1796, "step": 3012 }, { "epoch": 0.6125228705021346, "grad_norm": 0.12372852861881256, "learning_rate": 0.00013883860469846436, "loss": 0.8494, "step": 3013 }, { "epoch": 0.612726163854442, "grad_norm": 0.14432121813297272, "learning_rate": 0.00013881826502593308, "loss": 1.1595, "step": 3014 }, { "epoch": 0.6129294572067493, "grad_norm": 0.13419228792190552, "learning_rate": 0.0001387979253534018, "loss": 1.0886, "step": 3015 }, { "epoch": 0.6131327505590567, "grad_norm": 0.1301155984401703, "learning_rate": 0.00013877758568087053, "loss": 0.9456, "step": 3016 }, { "epoch": 0.6133360439113641, "grad_norm": 0.11167372018098831, "learning_rate": 0.00013875724600833928, "loss": 0.9077, "step": 3017 }, { "epoch": 0.6135393372636715, "grad_norm": 0.11222781985998154, "learning_rate": 0.000138736906335808, "loss": 0.8517, "step": 3018 }, { "epoch": 0.6137426306159789, "grad_norm": 0.1456783413887024, "learning_rate": 0.00013871656666327673, "loss": 1.1327, "step": 3019 }, { "epoch": 0.6139459239682863, "grad_norm": 0.12238568812608719, "learning_rate": 0.00013869622699074546, "loss": 1.0502, "step": 3020 }, { "epoch": 0.6141492173205936, "grad_norm": 0.1362997442483902, "learning_rate": 0.00013867588731821418, "loss": 0.9795, "step": 3021 }, { "epoch": 0.614352510672901, "grad_norm": 0.12421485036611557, "learning_rate": 0.0001386555476456829, "loss": 0.9604, "step": 3022 }, { "epoch": 0.6145558040252084, "grad_norm": 0.11413677036762238, "learning_rate": 0.00013863520797315163, "loss": 1.0068, "step": 3023 }, { "epoch": 0.6147590973775158, "grad_norm": 0.1455029845237732, "learning_rate": 0.00013861486830062035, "loss": 1.2662, "step": 3024 }, { "epoch": 0.6149623907298232, "grad_norm": 0.12818849086761475, "learning_rate": 0.0001385945286280891, "loss": 0.9195, "step": 3025 }, { "epoch": 0.6151656840821305, "grad_norm": 0.1426313817501068, "learning_rate": 0.00013857418895555783, "loss": 1.2388, "step": 3026 }, { "epoch": 0.6153689774344379, "grad_norm": 0.14491280913352966, "learning_rate": 0.00013855384928302655, "loss": 1.1537, "step": 3027 }, { "epoch": 0.6155722707867453, "grad_norm": 0.11689125746488571, "learning_rate": 0.00013853350961049528, "loss": 0.9963, "step": 3028 }, { "epoch": 0.6157755641390527, "grad_norm": 0.1245650127530098, "learning_rate": 0.000138513169937964, "loss": 0.9193, "step": 3029 }, { "epoch": 0.61597885749136, "grad_norm": 0.14654415845870972, "learning_rate": 0.00013849283026543273, "loss": 1.271, "step": 3030 }, { "epoch": 0.6161821508436675, "grad_norm": 0.13708455860614777, "learning_rate": 0.00013847249059290145, "loss": 1.1169, "step": 3031 }, { "epoch": 0.6163854441959747, "grad_norm": 0.13598188757896423, "learning_rate": 0.00013845215092037018, "loss": 1.1565, "step": 3032 }, { "epoch": 0.6165887375482821, "grad_norm": 0.13055184483528137, "learning_rate": 0.00013843181124783893, "loss": 0.9636, "step": 3033 }, { "epoch": 0.6167920309005895, "grad_norm": 0.12075616419315338, "learning_rate": 0.00013841147157530765, "loss": 1.0404, "step": 3034 }, { "epoch": 0.6169953242528969, "grad_norm": 0.12068097293376923, "learning_rate": 0.00013839113190277638, "loss": 0.9913, "step": 3035 }, { "epoch": 0.6171986176052043, "grad_norm": 0.13300339877605438, "learning_rate": 0.0001383707922302451, "loss": 1.1589, "step": 3036 }, { "epoch": 0.6174019109575117, "grad_norm": 0.13343989849090576, "learning_rate": 0.00013835045255771383, "loss": 1.1602, "step": 3037 }, { "epoch": 0.617605204309819, "grad_norm": 0.13028277456760406, "learning_rate": 0.00013833011288518255, "loss": 1.0985, "step": 3038 }, { "epoch": 0.6178084976621264, "grad_norm": 0.1125851422548294, "learning_rate": 0.00013830977321265128, "loss": 1.0002, "step": 3039 }, { "epoch": 0.6180117910144338, "grad_norm": 0.12342289090156555, "learning_rate": 0.00013828943354012, "loss": 1.0013, "step": 3040 }, { "epoch": 0.6182150843667412, "grad_norm": 0.12776073813438416, "learning_rate": 0.00013826909386758872, "loss": 0.9747, "step": 3041 }, { "epoch": 0.6184183777190486, "grad_norm": 0.12842942774295807, "learning_rate": 0.00013824875419505748, "loss": 0.9877, "step": 3042 }, { "epoch": 0.618621671071356, "grad_norm": 0.13102072477340698, "learning_rate": 0.0001382284145225262, "loss": 0.9636, "step": 3043 }, { "epoch": 0.6188249644236633, "grad_norm": 0.12905801832675934, "learning_rate": 0.00013820807484999492, "loss": 1.1595, "step": 3044 }, { "epoch": 0.6190282577759707, "grad_norm": 0.1274825781583786, "learning_rate": 0.00013818773517746365, "loss": 0.9346, "step": 3045 }, { "epoch": 0.6192315511282781, "grad_norm": 0.1235279068350792, "learning_rate": 0.00013816739550493237, "loss": 0.9774, "step": 3046 }, { "epoch": 0.6194348444805855, "grad_norm": 0.13355652987957, "learning_rate": 0.0001381470558324011, "loss": 1.1034, "step": 3047 }, { "epoch": 0.6196381378328929, "grad_norm": 0.12585759162902832, "learning_rate": 0.00013812671615986982, "loss": 1.0536, "step": 3048 }, { "epoch": 0.6198414311852003, "grad_norm": 0.12993231415748596, "learning_rate": 0.00013810637648733855, "loss": 1.2062, "step": 3049 }, { "epoch": 0.6200447245375076, "grad_norm": 0.1431044191122055, "learning_rate": 0.0001380860368148073, "loss": 1.1605, "step": 3050 }, { "epoch": 0.620248017889815, "grad_norm": 0.134634330868721, "learning_rate": 0.00013806569714227602, "loss": 1.101, "step": 3051 }, { "epoch": 0.6204513112421224, "grad_norm": 0.126140758395195, "learning_rate": 0.00013804535746974475, "loss": 1.0472, "step": 3052 }, { "epoch": 0.6206546045944298, "grad_norm": 0.1231079027056694, "learning_rate": 0.00013802501779721347, "loss": 0.9879, "step": 3053 }, { "epoch": 0.6208578979467372, "grad_norm": 0.12733492255210876, "learning_rate": 0.0001380046781246822, "loss": 1.0918, "step": 3054 }, { "epoch": 0.6210611912990445, "grad_norm": 0.14148791134357452, "learning_rate": 0.00013798433845215092, "loss": 1.145, "step": 3055 }, { "epoch": 0.6212644846513519, "grad_norm": 0.13087992370128632, "learning_rate": 0.00013796399877961965, "loss": 1.1101, "step": 3056 }, { "epoch": 0.6214677780036593, "grad_norm": 0.14443303644657135, "learning_rate": 0.00013794365910708837, "loss": 1.0992, "step": 3057 }, { "epoch": 0.6216710713559667, "grad_norm": 0.13422155380249023, "learning_rate": 0.00013792331943455712, "loss": 1.2044, "step": 3058 }, { "epoch": 0.6218743647082741, "grad_norm": 0.13146667182445526, "learning_rate": 0.00013790297976202585, "loss": 1.0769, "step": 3059 }, { "epoch": 0.6220776580605815, "grad_norm": 0.12982682883739471, "learning_rate": 0.00013788264008949457, "loss": 1.1232, "step": 3060 }, { "epoch": 0.6222809514128887, "grad_norm": 0.13256913423538208, "learning_rate": 0.0001378623004169633, "loss": 0.9969, "step": 3061 }, { "epoch": 0.6224842447651961, "grad_norm": 0.11935515701770782, "learning_rate": 0.00013784196074443202, "loss": 1.0282, "step": 3062 }, { "epoch": 0.6226875381175035, "grad_norm": 0.14199033379554749, "learning_rate": 0.00013782162107190074, "loss": 1.1328, "step": 3063 }, { "epoch": 0.622890831469811, "grad_norm": 0.12896639108657837, "learning_rate": 0.00013780128139936947, "loss": 1.1422, "step": 3064 }, { "epoch": 0.6230941248221183, "grad_norm": 0.12972599267959595, "learning_rate": 0.0001377809417268382, "loss": 1.0686, "step": 3065 }, { "epoch": 0.6232974181744257, "grad_norm": 0.14466549456119537, "learning_rate": 0.00013776060205430694, "loss": 1.3486, "step": 3066 }, { "epoch": 0.623500711526733, "grad_norm": 0.129892960190773, "learning_rate": 0.00013774026238177567, "loss": 0.9945, "step": 3067 }, { "epoch": 0.6237040048790404, "grad_norm": 0.1326766312122345, "learning_rate": 0.0001377199227092444, "loss": 1.0583, "step": 3068 }, { "epoch": 0.6239072982313478, "grad_norm": 0.14068090915679932, "learning_rate": 0.00013769958303671312, "loss": 1.1597, "step": 3069 }, { "epoch": 0.6241105915836552, "grad_norm": 0.12544094026088715, "learning_rate": 0.00013767924336418184, "loss": 0.9624, "step": 3070 }, { "epoch": 0.6243138849359626, "grad_norm": 0.13259856402873993, "learning_rate": 0.00013765890369165057, "loss": 1.0218, "step": 3071 }, { "epoch": 0.62451717828827, "grad_norm": 0.13529850542545319, "learning_rate": 0.0001376385640191193, "loss": 1.1063, "step": 3072 }, { "epoch": 0.6247204716405773, "grad_norm": 0.1389310508966446, "learning_rate": 0.00013761822434658802, "loss": 1.067, "step": 3073 }, { "epoch": 0.6249237649928847, "grad_norm": 0.1326620876789093, "learning_rate": 0.00013759788467405677, "loss": 1.1228, "step": 3074 }, { "epoch": 0.6251270583451921, "grad_norm": 0.1371268332004547, "learning_rate": 0.0001375775450015255, "loss": 1.056, "step": 3075 }, { "epoch": 0.6253303516974995, "grad_norm": 0.15050175786018372, "learning_rate": 0.00013755720532899422, "loss": 1.1679, "step": 3076 }, { "epoch": 0.6255336450498069, "grad_norm": 0.14462800323963165, "learning_rate": 0.00013753686565646294, "loss": 1.1155, "step": 3077 }, { "epoch": 0.6257369384021142, "grad_norm": 0.12994062900543213, "learning_rate": 0.00013751652598393166, "loss": 1.0954, "step": 3078 }, { "epoch": 0.6259402317544216, "grad_norm": 0.12979595363140106, "learning_rate": 0.0001374961863114004, "loss": 1.0912, "step": 3079 }, { "epoch": 0.626143525106729, "grad_norm": 0.12296707928180695, "learning_rate": 0.0001374758466388691, "loss": 0.9646, "step": 3080 }, { "epoch": 0.6263468184590364, "grad_norm": 0.14658544957637787, "learning_rate": 0.00013745550696633784, "loss": 1.1149, "step": 3081 }, { "epoch": 0.6265501118113438, "grad_norm": 0.12885436415672302, "learning_rate": 0.00013743516729380656, "loss": 1.0285, "step": 3082 }, { "epoch": 0.6267534051636512, "grad_norm": 0.13237449526786804, "learning_rate": 0.00013741482762127531, "loss": 1.1388, "step": 3083 }, { "epoch": 0.6269566985159585, "grad_norm": 0.11667048186063766, "learning_rate": 0.00013739448794874404, "loss": 0.9625, "step": 3084 }, { "epoch": 0.6271599918682659, "grad_norm": 0.09962797164916992, "learning_rate": 0.00013737414827621276, "loss": 0.8283, "step": 3085 }, { "epoch": 0.6273632852205733, "grad_norm": 0.11563806235790253, "learning_rate": 0.0001373538086036815, "loss": 0.9379, "step": 3086 }, { "epoch": 0.6275665785728807, "grad_norm": 0.14020705223083496, "learning_rate": 0.0001373334689311502, "loss": 1.0649, "step": 3087 }, { "epoch": 0.6277698719251881, "grad_norm": 0.1255711168050766, "learning_rate": 0.00013731312925861894, "loss": 1.0555, "step": 3088 }, { "epoch": 0.6279731652774955, "grad_norm": 0.1265256702899933, "learning_rate": 0.00013729278958608766, "loss": 0.9377, "step": 3089 }, { "epoch": 0.6281764586298028, "grad_norm": 0.13861151039600372, "learning_rate": 0.00013727244991355639, "loss": 1.2492, "step": 3090 }, { "epoch": 0.6283797519821102, "grad_norm": 0.1353643387556076, "learning_rate": 0.00013725211024102514, "loss": 1.0395, "step": 3091 }, { "epoch": 0.6285830453344176, "grad_norm": 0.14273463189601898, "learning_rate": 0.00013723177056849386, "loss": 1.2017, "step": 3092 }, { "epoch": 0.628786338686725, "grad_norm": 0.12992137670516968, "learning_rate": 0.00013721143089596259, "loss": 1.1135, "step": 3093 }, { "epoch": 0.6289896320390324, "grad_norm": 0.13525742292404175, "learning_rate": 0.0001371910912234313, "loss": 1.1695, "step": 3094 }, { "epoch": 0.6291929253913398, "grad_norm": 0.12449081242084503, "learning_rate": 0.00013717075155090003, "loss": 1.0187, "step": 3095 }, { "epoch": 0.629396218743647, "grad_norm": 0.12699362635612488, "learning_rate": 0.00013715041187836876, "loss": 1.0876, "step": 3096 }, { "epoch": 0.6295995120959544, "grad_norm": 0.12526580691337585, "learning_rate": 0.00013713007220583748, "loss": 1.0352, "step": 3097 }, { "epoch": 0.6298028054482618, "grad_norm": 0.1089174896478653, "learning_rate": 0.0001371097325333062, "loss": 0.9695, "step": 3098 }, { "epoch": 0.6300060988005692, "grad_norm": 0.1343061774969101, "learning_rate": 0.00013708939286077496, "loss": 1.0601, "step": 3099 }, { "epoch": 0.6302093921528766, "grad_norm": 0.14272217452526093, "learning_rate": 0.00013706905318824368, "loss": 1.1642, "step": 3100 }, { "epoch": 0.630412685505184, "grad_norm": 0.14062613248825073, "learning_rate": 0.0001370487135157124, "loss": 1.1965, "step": 3101 }, { "epoch": 0.6306159788574913, "grad_norm": 0.12888343632221222, "learning_rate": 0.00013702837384318113, "loss": 0.9552, "step": 3102 }, { "epoch": 0.6308192722097987, "grad_norm": 0.1350019872188568, "learning_rate": 0.00013700803417064986, "loss": 1.1513, "step": 3103 }, { "epoch": 0.6310225655621061, "grad_norm": 0.12076770514249802, "learning_rate": 0.00013698769449811858, "loss": 0.9807, "step": 3104 }, { "epoch": 0.6312258589144135, "grad_norm": 0.12005645036697388, "learning_rate": 0.0001369673548255873, "loss": 0.9309, "step": 3105 }, { "epoch": 0.6314291522667209, "grad_norm": 0.13432009518146515, "learning_rate": 0.00013694701515305603, "loss": 1.0728, "step": 3106 }, { "epoch": 0.6316324456190282, "grad_norm": 0.14083653688430786, "learning_rate": 0.00013692667548052478, "loss": 0.9339, "step": 3107 }, { "epoch": 0.6318357389713356, "grad_norm": 0.12383510172367096, "learning_rate": 0.0001369063358079935, "loss": 0.9525, "step": 3108 }, { "epoch": 0.632039032323643, "grad_norm": 0.12858064472675323, "learning_rate": 0.00013688599613546223, "loss": 1.1277, "step": 3109 }, { "epoch": 0.6322423256759504, "grad_norm": 0.1366434544324875, "learning_rate": 0.00013686565646293096, "loss": 1.0272, "step": 3110 }, { "epoch": 0.6324456190282578, "grad_norm": 0.12631452083587646, "learning_rate": 0.00013684531679039968, "loss": 1.0147, "step": 3111 }, { "epoch": 0.6326489123805652, "grad_norm": 0.1388847827911377, "learning_rate": 0.0001368249771178684, "loss": 1.1982, "step": 3112 }, { "epoch": 0.6328522057328725, "grad_norm": 0.1357526183128357, "learning_rate": 0.00013680463744533713, "loss": 1.1804, "step": 3113 }, { "epoch": 0.6330554990851799, "grad_norm": 0.1273118555545807, "learning_rate": 0.00013678429777280585, "loss": 1.0268, "step": 3114 }, { "epoch": 0.6332587924374873, "grad_norm": 0.13186684250831604, "learning_rate": 0.0001367639581002746, "loss": 1.135, "step": 3115 }, { "epoch": 0.6334620857897947, "grad_norm": 0.1217605397105217, "learning_rate": 0.00013674361842774333, "loss": 1.0364, "step": 3116 }, { "epoch": 0.6336653791421021, "grad_norm": 0.11785151809453964, "learning_rate": 0.00013672327875521205, "loss": 0.9248, "step": 3117 }, { "epoch": 0.6338686724944095, "grad_norm": 0.12986084818840027, "learning_rate": 0.00013670293908268078, "loss": 1.0988, "step": 3118 }, { "epoch": 0.6340719658467168, "grad_norm": 0.14195957779884338, "learning_rate": 0.0001366825994101495, "loss": 1.2182, "step": 3119 }, { "epoch": 0.6342752591990242, "grad_norm": 0.12939682602882385, "learning_rate": 0.00013666225973761823, "loss": 1.0573, "step": 3120 }, { "epoch": 0.6344785525513316, "grad_norm": 0.12343540787696838, "learning_rate": 0.00013664192006508695, "loss": 1.0057, "step": 3121 }, { "epoch": 0.634681845903639, "grad_norm": 0.12308801710605621, "learning_rate": 0.00013662158039255568, "loss": 0.9345, "step": 3122 }, { "epoch": 0.6348851392559464, "grad_norm": 0.13453471660614014, "learning_rate": 0.0001366012407200244, "loss": 1.1593, "step": 3123 }, { "epoch": 0.6350884326082538, "grad_norm": 0.14599518477916718, "learning_rate": 0.00013658090104749315, "loss": 1.19, "step": 3124 }, { "epoch": 0.635291725960561, "grad_norm": 0.13644537329673767, "learning_rate": 0.00013656056137496188, "loss": 1.1541, "step": 3125 }, { "epoch": 0.6354950193128684, "grad_norm": 0.1313880980014801, "learning_rate": 0.0001365402217024306, "loss": 1.0607, "step": 3126 }, { "epoch": 0.6356983126651758, "grad_norm": 0.12381511926651001, "learning_rate": 0.00013651988202989933, "loss": 1.0003, "step": 3127 }, { "epoch": 0.6359016060174832, "grad_norm": 0.1361168473958969, "learning_rate": 0.00013649954235736805, "loss": 0.9445, "step": 3128 }, { "epoch": 0.6361048993697906, "grad_norm": 0.13545829057693481, "learning_rate": 0.00013647920268483677, "loss": 1.046, "step": 3129 }, { "epoch": 0.636308192722098, "grad_norm": 0.1335272639989853, "learning_rate": 0.0001364588630123055, "loss": 1.0611, "step": 3130 }, { "epoch": 0.6365114860744053, "grad_norm": 0.13092759251594543, "learning_rate": 0.00013643852333977422, "loss": 0.991, "step": 3131 }, { "epoch": 0.6367147794267127, "grad_norm": 0.1328737437725067, "learning_rate": 0.00013641818366724298, "loss": 1.2151, "step": 3132 }, { "epoch": 0.6369180727790201, "grad_norm": 0.13247033953666687, "learning_rate": 0.0001363978439947117, "loss": 1.0918, "step": 3133 }, { "epoch": 0.6371213661313275, "grad_norm": 0.1463424563407898, "learning_rate": 0.00013637750432218042, "loss": 1.0753, "step": 3134 }, { "epoch": 0.6373246594836349, "grad_norm": 0.13314956426620483, "learning_rate": 0.00013635716464964915, "loss": 1.1224, "step": 3135 }, { "epoch": 0.6375279528359422, "grad_norm": 0.12841732800006866, "learning_rate": 0.00013633682497711787, "loss": 0.9049, "step": 3136 }, { "epoch": 0.6377312461882496, "grad_norm": 0.1303834468126297, "learning_rate": 0.0001363164853045866, "loss": 1.1208, "step": 3137 }, { "epoch": 0.637934539540557, "grad_norm": 0.1288985162973404, "learning_rate": 0.00013629614563205532, "loss": 1.0416, "step": 3138 }, { "epoch": 0.6381378328928644, "grad_norm": 0.13632969558238983, "learning_rate": 0.00013627580595952405, "loss": 1.1293, "step": 3139 }, { "epoch": 0.6383411262451718, "grad_norm": 0.12471256405115128, "learning_rate": 0.0001362554662869928, "loss": 1.1095, "step": 3140 }, { "epoch": 0.6385444195974792, "grad_norm": 0.13156485557556152, "learning_rate": 0.00013623512661446152, "loss": 1.0952, "step": 3141 }, { "epoch": 0.6387477129497865, "grad_norm": 0.13472090661525726, "learning_rate": 0.00013621478694193025, "loss": 1.0665, "step": 3142 }, { "epoch": 0.6389510063020939, "grad_norm": 0.1464674472808838, "learning_rate": 0.00013619444726939897, "loss": 1.3084, "step": 3143 }, { "epoch": 0.6391542996544013, "grad_norm": 0.13103194534778595, "learning_rate": 0.0001361741075968677, "loss": 1.0512, "step": 3144 }, { "epoch": 0.6393575930067087, "grad_norm": 0.13378995656967163, "learning_rate": 0.00013615376792433642, "loss": 1.0625, "step": 3145 }, { "epoch": 0.6395608863590161, "grad_norm": 0.13924111425876617, "learning_rate": 0.00013613342825180514, "loss": 1.1813, "step": 3146 }, { "epoch": 0.6397641797113235, "grad_norm": 0.13989883661270142, "learning_rate": 0.00013611308857927387, "loss": 1.0973, "step": 3147 }, { "epoch": 0.6399674730636308, "grad_norm": 0.12374843657016754, "learning_rate": 0.00013609274890674262, "loss": 0.9683, "step": 3148 }, { "epoch": 0.6401707664159382, "grad_norm": 0.14824433624744415, "learning_rate": 0.00013607240923421135, "loss": 1.1632, "step": 3149 }, { "epoch": 0.6403740597682456, "grad_norm": 0.13298064470291138, "learning_rate": 0.00013605206956168007, "loss": 1.0621, "step": 3150 }, { "epoch": 0.640577353120553, "grad_norm": 0.13271810114383698, "learning_rate": 0.0001360317298891488, "loss": 1.1239, "step": 3151 }, { "epoch": 0.6407806464728604, "grad_norm": 0.12920920550823212, "learning_rate": 0.00013601139021661752, "loss": 1.0332, "step": 3152 }, { "epoch": 0.6409839398251678, "grad_norm": 0.12078989297151566, "learning_rate": 0.00013599105054408624, "loss": 0.9747, "step": 3153 }, { "epoch": 0.6411872331774751, "grad_norm": 0.1309296190738678, "learning_rate": 0.00013597071087155497, "loss": 1.0871, "step": 3154 }, { "epoch": 0.6413905265297825, "grad_norm": 0.13290594518184662, "learning_rate": 0.0001359503711990237, "loss": 0.9874, "step": 3155 }, { "epoch": 0.6415938198820899, "grad_norm": 0.12248789519071579, "learning_rate": 0.00013593003152649244, "loss": 1.0674, "step": 3156 }, { "epoch": 0.6417971132343973, "grad_norm": 0.13262233138084412, "learning_rate": 0.00013590969185396117, "loss": 1.0918, "step": 3157 }, { "epoch": 0.6420004065867047, "grad_norm": 0.11638560891151428, "learning_rate": 0.0001358893521814299, "loss": 0.94, "step": 3158 }, { "epoch": 0.6422036999390119, "grad_norm": 0.13623739778995514, "learning_rate": 0.00013586901250889862, "loss": 0.9925, "step": 3159 }, { "epoch": 0.6424069932913193, "grad_norm": 0.13399013876914978, "learning_rate": 0.00013584867283636734, "loss": 0.9553, "step": 3160 }, { "epoch": 0.6426102866436267, "grad_norm": 0.12274351716041565, "learning_rate": 0.00013582833316383607, "loss": 1.0406, "step": 3161 }, { "epoch": 0.6428135799959341, "grad_norm": 0.13038837909698486, "learning_rate": 0.0001358079934913048, "loss": 1.1596, "step": 3162 }, { "epoch": 0.6430168733482415, "grad_norm": 0.13271398842334747, "learning_rate": 0.00013578765381877351, "loss": 1.0792, "step": 3163 }, { "epoch": 0.6432201667005489, "grad_norm": 0.1319563090801239, "learning_rate": 0.00013576731414624224, "loss": 1.1321, "step": 3164 }, { "epoch": 0.6434234600528562, "grad_norm": 0.13448521494865417, "learning_rate": 0.000135746974473711, "loss": 1.0278, "step": 3165 }, { "epoch": 0.6436267534051636, "grad_norm": 0.1246679350733757, "learning_rate": 0.00013572663480117972, "loss": 0.9818, "step": 3166 }, { "epoch": 0.643830046757471, "grad_norm": 0.11772032827138901, "learning_rate": 0.00013570629512864844, "loss": 0.9398, "step": 3167 }, { "epoch": 0.6440333401097784, "grad_norm": 0.12182223796844482, "learning_rate": 0.00013568595545611716, "loss": 0.9794, "step": 3168 }, { "epoch": 0.6442366334620858, "grad_norm": 0.11464784294366837, "learning_rate": 0.0001356656157835859, "loss": 0.9508, "step": 3169 }, { "epoch": 0.6444399268143932, "grad_norm": 0.12462913244962692, "learning_rate": 0.0001356452761110546, "loss": 1.0784, "step": 3170 }, { "epoch": 0.6446432201667005, "grad_norm": 0.14886057376861572, "learning_rate": 0.00013562493643852334, "loss": 1.1937, "step": 3171 }, { "epoch": 0.6448465135190079, "grad_norm": 0.12092513591051102, "learning_rate": 0.00013560459676599206, "loss": 0.9333, "step": 3172 }, { "epoch": 0.6450498068713153, "grad_norm": 0.13768193125724792, "learning_rate": 0.00013558425709346081, "loss": 1.0512, "step": 3173 }, { "epoch": 0.6452531002236227, "grad_norm": 0.13496732711791992, "learning_rate": 0.00013556391742092954, "loss": 1.0672, "step": 3174 }, { "epoch": 0.6454563935759301, "grad_norm": 0.1316104531288147, "learning_rate": 0.00013554357774839826, "loss": 1.0944, "step": 3175 }, { "epoch": 0.6456596869282375, "grad_norm": 0.12093289196491241, "learning_rate": 0.000135523238075867, "loss": 0.8777, "step": 3176 }, { "epoch": 0.6458629802805448, "grad_norm": 0.12371384352445602, "learning_rate": 0.0001355028984033357, "loss": 0.8997, "step": 3177 }, { "epoch": 0.6460662736328522, "grad_norm": 0.13598783314228058, "learning_rate": 0.00013548255873080444, "loss": 1.1167, "step": 3178 }, { "epoch": 0.6462695669851596, "grad_norm": 0.1385606974363327, "learning_rate": 0.00013546221905827316, "loss": 1.032, "step": 3179 }, { "epoch": 0.646472860337467, "grad_norm": 0.12756818532943726, "learning_rate": 0.00013544187938574188, "loss": 1.0912, "step": 3180 }, { "epoch": 0.6466761536897744, "grad_norm": 0.12240833789110184, "learning_rate": 0.00013542153971321064, "loss": 1.0065, "step": 3181 }, { "epoch": 0.6468794470420818, "grad_norm": 0.14103402197360992, "learning_rate": 0.00013540120004067936, "loss": 1.1357, "step": 3182 }, { "epoch": 0.6470827403943891, "grad_norm": 0.12278808653354645, "learning_rate": 0.00013538086036814809, "loss": 1.0462, "step": 3183 }, { "epoch": 0.6472860337466965, "grad_norm": 0.13968375325202942, "learning_rate": 0.0001353605206956168, "loss": 1.1164, "step": 3184 }, { "epoch": 0.6474893270990039, "grad_norm": 0.12311102449893951, "learning_rate": 0.00013534018102308553, "loss": 1.0793, "step": 3185 }, { "epoch": 0.6476926204513113, "grad_norm": 0.1307074874639511, "learning_rate": 0.00013531984135055426, "loss": 1.0874, "step": 3186 }, { "epoch": 0.6478959138036187, "grad_norm": 0.1303715705871582, "learning_rate": 0.00013529950167802298, "loss": 1.1111, "step": 3187 }, { "epoch": 0.648099207155926, "grad_norm": 0.13313518464565277, "learning_rate": 0.0001352791620054917, "loss": 0.9861, "step": 3188 }, { "epoch": 0.6483025005082333, "grad_norm": 0.13007265329360962, "learning_rate": 0.00013525882233296046, "loss": 0.9644, "step": 3189 }, { "epoch": 0.6485057938605407, "grad_norm": 0.14151926338672638, "learning_rate": 0.00013523848266042918, "loss": 1.1387, "step": 3190 }, { "epoch": 0.6487090872128481, "grad_norm": 0.13587616384029388, "learning_rate": 0.0001352181429878979, "loss": 1.0802, "step": 3191 }, { "epoch": 0.6489123805651555, "grad_norm": 0.14267796277999878, "learning_rate": 0.00013519780331536663, "loss": 1.1885, "step": 3192 }, { "epoch": 0.6491156739174629, "grad_norm": 0.11519461125135422, "learning_rate": 0.00013517746364283536, "loss": 0.9333, "step": 3193 }, { "epoch": 0.6493189672697702, "grad_norm": 0.14246360957622528, "learning_rate": 0.00013515712397030408, "loss": 1.1757, "step": 3194 }, { "epoch": 0.6495222606220776, "grad_norm": 0.14482155442237854, "learning_rate": 0.0001351367842977728, "loss": 1.097, "step": 3195 }, { "epoch": 0.649725553974385, "grad_norm": 0.1291578710079193, "learning_rate": 0.00013511644462524153, "loss": 0.9938, "step": 3196 }, { "epoch": 0.6499288473266924, "grad_norm": 0.13155002892017365, "learning_rate": 0.00013509610495271028, "loss": 1.0634, "step": 3197 }, { "epoch": 0.6501321406789998, "grad_norm": 0.1477162092924118, "learning_rate": 0.000135075765280179, "loss": 1.0995, "step": 3198 }, { "epoch": 0.6503354340313072, "grad_norm": 0.12841352820396423, "learning_rate": 0.00013505542560764773, "loss": 1.1185, "step": 3199 }, { "epoch": 0.6505387273836145, "grad_norm": 0.13000524044036865, "learning_rate": 0.00013503508593511646, "loss": 1.2428, "step": 3200 }, { "epoch": 0.6507420207359219, "grad_norm": 0.1218332052230835, "learning_rate": 0.00013501474626258518, "loss": 1.026, "step": 3201 }, { "epoch": 0.6509453140882293, "grad_norm": 0.12599121034145355, "learning_rate": 0.0001349944065900539, "loss": 1.0013, "step": 3202 }, { "epoch": 0.6511486074405367, "grad_norm": 0.15027253329753876, "learning_rate": 0.00013497406691752263, "loss": 1.2587, "step": 3203 }, { "epoch": 0.6513519007928441, "grad_norm": 0.12841476500034332, "learning_rate": 0.00013495372724499135, "loss": 0.9977, "step": 3204 }, { "epoch": 0.6515551941451515, "grad_norm": 0.13236485421657562, "learning_rate": 0.00013493338757246008, "loss": 1.0202, "step": 3205 }, { "epoch": 0.6517584874974588, "grad_norm": 0.1356945037841797, "learning_rate": 0.00013491304789992883, "loss": 1.1599, "step": 3206 }, { "epoch": 0.6519617808497662, "grad_norm": 0.13879364728927612, "learning_rate": 0.00013489270822739755, "loss": 1.0116, "step": 3207 }, { "epoch": 0.6521650742020736, "grad_norm": 0.15575814247131348, "learning_rate": 0.00013487236855486628, "loss": 1.0991, "step": 3208 }, { "epoch": 0.652368367554381, "grad_norm": 0.11463279277086258, "learning_rate": 0.000134852028882335, "loss": 0.8655, "step": 3209 }, { "epoch": 0.6525716609066884, "grad_norm": 0.1258864849805832, "learning_rate": 0.00013483168920980373, "loss": 0.9616, "step": 3210 }, { "epoch": 0.6527749542589957, "grad_norm": 0.13992567360401154, "learning_rate": 0.00013481134953727245, "loss": 1.078, "step": 3211 }, { "epoch": 0.6529782476113031, "grad_norm": 0.14613211154937744, "learning_rate": 0.00013479100986474118, "loss": 1.0828, "step": 3212 }, { "epoch": 0.6531815409636105, "grad_norm": 0.11749006807804108, "learning_rate": 0.0001347706701922099, "loss": 0.8586, "step": 3213 }, { "epoch": 0.6533848343159179, "grad_norm": 0.13639944791793823, "learning_rate": 0.00013475033051967865, "loss": 1.0437, "step": 3214 }, { "epoch": 0.6535881276682253, "grad_norm": 0.12744362652301788, "learning_rate": 0.00013472999084714738, "loss": 1.1059, "step": 3215 }, { "epoch": 0.6537914210205327, "grad_norm": 0.12434601038694382, "learning_rate": 0.0001347096511746161, "loss": 1.0041, "step": 3216 }, { "epoch": 0.65399471437284, "grad_norm": 0.12143322080373764, "learning_rate": 0.00013468931150208483, "loss": 0.9148, "step": 3217 }, { "epoch": 0.6541980077251474, "grad_norm": 0.142898291349411, "learning_rate": 0.00013466897182955355, "loss": 1.1715, "step": 3218 }, { "epoch": 0.6544013010774548, "grad_norm": 0.12720847129821777, "learning_rate": 0.00013464863215702227, "loss": 1.013, "step": 3219 }, { "epoch": 0.6546045944297622, "grad_norm": 0.1172272264957428, "learning_rate": 0.000134628292484491, "loss": 0.9205, "step": 3220 }, { "epoch": 0.6548078877820696, "grad_norm": 0.15361227095127106, "learning_rate": 0.00013460795281195972, "loss": 1.2636, "step": 3221 }, { "epoch": 0.655011181134377, "grad_norm": 0.1317681223154068, "learning_rate": 0.00013458761313942847, "loss": 1.1478, "step": 3222 }, { "epoch": 0.6552144744866842, "grad_norm": 0.1296282410621643, "learning_rate": 0.0001345672734668972, "loss": 1.0402, "step": 3223 }, { "epoch": 0.6554177678389916, "grad_norm": 0.1406709998846054, "learning_rate": 0.00013454693379436592, "loss": 1.1656, "step": 3224 }, { "epoch": 0.655621061191299, "grad_norm": 0.13919825851917267, "learning_rate": 0.00013452659412183465, "loss": 1.0382, "step": 3225 }, { "epoch": 0.6558243545436064, "grad_norm": 0.14981389045715332, "learning_rate": 0.00013450625444930337, "loss": 1.1494, "step": 3226 }, { "epoch": 0.6560276478959138, "grad_norm": 0.13149550557136536, "learning_rate": 0.0001344859147767721, "loss": 1.2005, "step": 3227 }, { "epoch": 0.6562309412482212, "grad_norm": 0.11929726600646973, "learning_rate": 0.00013446557510424082, "loss": 0.8001, "step": 3228 }, { "epoch": 0.6564342346005285, "grad_norm": 0.1304064244031906, "learning_rate": 0.00013444523543170955, "loss": 0.9621, "step": 3229 }, { "epoch": 0.6566375279528359, "grad_norm": 0.1286899745464325, "learning_rate": 0.0001344248957591783, "loss": 0.9954, "step": 3230 }, { "epoch": 0.6568408213051433, "grad_norm": 0.13308082520961761, "learning_rate": 0.00013440455608664702, "loss": 0.9092, "step": 3231 }, { "epoch": 0.6570441146574507, "grad_norm": 0.14997734129428864, "learning_rate": 0.00013438421641411575, "loss": 1.1089, "step": 3232 }, { "epoch": 0.6572474080097581, "grad_norm": 0.14065352082252502, "learning_rate": 0.00013436387674158447, "loss": 1.0664, "step": 3233 }, { "epoch": 0.6574507013620655, "grad_norm": 0.12980201840400696, "learning_rate": 0.0001343435370690532, "loss": 1.1631, "step": 3234 }, { "epoch": 0.6576539947143728, "grad_norm": 0.11543235182762146, "learning_rate": 0.00013432319739652192, "loss": 0.9523, "step": 3235 }, { "epoch": 0.6578572880666802, "grad_norm": 0.14717644453048706, "learning_rate": 0.00013430285772399064, "loss": 1.0466, "step": 3236 }, { "epoch": 0.6580605814189876, "grad_norm": 0.12715165317058563, "learning_rate": 0.00013428251805145937, "loss": 0.988, "step": 3237 }, { "epoch": 0.658263874771295, "grad_norm": 0.14531929790973663, "learning_rate": 0.00013426217837892812, "loss": 1.1871, "step": 3238 }, { "epoch": 0.6584671681236024, "grad_norm": 0.139459490776062, "learning_rate": 0.00013424183870639684, "loss": 1.1572, "step": 3239 }, { "epoch": 0.6586704614759097, "grad_norm": 0.11804230511188507, "learning_rate": 0.00013422149903386557, "loss": 0.9844, "step": 3240 }, { "epoch": 0.6588737548282171, "grad_norm": 0.14333584904670715, "learning_rate": 0.0001342011593613343, "loss": 1.1719, "step": 3241 }, { "epoch": 0.6590770481805245, "grad_norm": 0.14224494993686676, "learning_rate": 0.00013418081968880302, "loss": 1.1448, "step": 3242 }, { "epoch": 0.6592803415328319, "grad_norm": 0.11388222128152847, "learning_rate": 0.00013416048001627174, "loss": 0.9178, "step": 3243 }, { "epoch": 0.6594836348851393, "grad_norm": 0.12758168578147888, "learning_rate": 0.00013414014034374047, "loss": 0.9446, "step": 3244 }, { "epoch": 0.6596869282374467, "grad_norm": 0.159623384475708, "learning_rate": 0.0001341198006712092, "loss": 1.3143, "step": 3245 }, { "epoch": 0.659890221589754, "grad_norm": 0.13925635814666748, "learning_rate": 0.00013409946099867792, "loss": 1.131, "step": 3246 }, { "epoch": 0.6600935149420614, "grad_norm": 0.12121693789958954, "learning_rate": 0.00013407912132614667, "loss": 1.0076, "step": 3247 }, { "epoch": 0.6602968082943688, "grad_norm": 0.12954868376255035, "learning_rate": 0.0001340587816536154, "loss": 0.9662, "step": 3248 }, { "epoch": 0.6605001016466762, "grad_norm": 0.13503266870975494, "learning_rate": 0.00013403844198108412, "loss": 1.0233, "step": 3249 }, { "epoch": 0.6607033949989836, "grad_norm": 0.13549566268920898, "learning_rate": 0.00013401810230855284, "loss": 1.0545, "step": 3250 }, { "epoch": 0.660906688351291, "grad_norm": 0.13881300389766693, "learning_rate": 0.00013399776263602157, "loss": 1.0844, "step": 3251 }, { "epoch": 0.6611099817035982, "grad_norm": 0.13221535086631775, "learning_rate": 0.0001339774229634903, "loss": 1.0159, "step": 3252 }, { "epoch": 0.6613132750559056, "grad_norm": 0.1378117799758911, "learning_rate": 0.00013395708329095901, "loss": 1.2071, "step": 3253 }, { "epoch": 0.661516568408213, "grad_norm": 0.1307571530342102, "learning_rate": 0.00013393674361842774, "loss": 1.0874, "step": 3254 }, { "epoch": 0.6617198617605204, "grad_norm": 0.1532752364873886, "learning_rate": 0.0001339164039458965, "loss": 1.263, "step": 3255 }, { "epoch": 0.6619231551128278, "grad_norm": 0.14829877018928528, "learning_rate": 0.00013389606427336521, "loss": 1.1919, "step": 3256 }, { "epoch": 0.6621264484651352, "grad_norm": 0.12832298874855042, "learning_rate": 0.00013387572460083394, "loss": 1.0, "step": 3257 }, { "epoch": 0.6623297418174425, "grad_norm": 0.12127513438463211, "learning_rate": 0.00013385538492830266, "loss": 1.03, "step": 3258 }, { "epoch": 0.6625330351697499, "grad_norm": 0.1351458579301834, "learning_rate": 0.0001338350452557714, "loss": 1.0167, "step": 3259 }, { "epoch": 0.6627363285220573, "grad_norm": 0.11357429623603821, "learning_rate": 0.0001338147055832401, "loss": 0.9135, "step": 3260 }, { "epoch": 0.6629396218743647, "grad_norm": 0.14391832053661346, "learning_rate": 0.00013379436591070884, "loss": 1.1626, "step": 3261 }, { "epoch": 0.6631429152266721, "grad_norm": 0.1359371840953827, "learning_rate": 0.00013377402623817756, "loss": 1.1155, "step": 3262 }, { "epoch": 0.6633462085789794, "grad_norm": 0.14570018649101257, "learning_rate": 0.0001337536865656463, "loss": 1.0961, "step": 3263 }, { "epoch": 0.6635495019312868, "grad_norm": 0.12299071252346039, "learning_rate": 0.00013373334689311504, "loss": 0.9879, "step": 3264 }, { "epoch": 0.6637527952835942, "grad_norm": 0.1427142471075058, "learning_rate": 0.00013371300722058376, "loss": 1.2364, "step": 3265 }, { "epoch": 0.6639560886359016, "grad_norm": 0.1400018036365509, "learning_rate": 0.0001336926675480525, "loss": 1.1366, "step": 3266 }, { "epoch": 0.664159381988209, "grad_norm": 0.14757339656352997, "learning_rate": 0.0001336723278755212, "loss": 1.2415, "step": 3267 }, { "epoch": 0.6643626753405164, "grad_norm": 0.1404561698436737, "learning_rate": 0.00013365198820298994, "loss": 1.0536, "step": 3268 }, { "epoch": 0.6645659686928237, "grad_norm": 0.13608767092227936, "learning_rate": 0.00013363164853045866, "loss": 1.0977, "step": 3269 }, { "epoch": 0.6647692620451311, "grad_norm": 0.14513832330703735, "learning_rate": 0.00013361130885792738, "loss": 1.1665, "step": 3270 }, { "epoch": 0.6649725553974385, "grad_norm": 0.12752074003219604, "learning_rate": 0.00013359096918539614, "loss": 1.0611, "step": 3271 }, { "epoch": 0.6651758487497459, "grad_norm": 0.1297471970319748, "learning_rate": 0.00013357062951286486, "loss": 1.0336, "step": 3272 }, { "epoch": 0.6653791421020533, "grad_norm": 0.13528691232204437, "learning_rate": 0.00013355028984033359, "loss": 1.1205, "step": 3273 }, { "epoch": 0.6655824354543607, "grad_norm": 0.13278824090957642, "learning_rate": 0.0001335299501678023, "loss": 1.1608, "step": 3274 }, { "epoch": 0.665785728806668, "grad_norm": 0.1527799665927887, "learning_rate": 0.00013350961049527103, "loss": 1.1947, "step": 3275 }, { "epoch": 0.6659890221589754, "grad_norm": 0.11764834076166153, "learning_rate": 0.00013348927082273976, "loss": 0.8985, "step": 3276 }, { "epoch": 0.6661923155112828, "grad_norm": 0.12094051390886307, "learning_rate": 0.00013346893115020848, "loss": 0.8954, "step": 3277 }, { "epoch": 0.6663956088635902, "grad_norm": 0.1273156702518463, "learning_rate": 0.0001334485914776772, "loss": 1.0629, "step": 3278 }, { "epoch": 0.6665989022158976, "grad_norm": 0.12444844841957092, "learning_rate": 0.00013342825180514596, "loss": 1.0771, "step": 3279 }, { "epoch": 0.666802195568205, "grad_norm": 0.13100309669971466, "learning_rate": 0.00013340791213261468, "loss": 1.0665, "step": 3280 }, { "epoch": 0.6670054889205123, "grad_norm": 0.14003531634807587, "learning_rate": 0.0001333875724600834, "loss": 1.1218, "step": 3281 }, { "epoch": 0.6672087822728197, "grad_norm": 0.13837094604969025, "learning_rate": 0.00013336723278755213, "loss": 1.1864, "step": 3282 }, { "epoch": 0.667412075625127, "grad_norm": 0.1185075119137764, "learning_rate": 0.00013334689311502086, "loss": 0.9776, "step": 3283 }, { "epoch": 0.6676153689774345, "grad_norm": 0.1384880095720291, "learning_rate": 0.00013332655344248958, "loss": 1.117, "step": 3284 }, { "epoch": 0.6678186623297419, "grad_norm": 0.1331661343574524, "learning_rate": 0.0001333062137699583, "loss": 1.1523, "step": 3285 }, { "epoch": 0.6680219556820493, "grad_norm": 0.12203952670097351, "learning_rate": 0.00013328587409742703, "loss": 1.0236, "step": 3286 }, { "epoch": 0.6682252490343565, "grad_norm": 0.1446705311536789, "learning_rate": 0.00013326553442489575, "loss": 1.2011, "step": 3287 }, { "epoch": 0.6684285423866639, "grad_norm": 0.15075799822807312, "learning_rate": 0.0001332451947523645, "loss": 1.1135, "step": 3288 }, { "epoch": 0.6686318357389713, "grad_norm": 0.13888481259346008, "learning_rate": 0.00013322485507983323, "loss": 1.1357, "step": 3289 }, { "epoch": 0.6688351290912787, "grad_norm": 0.13847656548023224, "learning_rate": 0.00013320451540730196, "loss": 1.1679, "step": 3290 }, { "epoch": 0.6690384224435861, "grad_norm": 0.14227357506752014, "learning_rate": 0.00013318417573477068, "loss": 1.184, "step": 3291 }, { "epoch": 0.6692417157958934, "grad_norm": 0.13490445911884308, "learning_rate": 0.0001331638360622394, "loss": 1.026, "step": 3292 }, { "epoch": 0.6694450091482008, "grad_norm": 0.14282800257205963, "learning_rate": 0.00013314349638970813, "loss": 1.1685, "step": 3293 }, { "epoch": 0.6696483025005082, "grad_norm": 0.1398768126964569, "learning_rate": 0.00013312315671717685, "loss": 1.1653, "step": 3294 }, { "epoch": 0.6698515958528156, "grad_norm": 0.1359616070985794, "learning_rate": 0.00013310281704464558, "loss": 0.9866, "step": 3295 }, { "epoch": 0.670054889205123, "grad_norm": 0.14484332501888275, "learning_rate": 0.00013308247737211433, "loss": 1.119, "step": 3296 }, { "epoch": 0.6702581825574304, "grad_norm": 0.12202159315347672, "learning_rate": 0.00013306213769958305, "loss": 1.0366, "step": 3297 }, { "epoch": 0.6704614759097377, "grad_norm": 0.1427534818649292, "learning_rate": 0.00013304179802705178, "loss": 1.1238, "step": 3298 }, { "epoch": 0.6706647692620451, "grad_norm": 0.12576861679553986, "learning_rate": 0.0001330214583545205, "loss": 1.1628, "step": 3299 }, { "epoch": 0.6708680626143525, "grad_norm": 0.11372304707765579, "learning_rate": 0.00013300111868198923, "loss": 1.0396, "step": 3300 }, { "epoch": 0.6710713559666599, "grad_norm": 0.12820537388324738, "learning_rate": 0.00013298077900945795, "loss": 0.9094, "step": 3301 }, { "epoch": 0.6712746493189673, "grad_norm": 0.1097426563501358, "learning_rate": 0.00013296043933692668, "loss": 0.887, "step": 3302 }, { "epoch": 0.6714779426712747, "grad_norm": 0.13616250455379486, "learning_rate": 0.0001329400996643954, "loss": 1.0729, "step": 3303 }, { "epoch": 0.671681236023582, "grad_norm": 0.14476965367794037, "learning_rate": 0.00013291975999186415, "loss": 1.0224, "step": 3304 }, { "epoch": 0.6718845293758894, "grad_norm": 0.13365976512432098, "learning_rate": 0.00013289942031933288, "loss": 1.1, "step": 3305 }, { "epoch": 0.6720878227281968, "grad_norm": 0.12170373648405075, "learning_rate": 0.0001328790806468016, "loss": 0.9986, "step": 3306 }, { "epoch": 0.6722911160805042, "grad_norm": 0.1351754069328308, "learning_rate": 0.00013285874097427033, "loss": 1.0089, "step": 3307 }, { "epoch": 0.6724944094328116, "grad_norm": 0.13269051909446716, "learning_rate": 0.00013283840130173905, "loss": 0.9508, "step": 3308 }, { "epoch": 0.672697702785119, "grad_norm": 0.12628872692584991, "learning_rate": 0.00013281806162920777, "loss": 0.96, "step": 3309 }, { "epoch": 0.6729009961374263, "grad_norm": 0.13434316217899323, "learning_rate": 0.0001327977219566765, "loss": 1.0935, "step": 3310 }, { "epoch": 0.6731042894897337, "grad_norm": 0.137080579996109, "learning_rate": 0.00013277738228414522, "loss": 1.151, "step": 3311 }, { "epoch": 0.6733075828420411, "grad_norm": 0.1294548362493515, "learning_rate": 0.00013275704261161397, "loss": 1.0094, "step": 3312 }, { "epoch": 0.6735108761943485, "grad_norm": 0.14055456221103668, "learning_rate": 0.0001327367029390827, "loss": 1.056, "step": 3313 }, { "epoch": 0.6737141695466559, "grad_norm": 0.12785248458385468, "learning_rate": 0.00013271636326655142, "loss": 1.0964, "step": 3314 }, { "epoch": 0.6739174628989631, "grad_norm": 0.14090466499328613, "learning_rate": 0.00013269602359402015, "loss": 1.1419, "step": 3315 }, { "epoch": 0.6741207562512705, "grad_norm": 0.12105811387300491, "learning_rate": 0.00013267568392148887, "loss": 0.929, "step": 3316 }, { "epoch": 0.674324049603578, "grad_norm": 0.1410580724477768, "learning_rate": 0.0001326553442489576, "loss": 1.0849, "step": 3317 }, { "epoch": 0.6745273429558853, "grad_norm": 0.13689137995243073, "learning_rate": 0.00013263500457642632, "loss": 1.0011, "step": 3318 }, { "epoch": 0.6747306363081927, "grad_norm": 0.12887214124202728, "learning_rate": 0.00013261466490389505, "loss": 1.0231, "step": 3319 }, { "epoch": 0.6749339296605001, "grad_norm": 0.12463674694299698, "learning_rate": 0.0001325943252313638, "loss": 0.864, "step": 3320 }, { "epoch": 0.6751372230128074, "grad_norm": 0.13897714018821716, "learning_rate": 0.00013257398555883252, "loss": 1.1127, "step": 3321 }, { "epoch": 0.6753405163651148, "grad_norm": 0.1311863511800766, "learning_rate": 0.00013255364588630125, "loss": 1.0822, "step": 3322 }, { "epoch": 0.6755438097174222, "grad_norm": 0.1215839833021164, "learning_rate": 0.00013253330621376997, "loss": 0.9599, "step": 3323 }, { "epoch": 0.6757471030697296, "grad_norm": 0.12233379483222961, "learning_rate": 0.0001325129665412387, "loss": 0.8926, "step": 3324 }, { "epoch": 0.675950396422037, "grad_norm": 0.1159176304936409, "learning_rate": 0.00013249262686870742, "loss": 0.79, "step": 3325 }, { "epoch": 0.6761536897743444, "grad_norm": 0.1344752311706543, "learning_rate": 0.00013247228719617614, "loss": 1.0011, "step": 3326 }, { "epoch": 0.6763569831266517, "grad_norm": 0.14110898971557617, "learning_rate": 0.00013245194752364487, "loss": 1.1465, "step": 3327 }, { "epoch": 0.6765602764789591, "grad_norm": 0.12130746990442276, "learning_rate": 0.0001324316078511136, "loss": 0.9631, "step": 3328 }, { "epoch": 0.6767635698312665, "grad_norm": 0.12850743532180786, "learning_rate": 0.00013241126817858234, "loss": 1.0909, "step": 3329 }, { "epoch": 0.6769668631835739, "grad_norm": 0.14836134016513824, "learning_rate": 0.00013239092850605107, "loss": 1.3898, "step": 3330 }, { "epoch": 0.6771701565358813, "grad_norm": 0.1397714763879776, "learning_rate": 0.0001323705888335198, "loss": 1.1767, "step": 3331 }, { "epoch": 0.6773734498881887, "grad_norm": 0.13022536039352417, "learning_rate": 0.0001323502491609885, "loss": 1.0389, "step": 3332 }, { "epoch": 0.677576743240496, "grad_norm": 0.12649066746234894, "learning_rate": 0.00013232990948845724, "loss": 0.8931, "step": 3333 }, { "epoch": 0.6777800365928034, "grad_norm": 0.1422676295042038, "learning_rate": 0.00013230956981592597, "loss": 1.1758, "step": 3334 }, { "epoch": 0.6779833299451108, "grad_norm": 0.12162751704454422, "learning_rate": 0.0001322892301433947, "loss": 1.0629, "step": 3335 }, { "epoch": 0.6781866232974182, "grad_norm": 0.14175549149513245, "learning_rate": 0.00013226889047086342, "loss": 1.2327, "step": 3336 }, { "epoch": 0.6783899166497256, "grad_norm": 0.13854654133319855, "learning_rate": 0.00013224855079833217, "loss": 1.1758, "step": 3337 }, { "epoch": 0.678593210002033, "grad_norm": 0.11496133357286453, "learning_rate": 0.0001322282111258009, "loss": 0.8925, "step": 3338 }, { "epoch": 0.6787965033543403, "grad_norm": 0.1376158595085144, "learning_rate": 0.00013220787145326962, "loss": 1.2326, "step": 3339 }, { "epoch": 0.6789997967066477, "grad_norm": 0.12731988728046417, "learning_rate": 0.00013218753178073834, "loss": 1.0195, "step": 3340 }, { "epoch": 0.6792030900589551, "grad_norm": 0.1400342583656311, "learning_rate": 0.00013216719210820707, "loss": 1.1155, "step": 3341 }, { "epoch": 0.6794063834112625, "grad_norm": 0.11408770084381104, "learning_rate": 0.0001321468524356758, "loss": 0.8986, "step": 3342 }, { "epoch": 0.6796096767635699, "grad_norm": 0.13925215601921082, "learning_rate": 0.00013212651276314451, "loss": 1.0844, "step": 3343 }, { "epoch": 0.6798129701158772, "grad_norm": 0.13174065947532654, "learning_rate": 0.00013210617309061324, "loss": 0.9927, "step": 3344 }, { "epoch": 0.6800162634681846, "grad_norm": 0.12421359866857529, "learning_rate": 0.000132085833418082, "loss": 0.8822, "step": 3345 }, { "epoch": 0.680219556820492, "grad_norm": 0.14170731604099274, "learning_rate": 0.00013206549374555071, "loss": 1.0215, "step": 3346 }, { "epoch": 0.6804228501727994, "grad_norm": 0.13698481023311615, "learning_rate": 0.00013204515407301944, "loss": 1.1608, "step": 3347 }, { "epoch": 0.6806261435251068, "grad_norm": 0.12675851583480835, "learning_rate": 0.00013202481440048816, "loss": 1.0425, "step": 3348 }, { "epoch": 0.6808294368774142, "grad_norm": 0.13038714230060577, "learning_rate": 0.0001320044747279569, "loss": 1.0598, "step": 3349 }, { "epoch": 0.6810327302297214, "grad_norm": 0.1283421814441681, "learning_rate": 0.0001319841350554256, "loss": 0.9638, "step": 3350 }, { "epoch": 0.6812360235820288, "grad_norm": 0.1362680047750473, "learning_rate": 0.00013196379538289434, "loss": 1.1957, "step": 3351 }, { "epoch": 0.6814393169343362, "grad_norm": 0.12494239211082458, "learning_rate": 0.00013194345571036306, "loss": 1.0474, "step": 3352 }, { "epoch": 0.6816426102866436, "grad_norm": 0.11277607828378677, "learning_rate": 0.0001319231160378318, "loss": 0.9065, "step": 3353 }, { "epoch": 0.681845903638951, "grad_norm": 0.13010768592357635, "learning_rate": 0.00013190277636530054, "loss": 1.0045, "step": 3354 }, { "epoch": 0.6820491969912584, "grad_norm": 0.13375157117843628, "learning_rate": 0.00013188243669276926, "loss": 0.9506, "step": 3355 }, { "epoch": 0.6822524903435657, "grad_norm": 0.13150712847709656, "learning_rate": 0.00013186209702023799, "loss": 1.0485, "step": 3356 }, { "epoch": 0.6824557836958731, "grad_norm": 0.13057585060596466, "learning_rate": 0.0001318417573477067, "loss": 1.0687, "step": 3357 }, { "epoch": 0.6826590770481805, "grad_norm": 0.13433004915714264, "learning_rate": 0.00013182141767517544, "loss": 0.985, "step": 3358 }, { "epoch": 0.6828623704004879, "grad_norm": 0.1338491439819336, "learning_rate": 0.00013180107800264416, "loss": 1.1384, "step": 3359 }, { "epoch": 0.6830656637527953, "grad_norm": 0.13416750729084015, "learning_rate": 0.00013178073833011288, "loss": 1.0868, "step": 3360 }, { "epoch": 0.6832689571051027, "grad_norm": 0.13917329907417297, "learning_rate": 0.00013176039865758164, "loss": 1.1072, "step": 3361 }, { "epoch": 0.68347225045741, "grad_norm": 0.1197846531867981, "learning_rate": 0.00013174005898505036, "loss": 0.9208, "step": 3362 }, { "epoch": 0.6836755438097174, "grad_norm": 0.1425098180770874, "learning_rate": 0.00013171971931251908, "loss": 1.2693, "step": 3363 }, { "epoch": 0.6838788371620248, "grad_norm": 0.13614432513713837, "learning_rate": 0.0001316993796399878, "loss": 0.9667, "step": 3364 }, { "epoch": 0.6840821305143322, "grad_norm": 0.1563062071800232, "learning_rate": 0.00013167903996745653, "loss": 1.1234, "step": 3365 }, { "epoch": 0.6842854238666396, "grad_norm": 0.1402071714401245, "learning_rate": 0.00013165870029492526, "loss": 1.0228, "step": 3366 }, { "epoch": 0.684488717218947, "grad_norm": 0.14747624099254608, "learning_rate": 0.00013163836062239398, "loss": 1.2746, "step": 3367 }, { "epoch": 0.6846920105712543, "grad_norm": 0.11560353636741638, "learning_rate": 0.0001316180209498627, "loss": 0.9313, "step": 3368 }, { "epoch": 0.6848953039235617, "grad_norm": 0.12440039217472076, "learning_rate": 0.00013159768127733143, "loss": 0.986, "step": 3369 }, { "epoch": 0.6850985972758691, "grad_norm": 0.13954605162143707, "learning_rate": 0.00013157734160480018, "loss": 1.2206, "step": 3370 }, { "epoch": 0.6853018906281765, "grad_norm": 0.139942929148674, "learning_rate": 0.0001315570019322689, "loss": 0.9075, "step": 3371 }, { "epoch": 0.6855051839804839, "grad_norm": 0.13854482769966125, "learning_rate": 0.00013153666225973763, "loss": 1.1007, "step": 3372 }, { "epoch": 0.6857084773327912, "grad_norm": 0.12603192031383514, "learning_rate": 0.00013151632258720633, "loss": 1.1533, "step": 3373 }, { "epoch": 0.6859117706850986, "grad_norm": 0.12680287659168243, "learning_rate": 0.00013149598291467508, "loss": 1.0463, "step": 3374 }, { "epoch": 0.686115064037406, "grad_norm": 0.12043260782957077, "learning_rate": 0.0001314756432421438, "loss": 0.8653, "step": 3375 }, { "epoch": 0.6863183573897134, "grad_norm": 0.15314915776252747, "learning_rate": 0.00013145530356961253, "loss": 1.1384, "step": 3376 }, { "epoch": 0.6865216507420208, "grad_norm": 0.12305079400539398, "learning_rate": 0.00013143496389708125, "loss": 0.9134, "step": 3377 }, { "epoch": 0.6867249440943282, "grad_norm": 0.12972278892993927, "learning_rate": 0.00013141462422455, "loss": 1.1011, "step": 3378 }, { "epoch": 0.6869282374466354, "grad_norm": 0.12650032341480255, "learning_rate": 0.00013139428455201873, "loss": 1.0518, "step": 3379 }, { "epoch": 0.6871315307989428, "grad_norm": 0.13137362897396088, "learning_rate": 0.00013137394487948745, "loss": 1.0093, "step": 3380 }, { "epoch": 0.6873348241512502, "grad_norm": 0.1400621086359024, "learning_rate": 0.00013135360520695615, "loss": 1.2115, "step": 3381 }, { "epoch": 0.6875381175035576, "grad_norm": 0.1252133697271347, "learning_rate": 0.0001313332655344249, "loss": 1.0329, "step": 3382 }, { "epoch": 0.687741410855865, "grad_norm": 0.13961845636367798, "learning_rate": 0.00013131292586189363, "loss": 1.0093, "step": 3383 }, { "epoch": 0.6879447042081724, "grad_norm": 0.1432250738143921, "learning_rate": 0.00013129258618936235, "loss": 1.2186, "step": 3384 }, { "epoch": 0.6881479975604797, "grad_norm": 0.1433638036251068, "learning_rate": 0.00013127224651683108, "loss": 1.2933, "step": 3385 }, { "epoch": 0.6883512909127871, "grad_norm": 0.13323669135570526, "learning_rate": 0.00013125190684429983, "loss": 1.0775, "step": 3386 }, { "epoch": 0.6885545842650945, "grad_norm": 0.15013840794563293, "learning_rate": 0.00013123156717176855, "loss": 1.0446, "step": 3387 }, { "epoch": 0.6887578776174019, "grad_norm": 0.13675931096076965, "learning_rate": 0.00013121122749923728, "loss": 1.1743, "step": 3388 }, { "epoch": 0.6889611709697093, "grad_norm": 0.13321883976459503, "learning_rate": 0.00013119088782670597, "loss": 1.2053, "step": 3389 }, { "epoch": 0.6891644643220167, "grad_norm": 0.14458970725536346, "learning_rate": 0.00013117054815417473, "loss": 1.0808, "step": 3390 }, { "epoch": 0.689367757674324, "grad_norm": 0.12558375298976898, "learning_rate": 0.00013115020848164345, "loss": 0.9879, "step": 3391 }, { "epoch": 0.6895710510266314, "grad_norm": 0.13324345648288727, "learning_rate": 0.00013112986880911218, "loss": 1.0561, "step": 3392 }, { "epoch": 0.6897743443789388, "grad_norm": 0.1250324845314026, "learning_rate": 0.0001311095291365809, "loss": 1.0982, "step": 3393 }, { "epoch": 0.6899776377312462, "grad_norm": 0.13437926769256592, "learning_rate": 0.00013108918946404965, "loss": 1.0323, "step": 3394 }, { "epoch": 0.6901809310835536, "grad_norm": 0.1360880434513092, "learning_rate": 0.00013106884979151838, "loss": 1.079, "step": 3395 }, { "epoch": 0.6903842244358609, "grad_norm": 0.14753840863704681, "learning_rate": 0.0001310485101189871, "loss": 1.1558, "step": 3396 }, { "epoch": 0.6905875177881683, "grad_norm": 0.1305796205997467, "learning_rate": 0.00013102817044645582, "loss": 1.093, "step": 3397 }, { "epoch": 0.6907908111404757, "grad_norm": 0.14020781219005585, "learning_rate": 0.00013100783077392455, "loss": 1.1657, "step": 3398 }, { "epoch": 0.6909941044927831, "grad_norm": 0.1320771425962448, "learning_rate": 0.00013098749110139327, "loss": 0.9718, "step": 3399 }, { "epoch": 0.6911973978450905, "grad_norm": 0.13931889832019806, "learning_rate": 0.000130967151428862, "loss": 1.1547, "step": 3400 }, { "epoch": 0.6914006911973979, "grad_norm": 0.13345004618167877, "learning_rate": 0.00013094681175633072, "loss": 1.1103, "step": 3401 }, { "epoch": 0.6916039845497052, "grad_norm": 0.1303638517856598, "learning_rate": 0.00013092647208379947, "loss": 1.06, "step": 3402 }, { "epoch": 0.6918072779020126, "grad_norm": 0.12979425489902496, "learning_rate": 0.0001309061324112682, "loss": 1.025, "step": 3403 }, { "epoch": 0.69201057125432, "grad_norm": 0.1420203149318695, "learning_rate": 0.00013088579273873692, "loss": 1.3724, "step": 3404 }, { "epoch": 0.6922138646066274, "grad_norm": 0.13811589777469635, "learning_rate": 0.00013086545306620565, "loss": 1.1415, "step": 3405 }, { "epoch": 0.6924171579589348, "grad_norm": 0.1472085863351822, "learning_rate": 0.00013084511339367437, "loss": 1.2374, "step": 3406 }, { "epoch": 0.6926204513112422, "grad_norm": 0.12186230719089508, "learning_rate": 0.0001308247737211431, "loss": 0.8606, "step": 3407 }, { "epoch": 0.6928237446635495, "grad_norm": 0.14273689687252045, "learning_rate": 0.00013080443404861182, "loss": 0.9656, "step": 3408 }, { "epoch": 0.6930270380158569, "grad_norm": 0.1363956779241562, "learning_rate": 0.00013078409437608055, "loss": 1.1867, "step": 3409 }, { "epoch": 0.6932303313681643, "grad_norm": 0.1353340446949005, "learning_rate": 0.00013076375470354927, "loss": 1.0755, "step": 3410 }, { "epoch": 0.6934336247204717, "grad_norm": 0.12223875522613525, "learning_rate": 0.00013074341503101802, "loss": 0.9282, "step": 3411 }, { "epoch": 0.693636918072779, "grad_norm": 0.13030283153057098, "learning_rate": 0.00013072307535848675, "loss": 1.0846, "step": 3412 }, { "epoch": 0.6938402114250865, "grad_norm": 0.14898596704006195, "learning_rate": 0.00013070273568595547, "loss": 1.1225, "step": 3413 }, { "epoch": 0.6940435047773937, "grad_norm": 0.13688309490680695, "learning_rate": 0.00013068239601342417, "loss": 1.1666, "step": 3414 }, { "epoch": 0.6942467981297011, "grad_norm": 0.1352292150259018, "learning_rate": 0.00013066205634089292, "loss": 1.1107, "step": 3415 }, { "epoch": 0.6944500914820085, "grad_norm": 0.1321742832660675, "learning_rate": 0.00013064171666836164, "loss": 1.1059, "step": 3416 }, { "epoch": 0.6946533848343159, "grad_norm": 0.11616258323192596, "learning_rate": 0.00013062137699583037, "loss": 0.9273, "step": 3417 }, { "epoch": 0.6948566781866233, "grad_norm": 0.13355232775211334, "learning_rate": 0.0001306010373232991, "loss": 0.9926, "step": 3418 }, { "epoch": 0.6950599715389307, "grad_norm": 0.12835095822811127, "learning_rate": 0.00013058069765076784, "loss": 0.9222, "step": 3419 }, { "epoch": 0.695263264891238, "grad_norm": 0.13715249300003052, "learning_rate": 0.00013056035797823657, "loss": 1.0299, "step": 3420 }, { "epoch": 0.6954665582435454, "grad_norm": 0.12749621272087097, "learning_rate": 0.0001305400183057053, "loss": 0.8392, "step": 3421 }, { "epoch": 0.6956698515958528, "grad_norm": 0.12953422963619232, "learning_rate": 0.000130519678633174, "loss": 0.9364, "step": 3422 }, { "epoch": 0.6958731449481602, "grad_norm": 0.1335253268480301, "learning_rate": 0.00013049933896064274, "loss": 1.1024, "step": 3423 }, { "epoch": 0.6960764383004676, "grad_norm": 0.1350051760673523, "learning_rate": 0.00013047899928811147, "loss": 0.9491, "step": 3424 }, { "epoch": 0.6962797316527749, "grad_norm": 0.12581254541873932, "learning_rate": 0.0001304586596155802, "loss": 1.1341, "step": 3425 }, { "epoch": 0.6964830250050823, "grad_norm": 0.12518788874149323, "learning_rate": 0.00013043831994304892, "loss": 1.0329, "step": 3426 }, { "epoch": 0.6966863183573897, "grad_norm": 0.12527361512184143, "learning_rate": 0.00013041798027051767, "loss": 0.9014, "step": 3427 }, { "epoch": 0.6968896117096971, "grad_norm": 0.13964787125587463, "learning_rate": 0.0001303976405979864, "loss": 1.1231, "step": 3428 }, { "epoch": 0.6970929050620045, "grad_norm": 0.1401492953300476, "learning_rate": 0.00013037730092545512, "loss": 1.0639, "step": 3429 }, { "epoch": 0.6972961984143119, "grad_norm": 0.1398945450782776, "learning_rate": 0.0001303569612529238, "loss": 1.0882, "step": 3430 }, { "epoch": 0.6974994917666192, "grad_norm": 0.15390872955322266, "learning_rate": 0.00013033662158039256, "loss": 1.3927, "step": 3431 }, { "epoch": 0.6977027851189266, "grad_norm": 0.11634422838687897, "learning_rate": 0.0001303162819078613, "loss": 0.9213, "step": 3432 }, { "epoch": 0.697906078471234, "grad_norm": 0.14000141620635986, "learning_rate": 0.00013029594223533001, "loss": 1.1035, "step": 3433 }, { "epoch": 0.6981093718235414, "grad_norm": 0.13036206364631653, "learning_rate": 0.00013027560256279874, "loss": 1.0366, "step": 3434 }, { "epoch": 0.6983126651758488, "grad_norm": 0.1375044733285904, "learning_rate": 0.0001302552628902675, "loss": 1.1924, "step": 3435 }, { "epoch": 0.6985159585281562, "grad_norm": 0.13283680379390717, "learning_rate": 0.00013023492321773621, "loss": 1.1097, "step": 3436 }, { "epoch": 0.6987192518804635, "grad_norm": 0.14721041917800903, "learning_rate": 0.00013021458354520494, "loss": 1.1784, "step": 3437 }, { "epoch": 0.6989225452327709, "grad_norm": 0.1452692449092865, "learning_rate": 0.00013019424387267364, "loss": 1.1948, "step": 3438 }, { "epoch": 0.6991258385850783, "grad_norm": 0.12445453554391861, "learning_rate": 0.0001301739042001424, "loss": 1.0154, "step": 3439 }, { "epoch": 0.6993291319373857, "grad_norm": 0.13780944049358368, "learning_rate": 0.0001301535645276111, "loss": 1.1673, "step": 3440 }, { "epoch": 0.6995324252896931, "grad_norm": 0.14468298852443695, "learning_rate": 0.00013013322485507984, "loss": 1.2753, "step": 3441 }, { "epoch": 0.6997357186420005, "grad_norm": 0.13938096165657043, "learning_rate": 0.00013011288518254856, "loss": 1.1033, "step": 3442 }, { "epoch": 0.6999390119943077, "grad_norm": 0.12781304121017456, "learning_rate": 0.0001300925455100173, "loss": 0.8622, "step": 3443 }, { "epoch": 0.7001423053466151, "grad_norm": 0.12039446085691452, "learning_rate": 0.00013007220583748604, "loss": 0.923, "step": 3444 }, { "epoch": 0.7003455986989225, "grad_norm": 0.11263223737478256, "learning_rate": 0.00013005186616495476, "loss": 0.889, "step": 3445 }, { "epoch": 0.7005488920512299, "grad_norm": 0.10796971619129181, "learning_rate": 0.00013003152649242346, "loss": 0.852, "step": 3446 }, { "epoch": 0.7007521854035373, "grad_norm": 0.12779220938682556, "learning_rate": 0.0001300111868198922, "loss": 0.9844, "step": 3447 }, { "epoch": 0.7009554787558446, "grad_norm": 0.12436182051897049, "learning_rate": 0.00012999084714736093, "loss": 1.0729, "step": 3448 }, { "epoch": 0.701158772108152, "grad_norm": 0.12066857516765594, "learning_rate": 0.00012997050747482966, "loss": 0.9179, "step": 3449 }, { "epoch": 0.7013620654604594, "grad_norm": 0.12307177484035492, "learning_rate": 0.00012995016780229838, "loss": 0.9204, "step": 3450 }, { "epoch": 0.7015653588127668, "grad_norm": 0.1301327794790268, "learning_rate": 0.0001299298281297671, "loss": 1.1486, "step": 3451 }, { "epoch": 0.7017686521650742, "grad_norm": 0.13147859275341034, "learning_rate": 0.00012990948845723586, "loss": 1.0035, "step": 3452 }, { "epoch": 0.7019719455173816, "grad_norm": 0.13557538390159607, "learning_rate": 0.00012988914878470458, "loss": 1.1641, "step": 3453 }, { "epoch": 0.7021752388696889, "grad_norm": 0.11187610030174255, "learning_rate": 0.0001298688091121733, "loss": 0.9894, "step": 3454 }, { "epoch": 0.7023785322219963, "grad_norm": 0.12350699305534363, "learning_rate": 0.000129848469439642, "loss": 1.1538, "step": 3455 }, { "epoch": 0.7025818255743037, "grad_norm": 0.1363372653722763, "learning_rate": 0.00012982812976711076, "loss": 1.0104, "step": 3456 }, { "epoch": 0.7027851189266111, "grad_norm": 0.12748870253562927, "learning_rate": 0.00012980779009457948, "loss": 0.9978, "step": 3457 }, { "epoch": 0.7029884122789185, "grad_norm": 0.12273624539375305, "learning_rate": 0.0001297874504220482, "loss": 0.967, "step": 3458 }, { "epoch": 0.7031917056312259, "grad_norm": 0.13453403115272522, "learning_rate": 0.00012976711074951693, "loss": 1.1081, "step": 3459 }, { "epoch": 0.7033949989835332, "grad_norm": 0.13335007429122925, "learning_rate": 0.00012974677107698568, "loss": 0.9803, "step": 3460 }, { "epoch": 0.7035982923358406, "grad_norm": 0.13500504195690155, "learning_rate": 0.0001297264314044544, "loss": 0.967, "step": 3461 }, { "epoch": 0.703801585688148, "grad_norm": 0.145028218626976, "learning_rate": 0.00012970609173192313, "loss": 1.1643, "step": 3462 }, { "epoch": 0.7040048790404554, "grad_norm": 0.14210622012615204, "learning_rate": 0.00012968575205939183, "loss": 1.1898, "step": 3463 }, { "epoch": 0.7042081723927628, "grad_norm": 0.1239437535405159, "learning_rate": 0.00012966541238686058, "loss": 1.0432, "step": 3464 }, { "epoch": 0.7044114657450702, "grad_norm": 0.14510378241539001, "learning_rate": 0.0001296450727143293, "loss": 1.025, "step": 3465 }, { "epoch": 0.7046147590973775, "grad_norm": 0.13489870727062225, "learning_rate": 0.00012962473304179803, "loss": 1.1407, "step": 3466 }, { "epoch": 0.7048180524496849, "grad_norm": 0.12685105204582214, "learning_rate": 0.00012960439336926675, "loss": 1.0256, "step": 3467 }, { "epoch": 0.7050213458019923, "grad_norm": 0.14244306087493896, "learning_rate": 0.0001295840536967355, "loss": 1.1148, "step": 3468 }, { "epoch": 0.7052246391542997, "grad_norm": 0.13121604919433594, "learning_rate": 0.00012956371402420423, "loss": 0.9663, "step": 3469 }, { "epoch": 0.7054279325066071, "grad_norm": 0.14584699273109436, "learning_rate": 0.00012954337435167295, "loss": 1.1258, "step": 3470 }, { "epoch": 0.7056312258589145, "grad_norm": 0.130800262093544, "learning_rate": 0.00012952303467914165, "loss": 0.9679, "step": 3471 }, { "epoch": 0.7058345192112218, "grad_norm": 0.13025017082691193, "learning_rate": 0.0001295026950066104, "loss": 1.0312, "step": 3472 }, { "epoch": 0.7060378125635292, "grad_norm": 0.13392165303230286, "learning_rate": 0.00012948235533407913, "loss": 1.1851, "step": 3473 }, { "epoch": 0.7062411059158366, "grad_norm": 0.1399383693933487, "learning_rate": 0.00012946201566154785, "loss": 1.1286, "step": 3474 }, { "epoch": 0.706444399268144, "grad_norm": 0.12997640669345856, "learning_rate": 0.00012944167598901658, "loss": 1.0042, "step": 3475 }, { "epoch": 0.7066476926204514, "grad_norm": 0.1388452649116516, "learning_rate": 0.00012942133631648533, "loss": 1.1952, "step": 3476 }, { "epoch": 0.7068509859727586, "grad_norm": 0.13053801655769348, "learning_rate": 0.00012940099664395405, "loss": 0.9092, "step": 3477 }, { "epoch": 0.707054279325066, "grad_norm": 0.1334877759218216, "learning_rate": 0.00012938065697142278, "loss": 0.9635, "step": 3478 }, { "epoch": 0.7072575726773734, "grad_norm": 0.13902603089809418, "learning_rate": 0.00012936031729889147, "loss": 1.0507, "step": 3479 }, { "epoch": 0.7074608660296808, "grad_norm": 0.13863757252693176, "learning_rate": 0.00012933997762636023, "loss": 0.9798, "step": 3480 }, { "epoch": 0.7076641593819882, "grad_norm": 0.12283840775489807, "learning_rate": 0.00012931963795382895, "loss": 0.8997, "step": 3481 }, { "epoch": 0.7078674527342956, "grad_norm": 0.13135948777198792, "learning_rate": 0.00012929929828129768, "loss": 1.2408, "step": 3482 }, { "epoch": 0.7080707460866029, "grad_norm": 0.15379171073436737, "learning_rate": 0.0001292789586087664, "loss": 1.195, "step": 3483 }, { "epoch": 0.7082740394389103, "grad_norm": 0.13256476819515228, "learning_rate": 0.00012925861893623515, "loss": 1.0582, "step": 3484 }, { "epoch": 0.7084773327912177, "grad_norm": 0.12100596725940704, "learning_rate": 0.00012923827926370388, "loss": 0.8826, "step": 3485 }, { "epoch": 0.7086806261435251, "grad_norm": 0.13334119319915771, "learning_rate": 0.0001292179395911726, "loss": 1.2063, "step": 3486 }, { "epoch": 0.7088839194958325, "grad_norm": 0.14578770101070404, "learning_rate": 0.0001291975999186413, "loss": 1.2067, "step": 3487 }, { "epoch": 0.7090872128481399, "grad_norm": 0.13182413578033447, "learning_rate": 0.00012917726024611005, "loss": 1.0382, "step": 3488 }, { "epoch": 0.7092905062004472, "grad_norm": 0.13377144932746887, "learning_rate": 0.00012915692057357877, "loss": 1.0798, "step": 3489 }, { "epoch": 0.7094937995527546, "grad_norm": 0.15311647951602936, "learning_rate": 0.0001291365809010475, "loss": 1.1925, "step": 3490 }, { "epoch": 0.709697092905062, "grad_norm": 0.13385489583015442, "learning_rate": 0.00012911624122851622, "loss": 1.0437, "step": 3491 }, { "epoch": 0.7099003862573694, "grad_norm": 0.12438102066516876, "learning_rate": 0.00012909590155598495, "loss": 0.9325, "step": 3492 }, { "epoch": 0.7101036796096768, "grad_norm": 0.11423248052597046, "learning_rate": 0.0001290755618834537, "loss": 0.955, "step": 3493 }, { "epoch": 0.7103069729619842, "grad_norm": 0.1257968544960022, "learning_rate": 0.00012905522221092242, "loss": 0.9734, "step": 3494 }, { "epoch": 0.7105102663142915, "grad_norm": 0.12875302135944366, "learning_rate": 0.00012903488253839112, "loss": 0.9762, "step": 3495 }, { "epoch": 0.7107135596665989, "grad_norm": 0.15575377643108368, "learning_rate": 0.00012901454286585984, "loss": 1.199, "step": 3496 }, { "epoch": 0.7109168530189063, "grad_norm": 0.12835876643657684, "learning_rate": 0.0001289942031933286, "loss": 1.0026, "step": 3497 }, { "epoch": 0.7111201463712137, "grad_norm": 0.13237829506397247, "learning_rate": 0.00012897386352079732, "loss": 1.1309, "step": 3498 }, { "epoch": 0.7113234397235211, "grad_norm": 0.13372915983200073, "learning_rate": 0.00012895352384826605, "loss": 1.0218, "step": 3499 }, { "epoch": 0.7115267330758284, "grad_norm": 0.1394553929567337, "learning_rate": 0.00012893318417573477, "loss": 1.1678, "step": 3500 }, { "epoch": 0.7117300264281358, "grad_norm": 0.12859494984149933, "learning_rate": 0.00012891284450320352, "loss": 0.9039, "step": 3501 }, { "epoch": 0.7119333197804432, "grad_norm": 0.13142433762550354, "learning_rate": 0.00012889250483067225, "loss": 0.9692, "step": 3502 }, { "epoch": 0.7121366131327506, "grad_norm": 0.15247346460819244, "learning_rate": 0.00012887216515814094, "loss": 1.1068, "step": 3503 }, { "epoch": 0.712339906485058, "grad_norm": 0.1271810233592987, "learning_rate": 0.00012885182548560967, "loss": 1.05, "step": 3504 }, { "epoch": 0.7125431998373654, "grad_norm": 0.12222661077976227, "learning_rate": 0.00012883148581307842, "loss": 0.9563, "step": 3505 }, { "epoch": 0.7127464931896726, "grad_norm": 0.14147427678108215, "learning_rate": 0.00012881114614054714, "loss": 1.0697, "step": 3506 }, { "epoch": 0.71294978654198, "grad_norm": 0.12644895911216736, "learning_rate": 0.00012879080646801587, "loss": 0.9251, "step": 3507 }, { "epoch": 0.7131530798942874, "grad_norm": 0.126128152012825, "learning_rate": 0.0001287704667954846, "loss": 0.9617, "step": 3508 }, { "epoch": 0.7133563732465948, "grad_norm": 0.12538930773735046, "learning_rate": 0.00012875012712295334, "loss": 1.084, "step": 3509 }, { "epoch": 0.7135596665989022, "grad_norm": 0.1261541098356247, "learning_rate": 0.00012872978745042207, "loss": 1.0671, "step": 3510 }, { "epoch": 0.7137629599512096, "grad_norm": 0.15382623672485352, "learning_rate": 0.00012870944777789077, "loss": 1.1679, "step": 3511 }, { "epoch": 0.7139662533035169, "grad_norm": 0.14954978227615356, "learning_rate": 0.0001286891081053595, "loss": 1.0822, "step": 3512 }, { "epoch": 0.7141695466558243, "grad_norm": 0.12342054396867752, "learning_rate": 0.00012866876843282824, "loss": 1.0252, "step": 3513 }, { "epoch": 0.7143728400081317, "grad_norm": 0.13561514019966125, "learning_rate": 0.00012864842876029697, "loss": 1.0025, "step": 3514 }, { "epoch": 0.7145761333604391, "grad_norm": 0.13942426443099976, "learning_rate": 0.0001286280890877657, "loss": 1.0978, "step": 3515 }, { "epoch": 0.7147794267127465, "grad_norm": 0.13418523967266083, "learning_rate": 0.00012860774941523442, "loss": 0.9401, "step": 3516 }, { "epoch": 0.7149827200650539, "grad_norm": 0.13293065130710602, "learning_rate": 0.00012858740974270317, "loss": 0.9563, "step": 3517 }, { "epoch": 0.7151860134173612, "grad_norm": 0.13507983088493347, "learning_rate": 0.0001285670700701719, "loss": 1.1482, "step": 3518 }, { "epoch": 0.7153893067696686, "grad_norm": 0.13518783450126648, "learning_rate": 0.00012854673039764062, "loss": 1.0677, "step": 3519 }, { "epoch": 0.715592600121976, "grad_norm": 0.14185848832130432, "learning_rate": 0.0001285263907251093, "loss": 1.1828, "step": 3520 }, { "epoch": 0.7157958934742834, "grad_norm": 0.14050935208797455, "learning_rate": 0.00012850605105257806, "loss": 1.137, "step": 3521 }, { "epoch": 0.7159991868265908, "grad_norm": 0.12821073830127716, "learning_rate": 0.0001284857113800468, "loss": 0.9265, "step": 3522 }, { "epoch": 0.7162024801788982, "grad_norm": 0.14910835027694702, "learning_rate": 0.0001284653717075155, "loss": 1.1298, "step": 3523 }, { "epoch": 0.7164057735312055, "grad_norm": 0.12309451401233673, "learning_rate": 0.00012844503203498424, "loss": 0.9552, "step": 3524 }, { "epoch": 0.7166090668835129, "grad_norm": 0.13226357102394104, "learning_rate": 0.000128424692362453, "loss": 1.0618, "step": 3525 }, { "epoch": 0.7168123602358203, "grad_norm": 0.14502473175525665, "learning_rate": 0.00012840435268992171, "loss": 1.0766, "step": 3526 }, { "epoch": 0.7170156535881277, "grad_norm": 0.15114335715770721, "learning_rate": 0.00012838401301739044, "loss": 1.3117, "step": 3527 }, { "epoch": 0.7172189469404351, "grad_norm": 0.14016559720039368, "learning_rate": 0.00012836367334485914, "loss": 1.0747, "step": 3528 }, { "epoch": 0.7174222402927424, "grad_norm": 0.10465826839208603, "learning_rate": 0.0001283433336723279, "loss": 0.9244, "step": 3529 }, { "epoch": 0.7176255336450498, "grad_norm": 0.14475956559181213, "learning_rate": 0.0001283229939997966, "loss": 1.1458, "step": 3530 }, { "epoch": 0.7178288269973572, "grad_norm": 0.15967129170894623, "learning_rate": 0.00012830265432726534, "loss": 1.218, "step": 3531 }, { "epoch": 0.7180321203496646, "grad_norm": 0.16239500045776367, "learning_rate": 0.00012828231465473406, "loss": 1.1088, "step": 3532 }, { "epoch": 0.718235413701972, "grad_norm": 0.13778537511825562, "learning_rate": 0.00012826197498220279, "loss": 1.2284, "step": 3533 }, { "epoch": 0.7184387070542794, "grad_norm": 0.15743795037269592, "learning_rate": 0.00012824163530967154, "loss": 1.2057, "step": 3534 }, { "epoch": 0.7186420004065867, "grad_norm": 0.13260531425476074, "learning_rate": 0.00012822129563714026, "loss": 1.1239, "step": 3535 }, { "epoch": 0.718845293758894, "grad_norm": 0.12460935115814209, "learning_rate": 0.00012820095596460896, "loss": 1.0504, "step": 3536 }, { "epoch": 0.7190485871112015, "grad_norm": 0.13355574011802673, "learning_rate": 0.00012818061629207768, "loss": 1.0719, "step": 3537 }, { "epoch": 0.7192518804635089, "grad_norm": 0.1280195564031601, "learning_rate": 0.00012816027661954643, "loss": 1.012, "step": 3538 }, { "epoch": 0.7194551738158163, "grad_norm": 0.13986103236675262, "learning_rate": 0.00012813993694701516, "loss": 1.0658, "step": 3539 }, { "epoch": 0.7196584671681237, "grad_norm": 0.1170068234205246, "learning_rate": 0.00012811959727448388, "loss": 0.9056, "step": 3540 }, { "epoch": 0.7198617605204309, "grad_norm": 0.13569694757461548, "learning_rate": 0.0001280992576019526, "loss": 1.116, "step": 3541 }, { "epoch": 0.7200650538727383, "grad_norm": 0.1323375403881073, "learning_rate": 0.00012807891792942136, "loss": 1.0629, "step": 3542 }, { "epoch": 0.7202683472250457, "grad_norm": 0.12855368852615356, "learning_rate": 0.00012805857825689008, "loss": 0.89, "step": 3543 }, { "epoch": 0.7204716405773531, "grad_norm": 0.12019526958465576, "learning_rate": 0.00012803823858435878, "loss": 1.063, "step": 3544 }, { "epoch": 0.7206749339296605, "grad_norm": 0.13612791895866394, "learning_rate": 0.0001280178989118275, "loss": 0.9844, "step": 3545 }, { "epoch": 0.7208782272819679, "grad_norm": 0.1345546394586563, "learning_rate": 0.00012799755923929626, "loss": 0.9661, "step": 3546 }, { "epoch": 0.7210815206342752, "grad_norm": 0.12953819334506989, "learning_rate": 0.00012797721956676498, "loss": 0.968, "step": 3547 }, { "epoch": 0.7212848139865826, "grad_norm": 0.15265563130378723, "learning_rate": 0.0001279568798942337, "loss": 1.2455, "step": 3548 }, { "epoch": 0.72148810733889, "grad_norm": 0.16196173429489136, "learning_rate": 0.00012793654022170243, "loss": 1.1605, "step": 3549 }, { "epoch": 0.7216914006911974, "grad_norm": 0.13228391110897064, "learning_rate": 0.00012791620054917118, "loss": 1.1216, "step": 3550 }, { "epoch": 0.7218946940435048, "grad_norm": 0.1355789303779602, "learning_rate": 0.0001278958608766399, "loss": 1.1065, "step": 3551 }, { "epoch": 0.7220979873958121, "grad_norm": 0.13458067178726196, "learning_rate": 0.0001278755212041086, "loss": 1.173, "step": 3552 }, { "epoch": 0.7223012807481195, "grad_norm": 0.12841463088989258, "learning_rate": 0.00012785518153157733, "loss": 0.9425, "step": 3553 }, { "epoch": 0.7225045741004269, "grad_norm": 0.1273353099822998, "learning_rate": 0.00012783484185904608, "loss": 0.9527, "step": 3554 }, { "epoch": 0.7227078674527343, "grad_norm": 0.13753145933151245, "learning_rate": 0.0001278145021865148, "loss": 0.9917, "step": 3555 }, { "epoch": 0.7229111608050417, "grad_norm": 0.15175598859786987, "learning_rate": 0.00012779416251398353, "loss": 1.1534, "step": 3556 }, { "epoch": 0.7231144541573491, "grad_norm": 0.12491641193628311, "learning_rate": 0.00012777382284145225, "loss": 0.9861, "step": 3557 }, { "epoch": 0.7233177475096564, "grad_norm": 0.135353222489357, "learning_rate": 0.000127753483168921, "loss": 1.1638, "step": 3558 }, { "epoch": 0.7235210408619638, "grad_norm": 0.14735917747020721, "learning_rate": 0.00012773314349638973, "loss": 1.1341, "step": 3559 }, { "epoch": 0.7237243342142712, "grad_norm": 0.11300304532051086, "learning_rate": 0.00012771280382385843, "loss": 0.9734, "step": 3560 }, { "epoch": 0.7239276275665786, "grad_norm": 0.12589031457901, "learning_rate": 0.00012769246415132715, "loss": 0.9325, "step": 3561 }, { "epoch": 0.724130920918886, "grad_norm": 0.14478862285614014, "learning_rate": 0.0001276721244787959, "loss": 1.2025, "step": 3562 }, { "epoch": 0.7243342142711934, "grad_norm": 0.12382597476243973, "learning_rate": 0.00012765178480626463, "loss": 1.0629, "step": 3563 }, { "epoch": 0.7245375076235007, "grad_norm": 0.13786040246486664, "learning_rate": 0.00012763144513373335, "loss": 1.142, "step": 3564 }, { "epoch": 0.7247408009758081, "grad_norm": 0.13986682891845703, "learning_rate": 0.00012761110546120208, "loss": 1.1434, "step": 3565 }, { "epoch": 0.7249440943281155, "grad_norm": 0.13523870706558228, "learning_rate": 0.00012759076578867083, "loss": 0.889, "step": 3566 }, { "epoch": 0.7251473876804229, "grad_norm": 0.12312185764312744, "learning_rate": 0.00012757042611613955, "loss": 1.0227, "step": 3567 }, { "epoch": 0.7253506810327303, "grad_norm": 0.1324312686920166, "learning_rate": 0.00012755008644360825, "loss": 0.9739, "step": 3568 }, { "epoch": 0.7255539743850377, "grad_norm": 0.13704247772693634, "learning_rate": 0.00012752974677107697, "loss": 1.0356, "step": 3569 }, { "epoch": 0.725757267737345, "grad_norm": 0.12928558886051178, "learning_rate": 0.00012750940709854573, "loss": 0.9112, "step": 3570 }, { "epoch": 0.7259605610896523, "grad_norm": 0.12993620336055756, "learning_rate": 0.00012748906742601445, "loss": 1.0499, "step": 3571 }, { "epoch": 0.7261638544419597, "grad_norm": 0.13459739089012146, "learning_rate": 0.00012746872775348317, "loss": 1.1546, "step": 3572 }, { "epoch": 0.7263671477942671, "grad_norm": 0.14553983509540558, "learning_rate": 0.0001274483880809519, "loss": 1.1466, "step": 3573 }, { "epoch": 0.7265704411465745, "grad_norm": 0.1270923614501953, "learning_rate": 0.00012742804840842065, "loss": 0.9953, "step": 3574 }, { "epoch": 0.7267737344988819, "grad_norm": 0.11883358657360077, "learning_rate": 0.00012740770873588938, "loss": 0.9983, "step": 3575 }, { "epoch": 0.7269770278511892, "grad_norm": 0.13899964094161987, "learning_rate": 0.0001273873690633581, "loss": 1.0463, "step": 3576 }, { "epoch": 0.7271803212034966, "grad_norm": 0.12887227535247803, "learning_rate": 0.0001273670293908268, "loss": 0.9673, "step": 3577 }, { "epoch": 0.727383614555804, "grad_norm": 0.15297862887382507, "learning_rate": 0.00012734668971829552, "loss": 1.1517, "step": 3578 }, { "epoch": 0.7275869079081114, "grad_norm": 0.12056870758533478, "learning_rate": 0.00012732635004576427, "loss": 0.9494, "step": 3579 }, { "epoch": 0.7277902012604188, "grad_norm": 0.13357582688331604, "learning_rate": 0.000127306010373233, "loss": 0.9368, "step": 3580 }, { "epoch": 0.7279934946127261, "grad_norm": 0.1344243586063385, "learning_rate": 0.00012728567070070172, "loss": 0.9977, "step": 3581 }, { "epoch": 0.7281967879650335, "grad_norm": 0.12713217735290527, "learning_rate": 0.00012726533102817045, "loss": 0.9283, "step": 3582 }, { "epoch": 0.7284000813173409, "grad_norm": 0.1435747891664505, "learning_rate": 0.0001272449913556392, "loss": 1.0784, "step": 3583 }, { "epoch": 0.7286033746696483, "grad_norm": 0.1342409998178482, "learning_rate": 0.00012722465168310792, "loss": 0.99, "step": 3584 }, { "epoch": 0.7288066680219557, "grad_norm": 0.13497351109981537, "learning_rate": 0.00012720431201057662, "loss": 1.0655, "step": 3585 }, { "epoch": 0.7290099613742631, "grad_norm": 0.13522464036941528, "learning_rate": 0.00012718397233804534, "loss": 1.1785, "step": 3586 }, { "epoch": 0.7292132547265704, "grad_norm": 0.12016705423593521, "learning_rate": 0.0001271636326655141, "loss": 0.93, "step": 3587 }, { "epoch": 0.7294165480788778, "grad_norm": 0.1265437752008438, "learning_rate": 0.00012714329299298282, "loss": 1.0289, "step": 3588 }, { "epoch": 0.7296198414311852, "grad_norm": 0.12697303295135498, "learning_rate": 0.00012712295332045154, "loss": 1.0539, "step": 3589 }, { "epoch": 0.7298231347834926, "grad_norm": 0.14529366791248322, "learning_rate": 0.00012710261364792027, "loss": 1.134, "step": 3590 }, { "epoch": 0.7300264281358, "grad_norm": 0.1143953874707222, "learning_rate": 0.00012708227397538902, "loss": 0.7966, "step": 3591 }, { "epoch": 0.7302297214881074, "grad_norm": 0.14083142578601837, "learning_rate": 0.00012706193430285775, "loss": 1.1776, "step": 3592 }, { "epoch": 0.7304330148404147, "grad_norm": 0.12843115627765656, "learning_rate": 0.00012704159463032644, "loss": 1.075, "step": 3593 }, { "epoch": 0.7306363081927221, "grad_norm": 0.12742142379283905, "learning_rate": 0.00012702125495779517, "loss": 1.1087, "step": 3594 }, { "epoch": 0.7308396015450295, "grad_norm": 0.13479192554950714, "learning_rate": 0.00012700091528526392, "loss": 1.0954, "step": 3595 }, { "epoch": 0.7310428948973369, "grad_norm": 0.13784924149513245, "learning_rate": 0.00012698057561273264, "loss": 1.1773, "step": 3596 }, { "epoch": 0.7312461882496443, "grad_norm": 0.13474421203136444, "learning_rate": 0.00012696023594020137, "loss": 1.0023, "step": 3597 }, { "epoch": 0.7314494816019517, "grad_norm": 0.14545200765132904, "learning_rate": 0.0001269398962676701, "loss": 1.1166, "step": 3598 }, { "epoch": 0.731652774954259, "grad_norm": 0.11699052155017853, "learning_rate": 0.00012691955659513884, "loss": 0.982, "step": 3599 }, { "epoch": 0.7318560683065664, "grad_norm": 0.13108402490615845, "learning_rate": 0.00012689921692260757, "loss": 0.9805, "step": 3600 }, { "epoch": 0.7320593616588738, "grad_norm": 0.12493366003036499, "learning_rate": 0.00012687887725007627, "loss": 1.03, "step": 3601 }, { "epoch": 0.7322626550111812, "grad_norm": 0.12673288583755493, "learning_rate": 0.000126858537577545, "loss": 0.99, "step": 3602 }, { "epoch": 0.7324659483634886, "grad_norm": 0.14298030734062195, "learning_rate": 0.00012683819790501374, "loss": 0.9816, "step": 3603 }, { "epoch": 0.7326692417157958, "grad_norm": 0.1383986473083496, "learning_rate": 0.00012681785823248247, "loss": 1.0635, "step": 3604 }, { "epoch": 0.7328725350681032, "grad_norm": 0.13229741156101227, "learning_rate": 0.0001267975185599512, "loss": 1.0752, "step": 3605 }, { "epoch": 0.7330758284204106, "grad_norm": 0.12566420435905457, "learning_rate": 0.00012677717888741991, "loss": 1.0984, "step": 3606 }, { "epoch": 0.733279121772718, "grad_norm": 0.14375749230384827, "learning_rate": 0.00012675683921488867, "loss": 1.0459, "step": 3607 }, { "epoch": 0.7334824151250254, "grad_norm": 0.1541428565979004, "learning_rate": 0.0001267364995423574, "loss": 1.2405, "step": 3608 }, { "epoch": 0.7336857084773328, "grad_norm": 0.1277463287115097, "learning_rate": 0.0001267161598698261, "loss": 0.9784, "step": 3609 }, { "epoch": 0.7338890018296401, "grad_norm": 0.11879061907529831, "learning_rate": 0.0001266958201972948, "loss": 0.9301, "step": 3610 }, { "epoch": 0.7340922951819475, "grad_norm": 0.1343902200460434, "learning_rate": 0.00012667548052476356, "loss": 1.2015, "step": 3611 }, { "epoch": 0.7342955885342549, "grad_norm": 0.12574651837348938, "learning_rate": 0.0001266551408522323, "loss": 1.0215, "step": 3612 }, { "epoch": 0.7344988818865623, "grad_norm": 0.12160508334636688, "learning_rate": 0.000126634801179701, "loss": 1.0208, "step": 3613 }, { "epoch": 0.7347021752388697, "grad_norm": 0.13637933135032654, "learning_rate": 0.00012661446150716974, "loss": 1.1179, "step": 3614 }, { "epoch": 0.7349054685911771, "grad_norm": 0.14247237145900726, "learning_rate": 0.0001265941218346385, "loss": 1.1746, "step": 3615 }, { "epoch": 0.7351087619434844, "grad_norm": 0.14084017276763916, "learning_rate": 0.00012657378216210721, "loss": 0.9984, "step": 3616 }, { "epoch": 0.7353120552957918, "grad_norm": 0.1475144922733307, "learning_rate": 0.0001265534424895759, "loss": 1.0763, "step": 3617 }, { "epoch": 0.7355153486480992, "grad_norm": 0.12147875130176544, "learning_rate": 0.00012653310281704464, "loss": 1.0814, "step": 3618 }, { "epoch": 0.7357186420004066, "grad_norm": 0.12373865395784378, "learning_rate": 0.00012651276314451336, "loss": 0.9432, "step": 3619 }, { "epoch": 0.735921935352714, "grad_norm": 0.11395063996315002, "learning_rate": 0.0001264924234719821, "loss": 0.8629, "step": 3620 }, { "epoch": 0.7361252287050214, "grad_norm": 0.12742386758327484, "learning_rate": 0.00012647208379945084, "loss": 1.1148, "step": 3621 }, { "epoch": 0.7363285220573287, "grad_norm": 0.13474571704864502, "learning_rate": 0.00012645174412691956, "loss": 1.0862, "step": 3622 }, { "epoch": 0.7365318154096361, "grad_norm": 0.14104367792606354, "learning_rate": 0.00012643140445438828, "loss": 1.1224, "step": 3623 }, { "epoch": 0.7367351087619435, "grad_norm": 0.1266336888074875, "learning_rate": 0.00012641106478185704, "loss": 1.0388, "step": 3624 }, { "epoch": 0.7369384021142509, "grad_norm": 0.1418471783399582, "learning_rate": 0.00012639072510932573, "loss": 1.0211, "step": 3625 }, { "epoch": 0.7371416954665583, "grad_norm": 0.14462773501873016, "learning_rate": 0.00012637038543679446, "loss": 1.0212, "step": 3626 }, { "epoch": 0.7373449888188657, "grad_norm": 0.14509986340999603, "learning_rate": 0.00012635004576426318, "loss": 1.1922, "step": 3627 }, { "epoch": 0.737548282171173, "grad_norm": 0.13801227509975433, "learning_rate": 0.00012632970609173193, "loss": 1.1082, "step": 3628 }, { "epoch": 0.7377515755234804, "grad_norm": 0.13551753759384155, "learning_rate": 0.00012630936641920066, "loss": 1.0822, "step": 3629 }, { "epoch": 0.7379548688757878, "grad_norm": 0.12872062623500824, "learning_rate": 0.00012628902674666938, "loss": 0.9366, "step": 3630 }, { "epoch": 0.7381581622280952, "grad_norm": 0.13623321056365967, "learning_rate": 0.0001262686870741381, "loss": 1.1129, "step": 3631 }, { "epoch": 0.7383614555804026, "grad_norm": 0.14300891757011414, "learning_rate": 0.00012624834740160686, "loss": 1.1259, "step": 3632 }, { "epoch": 0.7385647489327098, "grad_norm": 0.13372913002967834, "learning_rate": 0.00012622800772907558, "loss": 1.0011, "step": 3633 }, { "epoch": 0.7387680422850172, "grad_norm": 0.11722072213888168, "learning_rate": 0.00012620766805654428, "loss": 1.0085, "step": 3634 }, { "epoch": 0.7389713356373246, "grad_norm": 0.13151319324970245, "learning_rate": 0.000126187328384013, "loss": 1.1878, "step": 3635 }, { "epoch": 0.739174628989632, "grad_norm": 0.13933278620243073, "learning_rate": 0.00012616698871148176, "loss": 1.1763, "step": 3636 }, { "epoch": 0.7393779223419394, "grad_norm": 0.13536275923252106, "learning_rate": 0.00012614664903895048, "loss": 1.2087, "step": 3637 }, { "epoch": 0.7395812156942468, "grad_norm": 0.13037016987800598, "learning_rate": 0.0001261263093664192, "loss": 1.0033, "step": 3638 }, { "epoch": 0.7397845090465541, "grad_norm": 0.1575489491224289, "learning_rate": 0.00012610596969388793, "loss": 1.1683, "step": 3639 }, { "epoch": 0.7399878023988615, "grad_norm": 0.12989576160907745, "learning_rate": 0.00012608563002135668, "loss": 1.127, "step": 3640 }, { "epoch": 0.7401910957511689, "grad_norm": 0.14293938875198364, "learning_rate": 0.0001260652903488254, "loss": 1.1641, "step": 3641 }, { "epoch": 0.7403943891034763, "grad_norm": 0.12480568885803223, "learning_rate": 0.0001260449506762941, "loss": 0.9612, "step": 3642 }, { "epoch": 0.7405976824557837, "grad_norm": 0.1387239396572113, "learning_rate": 0.00012602461100376283, "loss": 1.1289, "step": 3643 }, { "epoch": 0.7408009758080911, "grad_norm": 0.12974587082862854, "learning_rate": 0.00012600427133123158, "loss": 1.0032, "step": 3644 }, { "epoch": 0.7410042691603984, "grad_norm": 0.14558175206184387, "learning_rate": 0.0001259839316587003, "loss": 1.0957, "step": 3645 }, { "epoch": 0.7412075625127058, "grad_norm": 0.1346643716096878, "learning_rate": 0.00012596359198616903, "loss": 1.0644, "step": 3646 }, { "epoch": 0.7414108558650132, "grad_norm": 0.1275978684425354, "learning_rate": 0.00012594325231363775, "loss": 1.0638, "step": 3647 }, { "epoch": 0.7416141492173206, "grad_norm": 0.11669638752937317, "learning_rate": 0.0001259229126411065, "loss": 0.9147, "step": 3648 }, { "epoch": 0.741817442569628, "grad_norm": 0.12056609243154526, "learning_rate": 0.00012590257296857523, "loss": 0.8193, "step": 3649 }, { "epoch": 0.7420207359219354, "grad_norm": 0.1389569789171219, "learning_rate": 0.00012588223329604393, "loss": 1.1273, "step": 3650 }, { "epoch": 0.7422240292742427, "grad_norm": 0.1116948276758194, "learning_rate": 0.00012586189362351265, "loss": 0.8484, "step": 3651 }, { "epoch": 0.7424273226265501, "grad_norm": 0.13268932700157166, "learning_rate": 0.0001258415539509814, "loss": 1.0092, "step": 3652 }, { "epoch": 0.7426306159788575, "grad_norm": 0.13985766470432281, "learning_rate": 0.00012582121427845013, "loss": 1.0907, "step": 3653 }, { "epoch": 0.7428339093311649, "grad_norm": 0.13794921338558197, "learning_rate": 0.00012580087460591885, "loss": 1.1482, "step": 3654 }, { "epoch": 0.7430372026834723, "grad_norm": 0.13911883533000946, "learning_rate": 0.00012578053493338758, "loss": 1.1641, "step": 3655 }, { "epoch": 0.7432404960357797, "grad_norm": 0.11809851974248886, "learning_rate": 0.00012576019526085633, "loss": 0.9128, "step": 3656 }, { "epoch": 0.743443789388087, "grad_norm": 0.17306208610534668, "learning_rate": 0.00012573985558832505, "loss": 1.1575, "step": 3657 }, { "epoch": 0.7436470827403944, "grad_norm": 0.14551490545272827, "learning_rate": 0.00012571951591579375, "loss": 1.2794, "step": 3658 }, { "epoch": 0.7438503760927018, "grad_norm": 0.14065933227539062, "learning_rate": 0.00012569917624326247, "loss": 1.0652, "step": 3659 }, { "epoch": 0.7440536694450092, "grad_norm": 0.13010179996490479, "learning_rate": 0.0001256788365707312, "loss": 0.9541, "step": 3660 }, { "epoch": 0.7442569627973166, "grad_norm": 0.1264103651046753, "learning_rate": 0.00012565849689819995, "loss": 0.9767, "step": 3661 }, { "epoch": 0.7444602561496239, "grad_norm": 0.12325896322727203, "learning_rate": 0.00012563815722566867, "loss": 1.1016, "step": 3662 }, { "epoch": 0.7446635495019313, "grad_norm": 0.12374068796634674, "learning_rate": 0.0001256178175531374, "loss": 0.9282, "step": 3663 }, { "epoch": 0.7448668428542387, "grad_norm": 0.14597944915294647, "learning_rate": 0.00012559747788060612, "loss": 1.1961, "step": 3664 }, { "epoch": 0.745070136206546, "grad_norm": 0.128509983420372, "learning_rate": 0.00012557713820807487, "loss": 0.9838, "step": 3665 }, { "epoch": 0.7452734295588535, "grad_norm": 0.1421680897474289, "learning_rate": 0.00012555679853554357, "loss": 1.1563, "step": 3666 }, { "epoch": 0.7454767229111608, "grad_norm": 0.1468690037727356, "learning_rate": 0.0001255364588630123, "loss": 1.1203, "step": 3667 }, { "epoch": 0.7456800162634681, "grad_norm": 0.1539076417684555, "learning_rate": 0.00012551611919048102, "loss": 1.1775, "step": 3668 }, { "epoch": 0.7458833096157755, "grad_norm": 0.15628856420516968, "learning_rate": 0.00012549577951794977, "loss": 1.1265, "step": 3669 }, { "epoch": 0.7460866029680829, "grad_norm": 0.1251571774482727, "learning_rate": 0.0001254754398454185, "loss": 1.0525, "step": 3670 }, { "epoch": 0.7462898963203903, "grad_norm": 0.13868333399295807, "learning_rate": 0.00012545510017288722, "loss": 1.0449, "step": 3671 }, { "epoch": 0.7464931896726977, "grad_norm": 0.15435542166233063, "learning_rate": 0.00012543476050035595, "loss": 1.2156, "step": 3672 }, { "epoch": 0.7466964830250051, "grad_norm": 0.13579222559928894, "learning_rate": 0.0001254144208278247, "loss": 0.9683, "step": 3673 }, { "epoch": 0.7468997763773124, "grad_norm": 0.14346475899219513, "learning_rate": 0.0001253940811552934, "loss": 1.1188, "step": 3674 }, { "epoch": 0.7471030697296198, "grad_norm": 0.13663546741008759, "learning_rate": 0.00012537374148276212, "loss": 0.9814, "step": 3675 }, { "epoch": 0.7473063630819272, "grad_norm": 0.14386685192584991, "learning_rate": 0.00012535340181023084, "loss": 1.2206, "step": 3676 }, { "epoch": 0.7475096564342346, "grad_norm": 0.1263144165277481, "learning_rate": 0.0001253330621376996, "loss": 0.9859, "step": 3677 }, { "epoch": 0.747712949786542, "grad_norm": 0.12757907807826996, "learning_rate": 0.00012531272246516832, "loss": 0.9453, "step": 3678 }, { "epoch": 0.7479162431388494, "grad_norm": 0.13055284321308136, "learning_rate": 0.00012529238279263704, "loss": 0.9623, "step": 3679 }, { "epoch": 0.7481195364911567, "grad_norm": 0.15445955097675323, "learning_rate": 0.00012527204312010577, "loss": 1.3087, "step": 3680 }, { "epoch": 0.7483228298434641, "grad_norm": 0.1479884386062622, "learning_rate": 0.00012525170344757452, "loss": 1.2992, "step": 3681 }, { "epoch": 0.7485261231957715, "grad_norm": 0.14582955837249756, "learning_rate": 0.00012523136377504322, "loss": 1.0265, "step": 3682 }, { "epoch": 0.7487294165480789, "grad_norm": 0.1448071002960205, "learning_rate": 0.00012521102410251194, "loss": 1.0637, "step": 3683 }, { "epoch": 0.7489327099003863, "grad_norm": 0.13632971048355103, "learning_rate": 0.00012519068442998067, "loss": 1.204, "step": 3684 }, { "epoch": 0.7491360032526936, "grad_norm": 0.11724304407835007, "learning_rate": 0.00012517034475744942, "loss": 0.8373, "step": 3685 }, { "epoch": 0.749339296605001, "grad_norm": 0.12346580624580383, "learning_rate": 0.00012515000508491814, "loss": 0.9118, "step": 3686 }, { "epoch": 0.7495425899573084, "grad_norm": 0.12630046904087067, "learning_rate": 0.00012512966541238687, "loss": 1.0533, "step": 3687 }, { "epoch": 0.7497458833096158, "grad_norm": 0.14778174459934235, "learning_rate": 0.0001251093257398556, "loss": 1.2604, "step": 3688 }, { "epoch": 0.7499491766619232, "grad_norm": 0.13751018047332764, "learning_rate": 0.00012508898606732434, "loss": 1.0252, "step": 3689 }, { "epoch": 0.7501524700142306, "grad_norm": 0.14556734263896942, "learning_rate": 0.00012506864639479307, "loss": 1.0671, "step": 3690 }, { "epoch": 0.7503557633665379, "grad_norm": 0.12826183438301086, "learning_rate": 0.00012504830672226177, "loss": 1.0401, "step": 3691 }, { "epoch": 0.7505590567188453, "grad_norm": 0.12293746322393417, "learning_rate": 0.0001250279670497305, "loss": 1.0505, "step": 3692 }, { "epoch": 0.7507623500711527, "grad_norm": 0.14679206907749176, "learning_rate": 0.00012500762737719924, "loss": 1.1754, "step": 3693 }, { "epoch": 0.7509656434234601, "grad_norm": 0.13845571875572205, "learning_rate": 0.00012498728770466797, "loss": 1.1254, "step": 3694 }, { "epoch": 0.7511689367757675, "grad_norm": 0.126956969499588, "learning_rate": 0.0001249669480321367, "loss": 0.9822, "step": 3695 }, { "epoch": 0.7513722301280749, "grad_norm": 0.13764221966266632, "learning_rate": 0.00012494660835960541, "loss": 1.1326, "step": 3696 }, { "epoch": 0.7515755234803821, "grad_norm": 0.13586993515491486, "learning_rate": 0.00012492626868707417, "loss": 1.0949, "step": 3697 }, { "epoch": 0.7517788168326895, "grad_norm": 0.1523975282907486, "learning_rate": 0.0001249059290145429, "loss": 1.1545, "step": 3698 }, { "epoch": 0.7519821101849969, "grad_norm": 0.13115784525871277, "learning_rate": 0.0001248855893420116, "loss": 1.0304, "step": 3699 }, { "epoch": 0.7521854035373043, "grad_norm": 0.13575038313865662, "learning_rate": 0.0001248652496694803, "loss": 1.1775, "step": 3700 }, { "epoch": 0.7523886968896117, "grad_norm": 0.14293211698532104, "learning_rate": 0.00012484490999694904, "loss": 0.987, "step": 3701 }, { "epoch": 0.7525919902419191, "grad_norm": 0.13629594445228577, "learning_rate": 0.0001248245703244178, "loss": 0.9952, "step": 3702 }, { "epoch": 0.7527952835942264, "grad_norm": 0.1200501024723053, "learning_rate": 0.0001248042306518865, "loss": 0.9087, "step": 3703 }, { "epoch": 0.7529985769465338, "grad_norm": 0.12878622114658356, "learning_rate": 0.00012478389097935524, "loss": 1.0632, "step": 3704 }, { "epoch": 0.7532018702988412, "grad_norm": 0.13195644319057465, "learning_rate": 0.00012476355130682396, "loss": 1.1865, "step": 3705 }, { "epoch": 0.7534051636511486, "grad_norm": 0.13144764304161072, "learning_rate": 0.0001247432116342927, "loss": 0.9257, "step": 3706 }, { "epoch": 0.753608457003456, "grad_norm": 0.12141410261392593, "learning_rate": 0.0001247228719617614, "loss": 0.9196, "step": 3707 }, { "epoch": 0.7538117503557634, "grad_norm": 0.13238899409770966, "learning_rate": 0.00012470253228923014, "loss": 1.1404, "step": 3708 }, { "epoch": 0.7540150437080707, "grad_norm": 0.1436709612607956, "learning_rate": 0.00012468219261669886, "loss": 1.1169, "step": 3709 }, { "epoch": 0.7542183370603781, "grad_norm": 0.14147412776947021, "learning_rate": 0.0001246618529441676, "loss": 1.2736, "step": 3710 }, { "epoch": 0.7544216304126855, "grad_norm": 0.13145607709884644, "learning_rate": 0.00012464151327163634, "loss": 1.0299, "step": 3711 }, { "epoch": 0.7546249237649929, "grad_norm": 0.11213693022727966, "learning_rate": 0.00012462117359910506, "loss": 0.8738, "step": 3712 }, { "epoch": 0.7548282171173003, "grad_norm": 0.14354929327964783, "learning_rate": 0.00012460083392657378, "loss": 1.0871, "step": 3713 }, { "epoch": 0.7550315104696076, "grad_norm": 0.15012142062187195, "learning_rate": 0.00012458049425404254, "loss": 1.0701, "step": 3714 }, { "epoch": 0.755234803821915, "grad_norm": 0.15194512903690338, "learning_rate": 0.00012456015458151123, "loss": 1.0018, "step": 3715 }, { "epoch": 0.7554380971742224, "grad_norm": 0.13199283182621002, "learning_rate": 0.00012453981490897996, "loss": 0.9806, "step": 3716 }, { "epoch": 0.7556413905265298, "grad_norm": 0.1443110853433609, "learning_rate": 0.00012451947523644868, "loss": 1.1558, "step": 3717 }, { "epoch": 0.7558446838788372, "grad_norm": 0.1215786412358284, "learning_rate": 0.00012449913556391743, "loss": 1.0642, "step": 3718 }, { "epoch": 0.7560479772311446, "grad_norm": 0.12462542206048965, "learning_rate": 0.00012447879589138616, "loss": 0.9799, "step": 3719 }, { "epoch": 0.7562512705834519, "grad_norm": 0.1319034993648529, "learning_rate": 0.00012445845621885488, "loss": 1.0646, "step": 3720 }, { "epoch": 0.7564545639357593, "grad_norm": 0.14364975690841675, "learning_rate": 0.0001244381165463236, "loss": 1.0853, "step": 3721 }, { "epoch": 0.7566578572880667, "grad_norm": 0.13716979324817657, "learning_rate": 0.00012441777687379236, "loss": 1.2145, "step": 3722 }, { "epoch": 0.7568611506403741, "grad_norm": 0.1348930448293686, "learning_rate": 0.00012439743720126106, "loss": 1.1096, "step": 3723 }, { "epoch": 0.7570644439926815, "grad_norm": 0.15214388072490692, "learning_rate": 0.00012437709752872978, "loss": 1.1986, "step": 3724 }, { "epoch": 0.7572677373449889, "grad_norm": 0.14679096639156342, "learning_rate": 0.0001243567578561985, "loss": 1.2809, "step": 3725 }, { "epoch": 0.7574710306972962, "grad_norm": 0.1401345431804657, "learning_rate": 0.00012433641818366726, "loss": 1.013, "step": 3726 }, { "epoch": 0.7576743240496036, "grad_norm": 0.14611610770225525, "learning_rate": 0.00012431607851113598, "loss": 1.2145, "step": 3727 }, { "epoch": 0.757877617401911, "grad_norm": 0.13954514265060425, "learning_rate": 0.0001242957388386047, "loss": 1.1382, "step": 3728 }, { "epoch": 0.7580809107542184, "grad_norm": 0.1354246586561203, "learning_rate": 0.00012427539916607343, "loss": 1.0877, "step": 3729 }, { "epoch": 0.7582842041065257, "grad_norm": 0.12606988847255707, "learning_rate": 0.00012425505949354218, "loss": 1.0592, "step": 3730 }, { "epoch": 0.7584874974588331, "grad_norm": 0.13141503930091858, "learning_rate": 0.00012423471982101088, "loss": 0.9414, "step": 3731 }, { "epoch": 0.7586907908111404, "grad_norm": 0.1343068927526474, "learning_rate": 0.0001242143801484796, "loss": 0.9862, "step": 3732 }, { "epoch": 0.7588940841634478, "grad_norm": 0.13875959813594818, "learning_rate": 0.00012419404047594833, "loss": 1.3412, "step": 3733 }, { "epoch": 0.7590973775157552, "grad_norm": 0.14184454083442688, "learning_rate": 0.00012417370080341708, "loss": 1.0852, "step": 3734 }, { "epoch": 0.7593006708680626, "grad_norm": 0.13765336573123932, "learning_rate": 0.0001241533611308858, "loss": 1.1018, "step": 3735 }, { "epoch": 0.75950396422037, "grad_norm": 0.11607436835765839, "learning_rate": 0.00012413302145835453, "loss": 0.8447, "step": 3736 }, { "epoch": 0.7597072575726773, "grad_norm": 0.13869017362594604, "learning_rate": 0.00012411268178582325, "loss": 1.125, "step": 3737 }, { "epoch": 0.7599105509249847, "grad_norm": 0.14673906564712524, "learning_rate": 0.000124092342113292, "loss": 1.1876, "step": 3738 }, { "epoch": 0.7601138442772921, "grad_norm": 0.1397872269153595, "learning_rate": 0.0001240720024407607, "loss": 1.0398, "step": 3739 }, { "epoch": 0.7603171376295995, "grad_norm": 0.1220252513885498, "learning_rate": 0.00012405166276822943, "loss": 0.9359, "step": 3740 }, { "epoch": 0.7605204309819069, "grad_norm": 0.1372562199831009, "learning_rate": 0.00012403132309569815, "loss": 1.0704, "step": 3741 }, { "epoch": 0.7607237243342143, "grad_norm": 0.12001727521419525, "learning_rate": 0.00012401098342316688, "loss": 1.0201, "step": 3742 }, { "epoch": 0.7609270176865216, "grad_norm": 0.12373898923397064, "learning_rate": 0.00012399064375063563, "loss": 0.9927, "step": 3743 }, { "epoch": 0.761130311038829, "grad_norm": 0.1459614783525467, "learning_rate": 0.00012397030407810435, "loss": 1.23, "step": 3744 }, { "epoch": 0.7613336043911364, "grad_norm": 0.14972059428691864, "learning_rate": 0.00012394996440557308, "loss": 1.2213, "step": 3745 }, { "epoch": 0.7615368977434438, "grad_norm": 0.125379741191864, "learning_rate": 0.0001239296247330418, "loss": 0.9302, "step": 3746 }, { "epoch": 0.7617401910957512, "grad_norm": 0.13220852613449097, "learning_rate": 0.00012390928506051055, "loss": 0.9221, "step": 3747 }, { "epoch": 0.7619434844480586, "grad_norm": 0.1334318369626999, "learning_rate": 0.00012388894538797925, "loss": 1.1046, "step": 3748 }, { "epoch": 0.7621467778003659, "grad_norm": 0.11617275327444077, "learning_rate": 0.00012386860571544797, "loss": 0.8787, "step": 3749 }, { "epoch": 0.7623500711526733, "grad_norm": 0.12812359631061554, "learning_rate": 0.0001238482660429167, "loss": 1.0109, "step": 3750 }, { "epoch": 0.7625533645049807, "grad_norm": 0.1491006761789322, "learning_rate": 0.00012382792637038545, "loss": 1.1522, "step": 3751 }, { "epoch": 0.7627566578572881, "grad_norm": 0.1386028379201889, "learning_rate": 0.00012380758669785417, "loss": 1.0567, "step": 3752 }, { "epoch": 0.7629599512095955, "grad_norm": 0.12961892783641815, "learning_rate": 0.0001237872470253229, "loss": 0.9436, "step": 3753 }, { "epoch": 0.7631632445619029, "grad_norm": 0.13355448842048645, "learning_rate": 0.00012376690735279162, "loss": 1.063, "step": 3754 }, { "epoch": 0.7633665379142102, "grad_norm": 0.1302691102027893, "learning_rate": 0.00012374656768026037, "loss": 1.019, "step": 3755 }, { "epoch": 0.7635698312665176, "grad_norm": 0.12183891981840134, "learning_rate": 0.00012372622800772907, "loss": 0.964, "step": 3756 }, { "epoch": 0.763773124618825, "grad_norm": 0.12347770482301712, "learning_rate": 0.0001237058883351978, "loss": 0.9031, "step": 3757 }, { "epoch": 0.7639764179711324, "grad_norm": 0.12646906077861786, "learning_rate": 0.00012368554866266652, "loss": 1.009, "step": 3758 }, { "epoch": 0.7641797113234398, "grad_norm": 0.15650388598442078, "learning_rate": 0.00012366520899013527, "loss": 1.2043, "step": 3759 }, { "epoch": 0.7643830046757472, "grad_norm": 0.13092441856861115, "learning_rate": 0.000123644869317604, "loss": 1.038, "step": 3760 }, { "epoch": 0.7645862980280544, "grad_norm": 0.11747883260250092, "learning_rate": 0.00012362452964507272, "loss": 1.0147, "step": 3761 }, { "epoch": 0.7647895913803618, "grad_norm": 0.13621081411838531, "learning_rate": 0.00012360418997254145, "loss": 1.0798, "step": 3762 }, { "epoch": 0.7649928847326692, "grad_norm": 0.1359243243932724, "learning_rate": 0.0001235838503000102, "loss": 1.0795, "step": 3763 }, { "epoch": 0.7651961780849766, "grad_norm": 0.14412851631641388, "learning_rate": 0.0001235635106274789, "loss": 1.0907, "step": 3764 }, { "epoch": 0.765399471437284, "grad_norm": 0.14425703883171082, "learning_rate": 0.00012354317095494762, "loss": 1.1903, "step": 3765 }, { "epoch": 0.7656027647895913, "grad_norm": 0.15288187563419342, "learning_rate": 0.00012352283128241634, "loss": 1.1775, "step": 3766 }, { "epoch": 0.7658060581418987, "grad_norm": 0.149856299161911, "learning_rate": 0.0001235024916098851, "loss": 1.172, "step": 3767 }, { "epoch": 0.7660093514942061, "grad_norm": 0.1374143660068512, "learning_rate": 0.00012348215193735382, "loss": 1.2566, "step": 3768 }, { "epoch": 0.7662126448465135, "grad_norm": 0.13301679491996765, "learning_rate": 0.00012346181226482254, "loss": 1.0046, "step": 3769 }, { "epoch": 0.7664159381988209, "grad_norm": 0.1274683177471161, "learning_rate": 0.00012344147259229127, "loss": 0.8951, "step": 3770 }, { "epoch": 0.7666192315511283, "grad_norm": 0.12883058190345764, "learning_rate": 0.00012342113291976002, "loss": 0.955, "step": 3771 }, { "epoch": 0.7668225249034356, "grad_norm": 0.13394391536712646, "learning_rate": 0.00012340079324722872, "loss": 1.1987, "step": 3772 }, { "epoch": 0.767025818255743, "grad_norm": 0.1280052363872528, "learning_rate": 0.00012338045357469744, "loss": 0.9552, "step": 3773 }, { "epoch": 0.7672291116080504, "grad_norm": 0.13542263209819794, "learning_rate": 0.00012336011390216617, "loss": 0.9411, "step": 3774 }, { "epoch": 0.7674324049603578, "grad_norm": 0.13187946379184723, "learning_rate": 0.00012333977422963492, "loss": 1.0447, "step": 3775 }, { "epoch": 0.7676356983126652, "grad_norm": 0.13274554908275604, "learning_rate": 0.00012331943455710364, "loss": 1.0556, "step": 3776 }, { "epoch": 0.7678389916649726, "grad_norm": 0.1356000155210495, "learning_rate": 0.00012329909488457237, "loss": 1.0791, "step": 3777 }, { "epoch": 0.7680422850172799, "grad_norm": 0.1446497142314911, "learning_rate": 0.0001232787552120411, "loss": 1.1708, "step": 3778 }, { "epoch": 0.7682455783695873, "grad_norm": 0.14726495742797852, "learning_rate": 0.00012325841553950984, "loss": 1.0312, "step": 3779 }, { "epoch": 0.7684488717218947, "grad_norm": 0.1248805895447731, "learning_rate": 0.00012323807586697854, "loss": 1.001, "step": 3780 }, { "epoch": 0.7686521650742021, "grad_norm": 0.13720335066318512, "learning_rate": 0.00012321773619444726, "loss": 1.1051, "step": 3781 }, { "epoch": 0.7688554584265095, "grad_norm": 0.12258980423212051, "learning_rate": 0.000123197396521916, "loss": 1.014, "step": 3782 }, { "epoch": 0.7690587517788169, "grad_norm": 0.14602990448474884, "learning_rate": 0.00012317705684938471, "loss": 1.1083, "step": 3783 }, { "epoch": 0.7692620451311242, "grad_norm": 0.12902162969112396, "learning_rate": 0.00012315671717685347, "loss": 0.9431, "step": 3784 }, { "epoch": 0.7694653384834316, "grad_norm": 0.1396799236536026, "learning_rate": 0.0001231363775043222, "loss": 1.1026, "step": 3785 }, { "epoch": 0.769668631835739, "grad_norm": 0.13856211304664612, "learning_rate": 0.00012311603783179091, "loss": 1.2084, "step": 3786 }, { "epoch": 0.7698719251880464, "grad_norm": 0.12457921355962753, "learning_rate": 0.00012309569815925964, "loss": 1.0894, "step": 3787 }, { "epoch": 0.7700752185403538, "grad_norm": 0.12745535373687744, "learning_rate": 0.00012307535848672836, "loss": 0.9223, "step": 3788 }, { "epoch": 0.770278511892661, "grad_norm": 0.12804381549358368, "learning_rate": 0.0001230550188141971, "loss": 1.139, "step": 3789 }, { "epoch": 0.7704818052449685, "grad_norm": 0.12827259302139282, "learning_rate": 0.0001230346791416658, "loss": 1.0397, "step": 3790 }, { "epoch": 0.7706850985972759, "grad_norm": 0.14576175808906555, "learning_rate": 0.00012301433946913454, "loss": 1.1592, "step": 3791 }, { "epoch": 0.7708883919495833, "grad_norm": 0.13071264326572418, "learning_rate": 0.0001229939997966033, "loss": 1.1051, "step": 3792 }, { "epoch": 0.7710916853018907, "grad_norm": 0.1294952780008316, "learning_rate": 0.000122973660124072, "loss": 0.8815, "step": 3793 }, { "epoch": 0.771294978654198, "grad_norm": 0.13996455073356628, "learning_rate": 0.00012295332045154074, "loss": 1.1277, "step": 3794 }, { "epoch": 0.7714982720065053, "grad_norm": 0.14250068366527557, "learning_rate": 0.00012293298077900946, "loss": 1.2124, "step": 3795 }, { "epoch": 0.7717015653588127, "grad_norm": 0.12336855381727219, "learning_rate": 0.00012291264110647819, "loss": 1.0764, "step": 3796 }, { "epoch": 0.7719048587111201, "grad_norm": 0.14124532043933868, "learning_rate": 0.0001228923014339469, "loss": 1.156, "step": 3797 }, { "epoch": 0.7721081520634275, "grad_norm": 0.12716175615787506, "learning_rate": 0.00012287196176141563, "loss": 0.9228, "step": 3798 }, { "epoch": 0.7723114454157349, "grad_norm": 0.1456788033246994, "learning_rate": 0.00012285162208888436, "loss": 1.1462, "step": 3799 }, { "epoch": 0.7725147387680423, "grad_norm": 0.14236094057559967, "learning_rate": 0.0001228312824163531, "loss": 1.2849, "step": 3800 }, { "epoch": 0.7727180321203496, "grad_norm": 0.12564775347709656, "learning_rate": 0.00012281094274382184, "loss": 1.1172, "step": 3801 }, { "epoch": 0.772921325472657, "grad_norm": 0.11949034035205841, "learning_rate": 0.00012279060307129056, "loss": 0.8353, "step": 3802 }, { "epoch": 0.7731246188249644, "grad_norm": 0.13106048107147217, "learning_rate": 0.00012277026339875928, "loss": 1.0445, "step": 3803 }, { "epoch": 0.7733279121772718, "grad_norm": 0.11542949080467224, "learning_rate": 0.000122749923726228, "loss": 0.8345, "step": 3804 }, { "epoch": 0.7735312055295792, "grad_norm": 0.1267216056585312, "learning_rate": 0.00012272958405369673, "loss": 0.9304, "step": 3805 }, { "epoch": 0.7737344988818866, "grad_norm": 0.14101152122020721, "learning_rate": 0.00012270924438116546, "loss": 1.1243, "step": 3806 }, { "epoch": 0.7739377922341939, "grad_norm": 0.12236955761909485, "learning_rate": 0.00012268890470863418, "loss": 0.855, "step": 3807 }, { "epoch": 0.7741410855865013, "grad_norm": 0.11732099950313568, "learning_rate": 0.00012266856503610293, "loss": 0.8707, "step": 3808 }, { "epoch": 0.7743443789388087, "grad_norm": 0.12826688587665558, "learning_rate": 0.00012264822536357166, "loss": 1.0896, "step": 3809 }, { "epoch": 0.7745476722911161, "grad_norm": 0.13263994455337524, "learning_rate": 0.00012262788569104038, "loss": 1.0278, "step": 3810 }, { "epoch": 0.7747509656434235, "grad_norm": 0.12591175734996796, "learning_rate": 0.0001226075460185091, "loss": 0.8749, "step": 3811 }, { "epoch": 0.7749542589957309, "grad_norm": 0.13671188056468964, "learning_rate": 0.00012258720634597786, "loss": 1.0305, "step": 3812 }, { "epoch": 0.7751575523480382, "grad_norm": 0.13743267953395844, "learning_rate": 0.00012256686667344656, "loss": 1.0444, "step": 3813 }, { "epoch": 0.7753608457003456, "grad_norm": 0.12400873005390167, "learning_rate": 0.00012254652700091528, "loss": 0.8696, "step": 3814 }, { "epoch": 0.775564139052653, "grad_norm": 0.13085900247097015, "learning_rate": 0.000122526187328384, "loss": 1.0268, "step": 3815 }, { "epoch": 0.7757674324049604, "grad_norm": 0.13684894144535065, "learning_rate": 0.00012250584765585276, "loss": 1.1531, "step": 3816 }, { "epoch": 0.7759707257572678, "grad_norm": 0.12287990748882294, "learning_rate": 0.00012248550798332148, "loss": 0.9642, "step": 3817 }, { "epoch": 0.7761740191095751, "grad_norm": 0.1277002990245819, "learning_rate": 0.0001224651683107902, "loss": 0.9986, "step": 3818 }, { "epoch": 0.7763773124618825, "grad_norm": 0.13747401535511017, "learning_rate": 0.00012244482863825893, "loss": 1.084, "step": 3819 }, { "epoch": 0.7765806058141899, "grad_norm": 0.14822441339492798, "learning_rate": 0.00012242448896572768, "loss": 0.9895, "step": 3820 }, { "epoch": 0.7767838991664973, "grad_norm": 0.13352279365062714, "learning_rate": 0.00012240414929319638, "loss": 1.0556, "step": 3821 }, { "epoch": 0.7769871925188047, "grad_norm": 0.14219939708709717, "learning_rate": 0.0001223838096206651, "loss": 1.2207, "step": 3822 }, { "epoch": 0.7771904858711121, "grad_norm": 0.14143721759319305, "learning_rate": 0.00012236346994813383, "loss": 1.1582, "step": 3823 }, { "epoch": 0.7773937792234193, "grad_norm": 0.14316944777965546, "learning_rate": 0.00012234313027560255, "loss": 1.1662, "step": 3824 }, { "epoch": 0.7775970725757267, "grad_norm": 0.1315951645374298, "learning_rate": 0.0001223227906030713, "loss": 0.9502, "step": 3825 }, { "epoch": 0.7778003659280341, "grad_norm": 0.14173437654972076, "learning_rate": 0.00012230245093054003, "loss": 1.0675, "step": 3826 }, { "epoch": 0.7780036592803415, "grad_norm": 0.11854992806911469, "learning_rate": 0.00012228211125800875, "loss": 1.0158, "step": 3827 }, { "epoch": 0.7782069526326489, "grad_norm": 0.1441982090473175, "learning_rate": 0.00012226177158547748, "loss": 0.9432, "step": 3828 }, { "epoch": 0.7784102459849563, "grad_norm": 0.13182631134986877, "learning_rate": 0.0001222414319129462, "loss": 0.9372, "step": 3829 }, { "epoch": 0.7786135393372636, "grad_norm": 0.12839631736278534, "learning_rate": 0.00012222109224041493, "loss": 1.0888, "step": 3830 }, { "epoch": 0.778816832689571, "grad_norm": 0.14066271483898163, "learning_rate": 0.00012220075256788365, "loss": 1.0749, "step": 3831 }, { "epoch": 0.7790201260418784, "grad_norm": 0.13321231305599213, "learning_rate": 0.00012218041289535237, "loss": 0.9994, "step": 3832 }, { "epoch": 0.7792234193941858, "grad_norm": 0.12681256234645844, "learning_rate": 0.00012216007322282113, "loss": 0.9195, "step": 3833 }, { "epoch": 0.7794267127464932, "grad_norm": 0.14184892177581787, "learning_rate": 0.00012213973355028985, "loss": 1.2513, "step": 3834 }, { "epoch": 0.7796300060988006, "grad_norm": 0.10922446101903915, "learning_rate": 0.00012211939387775858, "loss": 0.8768, "step": 3835 }, { "epoch": 0.7798332994511079, "grad_norm": 0.135145902633667, "learning_rate": 0.0001220990542052273, "loss": 1.1626, "step": 3836 }, { "epoch": 0.7800365928034153, "grad_norm": 0.1317375898361206, "learning_rate": 0.00012207871453269602, "loss": 1.1827, "step": 3837 }, { "epoch": 0.7802398861557227, "grad_norm": 0.1291121393442154, "learning_rate": 0.00012205837486016476, "loss": 1.0689, "step": 3838 }, { "epoch": 0.7804431795080301, "grad_norm": 0.12500061094760895, "learning_rate": 0.00012203803518763349, "loss": 0.9881, "step": 3839 }, { "epoch": 0.7806464728603375, "grad_norm": 0.1250467747449875, "learning_rate": 0.0001220176955151022, "loss": 0.953, "step": 3840 }, { "epoch": 0.7808497662126448, "grad_norm": 0.1281813234090805, "learning_rate": 0.00012199735584257095, "loss": 1.0164, "step": 3841 }, { "epoch": 0.7810530595649522, "grad_norm": 0.11305128782987595, "learning_rate": 0.00012197701617003967, "loss": 0.8044, "step": 3842 }, { "epoch": 0.7812563529172596, "grad_norm": 0.14512300491333008, "learning_rate": 0.0001219566764975084, "loss": 1.121, "step": 3843 }, { "epoch": 0.781459646269567, "grad_norm": 0.1404501348733902, "learning_rate": 0.00012193633682497711, "loss": 1.1358, "step": 3844 }, { "epoch": 0.7816629396218744, "grad_norm": 0.13621416687965393, "learning_rate": 0.00012191599715244586, "loss": 1.0535, "step": 3845 }, { "epoch": 0.7818662329741818, "grad_norm": 0.11842742562294006, "learning_rate": 0.00012189565747991459, "loss": 0.9138, "step": 3846 }, { "epoch": 0.7820695263264891, "grad_norm": 0.14051960408687592, "learning_rate": 0.00012187531780738331, "loss": 1.0012, "step": 3847 }, { "epoch": 0.7822728196787965, "grad_norm": 0.12643176317214966, "learning_rate": 0.00012185497813485202, "loss": 0.9594, "step": 3848 }, { "epoch": 0.7824761130311039, "grad_norm": 0.13830742239952087, "learning_rate": 0.00012183463846232077, "loss": 1.0394, "step": 3849 }, { "epoch": 0.7826794063834113, "grad_norm": 0.12013565003871918, "learning_rate": 0.0001218142987897895, "loss": 0.8788, "step": 3850 }, { "epoch": 0.7828826997357187, "grad_norm": 0.14404936134815216, "learning_rate": 0.00012179395911725822, "loss": 1.0755, "step": 3851 }, { "epoch": 0.7830859930880261, "grad_norm": 0.1242976263165474, "learning_rate": 0.00012177361944472693, "loss": 0.9733, "step": 3852 }, { "epoch": 0.7832892864403334, "grad_norm": 0.13720235228538513, "learning_rate": 0.00012175327977219568, "loss": 1.1624, "step": 3853 }, { "epoch": 0.7834925797926408, "grad_norm": 0.11972963809967041, "learning_rate": 0.00012173294009966441, "loss": 0.8956, "step": 3854 }, { "epoch": 0.7836958731449482, "grad_norm": 0.1539568156003952, "learning_rate": 0.00012171260042713313, "loss": 1.1909, "step": 3855 }, { "epoch": 0.7838991664972556, "grad_norm": 0.11814553290605545, "learning_rate": 0.00012169226075460186, "loss": 0.8093, "step": 3856 }, { "epoch": 0.784102459849563, "grad_norm": 0.1346539407968521, "learning_rate": 0.0001216719210820706, "loss": 1.0815, "step": 3857 }, { "epoch": 0.7843057532018703, "grad_norm": 0.12297804653644562, "learning_rate": 0.00012165158140953932, "loss": 0.9216, "step": 3858 }, { "epoch": 0.7845090465541776, "grad_norm": 0.13503801822662354, "learning_rate": 0.00012163124173700804, "loss": 1.2251, "step": 3859 }, { "epoch": 0.784712339906485, "grad_norm": 0.1486554741859436, "learning_rate": 0.00012161090206447677, "loss": 1.0849, "step": 3860 }, { "epoch": 0.7849156332587924, "grad_norm": 0.13975729048252106, "learning_rate": 0.0001215905623919455, "loss": 1.0914, "step": 3861 }, { "epoch": 0.7851189266110998, "grad_norm": 0.14087165892124176, "learning_rate": 0.00012157022271941423, "loss": 1.2099, "step": 3862 }, { "epoch": 0.7853222199634072, "grad_norm": 0.14210177958011627, "learning_rate": 0.00012154988304688296, "loss": 1.1608, "step": 3863 }, { "epoch": 0.7855255133157146, "grad_norm": 0.14731276035308838, "learning_rate": 0.00012152954337435168, "loss": 1.1699, "step": 3864 }, { "epoch": 0.7857288066680219, "grad_norm": 0.12955504655838013, "learning_rate": 0.00012150920370182039, "loss": 1.0048, "step": 3865 }, { "epoch": 0.7859321000203293, "grad_norm": 0.13563545048236847, "learning_rate": 0.00012148886402928914, "loss": 1.0157, "step": 3866 }, { "epoch": 0.7861353933726367, "grad_norm": 0.12085787951946259, "learning_rate": 0.00012146852435675787, "loss": 0.9674, "step": 3867 }, { "epoch": 0.7863386867249441, "grad_norm": 0.12295902520418167, "learning_rate": 0.00012144818468422659, "loss": 0.9919, "step": 3868 }, { "epoch": 0.7865419800772515, "grad_norm": 0.12418414652347565, "learning_rate": 0.0001214278450116953, "loss": 1.0134, "step": 3869 }, { "epoch": 0.7867452734295588, "grad_norm": 0.13931810855865479, "learning_rate": 0.00012140750533916405, "loss": 0.9817, "step": 3870 }, { "epoch": 0.7869485667818662, "grad_norm": 0.14642778038978577, "learning_rate": 0.00012138716566663278, "loss": 1.0308, "step": 3871 }, { "epoch": 0.7871518601341736, "grad_norm": 0.1386035829782486, "learning_rate": 0.0001213668259941015, "loss": 1.0426, "step": 3872 }, { "epoch": 0.787355153486481, "grad_norm": 0.1395215541124344, "learning_rate": 0.00012134648632157021, "loss": 1.1342, "step": 3873 }, { "epoch": 0.7875584468387884, "grad_norm": 0.11670932918787003, "learning_rate": 0.00012132614664903896, "loss": 1.0713, "step": 3874 }, { "epoch": 0.7877617401910958, "grad_norm": 0.13401034474372864, "learning_rate": 0.00012130580697650769, "loss": 1.1358, "step": 3875 }, { "epoch": 0.7879650335434031, "grad_norm": 0.14385886490345, "learning_rate": 0.00012128546730397641, "loss": 1.1176, "step": 3876 }, { "epoch": 0.7881683268957105, "grad_norm": 0.12759087979793549, "learning_rate": 0.00012126512763144512, "loss": 1.0423, "step": 3877 }, { "epoch": 0.7883716202480179, "grad_norm": 0.155388742685318, "learning_rate": 0.00012124478795891388, "loss": 1.2041, "step": 3878 }, { "epoch": 0.7885749136003253, "grad_norm": 0.13593384623527527, "learning_rate": 0.0001212244482863826, "loss": 1.1223, "step": 3879 }, { "epoch": 0.7887782069526327, "grad_norm": 0.12947037816047668, "learning_rate": 0.00012120410861385133, "loss": 0.8951, "step": 3880 }, { "epoch": 0.7889815003049401, "grad_norm": 0.13151758909225464, "learning_rate": 0.00012118376894132004, "loss": 1.0879, "step": 3881 }, { "epoch": 0.7891847936572474, "grad_norm": 0.12490543723106384, "learning_rate": 0.00012116342926878879, "loss": 1.0054, "step": 3882 }, { "epoch": 0.7893880870095548, "grad_norm": 0.13789287209510803, "learning_rate": 0.00012114308959625751, "loss": 1.1069, "step": 3883 }, { "epoch": 0.7895913803618622, "grad_norm": 0.13065920770168304, "learning_rate": 0.00012112274992372624, "loss": 1.034, "step": 3884 }, { "epoch": 0.7897946737141696, "grad_norm": 0.1365562081336975, "learning_rate": 0.00012110241025119495, "loss": 1.1702, "step": 3885 }, { "epoch": 0.789997967066477, "grad_norm": 0.14647583663463593, "learning_rate": 0.0001210820705786637, "loss": 1.1919, "step": 3886 }, { "epoch": 0.7902012604187844, "grad_norm": 0.1417173445224762, "learning_rate": 0.00012106173090613242, "loss": 0.9846, "step": 3887 }, { "epoch": 0.7904045537710916, "grad_norm": 0.12423622608184814, "learning_rate": 0.00012104139123360115, "loss": 1.0007, "step": 3888 }, { "epoch": 0.790607847123399, "grad_norm": 0.1554161161184311, "learning_rate": 0.00012102105156106986, "loss": 1.3904, "step": 3889 }, { "epoch": 0.7908111404757064, "grad_norm": 0.14858123660087585, "learning_rate": 0.00012100071188853861, "loss": 1.1642, "step": 3890 }, { "epoch": 0.7910144338280138, "grad_norm": 0.1301809698343277, "learning_rate": 0.00012098037221600733, "loss": 1.0904, "step": 3891 }, { "epoch": 0.7912177271803212, "grad_norm": 0.13561727106571198, "learning_rate": 0.00012096003254347606, "loss": 1.1204, "step": 3892 }, { "epoch": 0.7914210205326286, "grad_norm": 0.13255447149276733, "learning_rate": 0.00012093969287094477, "loss": 1.0225, "step": 3893 }, { "epoch": 0.7916243138849359, "grad_norm": 0.14348706603050232, "learning_rate": 0.00012091935319841352, "loss": 1.1129, "step": 3894 }, { "epoch": 0.7918276072372433, "grad_norm": 0.1501035988330841, "learning_rate": 0.00012089901352588225, "loss": 1.1329, "step": 3895 }, { "epoch": 0.7920309005895507, "grad_norm": 0.11853793263435364, "learning_rate": 0.00012087867385335097, "loss": 1.0178, "step": 3896 }, { "epoch": 0.7922341939418581, "grad_norm": 0.12795880436897278, "learning_rate": 0.00012085833418081968, "loss": 0.9846, "step": 3897 }, { "epoch": 0.7924374872941655, "grad_norm": 0.13203004002571106, "learning_rate": 0.00012083799450828843, "loss": 1.0961, "step": 3898 }, { "epoch": 0.7926407806464728, "grad_norm": 0.1426658183336258, "learning_rate": 0.00012081765483575716, "loss": 1.101, "step": 3899 }, { "epoch": 0.7928440739987802, "grad_norm": 0.1364053338766098, "learning_rate": 0.00012079731516322588, "loss": 1.0818, "step": 3900 }, { "epoch": 0.7930473673510876, "grad_norm": 0.1505763679742813, "learning_rate": 0.00012077697549069459, "loss": 1.4502, "step": 3901 }, { "epoch": 0.793250660703395, "grad_norm": 0.133381187915802, "learning_rate": 0.00012075663581816334, "loss": 1.0036, "step": 3902 }, { "epoch": 0.7934539540557024, "grad_norm": 0.12915650010108948, "learning_rate": 0.00012073629614563207, "loss": 0.8927, "step": 3903 }, { "epoch": 0.7936572474080098, "grad_norm": 0.12571905553340912, "learning_rate": 0.0001207159564731008, "loss": 0.8493, "step": 3904 }, { "epoch": 0.7938605407603171, "grad_norm": 0.13056504726409912, "learning_rate": 0.0001206956168005695, "loss": 1.0281, "step": 3905 }, { "epoch": 0.7940638341126245, "grad_norm": 0.13789808750152588, "learning_rate": 0.00012067527712803826, "loss": 1.1499, "step": 3906 }, { "epoch": 0.7942671274649319, "grad_norm": 0.136823832988739, "learning_rate": 0.00012065493745550698, "loss": 1.122, "step": 3907 }, { "epoch": 0.7944704208172393, "grad_norm": 0.1468329280614853, "learning_rate": 0.0001206345977829757, "loss": 1.1591, "step": 3908 }, { "epoch": 0.7946737141695467, "grad_norm": 0.12451114505529404, "learning_rate": 0.00012061425811044442, "loss": 0.9909, "step": 3909 }, { "epoch": 0.7948770075218541, "grad_norm": 0.13311980664730072, "learning_rate": 0.00012059391843791314, "loss": 1.0117, "step": 3910 }, { "epoch": 0.7950803008741614, "grad_norm": 0.13651201128959656, "learning_rate": 0.00012057357876538189, "loss": 1.0057, "step": 3911 }, { "epoch": 0.7952835942264688, "grad_norm": 0.14184725284576416, "learning_rate": 0.00012055323909285062, "loss": 1.1766, "step": 3912 }, { "epoch": 0.7954868875787762, "grad_norm": 0.12303798645734787, "learning_rate": 0.00012053289942031933, "loss": 0.9439, "step": 3913 }, { "epoch": 0.7956901809310836, "grad_norm": 0.12423896044492722, "learning_rate": 0.00012051255974778805, "loss": 1.0675, "step": 3914 }, { "epoch": 0.795893474283391, "grad_norm": 0.16398456692695618, "learning_rate": 0.0001204922200752568, "loss": 1.2481, "step": 3915 }, { "epoch": 0.7960967676356984, "grad_norm": 0.13194435834884644, "learning_rate": 0.00012047188040272553, "loss": 1.0274, "step": 3916 }, { "epoch": 0.7963000609880057, "grad_norm": 0.13115055859088898, "learning_rate": 0.00012045154073019425, "loss": 0.9719, "step": 3917 }, { "epoch": 0.796503354340313, "grad_norm": 0.1376492977142334, "learning_rate": 0.00012043120105766296, "loss": 1.1586, "step": 3918 }, { "epoch": 0.7967066476926205, "grad_norm": 0.13110294938087463, "learning_rate": 0.00012041086138513171, "loss": 1.0523, "step": 3919 }, { "epoch": 0.7969099410449278, "grad_norm": 0.13019250333309174, "learning_rate": 0.00012039052171260044, "loss": 0.9233, "step": 3920 }, { "epoch": 0.7971132343972352, "grad_norm": 0.14026646316051483, "learning_rate": 0.00012037018204006916, "loss": 1.1442, "step": 3921 }, { "epoch": 0.7973165277495425, "grad_norm": 0.13189871609210968, "learning_rate": 0.00012034984236753787, "loss": 1.0618, "step": 3922 }, { "epoch": 0.7975198211018499, "grad_norm": 0.1411222368478775, "learning_rate": 0.00012032950269500663, "loss": 1.161, "step": 3923 }, { "epoch": 0.7977231144541573, "grad_norm": 0.13838204741477966, "learning_rate": 0.00012030916302247535, "loss": 0.9388, "step": 3924 }, { "epoch": 0.7979264078064647, "grad_norm": 0.13982777297496796, "learning_rate": 0.00012028882334994408, "loss": 1.2115, "step": 3925 }, { "epoch": 0.7981297011587721, "grad_norm": 0.16525013744831085, "learning_rate": 0.00012026848367741279, "loss": 1.3161, "step": 3926 }, { "epoch": 0.7983329945110795, "grad_norm": 0.12277159839868546, "learning_rate": 0.00012024814400488154, "loss": 0.9172, "step": 3927 }, { "epoch": 0.7985362878633868, "grad_norm": 0.13560567796230316, "learning_rate": 0.00012022780433235026, "loss": 1.1087, "step": 3928 }, { "epoch": 0.7987395812156942, "grad_norm": 0.13598614931106567, "learning_rate": 0.00012020746465981899, "loss": 1.0942, "step": 3929 }, { "epoch": 0.7989428745680016, "grad_norm": 0.12996844947338104, "learning_rate": 0.0001201871249872877, "loss": 1.1133, "step": 3930 }, { "epoch": 0.799146167920309, "grad_norm": 0.14834141731262207, "learning_rate": 0.00012016678531475645, "loss": 1.0568, "step": 3931 }, { "epoch": 0.7993494612726164, "grad_norm": 0.14238953590393066, "learning_rate": 0.00012014644564222517, "loss": 1.0885, "step": 3932 }, { "epoch": 0.7995527546249238, "grad_norm": 0.14690124988555908, "learning_rate": 0.0001201261059696939, "loss": 1.1593, "step": 3933 }, { "epoch": 0.7997560479772311, "grad_norm": 0.12939292192459106, "learning_rate": 0.00012010576629716261, "loss": 1.0157, "step": 3934 }, { "epoch": 0.7999593413295385, "grad_norm": 0.14343731105327606, "learning_rate": 0.00012008542662463136, "loss": 1.1929, "step": 3935 }, { "epoch": 0.8001626346818459, "grad_norm": 0.14443133771419525, "learning_rate": 0.00012006508695210008, "loss": 1.1939, "step": 3936 }, { "epoch": 0.8003659280341533, "grad_norm": 0.1503942608833313, "learning_rate": 0.00012004474727956881, "loss": 1.2368, "step": 3937 }, { "epoch": 0.8005692213864607, "grad_norm": 0.14112812280654907, "learning_rate": 0.00012002440760703752, "loss": 1.2037, "step": 3938 }, { "epoch": 0.8007725147387681, "grad_norm": 0.1394345462322235, "learning_rate": 0.00012000406793450627, "loss": 1.0076, "step": 3939 }, { "epoch": 0.8009758080910754, "grad_norm": 0.12623324990272522, "learning_rate": 0.000119983728261975, "loss": 1.0405, "step": 3940 }, { "epoch": 0.8011791014433828, "grad_norm": 0.14469188451766968, "learning_rate": 0.00011996338858944372, "loss": 1.2748, "step": 3941 }, { "epoch": 0.8013823947956902, "grad_norm": 0.14259637892246246, "learning_rate": 0.00011994304891691243, "loss": 1.1081, "step": 3942 }, { "epoch": 0.8015856881479976, "grad_norm": 0.13012677431106567, "learning_rate": 0.00011992270924438118, "loss": 0.9882, "step": 3943 }, { "epoch": 0.801788981500305, "grad_norm": 0.1390579491853714, "learning_rate": 0.00011990236957184991, "loss": 1.0806, "step": 3944 }, { "epoch": 0.8019922748526124, "grad_norm": 0.11860685795545578, "learning_rate": 0.00011988202989931863, "loss": 0.9036, "step": 3945 }, { "epoch": 0.8021955682049197, "grad_norm": 0.12979279458522797, "learning_rate": 0.00011986169022678734, "loss": 0.9918, "step": 3946 }, { "epoch": 0.8023988615572271, "grad_norm": 0.1302417367696762, "learning_rate": 0.0001198413505542561, "loss": 1.0498, "step": 3947 }, { "epoch": 0.8026021549095345, "grad_norm": 0.12292234599590302, "learning_rate": 0.00011982101088172482, "loss": 1.0181, "step": 3948 }, { "epoch": 0.8028054482618419, "grad_norm": 0.12002125382423401, "learning_rate": 0.00011980067120919354, "loss": 0.9503, "step": 3949 }, { "epoch": 0.8030087416141493, "grad_norm": 0.1403089463710785, "learning_rate": 0.00011978033153666225, "loss": 1.033, "step": 3950 }, { "epoch": 0.8032120349664565, "grad_norm": 0.13033491373062134, "learning_rate": 0.00011975999186413098, "loss": 0.9713, "step": 3951 }, { "epoch": 0.8034153283187639, "grad_norm": 0.14898493885993958, "learning_rate": 0.00011973965219159973, "loss": 1.1566, "step": 3952 }, { "epoch": 0.8036186216710713, "grad_norm": 0.1320907324552536, "learning_rate": 0.00011971931251906845, "loss": 0.9786, "step": 3953 }, { "epoch": 0.8038219150233787, "grad_norm": 0.14695419371128082, "learning_rate": 0.00011969897284653717, "loss": 1.1951, "step": 3954 }, { "epoch": 0.8040252083756861, "grad_norm": 0.13139761984348297, "learning_rate": 0.00011967863317400589, "loss": 1.0117, "step": 3955 }, { "epoch": 0.8042285017279935, "grad_norm": 0.13509678840637207, "learning_rate": 0.00011965829350147464, "loss": 1.0577, "step": 3956 }, { "epoch": 0.8044317950803008, "grad_norm": 0.12979759275913239, "learning_rate": 0.00011963795382894337, "loss": 1.0552, "step": 3957 }, { "epoch": 0.8046350884326082, "grad_norm": 0.12381764501333237, "learning_rate": 0.00011961761415641208, "loss": 1.045, "step": 3958 }, { "epoch": 0.8048383817849156, "grad_norm": 0.1273113489151001, "learning_rate": 0.0001195972744838808, "loss": 1.0352, "step": 3959 }, { "epoch": 0.805041675137223, "grad_norm": 0.13506780564785004, "learning_rate": 0.00011957693481134955, "loss": 1.0183, "step": 3960 }, { "epoch": 0.8052449684895304, "grad_norm": 0.13643690943717957, "learning_rate": 0.00011955659513881828, "loss": 0.9271, "step": 3961 }, { "epoch": 0.8054482618418378, "grad_norm": 0.13394352793693542, "learning_rate": 0.00011953625546628699, "loss": 1.01, "step": 3962 }, { "epoch": 0.8056515551941451, "grad_norm": 0.12335560470819473, "learning_rate": 0.00011951591579375571, "loss": 1.0166, "step": 3963 }, { "epoch": 0.8058548485464525, "grad_norm": 0.13454771041870117, "learning_rate": 0.00011949557612122446, "loss": 0.9962, "step": 3964 }, { "epoch": 0.8060581418987599, "grad_norm": 0.13157734274864197, "learning_rate": 0.00011947523644869319, "loss": 1.0938, "step": 3965 }, { "epoch": 0.8062614352510673, "grad_norm": 0.1234944686293602, "learning_rate": 0.0001194548967761619, "loss": 0.962, "step": 3966 }, { "epoch": 0.8064647286033747, "grad_norm": 0.1396668702363968, "learning_rate": 0.00011943455710363062, "loss": 1.1383, "step": 3967 }, { "epoch": 0.8066680219556821, "grad_norm": 0.14107537269592285, "learning_rate": 0.00011941421743109938, "loss": 0.9585, "step": 3968 }, { "epoch": 0.8068713153079894, "grad_norm": 0.14701543748378754, "learning_rate": 0.0001193938777585681, "loss": 1.1863, "step": 3969 }, { "epoch": 0.8070746086602968, "grad_norm": 0.13169316947460175, "learning_rate": 0.00011937353808603681, "loss": 1.1441, "step": 3970 }, { "epoch": 0.8072779020126042, "grad_norm": 0.1322471648454666, "learning_rate": 0.00011935319841350554, "loss": 1.0807, "step": 3971 }, { "epoch": 0.8074811953649116, "grad_norm": 0.1394157111644745, "learning_rate": 0.00011933285874097429, "loss": 1.1423, "step": 3972 }, { "epoch": 0.807684488717219, "grad_norm": 0.13044828176498413, "learning_rate": 0.00011931251906844301, "loss": 1.0136, "step": 3973 }, { "epoch": 0.8078877820695263, "grad_norm": 0.14347662031650543, "learning_rate": 0.00011929217939591174, "loss": 1.0989, "step": 3974 }, { "epoch": 0.8080910754218337, "grad_norm": 0.13873308897018433, "learning_rate": 0.00011927183972338045, "loss": 1.0194, "step": 3975 }, { "epoch": 0.8082943687741411, "grad_norm": 0.1266692876815796, "learning_rate": 0.0001192515000508492, "loss": 1.0114, "step": 3976 }, { "epoch": 0.8084976621264485, "grad_norm": 0.13810694217681885, "learning_rate": 0.00011923116037831792, "loss": 1.0433, "step": 3977 }, { "epoch": 0.8087009554787559, "grad_norm": 0.14580698311328888, "learning_rate": 0.00011921082070578665, "loss": 1.1527, "step": 3978 }, { "epoch": 0.8089042488310633, "grad_norm": 0.14972800016403198, "learning_rate": 0.00011919048103325536, "loss": 1.2265, "step": 3979 }, { "epoch": 0.8091075421833706, "grad_norm": 0.14041751623153687, "learning_rate": 0.00011917014136072411, "loss": 1.0299, "step": 3980 }, { "epoch": 0.809310835535678, "grad_norm": 0.14178511500358582, "learning_rate": 0.00011914980168819283, "loss": 1.1151, "step": 3981 }, { "epoch": 0.8095141288879854, "grad_norm": 0.14620938897132874, "learning_rate": 0.00011912946201566156, "loss": 1.1133, "step": 3982 }, { "epoch": 0.8097174222402928, "grad_norm": 0.14342685043811798, "learning_rate": 0.00011910912234313027, "loss": 0.9168, "step": 3983 }, { "epoch": 0.8099207155926001, "grad_norm": 0.1431896686553955, "learning_rate": 0.00011908878267059902, "loss": 1.1063, "step": 3984 }, { "epoch": 0.8101240089449075, "grad_norm": 0.13744968175888062, "learning_rate": 0.00011906844299806775, "loss": 1.0406, "step": 3985 }, { "epoch": 0.8103273022972148, "grad_norm": 0.1373889297246933, "learning_rate": 0.00011904810332553647, "loss": 1.0499, "step": 3986 }, { "epoch": 0.8105305956495222, "grad_norm": 0.13776156306266785, "learning_rate": 0.00011902776365300518, "loss": 1.0377, "step": 3987 }, { "epoch": 0.8107338890018296, "grad_norm": 0.13153620064258575, "learning_rate": 0.00011900742398047393, "loss": 0.995, "step": 3988 }, { "epoch": 0.810937182354137, "grad_norm": 0.153978168964386, "learning_rate": 0.00011898708430794266, "loss": 1.1654, "step": 3989 }, { "epoch": 0.8111404757064444, "grad_norm": 0.14656215906143188, "learning_rate": 0.00011896674463541138, "loss": 0.9426, "step": 3990 }, { "epoch": 0.8113437690587518, "grad_norm": 0.13378344476222992, "learning_rate": 0.00011894640496288009, "loss": 0.9992, "step": 3991 }, { "epoch": 0.8115470624110591, "grad_norm": 0.14194630086421967, "learning_rate": 0.00011892606529034882, "loss": 0.9897, "step": 3992 }, { "epoch": 0.8117503557633665, "grad_norm": 0.12744341790676117, "learning_rate": 0.00011890572561781757, "loss": 1.0271, "step": 3993 }, { "epoch": 0.8119536491156739, "grad_norm": 0.12593428790569305, "learning_rate": 0.0001188853859452863, "loss": 0.8918, "step": 3994 }, { "epoch": 0.8121569424679813, "grad_norm": 0.1353382021188736, "learning_rate": 0.000118865046272755, "loss": 0.9951, "step": 3995 }, { "epoch": 0.8123602358202887, "grad_norm": 0.13705074787139893, "learning_rate": 0.00011884470660022373, "loss": 0.9689, "step": 3996 }, { "epoch": 0.8125635291725961, "grad_norm": 0.1512332260608673, "learning_rate": 0.00011882436692769248, "loss": 1.1077, "step": 3997 }, { "epoch": 0.8127668225249034, "grad_norm": 0.1288524866104126, "learning_rate": 0.0001188040272551612, "loss": 1.0678, "step": 3998 }, { "epoch": 0.8129701158772108, "grad_norm": 0.13065043091773987, "learning_rate": 0.00011878368758262992, "loss": 1.0389, "step": 3999 }, { "epoch": 0.8131734092295182, "grad_norm": 0.14276419579982758, "learning_rate": 0.00011876334791009864, "loss": 1.0811, "step": 4000 }, { "epoch": 0.8133767025818256, "grad_norm": 0.12206871807575226, "learning_rate": 0.00011874300823756739, "loss": 0.8628, "step": 4001 }, { "epoch": 0.813579995934133, "grad_norm": 0.13889804482460022, "learning_rate": 0.00011872266856503612, "loss": 1.026, "step": 4002 }, { "epoch": 0.8137832892864403, "grad_norm": 0.13615025579929352, "learning_rate": 0.00011870232889250483, "loss": 1.0019, "step": 4003 }, { "epoch": 0.8139865826387477, "grad_norm": 0.12381166964769363, "learning_rate": 0.00011868198921997355, "loss": 0.9913, "step": 4004 }, { "epoch": 0.8141898759910551, "grad_norm": 0.13503628969192505, "learning_rate": 0.0001186616495474423, "loss": 1.0766, "step": 4005 }, { "epoch": 0.8143931693433625, "grad_norm": 0.143154576420784, "learning_rate": 0.00011864130987491103, "loss": 1.1219, "step": 4006 }, { "epoch": 0.8145964626956699, "grad_norm": 0.15656810998916626, "learning_rate": 0.00011862097020237974, "loss": 1.193, "step": 4007 }, { "epoch": 0.8147997560479773, "grad_norm": 0.14031293988227844, "learning_rate": 0.00011860063052984846, "loss": 0.9923, "step": 4008 }, { "epoch": 0.8150030494002846, "grad_norm": 0.13734276592731476, "learning_rate": 0.00011858029085731721, "loss": 1.0802, "step": 4009 }, { "epoch": 0.815206342752592, "grad_norm": 0.1422613561153412, "learning_rate": 0.00011855995118478594, "loss": 1.1129, "step": 4010 }, { "epoch": 0.8154096361048994, "grad_norm": 0.12899209558963776, "learning_rate": 0.00011853961151225465, "loss": 0.8745, "step": 4011 }, { "epoch": 0.8156129294572068, "grad_norm": 0.1424800455570221, "learning_rate": 0.00011851927183972337, "loss": 1.1192, "step": 4012 }, { "epoch": 0.8158162228095142, "grad_norm": 0.12653465569019318, "learning_rate": 0.00011849893216719213, "loss": 1.0847, "step": 4013 }, { "epoch": 0.8160195161618216, "grad_norm": 0.13867273926734924, "learning_rate": 0.00011847859249466085, "loss": 1.104, "step": 4014 }, { "epoch": 0.8162228095141288, "grad_norm": 0.12459316849708557, "learning_rate": 0.00011845825282212956, "loss": 0.9407, "step": 4015 }, { "epoch": 0.8164261028664362, "grad_norm": 0.11136915534734726, "learning_rate": 0.00011843791314959829, "loss": 0.8423, "step": 4016 }, { "epoch": 0.8166293962187436, "grad_norm": 0.13761533796787262, "learning_rate": 0.00011841757347706704, "loss": 1.0824, "step": 4017 }, { "epoch": 0.816832689571051, "grad_norm": 0.12137118726968765, "learning_rate": 0.00011839723380453576, "loss": 0.8622, "step": 4018 }, { "epoch": 0.8170359829233584, "grad_norm": 0.13748018443584442, "learning_rate": 0.00011837689413200447, "loss": 1.0754, "step": 4019 }, { "epoch": 0.8172392762756658, "grad_norm": 0.13673032820224762, "learning_rate": 0.0001183565544594732, "loss": 1.1766, "step": 4020 }, { "epoch": 0.8174425696279731, "grad_norm": 0.13649223744869232, "learning_rate": 0.00011833621478694195, "loss": 1.0473, "step": 4021 }, { "epoch": 0.8176458629802805, "grad_norm": 0.1405959576368332, "learning_rate": 0.00011831587511441067, "loss": 1.1525, "step": 4022 }, { "epoch": 0.8178491563325879, "grad_norm": 0.13079403340816498, "learning_rate": 0.00011829553544187938, "loss": 1.0007, "step": 4023 }, { "epoch": 0.8180524496848953, "grad_norm": 0.1415160447359085, "learning_rate": 0.00011827519576934811, "loss": 1.1784, "step": 4024 }, { "epoch": 0.8182557430372027, "grad_norm": 0.15281791985034943, "learning_rate": 0.00011825485609681686, "loss": 1.2122, "step": 4025 }, { "epoch": 0.81845903638951, "grad_norm": 0.138424813747406, "learning_rate": 0.00011823451642428558, "loss": 1.171, "step": 4026 }, { "epoch": 0.8186623297418174, "grad_norm": 0.13693661987781525, "learning_rate": 0.0001182141767517543, "loss": 1.1098, "step": 4027 }, { "epoch": 0.8188656230941248, "grad_norm": 0.13167519867420197, "learning_rate": 0.00011819383707922302, "loss": 0.9446, "step": 4028 }, { "epoch": 0.8190689164464322, "grad_norm": 0.13005776703357697, "learning_rate": 0.00011817349740669177, "loss": 0.9349, "step": 4029 }, { "epoch": 0.8192722097987396, "grad_norm": 0.1340433955192566, "learning_rate": 0.0001181531577341605, "loss": 1.1293, "step": 4030 }, { "epoch": 0.819475503151047, "grad_norm": 0.13463421165943146, "learning_rate": 0.0001181328180616292, "loss": 0.995, "step": 4031 }, { "epoch": 0.8196787965033543, "grad_norm": 0.13709862530231476, "learning_rate": 0.00011811247838909793, "loss": 1.0882, "step": 4032 }, { "epoch": 0.8198820898556617, "grad_norm": 0.1316375732421875, "learning_rate": 0.00011809213871656666, "loss": 1.1652, "step": 4033 }, { "epoch": 0.8200853832079691, "grad_norm": 0.12954191863536835, "learning_rate": 0.00011807179904403541, "loss": 1.0258, "step": 4034 }, { "epoch": 0.8202886765602765, "grad_norm": 0.15265285968780518, "learning_rate": 0.00011805145937150413, "loss": 1.162, "step": 4035 }, { "epoch": 0.8204919699125839, "grad_norm": 0.14315763115882874, "learning_rate": 0.00011803111969897284, "loss": 1.1478, "step": 4036 }, { "epoch": 0.8206952632648913, "grad_norm": 0.1538948267698288, "learning_rate": 0.00011801078002644157, "loss": 1.2476, "step": 4037 }, { "epoch": 0.8208985566171986, "grad_norm": 0.13834591209888458, "learning_rate": 0.00011799044035391032, "loss": 1.0098, "step": 4038 }, { "epoch": 0.821101849969506, "grad_norm": 0.12419674545526505, "learning_rate": 0.00011797010068137904, "loss": 0.9885, "step": 4039 }, { "epoch": 0.8213051433218134, "grad_norm": 0.12338082492351532, "learning_rate": 0.00011794976100884775, "loss": 0.8725, "step": 4040 }, { "epoch": 0.8215084366741208, "grad_norm": 0.12143974751234055, "learning_rate": 0.00011792942133631648, "loss": 0.8032, "step": 4041 }, { "epoch": 0.8217117300264282, "grad_norm": 0.11844722181558609, "learning_rate": 0.00011790908166378523, "loss": 0.9635, "step": 4042 }, { "epoch": 0.8219150233787356, "grad_norm": 0.12964794039726257, "learning_rate": 0.00011788874199125395, "loss": 0.9951, "step": 4043 }, { "epoch": 0.8221183167310429, "grad_norm": 0.11839304864406586, "learning_rate": 0.00011786840231872267, "loss": 0.9279, "step": 4044 }, { "epoch": 0.8223216100833503, "grad_norm": 0.1318419724702835, "learning_rate": 0.00011784806264619139, "loss": 0.9903, "step": 4045 }, { "epoch": 0.8225249034356577, "grad_norm": 0.12436816096305847, "learning_rate": 0.00011782772297366014, "loss": 0.9732, "step": 4046 }, { "epoch": 0.822728196787965, "grad_norm": 0.14113591611385345, "learning_rate": 0.00011780738330112887, "loss": 1.1141, "step": 4047 }, { "epoch": 0.8229314901402724, "grad_norm": 0.12076539546251297, "learning_rate": 0.00011778704362859758, "loss": 0.9827, "step": 4048 }, { "epoch": 0.8231347834925798, "grad_norm": 0.13398289680480957, "learning_rate": 0.0001177667039560663, "loss": 1.0446, "step": 4049 }, { "epoch": 0.8233380768448871, "grad_norm": 0.14761167764663696, "learning_rate": 0.00011774636428353505, "loss": 1.1347, "step": 4050 }, { "epoch": 0.8235413701971945, "grad_norm": 0.13124649226665497, "learning_rate": 0.00011772602461100378, "loss": 1.1484, "step": 4051 }, { "epoch": 0.8237446635495019, "grad_norm": 0.1205100268125534, "learning_rate": 0.00011770568493847249, "loss": 0.8889, "step": 4052 }, { "epoch": 0.8239479569018093, "grad_norm": 0.14440268278121948, "learning_rate": 0.00011768534526594121, "loss": 1.0888, "step": 4053 }, { "epoch": 0.8241512502541167, "grad_norm": 0.12991003692150116, "learning_rate": 0.00011766500559340996, "loss": 1.1111, "step": 4054 }, { "epoch": 0.824354543606424, "grad_norm": 0.13993045687675476, "learning_rate": 0.00011764466592087869, "loss": 1.1363, "step": 4055 }, { "epoch": 0.8245578369587314, "grad_norm": 0.14084355533123016, "learning_rate": 0.0001176243262483474, "loss": 1.2527, "step": 4056 }, { "epoch": 0.8247611303110388, "grad_norm": 0.13060720264911652, "learning_rate": 0.00011760398657581612, "loss": 0.9782, "step": 4057 }, { "epoch": 0.8249644236633462, "grad_norm": 0.13368849456310272, "learning_rate": 0.00011758364690328488, "loss": 1.0588, "step": 4058 }, { "epoch": 0.8251677170156536, "grad_norm": 0.1443461924791336, "learning_rate": 0.0001175633072307536, "loss": 1.0314, "step": 4059 }, { "epoch": 0.825371010367961, "grad_norm": 0.1611374020576477, "learning_rate": 0.00011754296755822231, "loss": 1.0414, "step": 4060 }, { "epoch": 0.8255743037202683, "grad_norm": 0.13659845292568207, "learning_rate": 0.00011752262788569104, "loss": 0.9511, "step": 4061 }, { "epoch": 0.8257775970725757, "grad_norm": 0.13321594893932343, "learning_rate": 0.00011750228821315979, "loss": 1.1095, "step": 4062 }, { "epoch": 0.8259808904248831, "grad_norm": 0.13120754063129425, "learning_rate": 0.00011748194854062851, "loss": 1.011, "step": 4063 }, { "epoch": 0.8261841837771905, "grad_norm": 0.12870921194553375, "learning_rate": 0.00011746160886809722, "loss": 1.0436, "step": 4064 }, { "epoch": 0.8263874771294979, "grad_norm": 0.14104719460010529, "learning_rate": 0.00011744126919556595, "loss": 1.1661, "step": 4065 }, { "epoch": 0.8265907704818053, "grad_norm": 0.12897245585918427, "learning_rate": 0.0001174209295230347, "loss": 0.9585, "step": 4066 }, { "epoch": 0.8267940638341126, "grad_norm": 0.13888487219810486, "learning_rate": 0.00011740058985050342, "loss": 1.1015, "step": 4067 }, { "epoch": 0.82699735718642, "grad_norm": 0.14110806584358215, "learning_rate": 0.00011738025017797213, "loss": 1.2207, "step": 4068 }, { "epoch": 0.8272006505387274, "grad_norm": 0.14423434436321259, "learning_rate": 0.00011735991050544086, "loss": 1.1922, "step": 4069 }, { "epoch": 0.8274039438910348, "grad_norm": 0.12947557866573334, "learning_rate": 0.00011733957083290961, "loss": 1.0836, "step": 4070 }, { "epoch": 0.8276072372433422, "grad_norm": 0.12978830933570862, "learning_rate": 0.00011731923116037833, "loss": 1.0118, "step": 4071 }, { "epoch": 0.8278105305956496, "grad_norm": 0.1388140469789505, "learning_rate": 0.00011729889148784705, "loss": 1.0495, "step": 4072 }, { "epoch": 0.8280138239479569, "grad_norm": 0.13801120221614838, "learning_rate": 0.00011727855181531577, "loss": 1.2402, "step": 4073 }, { "epoch": 0.8282171173002643, "grad_norm": 0.12440764904022217, "learning_rate": 0.0001172582121427845, "loss": 0.9277, "step": 4074 }, { "epoch": 0.8284204106525717, "grad_norm": 0.14315354824066162, "learning_rate": 0.00011723787247025325, "loss": 1.0844, "step": 4075 }, { "epoch": 0.8286237040048791, "grad_norm": 0.15074683725833893, "learning_rate": 0.00011721753279772196, "loss": 1.2032, "step": 4076 }, { "epoch": 0.8288269973571865, "grad_norm": 0.14017608761787415, "learning_rate": 0.00011719719312519068, "loss": 1.1352, "step": 4077 }, { "epoch": 0.8290302907094937, "grad_norm": 0.1432233303785324, "learning_rate": 0.0001171768534526594, "loss": 1.0653, "step": 4078 }, { "epoch": 0.8292335840618011, "grad_norm": 0.14064320921897888, "learning_rate": 0.00011715651378012816, "loss": 1.0208, "step": 4079 }, { "epoch": 0.8294368774141085, "grad_norm": 0.13602322340011597, "learning_rate": 0.00011713617410759687, "loss": 0.9782, "step": 4080 }, { "epoch": 0.8296401707664159, "grad_norm": 0.14761172235012054, "learning_rate": 0.00011711583443506559, "loss": 1.1826, "step": 4081 }, { "epoch": 0.8298434641187233, "grad_norm": 0.14076586067676544, "learning_rate": 0.00011709549476253432, "loss": 1.0883, "step": 4082 }, { "epoch": 0.8300467574710307, "grad_norm": 0.1385519951581955, "learning_rate": 0.00011707515509000307, "loss": 1.0841, "step": 4083 }, { "epoch": 0.830250050823338, "grad_norm": 0.1392289251089096, "learning_rate": 0.00011705481541747178, "loss": 1.1386, "step": 4084 }, { "epoch": 0.8304533441756454, "grad_norm": 0.14094628393650055, "learning_rate": 0.0001170344757449405, "loss": 0.983, "step": 4085 }, { "epoch": 0.8306566375279528, "grad_norm": 0.13715529441833496, "learning_rate": 0.00011701413607240923, "loss": 1.089, "step": 4086 }, { "epoch": 0.8308599308802602, "grad_norm": 0.13627447187900543, "learning_rate": 0.00011699379639987798, "loss": 1.2017, "step": 4087 }, { "epoch": 0.8310632242325676, "grad_norm": 0.12237659841775894, "learning_rate": 0.00011697345672734669, "loss": 0.9606, "step": 4088 }, { "epoch": 0.831266517584875, "grad_norm": 0.12152927368879318, "learning_rate": 0.00011695311705481542, "loss": 1.0262, "step": 4089 }, { "epoch": 0.8314698109371823, "grad_norm": 0.13828657567501068, "learning_rate": 0.00011693277738228414, "loss": 1.0506, "step": 4090 }, { "epoch": 0.8316731042894897, "grad_norm": 0.13322405517101288, "learning_rate": 0.00011691243770975289, "loss": 0.9554, "step": 4091 }, { "epoch": 0.8318763976417971, "grad_norm": 0.1526733636856079, "learning_rate": 0.00011689209803722162, "loss": 1.2395, "step": 4092 }, { "epoch": 0.8320796909941045, "grad_norm": 0.12804892659187317, "learning_rate": 0.00011687175836469033, "loss": 1.0469, "step": 4093 }, { "epoch": 0.8322829843464119, "grad_norm": 0.13679049909114838, "learning_rate": 0.00011685141869215905, "loss": 1.069, "step": 4094 }, { "epoch": 0.8324862776987193, "grad_norm": 0.15435020625591278, "learning_rate": 0.0001168310790196278, "loss": 1.2415, "step": 4095 }, { "epoch": 0.8326895710510266, "grad_norm": 0.13897407054901123, "learning_rate": 0.00011681073934709653, "loss": 1.1871, "step": 4096 }, { "epoch": 0.832892864403334, "grad_norm": 0.12856152653694153, "learning_rate": 0.00011679039967456524, "loss": 1.0499, "step": 4097 }, { "epoch": 0.8330961577556414, "grad_norm": 0.12705758213996887, "learning_rate": 0.00011677006000203396, "loss": 1.0097, "step": 4098 }, { "epoch": 0.8332994511079488, "grad_norm": 0.1295822411775589, "learning_rate": 0.00011674972032950271, "loss": 0.8506, "step": 4099 }, { "epoch": 0.8335027444602562, "grad_norm": 0.13831810653209686, "learning_rate": 0.00011672938065697144, "loss": 1.1784, "step": 4100 }, { "epoch": 0.8337060378125636, "grad_norm": 0.13451896607875824, "learning_rate": 0.00011670904098444015, "loss": 1.1079, "step": 4101 }, { "epoch": 0.8339093311648709, "grad_norm": 0.12430407106876373, "learning_rate": 0.00011668870131190887, "loss": 0.8755, "step": 4102 }, { "epoch": 0.8341126245171783, "grad_norm": 0.13857564330101013, "learning_rate": 0.00011666836163937763, "loss": 1.0993, "step": 4103 }, { "epoch": 0.8343159178694857, "grad_norm": 0.14329898357391357, "learning_rate": 0.00011664802196684635, "loss": 1.2359, "step": 4104 }, { "epoch": 0.8345192112217931, "grad_norm": 0.14642906188964844, "learning_rate": 0.00011662768229431506, "loss": 1.1454, "step": 4105 }, { "epoch": 0.8347225045741005, "grad_norm": 0.1252523809671402, "learning_rate": 0.00011660734262178379, "loss": 1.0017, "step": 4106 }, { "epoch": 0.8349257979264078, "grad_norm": 0.12852495908737183, "learning_rate": 0.00011658700294925254, "loss": 1.0273, "step": 4107 }, { "epoch": 0.8351290912787152, "grad_norm": 0.12575671076774597, "learning_rate": 0.00011656666327672126, "loss": 0.9608, "step": 4108 }, { "epoch": 0.8353323846310226, "grad_norm": 0.15008383989334106, "learning_rate": 0.00011654632360418997, "loss": 1.1065, "step": 4109 }, { "epoch": 0.83553567798333, "grad_norm": 0.14088520407676697, "learning_rate": 0.0001165259839316587, "loss": 1.1224, "step": 4110 }, { "epoch": 0.8357389713356373, "grad_norm": 0.12474369257688522, "learning_rate": 0.00011650564425912745, "loss": 0.9834, "step": 4111 }, { "epoch": 0.8359422646879447, "grad_norm": 0.1329812854528427, "learning_rate": 0.00011648530458659617, "loss": 1.0876, "step": 4112 }, { "epoch": 0.836145558040252, "grad_norm": 0.12517108023166656, "learning_rate": 0.00011646496491406488, "loss": 0.9772, "step": 4113 }, { "epoch": 0.8363488513925594, "grad_norm": 0.13506385684013367, "learning_rate": 0.00011644462524153361, "loss": 1.0004, "step": 4114 }, { "epoch": 0.8365521447448668, "grad_norm": 0.13172465562820435, "learning_rate": 0.00011642428556900233, "loss": 1.0687, "step": 4115 }, { "epoch": 0.8367554380971742, "grad_norm": 0.13177163898944855, "learning_rate": 0.00011640394589647108, "loss": 1.0225, "step": 4116 }, { "epoch": 0.8369587314494816, "grad_norm": 0.1380792260169983, "learning_rate": 0.0001163836062239398, "loss": 1.0243, "step": 4117 }, { "epoch": 0.837162024801789, "grad_norm": 0.13012027740478516, "learning_rate": 0.00011636326655140852, "loss": 0.9618, "step": 4118 }, { "epoch": 0.8373653181540963, "grad_norm": 0.13312657177448273, "learning_rate": 0.00011634292687887724, "loss": 0.9299, "step": 4119 }, { "epoch": 0.8375686115064037, "grad_norm": 0.1449914276599884, "learning_rate": 0.000116322587206346, "loss": 1.1712, "step": 4120 }, { "epoch": 0.8377719048587111, "grad_norm": 0.14391463994979858, "learning_rate": 0.0001163022475338147, "loss": 1.2165, "step": 4121 }, { "epoch": 0.8379751982110185, "grad_norm": 0.14427267014980316, "learning_rate": 0.00011628190786128343, "loss": 1.1486, "step": 4122 }, { "epoch": 0.8381784915633259, "grad_norm": 0.15920564532279968, "learning_rate": 0.00011626156818875216, "loss": 1.3503, "step": 4123 }, { "epoch": 0.8383817849156333, "grad_norm": 0.14215265214443207, "learning_rate": 0.00011624122851622091, "loss": 1.0607, "step": 4124 }, { "epoch": 0.8385850782679406, "grad_norm": 0.12936022877693176, "learning_rate": 0.00011622088884368962, "loss": 0.9739, "step": 4125 }, { "epoch": 0.838788371620248, "grad_norm": 0.13270482420921326, "learning_rate": 0.00011620054917115834, "loss": 1.006, "step": 4126 }, { "epoch": 0.8389916649725554, "grad_norm": 0.13230706751346588, "learning_rate": 0.00011618020949862707, "loss": 1.0016, "step": 4127 }, { "epoch": 0.8391949583248628, "grad_norm": 0.1272687166929245, "learning_rate": 0.00011615986982609582, "loss": 0.8975, "step": 4128 }, { "epoch": 0.8393982516771702, "grad_norm": 0.13361401855945587, "learning_rate": 0.00011613953015356453, "loss": 0.9294, "step": 4129 }, { "epoch": 0.8396015450294775, "grad_norm": 0.12465297430753708, "learning_rate": 0.00011611919048103325, "loss": 0.8893, "step": 4130 }, { "epoch": 0.8398048383817849, "grad_norm": 0.1458294540643692, "learning_rate": 0.00011609885080850198, "loss": 1.1928, "step": 4131 }, { "epoch": 0.8400081317340923, "grad_norm": 0.1325213760137558, "learning_rate": 0.00011607851113597073, "loss": 0.9254, "step": 4132 }, { "epoch": 0.8402114250863997, "grad_norm": 0.1297135353088379, "learning_rate": 0.00011605817146343944, "loss": 0.8737, "step": 4133 }, { "epoch": 0.8404147184387071, "grad_norm": 0.1350976824760437, "learning_rate": 0.00011603783179090817, "loss": 1.1101, "step": 4134 }, { "epoch": 0.8406180117910145, "grad_norm": 0.13058003783226013, "learning_rate": 0.00011601749211837689, "loss": 1.0, "step": 4135 }, { "epoch": 0.8408213051433218, "grad_norm": 0.13314960896968842, "learning_rate": 0.00011599715244584564, "loss": 0.9872, "step": 4136 }, { "epoch": 0.8410245984956292, "grad_norm": 0.12905332446098328, "learning_rate": 0.00011597681277331435, "loss": 1.0097, "step": 4137 }, { "epoch": 0.8412278918479366, "grad_norm": 0.12162060290575027, "learning_rate": 0.00011595647310078308, "loss": 0.8788, "step": 4138 }, { "epoch": 0.841431185200244, "grad_norm": 0.12525275349617004, "learning_rate": 0.0001159361334282518, "loss": 0.8742, "step": 4139 }, { "epoch": 0.8416344785525514, "grad_norm": 0.13911886513233185, "learning_rate": 0.00011591579375572055, "loss": 1.3075, "step": 4140 }, { "epoch": 0.8418377719048588, "grad_norm": 0.13579173386096954, "learning_rate": 0.00011589545408318926, "loss": 1.0646, "step": 4141 }, { "epoch": 0.842041065257166, "grad_norm": 0.14429797232151031, "learning_rate": 0.00011587511441065799, "loss": 1.1266, "step": 4142 }, { "epoch": 0.8422443586094734, "grad_norm": 0.11546068638563156, "learning_rate": 0.00011585477473812671, "loss": 0.8928, "step": 4143 }, { "epoch": 0.8424476519617808, "grad_norm": 0.14215877652168274, "learning_rate": 0.00011583443506559546, "loss": 1.1816, "step": 4144 }, { "epoch": 0.8426509453140882, "grad_norm": 0.12982290983200073, "learning_rate": 0.00011581409539306417, "loss": 0.9241, "step": 4145 }, { "epoch": 0.8428542386663956, "grad_norm": 0.13759194314479828, "learning_rate": 0.0001157937557205329, "loss": 1.0254, "step": 4146 }, { "epoch": 0.843057532018703, "grad_norm": 0.13671040534973145, "learning_rate": 0.00011577341604800162, "loss": 1.1823, "step": 4147 }, { "epoch": 0.8432608253710103, "grad_norm": 0.11964955925941467, "learning_rate": 0.00011575307637547038, "loss": 0.99, "step": 4148 }, { "epoch": 0.8434641187233177, "grad_norm": 0.1388668566942215, "learning_rate": 0.0001157327367029391, "loss": 1.0667, "step": 4149 }, { "epoch": 0.8436674120756251, "grad_norm": 0.12363268435001373, "learning_rate": 0.00011571239703040781, "loss": 1.0156, "step": 4150 }, { "epoch": 0.8438707054279325, "grad_norm": 0.14275164902210236, "learning_rate": 0.00011569205735787654, "loss": 1.2653, "step": 4151 }, { "epoch": 0.8440739987802399, "grad_norm": 0.13494303822517395, "learning_rate": 0.00011567171768534529, "loss": 1.148, "step": 4152 }, { "epoch": 0.8442772921325473, "grad_norm": 0.12230674922466278, "learning_rate": 0.00011565137801281401, "loss": 0.9444, "step": 4153 }, { "epoch": 0.8444805854848546, "grad_norm": 0.12223172187805176, "learning_rate": 0.00011563103834028272, "loss": 1.0189, "step": 4154 }, { "epoch": 0.844683878837162, "grad_norm": 0.12231465429067612, "learning_rate": 0.00011561069866775145, "loss": 0.9234, "step": 4155 }, { "epoch": 0.8448871721894694, "grad_norm": 0.13856825232505798, "learning_rate": 0.00011559035899522017, "loss": 1.0455, "step": 4156 }, { "epoch": 0.8450904655417768, "grad_norm": 0.12258224934339523, "learning_rate": 0.00011557001932268892, "loss": 0.8723, "step": 4157 }, { "epoch": 0.8452937588940842, "grad_norm": 0.13906900584697723, "learning_rate": 0.00011554967965015763, "loss": 1.0314, "step": 4158 }, { "epoch": 0.8454970522463915, "grad_norm": 0.13625988364219666, "learning_rate": 0.00011552933997762636, "loss": 1.0259, "step": 4159 }, { "epoch": 0.8457003455986989, "grad_norm": 0.13316601514816284, "learning_rate": 0.00011550900030509508, "loss": 0.9816, "step": 4160 }, { "epoch": 0.8459036389510063, "grad_norm": 0.1430322825908661, "learning_rate": 0.00011548866063256383, "loss": 1.0748, "step": 4161 }, { "epoch": 0.8461069323033137, "grad_norm": 0.14025886356830597, "learning_rate": 0.00011546832096003254, "loss": 1.0884, "step": 4162 }, { "epoch": 0.8463102256556211, "grad_norm": 0.12264370173215866, "learning_rate": 0.00011544798128750127, "loss": 0.844, "step": 4163 }, { "epoch": 0.8465135190079285, "grad_norm": 0.14598575234413147, "learning_rate": 0.00011542764161497, "loss": 1.1999, "step": 4164 }, { "epoch": 0.8467168123602358, "grad_norm": 0.139155313372612, "learning_rate": 0.00011540730194243875, "loss": 1.0555, "step": 4165 }, { "epoch": 0.8469201057125432, "grad_norm": 0.13013023138046265, "learning_rate": 0.00011538696226990746, "loss": 1.0511, "step": 4166 }, { "epoch": 0.8471233990648506, "grad_norm": 0.13424082100391388, "learning_rate": 0.00011536662259737618, "loss": 1.0551, "step": 4167 }, { "epoch": 0.847326692417158, "grad_norm": 0.14205624163150787, "learning_rate": 0.0001153462829248449, "loss": 1.1582, "step": 4168 }, { "epoch": 0.8475299857694654, "grad_norm": 0.13570645451545715, "learning_rate": 0.00011532594325231366, "loss": 1.0911, "step": 4169 }, { "epoch": 0.8477332791217728, "grad_norm": 0.1343654841184616, "learning_rate": 0.00011530560357978237, "loss": 1.0519, "step": 4170 }, { "epoch": 0.84793657247408, "grad_norm": 0.12332738190889359, "learning_rate": 0.00011528526390725109, "loss": 0.9967, "step": 4171 }, { "epoch": 0.8481398658263875, "grad_norm": 0.13061444461345673, "learning_rate": 0.00011526492423471982, "loss": 0.998, "step": 4172 }, { "epoch": 0.8483431591786949, "grad_norm": 0.11777007579803467, "learning_rate": 0.00011524458456218857, "loss": 0.8942, "step": 4173 }, { "epoch": 0.8485464525310022, "grad_norm": 0.13091976940631866, "learning_rate": 0.00011522424488965728, "loss": 1.0612, "step": 4174 }, { "epoch": 0.8487497458833096, "grad_norm": 0.13466595113277435, "learning_rate": 0.000115203905217126, "loss": 1.0211, "step": 4175 }, { "epoch": 0.848953039235617, "grad_norm": 0.12775756418704987, "learning_rate": 0.00011518356554459473, "loss": 0.9925, "step": 4176 }, { "epoch": 0.8491563325879243, "grad_norm": 0.144356831908226, "learning_rate": 0.00011516322587206348, "loss": 1.1657, "step": 4177 }, { "epoch": 0.8493596259402317, "grad_norm": 0.1248125433921814, "learning_rate": 0.00011514288619953219, "loss": 0.9492, "step": 4178 }, { "epoch": 0.8495629192925391, "grad_norm": 0.13141238689422607, "learning_rate": 0.00011512254652700091, "loss": 0.9265, "step": 4179 }, { "epoch": 0.8497662126448465, "grad_norm": 0.13851980865001678, "learning_rate": 0.00011510220685446964, "loss": 1.1475, "step": 4180 }, { "epoch": 0.8499695059971539, "grad_norm": 0.13344109058380127, "learning_rate": 0.00011508186718193839, "loss": 1.0393, "step": 4181 }, { "epoch": 0.8501727993494613, "grad_norm": 0.15251140296459198, "learning_rate": 0.0001150615275094071, "loss": 1.1769, "step": 4182 }, { "epoch": 0.8503760927017686, "grad_norm": 0.1376708298921585, "learning_rate": 0.00011504118783687583, "loss": 1.1571, "step": 4183 }, { "epoch": 0.850579386054076, "grad_norm": 0.1312796026468277, "learning_rate": 0.00011502084816434455, "loss": 1.0176, "step": 4184 }, { "epoch": 0.8507826794063834, "grad_norm": 0.13133344054222107, "learning_rate": 0.0001150005084918133, "loss": 1.1126, "step": 4185 }, { "epoch": 0.8509859727586908, "grad_norm": 0.1404520869255066, "learning_rate": 0.00011498016881928201, "loss": 1.0467, "step": 4186 }, { "epoch": 0.8511892661109982, "grad_norm": 0.13041868805885315, "learning_rate": 0.00011495982914675074, "loss": 0.9282, "step": 4187 }, { "epoch": 0.8513925594633055, "grad_norm": 0.1341453641653061, "learning_rate": 0.00011493948947421946, "loss": 0.9568, "step": 4188 }, { "epoch": 0.8515958528156129, "grad_norm": 0.13047213852405548, "learning_rate": 0.00011491914980168821, "loss": 0.9724, "step": 4189 }, { "epoch": 0.8517991461679203, "grad_norm": 0.12841585278511047, "learning_rate": 0.00011489881012915692, "loss": 0.9021, "step": 4190 }, { "epoch": 0.8520024395202277, "grad_norm": 0.1475822478532791, "learning_rate": 0.00011487847045662565, "loss": 1.1875, "step": 4191 }, { "epoch": 0.8522057328725351, "grad_norm": 0.12445596605539322, "learning_rate": 0.00011485813078409437, "loss": 1.0242, "step": 4192 }, { "epoch": 0.8524090262248425, "grad_norm": 0.12460153549909592, "learning_rate": 0.00011483779111156313, "loss": 1.048, "step": 4193 }, { "epoch": 0.8526123195771498, "grad_norm": 0.1400919258594513, "learning_rate": 0.00011481745143903184, "loss": 1.0866, "step": 4194 }, { "epoch": 0.8528156129294572, "grad_norm": 0.13624945282936096, "learning_rate": 0.00011479711176650056, "loss": 1.0692, "step": 4195 }, { "epoch": 0.8530189062817646, "grad_norm": 0.13561497628688812, "learning_rate": 0.00011477677209396928, "loss": 1.1576, "step": 4196 }, { "epoch": 0.853222199634072, "grad_norm": 0.13115760684013367, "learning_rate": 0.00011475643242143801, "loss": 1.0431, "step": 4197 }, { "epoch": 0.8534254929863794, "grad_norm": 0.14327464997768402, "learning_rate": 0.00011473609274890675, "loss": 1.1245, "step": 4198 }, { "epoch": 0.8536287863386868, "grad_norm": 0.14745375514030457, "learning_rate": 0.00011471575307637547, "loss": 1.1246, "step": 4199 }, { "epoch": 0.8538320796909941, "grad_norm": 0.13001082837581635, "learning_rate": 0.0001146954134038442, "loss": 0.979, "step": 4200 }, { "epoch": 0.8540353730433015, "grad_norm": 0.14172659814357758, "learning_rate": 0.00011467507373131292, "loss": 1.0395, "step": 4201 }, { "epoch": 0.8542386663956089, "grad_norm": 0.12964913249015808, "learning_rate": 0.00011465473405878166, "loss": 0.9479, "step": 4202 }, { "epoch": 0.8544419597479163, "grad_norm": 0.13716383278369904, "learning_rate": 0.00011463439438625038, "loss": 1.1041, "step": 4203 }, { "epoch": 0.8546452531002237, "grad_norm": 0.1309032440185547, "learning_rate": 0.00011461405471371911, "loss": 1.0235, "step": 4204 }, { "epoch": 0.854848546452531, "grad_norm": 0.13954511284828186, "learning_rate": 0.00011459371504118783, "loss": 1.077, "step": 4205 }, { "epoch": 0.8550518398048383, "grad_norm": 0.12911508977413177, "learning_rate": 0.00011457337536865657, "loss": 1.0156, "step": 4206 }, { "epoch": 0.8552551331571457, "grad_norm": 0.13105571269989014, "learning_rate": 0.0001145530356961253, "loss": 0.9734, "step": 4207 }, { "epoch": 0.8554584265094531, "grad_norm": 0.1499045491218567, "learning_rate": 0.00011453269602359402, "loss": 1.0985, "step": 4208 }, { "epoch": 0.8556617198617605, "grad_norm": 0.1255357265472412, "learning_rate": 0.00011451235635106274, "loss": 1.0147, "step": 4209 }, { "epoch": 0.8558650132140679, "grad_norm": 0.1335058957338333, "learning_rate": 0.0001144920166785315, "loss": 1.0306, "step": 4210 }, { "epoch": 0.8560683065663752, "grad_norm": 0.12359452992677689, "learning_rate": 0.0001144716770060002, "loss": 0.8835, "step": 4211 }, { "epoch": 0.8562715999186826, "grad_norm": 0.14083559811115265, "learning_rate": 0.00011445133733346893, "loss": 0.9393, "step": 4212 }, { "epoch": 0.85647489327099, "grad_norm": 0.13426551222801208, "learning_rate": 0.00011443099766093765, "loss": 0.8895, "step": 4213 }, { "epoch": 0.8566781866232974, "grad_norm": 0.13291719555854797, "learning_rate": 0.0001144106579884064, "loss": 1.0563, "step": 4214 }, { "epoch": 0.8568814799756048, "grad_norm": 0.14882031083106995, "learning_rate": 0.00011439031831587512, "loss": 1.1625, "step": 4215 }, { "epoch": 0.8570847733279122, "grad_norm": 0.13581587374210358, "learning_rate": 0.00011436997864334384, "loss": 1.065, "step": 4216 }, { "epoch": 0.8572880666802195, "grad_norm": 0.1269901543855667, "learning_rate": 0.00011434963897081257, "loss": 1.045, "step": 4217 }, { "epoch": 0.8574913600325269, "grad_norm": 0.13057155907154083, "learning_rate": 0.00011432929929828132, "loss": 0.8866, "step": 4218 }, { "epoch": 0.8576946533848343, "grad_norm": 0.1503707617521286, "learning_rate": 0.00011430895962575003, "loss": 1.1888, "step": 4219 }, { "epoch": 0.8578979467371417, "grad_norm": 0.1276797503232956, "learning_rate": 0.00011428861995321875, "loss": 1.0497, "step": 4220 }, { "epoch": 0.8581012400894491, "grad_norm": 0.15582577884197235, "learning_rate": 0.00011426828028068748, "loss": 1.2019, "step": 4221 }, { "epoch": 0.8583045334417565, "grad_norm": 0.1253650039434433, "learning_rate": 0.00011424794060815623, "loss": 0.9089, "step": 4222 }, { "epoch": 0.8585078267940638, "grad_norm": 0.13212646543979645, "learning_rate": 0.00011422760093562494, "loss": 0.8912, "step": 4223 }, { "epoch": 0.8587111201463712, "grad_norm": 0.13876405358314514, "learning_rate": 0.00011420726126309366, "loss": 1.0144, "step": 4224 }, { "epoch": 0.8589144134986786, "grad_norm": 0.15017178654670715, "learning_rate": 0.00011418692159056239, "loss": 1.2437, "step": 4225 }, { "epoch": 0.859117706850986, "grad_norm": 0.1450318694114685, "learning_rate": 0.00011416658191803114, "loss": 1.1114, "step": 4226 }, { "epoch": 0.8593210002032934, "grad_norm": 0.14307589828968048, "learning_rate": 0.00011414624224549985, "loss": 1.1785, "step": 4227 }, { "epoch": 0.8595242935556008, "grad_norm": 0.13084810972213745, "learning_rate": 0.00011412590257296858, "loss": 1.0871, "step": 4228 }, { "epoch": 0.8597275869079081, "grad_norm": 0.12914970517158508, "learning_rate": 0.0001141055629004373, "loss": 1.0432, "step": 4229 }, { "epoch": 0.8599308802602155, "grad_norm": 0.14787475764751434, "learning_rate": 0.00011408522322790605, "loss": 1.0795, "step": 4230 }, { "epoch": 0.8601341736125229, "grad_norm": 0.1235564798116684, "learning_rate": 0.00011406488355537476, "loss": 0.946, "step": 4231 }, { "epoch": 0.8603374669648303, "grad_norm": 0.11737848818302155, "learning_rate": 0.00011404454388284349, "loss": 0.9549, "step": 4232 }, { "epoch": 0.8605407603171377, "grad_norm": 0.12434041500091553, "learning_rate": 0.00011402420421031221, "loss": 1.0672, "step": 4233 }, { "epoch": 0.8607440536694451, "grad_norm": 0.14527469873428345, "learning_rate": 0.00011400386453778096, "loss": 1.1508, "step": 4234 }, { "epoch": 0.8609473470217524, "grad_norm": 0.14363646507263184, "learning_rate": 0.00011398352486524967, "loss": 1.1063, "step": 4235 }, { "epoch": 0.8611506403740598, "grad_norm": 0.14891605079174042, "learning_rate": 0.0001139631851927184, "loss": 1.1027, "step": 4236 }, { "epoch": 0.8613539337263671, "grad_norm": 0.13269458711147308, "learning_rate": 0.00011394284552018712, "loss": 0.9506, "step": 4237 }, { "epoch": 0.8615572270786745, "grad_norm": 0.13667765259742737, "learning_rate": 0.00011392250584765585, "loss": 1.0362, "step": 4238 }, { "epoch": 0.861760520430982, "grad_norm": 0.1343078464269638, "learning_rate": 0.00011390216617512459, "loss": 1.0644, "step": 4239 }, { "epoch": 0.8619638137832892, "grad_norm": 0.12829913198947906, "learning_rate": 0.00011388182650259331, "loss": 0.9775, "step": 4240 }, { "epoch": 0.8621671071355966, "grad_norm": 0.14578650891780853, "learning_rate": 0.00011386148683006203, "loss": 1.1502, "step": 4241 }, { "epoch": 0.862370400487904, "grad_norm": 0.12001452594995499, "learning_rate": 0.00011384114715753076, "loss": 0.9496, "step": 4242 }, { "epoch": 0.8625736938402114, "grad_norm": 0.14932505786418915, "learning_rate": 0.0001138208074849995, "loss": 1.0731, "step": 4243 }, { "epoch": 0.8627769871925188, "grad_norm": 0.12832188606262207, "learning_rate": 0.00011380046781246822, "loss": 0.9428, "step": 4244 }, { "epoch": 0.8629802805448262, "grad_norm": 0.15174297988414764, "learning_rate": 0.00011378012813993695, "loss": 1.259, "step": 4245 }, { "epoch": 0.8631835738971335, "grad_norm": 0.1397685408592224, "learning_rate": 0.00011375978846740567, "loss": 1.204, "step": 4246 }, { "epoch": 0.8633868672494409, "grad_norm": 0.1386864334344864, "learning_rate": 0.00011373944879487441, "loss": 1.2689, "step": 4247 }, { "epoch": 0.8635901606017483, "grad_norm": 0.13151347637176514, "learning_rate": 0.00011371910912234313, "loss": 0.9938, "step": 4248 }, { "epoch": 0.8637934539540557, "grad_norm": 0.15212032198905945, "learning_rate": 0.00011369876944981186, "loss": 1.0839, "step": 4249 }, { "epoch": 0.8639967473063631, "grad_norm": 0.1537848860025406, "learning_rate": 0.00011367842977728058, "loss": 1.1586, "step": 4250 }, { "epoch": 0.8642000406586705, "grad_norm": 0.12098225951194763, "learning_rate": 0.00011365809010474932, "loss": 0.9585, "step": 4251 }, { "epoch": 0.8644033340109778, "grad_norm": 0.1198868602514267, "learning_rate": 0.00011363775043221804, "loss": 0.9055, "step": 4252 }, { "epoch": 0.8646066273632852, "grad_norm": 0.13080951571464539, "learning_rate": 0.00011361741075968677, "loss": 1.0171, "step": 4253 }, { "epoch": 0.8648099207155926, "grad_norm": 0.13523563742637634, "learning_rate": 0.0001135970710871555, "loss": 0.9592, "step": 4254 }, { "epoch": 0.8650132140679, "grad_norm": 0.12738974392414093, "learning_rate": 0.00011357673141462423, "loss": 0.9856, "step": 4255 }, { "epoch": 0.8652165074202074, "grad_norm": 0.13558736443519592, "learning_rate": 0.00011355639174209296, "loss": 1.0518, "step": 4256 }, { "epoch": 0.8654198007725148, "grad_norm": 0.13021017611026764, "learning_rate": 0.00011353605206956168, "loss": 0.9969, "step": 4257 }, { "epoch": 0.8656230941248221, "grad_norm": 0.13365107774734497, "learning_rate": 0.0001135157123970304, "loss": 1.0569, "step": 4258 }, { "epoch": 0.8658263874771295, "grad_norm": 0.14550118148326874, "learning_rate": 0.00011349537272449914, "loss": 1.0975, "step": 4259 }, { "epoch": 0.8660296808294369, "grad_norm": 0.12725263833999634, "learning_rate": 0.00011347503305196787, "loss": 0.9558, "step": 4260 }, { "epoch": 0.8662329741817443, "grad_norm": 0.13404077291488647, "learning_rate": 0.00011345469337943659, "loss": 1.1282, "step": 4261 }, { "epoch": 0.8664362675340517, "grad_norm": 0.12755300104618073, "learning_rate": 0.00011343435370690532, "loss": 1.0461, "step": 4262 }, { "epoch": 0.866639560886359, "grad_norm": 0.13699626922607422, "learning_rate": 0.00011341401403437405, "loss": 1.0606, "step": 4263 }, { "epoch": 0.8668428542386664, "grad_norm": 0.13077600300312042, "learning_rate": 0.00011339367436184278, "loss": 1.0505, "step": 4264 }, { "epoch": 0.8670461475909738, "grad_norm": 0.13652461767196655, "learning_rate": 0.0001133733346893115, "loss": 0.9115, "step": 4265 }, { "epoch": 0.8672494409432812, "grad_norm": 0.1255892813205719, "learning_rate": 0.00011335299501678023, "loss": 0.9152, "step": 4266 }, { "epoch": 0.8674527342955886, "grad_norm": 0.13048523664474487, "learning_rate": 0.00011333265534424898, "loss": 1.0926, "step": 4267 }, { "epoch": 0.867656027647896, "grad_norm": 0.13757598400115967, "learning_rate": 0.00011331231567171769, "loss": 0.9713, "step": 4268 }, { "epoch": 0.8678593210002032, "grad_norm": 0.12873396277427673, "learning_rate": 0.00011329197599918641, "loss": 1.1876, "step": 4269 }, { "epoch": 0.8680626143525106, "grad_norm": 0.12738154828548431, "learning_rate": 0.00011327163632665514, "loss": 1.0222, "step": 4270 }, { "epoch": 0.868265907704818, "grad_norm": 0.11265822499990463, "learning_rate": 0.00011325129665412389, "loss": 0.8971, "step": 4271 }, { "epoch": 0.8684692010571254, "grad_norm": 0.1351097822189331, "learning_rate": 0.0001132309569815926, "loss": 1.0784, "step": 4272 }, { "epoch": 0.8686724944094328, "grad_norm": 0.1455054134130478, "learning_rate": 0.00011321061730906133, "loss": 1.1519, "step": 4273 }, { "epoch": 0.8688757877617402, "grad_norm": 0.13560084998607635, "learning_rate": 0.00011319027763653005, "loss": 1.1245, "step": 4274 }, { "epoch": 0.8690790811140475, "grad_norm": 0.13816951215267181, "learning_rate": 0.0001131699379639988, "loss": 1.1679, "step": 4275 }, { "epoch": 0.8692823744663549, "grad_norm": 0.13416263461112976, "learning_rate": 0.00011314959829146751, "loss": 1.0392, "step": 4276 }, { "epoch": 0.8694856678186623, "grad_norm": 0.1267019808292389, "learning_rate": 0.00011312925861893624, "loss": 0.9492, "step": 4277 }, { "epoch": 0.8696889611709697, "grad_norm": 0.14063285291194916, "learning_rate": 0.00011310891894640496, "loss": 1.1432, "step": 4278 }, { "epoch": 0.8698922545232771, "grad_norm": 0.149309441447258, "learning_rate": 0.00011308857927387371, "loss": 1.2309, "step": 4279 }, { "epoch": 0.8700955478755845, "grad_norm": 0.1392187476158142, "learning_rate": 0.00011306823960134242, "loss": 1.0393, "step": 4280 }, { "epoch": 0.8702988412278918, "grad_norm": 0.12659290432929993, "learning_rate": 0.00011304789992881115, "loss": 0.8555, "step": 4281 }, { "epoch": 0.8705021345801992, "grad_norm": 0.11759068816900253, "learning_rate": 0.00011302756025627987, "loss": 0.938, "step": 4282 }, { "epoch": 0.8707054279325066, "grad_norm": 0.13261142373085022, "learning_rate": 0.0001130072205837486, "loss": 1.0559, "step": 4283 }, { "epoch": 0.870908721284814, "grad_norm": 0.12003304809331894, "learning_rate": 0.00011298688091121734, "loss": 0.9442, "step": 4284 }, { "epoch": 0.8711120146371214, "grad_norm": 0.13861103355884552, "learning_rate": 0.00011296654123868606, "loss": 1.0972, "step": 4285 }, { "epoch": 0.8713153079894288, "grad_norm": 0.12716351449489594, "learning_rate": 0.00011294620156615478, "loss": 0.9511, "step": 4286 }, { "epoch": 0.8715186013417361, "grad_norm": 0.1347339004278183, "learning_rate": 0.00011292586189362351, "loss": 0.9783, "step": 4287 }, { "epoch": 0.8717218946940435, "grad_norm": 0.14212962985038757, "learning_rate": 0.00011290552222109225, "loss": 0.9568, "step": 4288 }, { "epoch": 0.8719251880463509, "grad_norm": 0.11800102889537811, "learning_rate": 0.00011288518254856097, "loss": 0.9524, "step": 4289 }, { "epoch": 0.8721284813986583, "grad_norm": 0.1591940075159073, "learning_rate": 0.0001128648428760297, "loss": 1.2547, "step": 4290 }, { "epoch": 0.8723317747509657, "grad_norm": 0.12767143547534943, "learning_rate": 0.00011284450320349842, "loss": 1.0574, "step": 4291 }, { "epoch": 0.872535068103273, "grad_norm": 0.1308542639017105, "learning_rate": 0.00011282416353096716, "loss": 0.9813, "step": 4292 }, { "epoch": 0.8727383614555804, "grad_norm": 0.15340617299079895, "learning_rate": 0.00011280382385843588, "loss": 1.2684, "step": 4293 }, { "epoch": 0.8729416548078878, "grad_norm": 0.14063572883605957, "learning_rate": 0.00011278348418590461, "loss": 1.0218, "step": 4294 }, { "epoch": 0.8731449481601952, "grad_norm": 0.12035755813121796, "learning_rate": 0.00011276314451337333, "loss": 1.0237, "step": 4295 }, { "epoch": 0.8733482415125026, "grad_norm": 0.1521058976650238, "learning_rate": 0.00011274280484084207, "loss": 1.1983, "step": 4296 }, { "epoch": 0.87355153486481, "grad_norm": 0.1308029145002365, "learning_rate": 0.0001127224651683108, "loss": 0.9343, "step": 4297 }, { "epoch": 0.8737548282171173, "grad_norm": 0.13655021786689758, "learning_rate": 0.00011270212549577952, "loss": 1.1253, "step": 4298 }, { "epoch": 0.8739581215694247, "grad_norm": 0.13754834234714508, "learning_rate": 0.00011268178582324824, "loss": 1.1018, "step": 4299 }, { "epoch": 0.874161414921732, "grad_norm": 0.14539092779159546, "learning_rate": 0.00011266144615071698, "loss": 1.1647, "step": 4300 }, { "epoch": 0.8743647082740394, "grad_norm": 0.1390954852104187, "learning_rate": 0.0001126411064781857, "loss": 1.0717, "step": 4301 }, { "epoch": 0.8745680016263468, "grad_norm": 0.13942857086658478, "learning_rate": 0.00011262076680565443, "loss": 1.0457, "step": 4302 }, { "epoch": 0.8747712949786542, "grad_norm": 0.13453049957752228, "learning_rate": 0.00011260042713312315, "loss": 1.1522, "step": 4303 }, { "epoch": 0.8749745883309615, "grad_norm": 0.13158947229385376, "learning_rate": 0.00011258008746059189, "loss": 0.969, "step": 4304 }, { "epoch": 0.8751778816832689, "grad_norm": 0.1394949108362198, "learning_rate": 0.00011255974778806062, "loss": 0.9924, "step": 4305 }, { "epoch": 0.8753811750355763, "grad_norm": 0.14436380565166473, "learning_rate": 0.00011253940811552934, "loss": 1.0416, "step": 4306 }, { "epoch": 0.8755844683878837, "grad_norm": 0.12444054335355759, "learning_rate": 0.00011251906844299807, "loss": 0.9734, "step": 4307 }, { "epoch": 0.8757877617401911, "grad_norm": 0.1411658078432083, "learning_rate": 0.0001124987287704668, "loss": 0.9828, "step": 4308 }, { "epoch": 0.8759910550924985, "grad_norm": 0.13278289139270782, "learning_rate": 0.00011247838909793553, "loss": 1.0184, "step": 4309 }, { "epoch": 0.8761943484448058, "grad_norm": 0.13630905747413635, "learning_rate": 0.00011245804942540425, "loss": 1.1146, "step": 4310 }, { "epoch": 0.8763976417971132, "grad_norm": 0.12063156068325043, "learning_rate": 0.00011243770975287298, "loss": 0.9559, "step": 4311 }, { "epoch": 0.8766009351494206, "grad_norm": 0.12756480276584625, "learning_rate": 0.00011241737008034172, "loss": 0.935, "step": 4312 }, { "epoch": 0.876804228501728, "grad_norm": 0.1388019174337387, "learning_rate": 0.00011239703040781044, "loss": 1.102, "step": 4313 }, { "epoch": 0.8770075218540354, "grad_norm": 0.13372951745986938, "learning_rate": 0.00011237669073527916, "loss": 0.9758, "step": 4314 }, { "epoch": 0.8772108152063427, "grad_norm": 0.12079128623008728, "learning_rate": 0.00011235635106274789, "loss": 0.9984, "step": 4315 }, { "epoch": 0.8774141085586501, "grad_norm": 0.1439303755760193, "learning_rate": 0.00011233601139021663, "loss": 1.131, "step": 4316 }, { "epoch": 0.8776174019109575, "grad_norm": 0.131261944770813, "learning_rate": 0.00011231567171768535, "loss": 0.9801, "step": 4317 }, { "epoch": 0.8778206952632649, "grad_norm": 0.1571865677833557, "learning_rate": 0.00011229533204515408, "loss": 1.0769, "step": 4318 }, { "epoch": 0.8780239886155723, "grad_norm": 0.1357412189245224, "learning_rate": 0.0001122749923726228, "loss": 0.992, "step": 4319 }, { "epoch": 0.8782272819678797, "grad_norm": 0.12698335945606232, "learning_rate": 0.00011225465270009154, "loss": 0.9678, "step": 4320 }, { "epoch": 0.878430575320187, "grad_norm": 0.15510526299476624, "learning_rate": 0.00011223431302756026, "loss": 1.2756, "step": 4321 }, { "epoch": 0.8786338686724944, "grad_norm": 0.13490548729896545, "learning_rate": 0.00011221397335502899, "loss": 0.9746, "step": 4322 }, { "epoch": 0.8788371620248018, "grad_norm": 0.1362731158733368, "learning_rate": 0.00011219363368249771, "loss": 1.1591, "step": 4323 }, { "epoch": 0.8790404553771092, "grad_norm": 0.12086111307144165, "learning_rate": 0.00011217329400996644, "loss": 0.9592, "step": 4324 }, { "epoch": 0.8792437487294166, "grad_norm": 0.13338525593280792, "learning_rate": 0.00011215295433743517, "loss": 1.0375, "step": 4325 }, { "epoch": 0.879447042081724, "grad_norm": 0.13681508600711823, "learning_rate": 0.0001121326146649039, "loss": 1.2068, "step": 4326 }, { "epoch": 0.8796503354340313, "grad_norm": 0.12971334159374237, "learning_rate": 0.00011211227499237262, "loss": 0.9959, "step": 4327 }, { "epoch": 0.8798536287863387, "grad_norm": 0.13908180594444275, "learning_rate": 0.00011209193531984135, "loss": 1.119, "step": 4328 }, { "epoch": 0.8800569221386461, "grad_norm": 0.13482098281383514, "learning_rate": 0.00011207159564731009, "loss": 1.0079, "step": 4329 }, { "epoch": 0.8802602154909535, "grad_norm": 0.14087046682834625, "learning_rate": 0.00011205125597477881, "loss": 1.0478, "step": 4330 }, { "epoch": 0.8804635088432609, "grad_norm": 0.12133046984672546, "learning_rate": 0.00011203091630224753, "loss": 0.8026, "step": 4331 }, { "epoch": 0.8806668021955683, "grad_norm": 0.12162627279758453, "learning_rate": 0.00011201057662971626, "loss": 1.0147, "step": 4332 }, { "epoch": 0.8808700955478755, "grad_norm": 0.1315440535545349, "learning_rate": 0.000111990236957185, "loss": 1.1736, "step": 4333 }, { "epoch": 0.8810733889001829, "grad_norm": 0.1336052566766739, "learning_rate": 0.00011196989728465372, "loss": 1.0814, "step": 4334 }, { "epoch": 0.8812766822524903, "grad_norm": 0.12887480854988098, "learning_rate": 0.00011194955761212245, "loss": 1.0392, "step": 4335 }, { "epoch": 0.8814799756047977, "grad_norm": 0.12557265162467957, "learning_rate": 0.00011192921793959117, "loss": 0.9376, "step": 4336 }, { "epoch": 0.8816832689571051, "grad_norm": 0.13946324586868286, "learning_rate": 0.00011190887826705991, "loss": 1.0847, "step": 4337 }, { "epoch": 0.8818865623094125, "grad_norm": 0.14429444074630737, "learning_rate": 0.00011188853859452863, "loss": 1.0925, "step": 4338 }, { "epoch": 0.8820898556617198, "grad_norm": 0.13866104185581207, "learning_rate": 0.00011186819892199736, "loss": 1.1063, "step": 4339 }, { "epoch": 0.8822931490140272, "grad_norm": 0.1266574114561081, "learning_rate": 0.00011184785924946608, "loss": 1.0786, "step": 4340 }, { "epoch": 0.8824964423663346, "grad_norm": 0.14879325032234192, "learning_rate": 0.00011182751957693482, "loss": 1.1059, "step": 4341 }, { "epoch": 0.882699735718642, "grad_norm": 0.11987625062465668, "learning_rate": 0.00011180717990440354, "loss": 0.8947, "step": 4342 }, { "epoch": 0.8829030290709494, "grad_norm": 0.13331225514411926, "learning_rate": 0.00011178684023187227, "loss": 1.1468, "step": 4343 }, { "epoch": 0.8831063224232567, "grad_norm": 0.13890080153942108, "learning_rate": 0.00011176650055934099, "loss": 1.0535, "step": 4344 }, { "epoch": 0.8833096157755641, "grad_norm": 0.14050957560539246, "learning_rate": 0.00011174616088680973, "loss": 1.1156, "step": 4345 }, { "epoch": 0.8835129091278715, "grad_norm": 0.14118660986423492, "learning_rate": 0.00011172582121427846, "loss": 0.9597, "step": 4346 }, { "epoch": 0.8837162024801789, "grad_norm": 0.13197362422943115, "learning_rate": 0.00011170548154174718, "loss": 0.9801, "step": 4347 }, { "epoch": 0.8839194958324863, "grad_norm": 0.1429329663515091, "learning_rate": 0.0001116851418692159, "loss": 1.1501, "step": 4348 }, { "epoch": 0.8841227891847937, "grad_norm": 0.14236941933631897, "learning_rate": 0.00011166480219668464, "loss": 1.0295, "step": 4349 }, { "epoch": 0.884326082537101, "grad_norm": 0.13247445225715637, "learning_rate": 0.00011164446252415337, "loss": 0.9995, "step": 4350 }, { "epoch": 0.8845293758894084, "grad_norm": 0.1475542187690735, "learning_rate": 0.00011162412285162209, "loss": 1.1062, "step": 4351 }, { "epoch": 0.8847326692417158, "grad_norm": 0.14314448833465576, "learning_rate": 0.00011160378317909082, "loss": 1.1257, "step": 4352 }, { "epoch": 0.8849359625940232, "grad_norm": 0.1297428011894226, "learning_rate": 0.00011158344350655955, "loss": 0.8739, "step": 4353 }, { "epoch": 0.8851392559463306, "grad_norm": 0.15738995373249054, "learning_rate": 0.00011156310383402828, "loss": 1.2495, "step": 4354 }, { "epoch": 0.885342549298638, "grad_norm": 0.13949069380760193, "learning_rate": 0.000111542764161497, "loss": 1.0207, "step": 4355 }, { "epoch": 0.8855458426509453, "grad_norm": 0.1462063193321228, "learning_rate": 0.00011152242448896573, "loss": 0.9444, "step": 4356 }, { "epoch": 0.8857491360032527, "grad_norm": 0.13881848752498627, "learning_rate": 0.00011150208481643447, "loss": 1.0071, "step": 4357 }, { "epoch": 0.8859524293555601, "grad_norm": 0.13828495144844055, "learning_rate": 0.00011148174514390319, "loss": 1.0035, "step": 4358 }, { "epoch": 0.8861557227078675, "grad_norm": 0.12428104132413864, "learning_rate": 0.00011146140547137191, "loss": 0.94, "step": 4359 }, { "epoch": 0.8863590160601749, "grad_norm": 0.14945100247859955, "learning_rate": 0.00011144106579884064, "loss": 1.262, "step": 4360 }, { "epoch": 0.8865623094124823, "grad_norm": 0.13491201400756836, "learning_rate": 0.00011142072612630938, "loss": 0.9794, "step": 4361 }, { "epoch": 0.8867656027647896, "grad_norm": 0.1441691815853119, "learning_rate": 0.0001114003864537781, "loss": 1.0569, "step": 4362 }, { "epoch": 0.886968896117097, "grad_norm": 0.14696361124515533, "learning_rate": 0.00011138004678124683, "loss": 1.263, "step": 4363 }, { "epoch": 0.8871721894694043, "grad_norm": 0.131379634141922, "learning_rate": 0.00011135970710871555, "loss": 0.8765, "step": 4364 }, { "epoch": 0.8873754828217117, "grad_norm": 0.13199898600578308, "learning_rate": 0.00011133936743618427, "loss": 0.9504, "step": 4365 }, { "epoch": 0.8875787761740191, "grad_norm": 0.12538810074329376, "learning_rate": 0.00011131902776365301, "loss": 0.9167, "step": 4366 }, { "epoch": 0.8877820695263264, "grad_norm": 0.14858978986740112, "learning_rate": 0.00011129868809112174, "loss": 1.1652, "step": 4367 }, { "epoch": 0.8879853628786338, "grad_norm": 0.12117012590169907, "learning_rate": 0.00011127834841859046, "loss": 0.9091, "step": 4368 }, { "epoch": 0.8881886562309412, "grad_norm": 0.13053376972675323, "learning_rate": 0.00011125800874605919, "loss": 0.9876, "step": 4369 }, { "epoch": 0.8883919495832486, "grad_norm": 0.15164178609848022, "learning_rate": 0.00011123766907352792, "loss": 1.1477, "step": 4370 }, { "epoch": 0.888595242935556, "grad_norm": 0.13139276206493378, "learning_rate": 0.00011121732940099665, "loss": 1.0332, "step": 4371 }, { "epoch": 0.8887985362878634, "grad_norm": 0.14275844395160675, "learning_rate": 0.00011119698972846537, "loss": 1.2397, "step": 4372 }, { "epoch": 0.8890018296401707, "grad_norm": 0.14269821345806122, "learning_rate": 0.0001111766500559341, "loss": 1.0065, "step": 4373 }, { "epoch": 0.8892051229924781, "grad_norm": 0.12749828398227692, "learning_rate": 0.00011115631038340284, "loss": 0.9364, "step": 4374 }, { "epoch": 0.8894084163447855, "grad_norm": 0.13233932852745056, "learning_rate": 0.00011113597071087156, "loss": 1.035, "step": 4375 }, { "epoch": 0.8896117096970929, "grad_norm": 0.14462941884994507, "learning_rate": 0.00011111563103834028, "loss": 1.1277, "step": 4376 }, { "epoch": 0.8898150030494003, "grad_norm": 0.14381466805934906, "learning_rate": 0.00011109529136580901, "loss": 1.166, "step": 4377 }, { "epoch": 0.8900182964017077, "grad_norm": 0.1264910101890564, "learning_rate": 0.00011107495169327775, "loss": 1.0343, "step": 4378 }, { "epoch": 0.890221589754015, "grad_norm": 0.12185248732566833, "learning_rate": 0.00011105461202074647, "loss": 1.0195, "step": 4379 }, { "epoch": 0.8904248831063224, "grad_norm": 0.13510321080684662, "learning_rate": 0.0001110342723482152, "loss": 0.9245, "step": 4380 }, { "epoch": 0.8906281764586298, "grad_norm": 0.13467377424240112, "learning_rate": 0.00011101393267568392, "loss": 0.9795, "step": 4381 }, { "epoch": 0.8908314698109372, "grad_norm": 0.1266263723373413, "learning_rate": 0.00011099359300315266, "loss": 0.9728, "step": 4382 }, { "epoch": 0.8910347631632446, "grad_norm": 0.12397301942110062, "learning_rate": 0.00011097325333062138, "loss": 0.9325, "step": 4383 }, { "epoch": 0.891238056515552, "grad_norm": 0.14966972172260284, "learning_rate": 0.00011095291365809011, "loss": 1.2973, "step": 4384 }, { "epoch": 0.8914413498678593, "grad_norm": 0.13662739098072052, "learning_rate": 0.00011093257398555883, "loss": 1.0907, "step": 4385 }, { "epoch": 0.8916446432201667, "grad_norm": 0.1289726197719574, "learning_rate": 0.00011091223431302757, "loss": 1.047, "step": 4386 }, { "epoch": 0.8918479365724741, "grad_norm": 0.13556358218193054, "learning_rate": 0.0001108918946404963, "loss": 0.9938, "step": 4387 }, { "epoch": 0.8920512299247815, "grad_norm": 0.13389402627944946, "learning_rate": 0.00011087155496796502, "loss": 1.1329, "step": 4388 }, { "epoch": 0.8922545232770889, "grad_norm": 0.13192865252494812, "learning_rate": 0.00011085121529543374, "loss": 0.945, "step": 4389 }, { "epoch": 0.8924578166293963, "grad_norm": 0.14545689523220062, "learning_rate": 0.00011083087562290248, "loss": 1.0199, "step": 4390 }, { "epoch": 0.8926611099817036, "grad_norm": 0.1357770413160324, "learning_rate": 0.0001108105359503712, "loss": 1.1277, "step": 4391 }, { "epoch": 0.892864403334011, "grad_norm": 0.1452401578426361, "learning_rate": 0.00011079019627783993, "loss": 1.2486, "step": 4392 }, { "epoch": 0.8930676966863184, "grad_norm": 0.12674301862716675, "learning_rate": 0.00011076985660530865, "loss": 0.9128, "step": 4393 }, { "epoch": 0.8932709900386258, "grad_norm": 0.14735066890716553, "learning_rate": 0.00011074951693277739, "loss": 1.0515, "step": 4394 }, { "epoch": 0.8934742833909332, "grad_norm": 0.14510585367679596, "learning_rate": 0.00011072917726024612, "loss": 1.106, "step": 4395 }, { "epoch": 0.8936775767432404, "grad_norm": 0.14333130419254303, "learning_rate": 0.00011070883758771484, "loss": 1.1362, "step": 4396 }, { "epoch": 0.8938808700955478, "grad_norm": 0.1307590752840042, "learning_rate": 0.00011068849791518357, "loss": 0.9749, "step": 4397 }, { "epoch": 0.8940841634478552, "grad_norm": 0.12639310956001282, "learning_rate": 0.0001106681582426523, "loss": 0.9695, "step": 4398 }, { "epoch": 0.8942874568001626, "grad_norm": 0.13830193877220154, "learning_rate": 0.00011064781857012103, "loss": 0.9268, "step": 4399 }, { "epoch": 0.89449075015247, "grad_norm": 0.1438985913991928, "learning_rate": 0.00011062747889758975, "loss": 1.0869, "step": 4400 }, { "epoch": 0.8946940435047774, "grad_norm": 0.1423654854297638, "learning_rate": 0.00011060713922505848, "loss": 1.1083, "step": 4401 }, { "epoch": 0.8948973368570847, "grad_norm": 0.1318962126970291, "learning_rate": 0.00011058679955252722, "loss": 0.8641, "step": 4402 }, { "epoch": 0.8951006302093921, "grad_norm": 0.13388904929161072, "learning_rate": 0.00011056645987999594, "loss": 0.9573, "step": 4403 }, { "epoch": 0.8953039235616995, "grad_norm": 0.13502460718154907, "learning_rate": 0.00011054612020746466, "loss": 1.0003, "step": 4404 }, { "epoch": 0.8955072169140069, "grad_norm": 0.13359855115413666, "learning_rate": 0.00011052578053493339, "loss": 1.0714, "step": 4405 }, { "epoch": 0.8957105102663143, "grad_norm": 0.12817350029945374, "learning_rate": 0.00011050544086240211, "loss": 1.0177, "step": 4406 }, { "epoch": 0.8959138036186217, "grad_norm": 0.13135068118572235, "learning_rate": 0.00011048510118987085, "loss": 0.9071, "step": 4407 }, { "epoch": 0.896117096970929, "grad_norm": 0.13310706615447998, "learning_rate": 0.00011046476151733958, "loss": 1.0575, "step": 4408 }, { "epoch": 0.8963203903232364, "grad_norm": 0.12109819054603577, "learning_rate": 0.0001104444218448083, "loss": 0.902, "step": 4409 }, { "epoch": 0.8965236836755438, "grad_norm": 0.1310720294713974, "learning_rate": 0.00011042408217227702, "loss": 0.9465, "step": 4410 }, { "epoch": 0.8967269770278512, "grad_norm": 0.1330663412809372, "learning_rate": 0.00011040374249974576, "loss": 1.0053, "step": 4411 }, { "epoch": 0.8969302703801586, "grad_norm": 0.14403831958770752, "learning_rate": 0.00011038340282721449, "loss": 1.1808, "step": 4412 }, { "epoch": 0.897133563732466, "grad_norm": 0.1323632299900055, "learning_rate": 0.00011036306315468321, "loss": 0.9314, "step": 4413 }, { "epoch": 0.8973368570847733, "grad_norm": 0.12776096165180206, "learning_rate": 0.00011034272348215194, "loss": 1.0343, "step": 4414 }, { "epoch": 0.8975401504370807, "grad_norm": 0.12130887806415558, "learning_rate": 0.00011032238380962067, "loss": 1.0066, "step": 4415 }, { "epoch": 0.8977434437893881, "grad_norm": 0.11282986402511597, "learning_rate": 0.0001103020441370894, "loss": 0.8551, "step": 4416 }, { "epoch": 0.8979467371416955, "grad_norm": 0.14610666036605835, "learning_rate": 0.00011028170446455812, "loss": 1.2288, "step": 4417 }, { "epoch": 0.8981500304940029, "grad_norm": 0.14186285436153412, "learning_rate": 0.00011026136479202685, "loss": 1.183, "step": 4418 }, { "epoch": 0.8983533238463103, "grad_norm": 0.1389775425195694, "learning_rate": 0.00011024102511949559, "loss": 1.1684, "step": 4419 }, { "epoch": 0.8985566171986176, "grad_norm": 0.12318051606416702, "learning_rate": 0.00011022068544696431, "loss": 0.8705, "step": 4420 }, { "epoch": 0.898759910550925, "grad_norm": 0.12933410704135895, "learning_rate": 0.00011020034577443303, "loss": 1.0982, "step": 4421 }, { "epoch": 0.8989632039032324, "grad_norm": 0.14935623109340668, "learning_rate": 0.00011018000610190176, "loss": 1.1293, "step": 4422 }, { "epoch": 0.8991664972555398, "grad_norm": 0.13630087673664093, "learning_rate": 0.0001101596664293705, "loss": 1.0667, "step": 4423 }, { "epoch": 0.8993697906078472, "grad_norm": 0.14735549688339233, "learning_rate": 0.00011013932675683922, "loss": 1.0931, "step": 4424 }, { "epoch": 0.8995730839601545, "grad_norm": 0.13349930942058563, "learning_rate": 0.00011011898708430795, "loss": 1.0843, "step": 4425 }, { "epoch": 0.8997763773124619, "grad_norm": 0.13748763501644135, "learning_rate": 0.00011009864741177667, "loss": 1.0533, "step": 4426 }, { "epoch": 0.8999796706647692, "grad_norm": 0.1320018768310547, "learning_rate": 0.00011007830773924541, "loss": 1.0631, "step": 4427 }, { "epoch": 0.9001829640170766, "grad_norm": 0.1377144604921341, "learning_rate": 0.00011005796806671413, "loss": 1.0616, "step": 4428 }, { "epoch": 0.900386257369384, "grad_norm": 0.13794207572937012, "learning_rate": 0.00011003762839418286, "loss": 1.043, "step": 4429 }, { "epoch": 0.9005895507216914, "grad_norm": 0.12091651558876038, "learning_rate": 0.00011001728872165158, "loss": 0.9347, "step": 4430 }, { "epoch": 0.9007928440739987, "grad_norm": 0.13244852423667908, "learning_rate": 0.00010999694904912032, "loss": 0.9094, "step": 4431 }, { "epoch": 0.9009961374263061, "grad_norm": 0.1419922262430191, "learning_rate": 0.00010997660937658904, "loss": 1.0749, "step": 4432 }, { "epoch": 0.9011994307786135, "grad_norm": 0.138065367937088, "learning_rate": 0.00010995626970405777, "loss": 1.0559, "step": 4433 }, { "epoch": 0.9014027241309209, "grad_norm": 0.13192395865917206, "learning_rate": 0.00010993593003152649, "loss": 0.9556, "step": 4434 }, { "epoch": 0.9016060174832283, "grad_norm": 0.13181698322296143, "learning_rate": 0.00010991559035899523, "loss": 1.0103, "step": 4435 }, { "epoch": 0.9018093108355357, "grad_norm": 0.1360086053609848, "learning_rate": 0.00010989525068646396, "loss": 0.9737, "step": 4436 }, { "epoch": 0.902012604187843, "grad_norm": 0.14762909710407257, "learning_rate": 0.00010987491101393268, "loss": 1.0592, "step": 4437 }, { "epoch": 0.9022158975401504, "grad_norm": 0.13677798211574554, "learning_rate": 0.0001098545713414014, "loss": 1.0893, "step": 4438 }, { "epoch": 0.9024191908924578, "grad_norm": 0.13737376034259796, "learning_rate": 0.00010983423166887014, "loss": 1.1157, "step": 4439 }, { "epoch": 0.9026224842447652, "grad_norm": 0.13454869389533997, "learning_rate": 0.00010981389199633887, "loss": 1.0837, "step": 4440 }, { "epoch": 0.9028257775970726, "grad_norm": 0.1382821798324585, "learning_rate": 0.00010979355232380759, "loss": 0.9586, "step": 4441 }, { "epoch": 0.90302907094938, "grad_norm": 0.12248346954584122, "learning_rate": 0.00010977321265127632, "loss": 0.9251, "step": 4442 }, { "epoch": 0.9032323643016873, "grad_norm": 0.13722175359725952, "learning_rate": 0.00010975287297874505, "loss": 1.192, "step": 4443 }, { "epoch": 0.9034356576539947, "grad_norm": 0.14339371025562286, "learning_rate": 0.00010973253330621378, "loss": 1.1055, "step": 4444 }, { "epoch": 0.9036389510063021, "grad_norm": 0.1536564826965332, "learning_rate": 0.0001097121936336825, "loss": 1.1969, "step": 4445 }, { "epoch": 0.9038422443586095, "grad_norm": 0.12401420623064041, "learning_rate": 0.00010969185396115123, "loss": 1.0346, "step": 4446 }, { "epoch": 0.9040455377109169, "grad_norm": 0.12466490268707275, "learning_rate": 0.00010967151428861995, "loss": 0.903, "step": 4447 }, { "epoch": 0.9042488310632242, "grad_norm": 0.1398215889930725, "learning_rate": 0.00010965117461608869, "loss": 1.0548, "step": 4448 }, { "epoch": 0.9044521244155316, "grad_norm": 0.1224413737654686, "learning_rate": 0.00010963083494355741, "loss": 0.9738, "step": 4449 }, { "epoch": 0.904655417767839, "grad_norm": 0.13140305876731873, "learning_rate": 0.00010961049527102614, "loss": 1.1668, "step": 4450 }, { "epoch": 0.9048587111201464, "grad_norm": 0.13816101849079132, "learning_rate": 0.00010959015559849486, "loss": 1.0785, "step": 4451 }, { "epoch": 0.9050620044724538, "grad_norm": 0.19513925909996033, "learning_rate": 0.0001095698159259636, "loss": 1.2728, "step": 4452 }, { "epoch": 0.9052652978247612, "grad_norm": 0.1294509470462799, "learning_rate": 0.00010954947625343233, "loss": 0.9757, "step": 4453 }, { "epoch": 0.9054685911770685, "grad_norm": 0.13822956383228302, "learning_rate": 0.00010952913658090105, "loss": 1.064, "step": 4454 }, { "epoch": 0.9056718845293759, "grad_norm": 0.13722215592861176, "learning_rate": 0.00010950879690836977, "loss": 1.0321, "step": 4455 }, { "epoch": 0.9058751778816833, "grad_norm": 0.1313597559928894, "learning_rate": 0.00010948845723583851, "loss": 1.117, "step": 4456 }, { "epoch": 0.9060784712339907, "grad_norm": 0.13262207806110382, "learning_rate": 0.00010946811756330724, "loss": 1.0395, "step": 4457 }, { "epoch": 0.906281764586298, "grad_norm": 0.15121689438819885, "learning_rate": 0.00010944777789077596, "loss": 1.1235, "step": 4458 }, { "epoch": 0.9064850579386055, "grad_norm": 0.14262863993644714, "learning_rate": 0.00010942743821824469, "loss": 1.2066, "step": 4459 }, { "epoch": 0.9066883512909127, "grad_norm": 0.13933706283569336, "learning_rate": 0.00010940709854571342, "loss": 1.15, "step": 4460 }, { "epoch": 0.9068916446432201, "grad_norm": 0.14884263277053833, "learning_rate": 0.00010938675887318215, "loss": 1.1161, "step": 4461 }, { "epoch": 0.9070949379955275, "grad_norm": 0.1426582783460617, "learning_rate": 0.00010936641920065087, "loss": 1.1462, "step": 4462 }, { "epoch": 0.9072982313478349, "grad_norm": 0.14341098070144653, "learning_rate": 0.0001093460795281196, "loss": 1.0028, "step": 4463 }, { "epoch": 0.9075015247001423, "grad_norm": 0.13192780315876007, "learning_rate": 0.00010932573985558834, "loss": 0.9766, "step": 4464 }, { "epoch": 0.9077048180524497, "grad_norm": 0.13691288232803345, "learning_rate": 0.00010930540018305706, "loss": 1.0718, "step": 4465 }, { "epoch": 0.907908111404757, "grad_norm": 0.1597934365272522, "learning_rate": 0.00010928506051052578, "loss": 1.2067, "step": 4466 }, { "epoch": 0.9081114047570644, "grad_norm": 0.128030464053154, "learning_rate": 0.00010926472083799451, "loss": 0.9896, "step": 4467 }, { "epoch": 0.9083146981093718, "grad_norm": 0.13701699674129486, "learning_rate": 0.00010924438116546325, "loss": 1.0203, "step": 4468 }, { "epoch": 0.9085179914616792, "grad_norm": 0.13079933822155, "learning_rate": 0.00010922404149293197, "loss": 1.0287, "step": 4469 }, { "epoch": 0.9087212848139866, "grad_norm": 0.15257249772548676, "learning_rate": 0.0001092037018204007, "loss": 1.0392, "step": 4470 }, { "epoch": 0.908924578166294, "grad_norm": 0.134558767080307, "learning_rate": 0.00010918336214786942, "loss": 1.1618, "step": 4471 }, { "epoch": 0.9091278715186013, "grad_norm": 0.13755445182323456, "learning_rate": 0.00010916302247533816, "loss": 1.0579, "step": 4472 }, { "epoch": 0.9093311648709087, "grad_norm": 0.14956828951835632, "learning_rate": 0.00010914268280280688, "loss": 1.0457, "step": 4473 }, { "epoch": 0.9095344582232161, "grad_norm": 0.138174906373024, "learning_rate": 0.0001091223431302756, "loss": 0.9902, "step": 4474 }, { "epoch": 0.9097377515755235, "grad_norm": 0.14548815786838531, "learning_rate": 0.00010910200345774433, "loss": 1.1674, "step": 4475 }, { "epoch": 0.9099410449278309, "grad_norm": 0.13372185826301575, "learning_rate": 0.00010908166378521307, "loss": 1.1141, "step": 4476 }, { "epoch": 0.9101443382801382, "grad_norm": 0.1349831521511078, "learning_rate": 0.0001090613241126818, "loss": 1.0383, "step": 4477 }, { "epoch": 0.9103476316324456, "grad_norm": 0.12056616693735123, "learning_rate": 0.00010904098444015052, "loss": 0.7852, "step": 4478 }, { "epoch": 0.910550924984753, "grad_norm": 0.14333753287792206, "learning_rate": 0.00010902064476761924, "loss": 1.0264, "step": 4479 }, { "epoch": 0.9107542183370604, "grad_norm": 0.1312333047389984, "learning_rate": 0.00010900030509508798, "loss": 0.8746, "step": 4480 }, { "epoch": 0.9109575116893678, "grad_norm": 0.14129756391048431, "learning_rate": 0.0001089799654225567, "loss": 1.1576, "step": 4481 }, { "epoch": 0.9111608050416752, "grad_norm": 0.135942742228508, "learning_rate": 0.00010895962575002543, "loss": 1.1842, "step": 4482 }, { "epoch": 0.9113640983939825, "grad_norm": 0.1423972100019455, "learning_rate": 0.00010893928607749415, "loss": 1.051, "step": 4483 }, { "epoch": 0.9115673917462899, "grad_norm": 0.13322605192661285, "learning_rate": 0.00010891894640496289, "loss": 0.9931, "step": 4484 }, { "epoch": 0.9117706850985973, "grad_norm": 0.14480316638946533, "learning_rate": 0.00010889860673243162, "loss": 1.0453, "step": 4485 }, { "epoch": 0.9119739784509047, "grad_norm": 0.1365094780921936, "learning_rate": 0.00010887826705990034, "loss": 1.0413, "step": 4486 }, { "epoch": 0.9121772718032121, "grad_norm": 0.128956139087677, "learning_rate": 0.00010885792738736907, "loss": 1.0774, "step": 4487 }, { "epoch": 0.9123805651555195, "grad_norm": 0.11314928531646729, "learning_rate": 0.00010883758771483779, "loss": 0.8729, "step": 4488 }, { "epoch": 0.9125838585078268, "grad_norm": 0.13904598355293274, "learning_rate": 0.00010881724804230653, "loss": 1.07, "step": 4489 }, { "epoch": 0.9127871518601341, "grad_norm": 0.1325247436761856, "learning_rate": 0.00010879690836977525, "loss": 1.1768, "step": 4490 }, { "epoch": 0.9129904452124415, "grad_norm": 0.13978269696235657, "learning_rate": 0.00010877656869724398, "loss": 1.0877, "step": 4491 }, { "epoch": 0.913193738564749, "grad_norm": 0.13564588129520416, "learning_rate": 0.0001087562290247127, "loss": 1.0527, "step": 4492 }, { "epoch": 0.9133970319170563, "grad_norm": 0.14008729159832, "learning_rate": 0.00010873588935218144, "loss": 1.0295, "step": 4493 }, { "epoch": 0.9136003252693637, "grad_norm": 0.14307157695293427, "learning_rate": 0.00010871554967965016, "loss": 1.0371, "step": 4494 }, { "epoch": 0.913803618621671, "grad_norm": 0.13670316338539124, "learning_rate": 0.00010869521000711889, "loss": 1.0557, "step": 4495 }, { "epoch": 0.9140069119739784, "grad_norm": 0.138756662607193, "learning_rate": 0.00010867487033458761, "loss": 0.9865, "step": 4496 }, { "epoch": 0.9142102053262858, "grad_norm": 0.132290780544281, "learning_rate": 0.00010865453066205635, "loss": 1.041, "step": 4497 }, { "epoch": 0.9144134986785932, "grad_norm": 0.13535267114639282, "learning_rate": 0.00010863419098952508, "loss": 1.0592, "step": 4498 }, { "epoch": 0.9146167920309006, "grad_norm": 0.12333885580301285, "learning_rate": 0.0001086138513169938, "loss": 1.0101, "step": 4499 }, { "epoch": 0.9148200853832079, "grad_norm": 0.14777310192584991, "learning_rate": 0.00010859351164446252, "loss": 1.0622, "step": 4500 }, { "epoch": 0.9150233787355153, "grad_norm": 0.11419006437063217, "learning_rate": 0.00010857317197193126, "loss": 0.8548, "step": 4501 }, { "epoch": 0.9152266720878227, "grad_norm": 0.12761832773685455, "learning_rate": 0.00010855283229939999, "loss": 0.9198, "step": 4502 }, { "epoch": 0.9154299654401301, "grad_norm": 0.1387338936328888, "learning_rate": 0.00010853249262686871, "loss": 1.0402, "step": 4503 }, { "epoch": 0.9156332587924375, "grad_norm": 0.13915283977985382, "learning_rate": 0.00010851215295433744, "loss": 1.1062, "step": 4504 }, { "epoch": 0.9158365521447449, "grad_norm": 0.13649246096611023, "learning_rate": 0.00010849181328180617, "loss": 1.1802, "step": 4505 }, { "epoch": 0.9160398454970522, "grad_norm": 0.15227414667606354, "learning_rate": 0.0001084714736092749, "loss": 1.3089, "step": 4506 }, { "epoch": 0.9162431388493596, "grad_norm": 0.1522645801305771, "learning_rate": 0.00010845113393674362, "loss": 1.1328, "step": 4507 }, { "epoch": 0.916446432201667, "grad_norm": 0.13502533733844757, "learning_rate": 0.00010843079426421235, "loss": 1.0838, "step": 4508 }, { "epoch": 0.9166497255539744, "grad_norm": 0.1440073549747467, "learning_rate": 0.00010841045459168108, "loss": 1.1632, "step": 4509 }, { "epoch": 0.9168530189062818, "grad_norm": 0.1380605548620224, "learning_rate": 0.00010839011491914981, "loss": 1.0128, "step": 4510 }, { "epoch": 0.9170563122585892, "grad_norm": 0.14944829046726227, "learning_rate": 0.00010836977524661853, "loss": 1.2041, "step": 4511 }, { "epoch": 0.9172596056108965, "grad_norm": 0.13469955325126648, "learning_rate": 0.00010834943557408726, "loss": 1.1208, "step": 4512 }, { "epoch": 0.9174628989632039, "grad_norm": 0.1321646124124527, "learning_rate": 0.000108329095901556, "loss": 0.9967, "step": 4513 }, { "epoch": 0.9176661923155113, "grad_norm": 0.1304931789636612, "learning_rate": 0.00010830875622902472, "loss": 1.0428, "step": 4514 }, { "epoch": 0.9178694856678187, "grad_norm": 0.12599384784698486, "learning_rate": 0.00010828841655649345, "loss": 1.0719, "step": 4515 }, { "epoch": 0.9180727790201261, "grad_norm": 0.12788186967372894, "learning_rate": 0.00010826807688396217, "loss": 1.0551, "step": 4516 }, { "epoch": 0.9182760723724335, "grad_norm": 0.16241435706615448, "learning_rate": 0.00010824773721143091, "loss": 1.3277, "step": 4517 }, { "epoch": 0.9184793657247408, "grad_norm": 0.12297213822603226, "learning_rate": 0.00010822739753889963, "loss": 0.9117, "step": 4518 }, { "epoch": 0.9186826590770482, "grad_norm": 0.13010992109775543, "learning_rate": 0.00010820705786636836, "loss": 0.9313, "step": 4519 }, { "epoch": 0.9188859524293556, "grad_norm": 0.13779647648334503, "learning_rate": 0.00010818671819383708, "loss": 1.1851, "step": 4520 }, { "epoch": 0.919089245781663, "grad_norm": 0.15298517048358917, "learning_rate": 0.00010816637852130582, "loss": 1.2739, "step": 4521 }, { "epoch": 0.9192925391339704, "grad_norm": 0.1386537402868271, "learning_rate": 0.00010814603884877454, "loss": 1.1061, "step": 4522 }, { "epoch": 0.9194958324862778, "grad_norm": 0.14241141080856323, "learning_rate": 0.00010812569917624327, "loss": 1.1449, "step": 4523 }, { "epoch": 0.919699125838585, "grad_norm": 0.14428827166557312, "learning_rate": 0.00010810535950371199, "loss": 0.9884, "step": 4524 }, { "epoch": 0.9199024191908924, "grad_norm": 0.15264667570590973, "learning_rate": 0.00010808501983118073, "loss": 1.2018, "step": 4525 }, { "epoch": 0.9201057125431998, "grad_norm": 0.14881928265094757, "learning_rate": 0.00010806468015864945, "loss": 1.0599, "step": 4526 }, { "epoch": 0.9203090058955072, "grad_norm": 0.12393801659345627, "learning_rate": 0.00010804434048611818, "loss": 1.0684, "step": 4527 }, { "epoch": 0.9205122992478146, "grad_norm": 0.1288781762123108, "learning_rate": 0.0001080240008135869, "loss": 0.9667, "step": 4528 }, { "epoch": 0.9207155926001219, "grad_norm": 0.12993919849395752, "learning_rate": 0.00010800366114105563, "loss": 0.985, "step": 4529 }, { "epoch": 0.9209188859524293, "grad_norm": 0.14005163311958313, "learning_rate": 0.00010798332146852437, "loss": 1.0121, "step": 4530 }, { "epoch": 0.9211221793047367, "grad_norm": 0.1298326551914215, "learning_rate": 0.00010796298179599309, "loss": 1.0357, "step": 4531 }, { "epoch": 0.9213254726570441, "grad_norm": 0.1444677710533142, "learning_rate": 0.00010794264212346182, "loss": 1.0837, "step": 4532 }, { "epoch": 0.9215287660093515, "grad_norm": 0.1372900754213333, "learning_rate": 0.00010792230245093054, "loss": 1.1198, "step": 4533 }, { "epoch": 0.9217320593616589, "grad_norm": 0.13712218403816223, "learning_rate": 0.00010790196277839928, "loss": 1.1203, "step": 4534 }, { "epoch": 0.9219353527139662, "grad_norm": 0.13176938891410828, "learning_rate": 0.000107881623105868, "loss": 0.9814, "step": 4535 }, { "epoch": 0.9221386460662736, "grad_norm": 0.14285510778427124, "learning_rate": 0.00010786128343333673, "loss": 1.0725, "step": 4536 }, { "epoch": 0.922341939418581, "grad_norm": 0.14509692788124084, "learning_rate": 0.00010784094376080545, "loss": 1.2281, "step": 4537 }, { "epoch": 0.9225452327708884, "grad_norm": 0.12854382395744324, "learning_rate": 0.00010782060408827419, "loss": 0.95, "step": 4538 }, { "epoch": 0.9227485261231958, "grad_norm": 0.13784833252429962, "learning_rate": 0.00010780026441574291, "loss": 1.0091, "step": 4539 }, { "epoch": 0.9229518194755032, "grad_norm": 0.12507863342761993, "learning_rate": 0.00010777992474321164, "loss": 0.9746, "step": 4540 }, { "epoch": 0.9231551128278105, "grad_norm": 0.14005503058433533, "learning_rate": 0.00010775958507068036, "loss": 0.9599, "step": 4541 }, { "epoch": 0.9233584061801179, "grad_norm": 0.15629933774471283, "learning_rate": 0.0001077392453981491, "loss": 1.1292, "step": 4542 }, { "epoch": 0.9235616995324253, "grad_norm": 0.12826746702194214, "learning_rate": 0.00010771890572561782, "loss": 1.0791, "step": 4543 }, { "epoch": 0.9237649928847327, "grad_norm": 0.1537964642047882, "learning_rate": 0.00010769856605308655, "loss": 1.2648, "step": 4544 }, { "epoch": 0.9239682862370401, "grad_norm": 0.13459934294223785, "learning_rate": 0.00010767822638055527, "loss": 1.1297, "step": 4545 }, { "epoch": 0.9241715795893475, "grad_norm": 0.1457410752773285, "learning_rate": 0.00010765788670802401, "loss": 1.0294, "step": 4546 }, { "epoch": 0.9243748729416548, "grad_norm": 0.12394455820322037, "learning_rate": 0.00010763754703549274, "loss": 0.9905, "step": 4547 }, { "epoch": 0.9245781662939622, "grad_norm": 0.14204509556293488, "learning_rate": 0.00010761720736296146, "loss": 1.1691, "step": 4548 }, { "epoch": 0.9247814596462696, "grad_norm": 0.1345042586326599, "learning_rate": 0.00010759686769043019, "loss": 1.0468, "step": 4549 }, { "epoch": 0.924984752998577, "grad_norm": 0.13902144134044647, "learning_rate": 0.00010757652801789892, "loss": 1.0194, "step": 4550 }, { "epoch": 0.9251880463508844, "grad_norm": 0.1317700892686844, "learning_rate": 0.00010755618834536765, "loss": 0.923, "step": 4551 }, { "epoch": 0.9253913397031917, "grad_norm": 0.15080450475215912, "learning_rate": 0.00010753584867283637, "loss": 1.2244, "step": 4552 }, { "epoch": 0.925594633055499, "grad_norm": 0.14415398240089417, "learning_rate": 0.0001075155090003051, "loss": 1.0809, "step": 4553 }, { "epoch": 0.9257979264078064, "grad_norm": 0.12147921323776245, "learning_rate": 0.00010749516932777383, "loss": 0.8445, "step": 4554 }, { "epoch": 0.9260012197601138, "grad_norm": 0.1352618932723999, "learning_rate": 0.00010747482965524256, "loss": 1.0397, "step": 4555 }, { "epoch": 0.9262045131124212, "grad_norm": 0.1354973316192627, "learning_rate": 0.00010745448998271128, "loss": 1.0103, "step": 4556 }, { "epoch": 0.9264078064647286, "grad_norm": 0.13657426834106445, "learning_rate": 0.00010743415031018001, "loss": 0.9979, "step": 4557 }, { "epoch": 0.9266110998170359, "grad_norm": 0.13294103741645813, "learning_rate": 0.00010741381063764875, "loss": 0.9317, "step": 4558 }, { "epoch": 0.9268143931693433, "grad_norm": 0.14303997159004211, "learning_rate": 0.00010739347096511747, "loss": 1.1488, "step": 4559 }, { "epoch": 0.9270176865216507, "grad_norm": 0.12142444401979446, "learning_rate": 0.0001073731312925862, "loss": 0.9833, "step": 4560 }, { "epoch": 0.9272209798739581, "grad_norm": 0.1350148767232895, "learning_rate": 0.00010735279162005492, "loss": 0.9737, "step": 4561 }, { "epoch": 0.9274242732262655, "grad_norm": 0.15613560378551483, "learning_rate": 0.00010733245194752366, "loss": 1.1749, "step": 4562 }, { "epoch": 0.9276275665785729, "grad_norm": 0.13186268508434296, "learning_rate": 0.00010731211227499238, "loss": 1.0103, "step": 4563 }, { "epoch": 0.9278308599308802, "grad_norm": 0.14699916541576385, "learning_rate": 0.0001072917726024611, "loss": 1.1501, "step": 4564 }, { "epoch": 0.9280341532831876, "grad_norm": 0.13133716583251953, "learning_rate": 0.00010727143292992983, "loss": 1.026, "step": 4565 }, { "epoch": 0.928237446635495, "grad_norm": 0.1365920603275299, "learning_rate": 0.00010725109325739857, "loss": 1.0866, "step": 4566 }, { "epoch": 0.9284407399878024, "grad_norm": 0.12985709309577942, "learning_rate": 0.0001072307535848673, "loss": 1.1098, "step": 4567 }, { "epoch": 0.9286440333401098, "grad_norm": 0.14012043178081512, "learning_rate": 0.00010721041391233602, "loss": 0.9724, "step": 4568 }, { "epoch": 0.9288473266924172, "grad_norm": 0.12195601314306259, "learning_rate": 0.00010719007423980474, "loss": 0.9906, "step": 4569 }, { "epoch": 0.9290506200447245, "grad_norm": 0.12102338671684265, "learning_rate": 0.00010716973456727347, "loss": 0.8993, "step": 4570 }, { "epoch": 0.9292539133970319, "grad_norm": 0.16343897581100464, "learning_rate": 0.0001071493948947422, "loss": 1.3669, "step": 4571 }, { "epoch": 0.9294572067493393, "grad_norm": 0.12324689328670502, "learning_rate": 0.00010712905522221093, "loss": 1.018, "step": 4572 }, { "epoch": 0.9296605001016467, "grad_norm": 0.14391222596168518, "learning_rate": 0.00010710871554967965, "loss": 1.0502, "step": 4573 }, { "epoch": 0.9298637934539541, "grad_norm": 0.13690593838691711, "learning_rate": 0.00010708837587714838, "loss": 1.0015, "step": 4574 }, { "epoch": 0.9300670868062615, "grad_norm": 0.11955592036247253, "learning_rate": 0.00010706803620461712, "loss": 0.8207, "step": 4575 }, { "epoch": 0.9302703801585688, "grad_norm": 0.12728698551654816, "learning_rate": 0.00010704769653208584, "loss": 0.8813, "step": 4576 }, { "epoch": 0.9304736735108762, "grad_norm": 0.14534975588321686, "learning_rate": 0.00010702735685955457, "loss": 1.0923, "step": 4577 }, { "epoch": 0.9306769668631836, "grad_norm": 0.12908664345741272, "learning_rate": 0.00010700701718702329, "loss": 1.1333, "step": 4578 }, { "epoch": 0.930880260215491, "grad_norm": 0.14262458682060242, "learning_rate": 0.00010698667751449203, "loss": 1.0474, "step": 4579 }, { "epoch": 0.9310835535677984, "grad_norm": 0.13423089683055878, "learning_rate": 0.00010696633784196075, "loss": 1.0581, "step": 4580 }, { "epoch": 0.9312868469201057, "grad_norm": 0.1267002373933792, "learning_rate": 0.00010694599816942948, "loss": 0.958, "step": 4581 }, { "epoch": 0.9314901402724131, "grad_norm": 0.13516265153884888, "learning_rate": 0.0001069256584968982, "loss": 1.0636, "step": 4582 }, { "epoch": 0.9316934336247205, "grad_norm": 0.14232146739959717, "learning_rate": 0.00010690531882436694, "loss": 1.1371, "step": 4583 }, { "epoch": 0.9318967269770279, "grad_norm": 0.13286015391349792, "learning_rate": 0.00010688497915183566, "loss": 0.9935, "step": 4584 }, { "epoch": 0.9321000203293353, "grad_norm": 0.1338234841823578, "learning_rate": 0.00010686463947930439, "loss": 1.0371, "step": 4585 }, { "epoch": 0.9323033136816427, "grad_norm": 0.13574783504009247, "learning_rate": 0.00010684429980677311, "loss": 1.0583, "step": 4586 }, { "epoch": 0.9325066070339499, "grad_norm": 0.1322636902332306, "learning_rate": 0.00010682396013424185, "loss": 0.9872, "step": 4587 }, { "epoch": 0.9327099003862573, "grad_norm": 0.13177639245986938, "learning_rate": 0.00010680362046171057, "loss": 0.9844, "step": 4588 }, { "epoch": 0.9329131937385647, "grad_norm": 0.13709305226802826, "learning_rate": 0.0001067832807891793, "loss": 1.0352, "step": 4589 }, { "epoch": 0.9331164870908721, "grad_norm": 0.13158872723579407, "learning_rate": 0.00010676294111664802, "loss": 0.9291, "step": 4590 }, { "epoch": 0.9333197804431795, "grad_norm": 0.1440209448337555, "learning_rate": 0.00010674260144411676, "loss": 1.1299, "step": 4591 }, { "epoch": 0.9335230737954869, "grad_norm": 0.14185591042041779, "learning_rate": 0.00010672226177158549, "loss": 1.0265, "step": 4592 }, { "epoch": 0.9337263671477942, "grad_norm": 0.13720087707042694, "learning_rate": 0.00010670192209905421, "loss": 1.065, "step": 4593 }, { "epoch": 0.9339296605001016, "grad_norm": 0.1312158852815628, "learning_rate": 0.00010668158242652294, "loss": 0.97, "step": 4594 }, { "epoch": 0.934132953852409, "grad_norm": 0.13442127406597137, "learning_rate": 0.00010666124275399167, "loss": 1.0517, "step": 4595 }, { "epoch": 0.9343362472047164, "grad_norm": 0.1302952766418457, "learning_rate": 0.0001066409030814604, "loss": 1.0052, "step": 4596 }, { "epoch": 0.9345395405570238, "grad_norm": 0.14878568053245544, "learning_rate": 0.00010662056340892912, "loss": 1.2183, "step": 4597 }, { "epoch": 0.9347428339093312, "grad_norm": 0.13958996534347534, "learning_rate": 0.00010660022373639785, "loss": 1.0758, "step": 4598 }, { "epoch": 0.9349461272616385, "grad_norm": 0.14994315803050995, "learning_rate": 0.00010657988406386658, "loss": 1.1696, "step": 4599 }, { "epoch": 0.9351494206139459, "grad_norm": 0.13476385176181793, "learning_rate": 0.00010655954439133531, "loss": 0.9507, "step": 4600 }, { "epoch": 0.9353527139662533, "grad_norm": 0.13115908205509186, "learning_rate": 0.00010653920471880403, "loss": 1.1128, "step": 4601 }, { "epoch": 0.9355560073185607, "grad_norm": 0.12260119616985321, "learning_rate": 0.00010651886504627276, "loss": 0.8933, "step": 4602 }, { "epoch": 0.9357593006708681, "grad_norm": 0.12978796660900116, "learning_rate": 0.0001064985253737415, "loss": 1.0094, "step": 4603 }, { "epoch": 0.9359625940231754, "grad_norm": 0.13168974220752716, "learning_rate": 0.00010647818570121022, "loss": 1.0007, "step": 4604 }, { "epoch": 0.9361658873754828, "grad_norm": 0.13790659606456757, "learning_rate": 0.00010645784602867894, "loss": 0.9609, "step": 4605 }, { "epoch": 0.9363691807277902, "grad_norm": 0.13622581958770752, "learning_rate": 0.00010643750635614767, "loss": 0.9742, "step": 4606 }, { "epoch": 0.9365724740800976, "grad_norm": 0.13826538622379303, "learning_rate": 0.00010641716668361641, "loss": 1.1061, "step": 4607 }, { "epoch": 0.936775767432405, "grad_norm": 0.13676097989082336, "learning_rate": 0.00010639682701108513, "loss": 1.1522, "step": 4608 }, { "epoch": 0.9369790607847124, "grad_norm": 0.13370144367218018, "learning_rate": 0.00010637648733855386, "loss": 0.9657, "step": 4609 }, { "epoch": 0.9371823541370197, "grad_norm": 0.12708503007888794, "learning_rate": 0.00010635614766602258, "loss": 1.0229, "step": 4610 }, { "epoch": 0.9373856474893271, "grad_norm": 0.14301814138889313, "learning_rate": 0.00010633580799349132, "loss": 0.9909, "step": 4611 }, { "epoch": 0.9375889408416345, "grad_norm": 0.14644454419612885, "learning_rate": 0.00010631546832096004, "loss": 1.1386, "step": 4612 }, { "epoch": 0.9377922341939419, "grad_norm": 0.13054661452770233, "learning_rate": 0.00010629512864842877, "loss": 0.9233, "step": 4613 }, { "epoch": 0.9379955275462493, "grad_norm": 0.13898830115795135, "learning_rate": 0.00010627478897589749, "loss": 1.2265, "step": 4614 }, { "epoch": 0.9381988208985567, "grad_norm": 0.13503706455230713, "learning_rate": 0.00010625444930336622, "loss": 1.0694, "step": 4615 }, { "epoch": 0.938402114250864, "grad_norm": 0.12382601946592331, "learning_rate": 0.00010623410963083495, "loss": 0.8756, "step": 4616 }, { "epoch": 0.9386054076031713, "grad_norm": 0.12934790551662445, "learning_rate": 0.00010621376995830368, "loss": 1.0435, "step": 4617 }, { "epoch": 0.9388087009554787, "grad_norm": 0.14618442952632904, "learning_rate": 0.0001061934302857724, "loss": 1.209, "step": 4618 }, { "epoch": 0.9390119943077861, "grad_norm": 0.1417202651500702, "learning_rate": 0.00010617309061324113, "loss": 1.1805, "step": 4619 }, { "epoch": 0.9392152876600935, "grad_norm": 0.14158600568771362, "learning_rate": 0.00010615275094070987, "loss": 1.0833, "step": 4620 }, { "epoch": 0.9394185810124009, "grad_norm": 0.13389776647090912, "learning_rate": 0.00010613241126817859, "loss": 1.1245, "step": 4621 }, { "epoch": 0.9396218743647082, "grad_norm": 0.1260322481393814, "learning_rate": 0.00010611207159564731, "loss": 0.8908, "step": 4622 }, { "epoch": 0.9398251677170156, "grad_norm": 0.1375802904367447, "learning_rate": 0.00010609173192311604, "loss": 1.0062, "step": 4623 }, { "epoch": 0.940028461069323, "grad_norm": 0.13388384878635406, "learning_rate": 0.00010607139225058478, "loss": 1.082, "step": 4624 }, { "epoch": 0.9402317544216304, "grad_norm": 0.13197797536849976, "learning_rate": 0.0001060510525780535, "loss": 0.9478, "step": 4625 }, { "epoch": 0.9404350477739378, "grad_norm": 0.1293218582868576, "learning_rate": 0.00010603071290552223, "loss": 0.9416, "step": 4626 }, { "epoch": 0.9406383411262452, "grad_norm": 0.1269448846578598, "learning_rate": 0.00010601037323299095, "loss": 0.9556, "step": 4627 }, { "epoch": 0.9408416344785525, "grad_norm": 0.15124647319316864, "learning_rate": 0.00010599003356045969, "loss": 1.1097, "step": 4628 }, { "epoch": 0.9410449278308599, "grad_norm": 0.12264547497034073, "learning_rate": 0.00010596969388792841, "loss": 1.0336, "step": 4629 }, { "epoch": 0.9412482211831673, "grad_norm": 0.13190335035324097, "learning_rate": 0.00010594935421539714, "loss": 1.0159, "step": 4630 }, { "epoch": 0.9414515145354747, "grad_norm": 0.13107061386108398, "learning_rate": 0.00010592901454286586, "loss": 1.0756, "step": 4631 }, { "epoch": 0.9416548078877821, "grad_norm": 0.13843277096748352, "learning_rate": 0.0001059086748703346, "loss": 0.9752, "step": 4632 }, { "epoch": 0.9418581012400894, "grad_norm": 0.12323298305273056, "learning_rate": 0.00010588833519780332, "loss": 0.8268, "step": 4633 }, { "epoch": 0.9420613945923968, "grad_norm": 0.136516734957695, "learning_rate": 0.00010586799552527205, "loss": 1.0694, "step": 4634 }, { "epoch": 0.9422646879447042, "grad_norm": 0.13739456236362457, "learning_rate": 0.00010584765585274077, "loss": 1.0251, "step": 4635 }, { "epoch": 0.9424679812970116, "grad_norm": 0.13358846306800842, "learning_rate": 0.00010582731618020951, "loss": 1.1104, "step": 4636 }, { "epoch": 0.942671274649319, "grad_norm": 0.13964349031448364, "learning_rate": 0.00010580697650767824, "loss": 0.987, "step": 4637 }, { "epoch": 0.9428745680016264, "grad_norm": 0.1372976303100586, "learning_rate": 0.00010578663683514696, "loss": 0.9547, "step": 4638 }, { "epoch": 0.9430778613539337, "grad_norm": 0.14359022676944733, "learning_rate": 0.00010576629716261568, "loss": 1.1619, "step": 4639 }, { "epoch": 0.9432811547062411, "grad_norm": 0.12636056542396545, "learning_rate": 0.00010574595749008442, "loss": 0.9485, "step": 4640 }, { "epoch": 0.9434844480585485, "grad_norm": 0.15746049582958221, "learning_rate": 0.00010572561781755315, "loss": 1.2212, "step": 4641 }, { "epoch": 0.9436877414108559, "grad_norm": 0.13888253271579742, "learning_rate": 0.00010570527814502187, "loss": 1.1069, "step": 4642 }, { "epoch": 0.9438910347631633, "grad_norm": 0.12905919551849365, "learning_rate": 0.0001056849384724906, "loss": 1.0055, "step": 4643 }, { "epoch": 0.9440943281154707, "grad_norm": 0.14576807618141174, "learning_rate": 0.00010566459879995933, "loss": 1.1185, "step": 4644 }, { "epoch": 0.944297621467778, "grad_norm": 0.15471163392066956, "learning_rate": 0.00010564425912742806, "loss": 1.2705, "step": 4645 }, { "epoch": 0.9445009148200854, "grad_norm": 0.1389993131160736, "learning_rate": 0.00010562391945489678, "loss": 1.0862, "step": 4646 }, { "epoch": 0.9447042081723928, "grad_norm": 0.1482502818107605, "learning_rate": 0.00010560357978236551, "loss": 1.1031, "step": 4647 }, { "epoch": 0.9449075015247002, "grad_norm": 0.12542898952960968, "learning_rate": 0.00010558324010983425, "loss": 0.8559, "step": 4648 }, { "epoch": 0.9451107948770076, "grad_norm": 0.12403812259435654, "learning_rate": 0.00010556290043730297, "loss": 0.9117, "step": 4649 }, { "epoch": 0.945314088229315, "grad_norm": 0.11655326187610626, "learning_rate": 0.0001055425607647717, "loss": 0.9568, "step": 4650 }, { "epoch": 0.9455173815816222, "grad_norm": 0.13780179619789124, "learning_rate": 0.00010552222109224042, "loss": 1.0537, "step": 4651 }, { "epoch": 0.9457206749339296, "grad_norm": 0.13035158812999725, "learning_rate": 0.00010550188141970916, "loss": 1.0461, "step": 4652 }, { "epoch": 0.945923968286237, "grad_norm": 0.13049277663230896, "learning_rate": 0.00010548154174717788, "loss": 0.8681, "step": 4653 }, { "epoch": 0.9461272616385444, "grad_norm": 0.149881973862648, "learning_rate": 0.0001054612020746466, "loss": 1.2271, "step": 4654 }, { "epoch": 0.9463305549908518, "grad_norm": 0.11799302697181702, "learning_rate": 0.00010544086240211533, "loss": 0.8407, "step": 4655 }, { "epoch": 0.9465338483431591, "grad_norm": 0.16021724045276642, "learning_rate": 0.00010542052272958404, "loss": 1.2769, "step": 4656 }, { "epoch": 0.9467371416954665, "grad_norm": 0.14058107137680054, "learning_rate": 0.00010540018305705279, "loss": 1.0519, "step": 4657 }, { "epoch": 0.9469404350477739, "grad_norm": 0.14473353326320648, "learning_rate": 0.00010537984338452152, "loss": 1.0002, "step": 4658 }, { "epoch": 0.9471437284000813, "grad_norm": 0.12458368390798569, "learning_rate": 0.00010535950371199024, "loss": 0.9717, "step": 4659 }, { "epoch": 0.9473470217523887, "grad_norm": 0.13984310626983643, "learning_rate": 0.00010533916403945897, "loss": 1.0642, "step": 4660 }, { "epoch": 0.9475503151046961, "grad_norm": 0.13739560544490814, "learning_rate": 0.0001053188243669277, "loss": 1.0281, "step": 4661 }, { "epoch": 0.9477536084570034, "grad_norm": 0.1382581740617752, "learning_rate": 0.00010529848469439643, "loss": 1.0816, "step": 4662 }, { "epoch": 0.9479569018093108, "grad_norm": 0.14696218073368073, "learning_rate": 0.00010527814502186515, "loss": 1.2176, "step": 4663 }, { "epoch": 0.9481601951616182, "grad_norm": 0.12849698960781097, "learning_rate": 0.00010525780534933388, "loss": 1.0097, "step": 4664 }, { "epoch": 0.9483634885139256, "grad_norm": 0.14687961339950562, "learning_rate": 0.00010523746567680262, "loss": 1.1417, "step": 4665 }, { "epoch": 0.948566781866233, "grad_norm": 0.14504985511302948, "learning_rate": 0.00010521712600427134, "loss": 1.1261, "step": 4666 }, { "epoch": 0.9487700752185404, "grad_norm": 0.12274103611707687, "learning_rate": 0.00010519678633174006, "loss": 0.9646, "step": 4667 }, { "epoch": 0.9489733685708477, "grad_norm": 0.11958125233650208, "learning_rate": 0.00010517644665920879, "loss": 0.9111, "step": 4668 }, { "epoch": 0.9491766619231551, "grad_norm": 0.14991825819015503, "learning_rate": 0.00010515610698667753, "loss": 1.1415, "step": 4669 }, { "epoch": 0.9493799552754625, "grad_norm": 0.14164093136787415, "learning_rate": 0.00010513576731414625, "loss": 1.2477, "step": 4670 }, { "epoch": 0.9495832486277699, "grad_norm": 0.13947711884975433, "learning_rate": 0.00010511542764161498, "loss": 1.0371, "step": 4671 }, { "epoch": 0.9497865419800773, "grad_norm": 0.12946373224258423, "learning_rate": 0.0001050950879690837, "loss": 1.0401, "step": 4672 }, { "epoch": 0.9499898353323847, "grad_norm": 0.1340869963169098, "learning_rate": 0.00010507474829655244, "loss": 1.0419, "step": 4673 }, { "epoch": 0.950193128684692, "grad_norm": 0.1419667899608612, "learning_rate": 0.00010505440862402116, "loss": 1.0027, "step": 4674 }, { "epoch": 0.9503964220369994, "grad_norm": 0.13332538306713104, "learning_rate": 0.00010503406895148989, "loss": 0.8884, "step": 4675 }, { "epoch": 0.9505997153893068, "grad_norm": 0.13271865248680115, "learning_rate": 0.00010501372927895861, "loss": 1.005, "step": 4676 }, { "epoch": 0.9508030087416142, "grad_norm": 0.15168990194797516, "learning_rate": 0.00010499338960642735, "loss": 1.168, "step": 4677 }, { "epoch": 0.9510063020939216, "grad_norm": 0.12381100654602051, "learning_rate": 0.00010497304993389607, "loss": 1.0467, "step": 4678 }, { "epoch": 0.951209595446229, "grad_norm": 0.14847104251384735, "learning_rate": 0.0001049527102613648, "loss": 1.1576, "step": 4679 }, { "epoch": 0.9514128887985362, "grad_norm": 0.1450118124485016, "learning_rate": 0.00010493237058883352, "loss": 1.0424, "step": 4680 }, { "epoch": 0.9516161821508436, "grad_norm": 0.13652002811431885, "learning_rate": 0.00010491203091630226, "loss": 1.0449, "step": 4681 }, { "epoch": 0.951819475503151, "grad_norm": 0.14836739003658295, "learning_rate": 0.00010489169124377099, "loss": 1.1076, "step": 4682 }, { "epoch": 0.9520227688554584, "grad_norm": 0.12465627491474152, "learning_rate": 0.00010487135157123971, "loss": 1.0594, "step": 4683 }, { "epoch": 0.9522260622077658, "grad_norm": 0.14319440722465515, "learning_rate": 0.00010485101189870843, "loss": 1.0954, "step": 4684 }, { "epoch": 0.9524293555600731, "grad_norm": 0.1305132359266281, "learning_rate": 0.00010483067222617717, "loss": 1.126, "step": 4685 }, { "epoch": 0.9526326489123805, "grad_norm": 0.14411622285842896, "learning_rate": 0.0001048103325536459, "loss": 1.069, "step": 4686 }, { "epoch": 0.9528359422646879, "grad_norm": 0.1547628790140152, "learning_rate": 0.00010478999288111462, "loss": 1.185, "step": 4687 }, { "epoch": 0.9530392356169953, "grad_norm": 0.1339641660451889, "learning_rate": 0.00010476965320858335, "loss": 1.1261, "step": 4688 }, { "epoch": 0.9532425289693027, "grad_norm": 0.15180015563964844, "learning_rate": 0.00010474931353605208, "loss": 1.2143, "step": 4689 }, { "epoch": 0.9534458223216101, "grad_norm": 0.13662739098072052, "learning_rate": 0.00010472897386352081, "loss": 1.1649, "step": 4690 }, { "epoch": 0.9536491156739174, "grad_norm": 0.14575301110744476, "learning_rate": 0.00010470863419098953, "loss": 1.0256, "step": 4691 }, { "epoch": 0.9538524090262248, "grad_norm": 0.13986723124980927, "learning_rate": 0.00010468829451845826, "loss": 1.1673, "step": 4692 }, { "epoch": 0.9540557023785322, "grad_norm": 0.14431442320346832, "learning_rate": 0.000104667954845927, "loss": 1.1316, "step": 4693 }, { "epoch": 0.9542589957308396, "grad_norm": 0.13795843720436096, "learning_rate": 0.00010464761517339572, "loss": 1.0331, "step": 4694 }, { "epoch": 0.954462289083147, "grad_norm": 0.1303069293498993, "learning_rate": 0.00010462727550086444, "loss": 0.9968, "step": 4695 }, { "epoch": 0.9546655824354544, "grad_norm": 0.15158216655254364, "learning_rate": 0.00010460693582833317, "loss": 1.1864, "step": 4696 }, { "epoch": 0.9548688757877617, "grad_norm": 0.1332157850265503, "learning_rate": 0.00010458659615580188, "loss": 1.0648, "step": 4697 }, { "epoch": 0.9550721691400691, "grad_norm": 0.14668650925159454, "learning_rate": 0.00010456625648327063, "loss": 1.0517, "step": 4698 }, { "epoch": 0.9552754624923765, "grad_norm": 0.15106475353240967, "learning_rate": 0.00010454591681073936, "loss": 1.2643, "step": 4699 }, { "epoch": 0.9554787558446839, "grad_norm": 0.1326945424079895, "learning_rate": 0.00010452557713820808, "loss": 1.0154, "step": 4700 }, { "epoch": 0.9556820491969913, "grad_norm": 0.14156071841716766, "learning_rate": 0.00010450523746567679, "loss": 0.988, "step": 4701 }, { "epoch": 0.9558853425492987, "grad_norm": 0.13619528710842133, "learning_rate": 0.00010448489779314554, "loss": 0.9761, "step": 4702 }, { "epoch": 0.956088635901606, "grad_norm": 0.1572863608598709, "learning_rate": 0.00010446455812061427, "loss": 1.2895, "step": 4703 }, { "epoch": 0.9562919292539134, "grad_norm": 0.13423630595207214, "learning_rate": 0.00010444421844808299, "loss": 0.9817, "step": 4704 }, { "epoch": 0.9564952226062208, "grad_norm": 0.13150696456432343, "learning_rate": 0.0001044238787755517, "loss": 0.9353, "step": 4705 }, { "epoch": 0.9566985159585282, "grad_norm": 0.13118426501750946, "learning_rate": 0.00010440353910302045, "loss": 0.956, "step": 4706 }, { "epoch": 0.9569018093108356, "grad_norm": 0.1445060670375824, "learning_rate": 0.00010438319943048918, "loss": 1.1343, "step": 4707 }, { "epoch": 0.957105102663143, "grad_norm": 0.12421584874391556, "learning_rate": 0.0001043628597579579, "loss": 0.9257, "step": 4708 }, { "epoch": 0.9573083960154503, "grad_norm": 0.1518603265285492, "learning_rate": 0.00010434252008542661, "loss": 1.0934, "step": 4709 }, { "epoch": 0.9575116893677577, "grad_norm": 0.13642071187496185, "learning_rate": 0.00010432218041289537, "loss": 1.1008, "step": 4710 }, { "epoch": 0.9577149827200651, "grad_norm": 0.13501964509487152, "learning_rate": 0.00010430184074036409, "loss": 1.0246, "step": 4711 }, { "epoch": 0.9579182760723725, "grad_norm": 0.14059419929981232, "learning_rate": 0.00010428150106783281, "loss": 1.0548, "step": 4712 }, { "epoch": 0.9581215694246799, "grad_norm": 0.13295401632785797, "learning_rate": 0.00010426116139530153, "loss": 1.0868, "step": 4713 }, { "epoch": 0.9583248627769871, "grad_norm": 0.1419042944908142, "learning_rate": 0.00010424082172277028, "loss": 1.0969, "step": 4714 }, { "epoch": 0.9585281561292945, "grad_norm": 0.13607093691825867, "learning_rate": 0.000104220482050239, "loss": 1.1988, "step": 4715 }, { "epoch": 0.9587314494816019, "grad_norm": 0.15295760333538055, "learning_rate": 0.00010420014237770773, "loss": 1.1384, "step": 4716 }, { "epoch": 0.9589347428339093, "grad_norm": 0.13830776512622833, "learning_rate": 0.00010417980270517644, "loss": 1.1107, "step": 4717 }, { "epoch": 0.9591380361862167, "grad_norm": 0.15392519533634186, "learning_rate": 0.00010415946303264519, "loss": 1.3216, "step": 4718 }, { "epoch": 0.9593413295385241, "grad_norm": 0.1344476342201233, "learning_rate": 0.00010413912336011391, "loss": 1.1368, "step": 4719 }, { "epoch": 0.9595446228908314, "grad_norm": 0.142112597823143, "learning_rate": 0.00010411878368758264, "loss": 0.997, "step": 4720 }, { "epoch": 0.9597479162431388, "grad_norm": 0.12999044358730316, "learning_rate": 0.00010409844401505136, "loss": 0.978, "step": 4721 }, { "epoch": 0.9599512095954462, "grad_norm": 0.13146638870239258, "learning_rate": 0.0001040781043425201, "loss": 1.0563, "step": 4722 }, { "epoch": 0.9601545029477536, "grad_norm": 0.15361693501472473, "learning_rate": 0.00010405776466998882, "loss": 1.1783, "step": 4723 }, { "epoch": 0.960357796300061, "grad_norm": 0.13315477967262268, "learning_rate": 0.00010403742499745755, "loss": 0.97, "step": 4724 }, { "epoch": 0.9605610896523684, "grad_norm": 0.13661111891269684, "learning_rate": 0.00010401708532492627, "loss": 1.06, "step": 4725 }, { "epoch": 0.9607643830046757, "grad_norm": 0.13284547626972198, "learning_rate": 0.00010399674565239501, "loss": 1.0313, "step": 4726 }, { "epoch": 0.9609676763569831, "grad_norm": 0.13400302827358246, "learning_rate": 0.00010397640597986374, "loss": 1.0858, "step": 4727 }, { "epoch": 0.9611709697092905, "grad_norm": 0.12329299002885818, "learning_rate": 0.00010395606630733246, "loss": 0.8973, "step": 4728 }, { "epoch": 0.9613742630615979, "grad_norm": 0.14118091762065887, "learning_rate": 0.00010393572663480118, "loss": 1.0407, "step": 4729 }, { "epoch": 0.9615775564139053, "grad_norm": 0.13104970753192902, "learning_rate": 0.00010391538696226992, "loss": 0.9438, "step": 4730 }, { "epoch": 0.9617808497662127, "grad_norm": 0.12976235151290894, "learning_rate": 0.00010389504728973865, "loss": 0.989, "step": 4731 }, { "epoch": 0.96198414311852, "grad_norm": 0.12546932697296143, "learning_rate": 0.00010387470761720737, "loss": 0.9445, "step": 4732 }, { "epoch": 0.9621874364708274, "grad_norm": 0.14191336929798126, "learning_rate": 0.0001038543679446761, "loss": 1.132, "step": 4733 }, { "epoch": 0.9623907298231348, "grad_norm": 0.14218741655349731, "learning_rate": 0.00010383402827214483, "loss": 1.046, "step": 4734 }, { "epoch": 0.9625940231754422, "grad_norm": 0.15692010521888733, "learning_rate": 0.00010381368859961356, "loss": 1.1352, "step": 4735 }, { "epoch": 0.9627973165277496, "grad_norm": 0.1295771300792694, "learning_rate": 0.00010379334892708228, "loss": 1.0675, "step": 4736 }, { "epoch": 0.9630006098800569, "grad_norm": 0.15568415820598602, "learning_rate": 0.00010377300925455101, "loss": 1.132, "step": 4737 }, { "epoch": 0.9632039032323643, "grad_norm": 0.12996648252010345, "learning_rate": 0.00010375266958201972, "loss": 1.0338, "step": 4738 }, { "epoch": 0.9634071965846717, "grad_norm": 0.14026613533496857, "learning_rate": 0.00010373232990948847, "loss": 1.1937, "step": 4739 }, { "epoch": 0.9636104899369791, "grad_norm": 0.13028547167778015, "learning_rate": 0.0001037119902369572, "loss": 0.8526, "step": 4740 }, { "epoch": 0.9638137832892865, "grad_norm": 0.12742145359516144, "learning_rate": 0.00010369165056442592, "loss": 0.85, "step": 4741 }, { "epoch": 0.9640170766415939, "grad_norm": 0.11644089221954346, "learning_rate": 0.00010367131089189463, "loss": 0.8878, "step": 4742 }, { "epoch": 0.9642203699939011, "grad_norm": 0.1318705528974533, "learning_rate": 0.00010365097121936338, "loss": 1.0346, "step": 4743 }, { "epoch": 0.9644236633462085, "grad_norm": 0.14477600157260895, "learning_rate": 0.0001036306315468321, "loss": 1.1933, "step": 4744 }, { "epoch": 0.964626956698516, "grad_norm": 0.13914555311203003, "learning_rate": 0.00010361029187430083, "loss": 1.1292, "step": 4745 }, { "epoch": 0.9648302500508233, "grad_norm": 0.1304524540901184, "learning_rate": 0.00010358995220176954, "loss": 0.9745, "step": 4746 }, { "epoch": 0.9650335434031307, "grad_norm": 0.1401352435350418, "learning_rate": 0.00010356961252923829, "loss": 1.072, "step": 4747 }, { "epoch": 0.9652368367554381, "grad_norm": 0.1341739445924759, "learning_rate": 0.00010354927285670702, "loss": 0.9751, "step": 4748 }, { "epoch": 0.9654401301077454, "grad_norm": 0.13538521528244019, "learning_rate": 0.00010352893318417574, "loss": 1.0814, "step": 4749 }, { "epoch": 0.9656434234600528, "grad_norm": 0.14047326147556305, "learning_rate": 0.00010350859351164445, "loss": 1.1741, "step": 4750 }, { "epoch": 0.9658467168123602, "grad_norm": 0.13722112774848938, "learning_rate": 0.0001034882538391132, "loss": 0.928, "step": 4751 }, { "epoch": 0.9660500101646676, "grad_norm": 0.12714186310768127, "learning_rate": 0.00010346791416658193, "loss": 0.7725, "step": 4752 }, { "epoch": 0.966253303516975, "grad_norm": 0.12423626333475113, "learning_rate": 0.00010344757449405065, "loss": 0.8907, "step": 4753 }, { "epoch": 0.9664565968692824, "grad_norm": 0.1605733186006546, "learning_rate": 0.00010342723482151936, "loss": 1.2174, "step": 4754 }, { "epoch": 0.9666598902215897, "grad_norm": 0.14010462164878845, "learning_rate": 0.00010340689514898812, "loss": 1.0363, "step": 4755 }, { "epoch": 0.9668631835738971, "grad_norm": 0.13562703132629395, "learning_rate": 0.00010338655547645684, "loss": 0.9911, "step": 4756 }, { "epoch": 0.9670664769262045, "grad_norm": 0.1469573825597763, "learning_rate": 0.00010336621580392556, "loss": 1.0719, "step": 4757 }, { "epoch": 0.9672697702785119, "grad_norm": 0.1374790370464325, "learning_rate": 0.00010334587613139428, "loss": 1.1695, "step": 4758 }, { "epoch": 0.9674730636308193, "grad_norm": 0.12282276153564453, "learning_rate": 0.00010332553645886303, "loss": 0.9288, "step": 4759 }, { "epoch": 0.9676763569831267, "grad_norm": 0.12597915530204773, "learning_rate": 0.00010330519678633175, "loss": 0.9853, "step": 4760 }, { "epoch": 0.967879650335434, "grad_norm": 0.13501757383346558, "learning_rate": 0.00010328485711380048, "loss": 1.0153, "step": 4761 }, { "epoch": 0.9680829436877414, "grad_norm": 0.1333313286304474, "learning_rate": 0.00010326451744126919, "loss": 1.0339, "step": 4762 }, { "epoch": 0.9682862370400488, "grad_norm": 0.13838358223438263, "learning_rate": 0.00010324417776873794, "loss": 0.9584, "step": 4763 }, { "epoch": 0.9684895303923562, "grad_norm": 0.14973820745944977, "learning_rate": 0.00010322383809620666, "loss": 0.9757, "step": 4764 }, { "epoch": 0.9686928237446636, "grad_norm": 0.12162914872169495, "learning_rate": 0.00010320349842367539, "loss": 0.9844, "step": 4765 }, { "epoch": 0.9688961170969709, "grad_norm": 0.1409245729446411, "learning_rate": 0.0001031831587511441, "loss": 1.0499, "step": 4766 }, { "epoch": 0.9690994104492783, "grad_norm": 0.1342407464981079, "learning_rate": 0.00010316281907861285, "loss": 1.0489, "step": 4767 }, { "epoch": 0.9693027038015857, "grad_norm": 0.12758475542068481, "learning_rate": 0.00010314247940608157, "loss": 0.8951, "step": 4768 }, { "epoch": 0.9695059971538931, "grad_norm": 0.13202863931655884, "learning_rate": 0.0001031221397335503, "loss": 1.0063, "step": 4769 }, { "epoch": 0.9697092905062005, "grad_norm": 0.14444518089294434, "learning_rate": 0.00010310180006101901, "loss": 1.1248, "step": 4770 }, { "epoch": 0.9699125838585079, "grad_norm": 0.12190812826156616, "learning_rate": 0.00010308146038848776, "loss": 0.9866, "step": 4771 }, { "epoch": 0.9701158772108152, "grad_norm": 0.1404780000448227, "learning_rate": 0.00010306112071595649, "loss": 1.1731, "step": 4772 }, { "epoch": 0.9703191705631226, "grad_norm": 0.13559852540493011, "learning_rate": 0.00010304078104342521, "loss": 0.8862, "step": 4773 }, { "epoch": 0.97052246391543, "grad_norm": 0.12813040614128113, "learning_rate": 0.00010302044137089392, "loss": 1.0137, "step": 4774 }, { "epoch": 0.9707257572677374, "grad_norm": 0.14801639318466187, "learning_rate": 0.00010300010169836267, "loss": 1.1613, "step": 4775 }, { "epoch": 0.9709290506200448, "grad_norm": 0.13504531979560852, "learning_rate": 0.0001029797620258314, "loss": 1.0006, "step": 4776 }, { "epoch": 0.9711323439723522, "grad_norm": 0.14465901255607605, "learning_rate": 0.00010295942235330012, "loss": 1.2234, "step": 4777 }, { "epoch": 0.9713356373246594, "grad_norm": 0.1441653072834015, "learning_rate": 0.00010293908268076885, "loss": 1.1343, "step": 4778 }, { "epoch": 0.9715389306769668, "grad_norm": 0.14294147491455078, "learning_rate": 0.00010291874300823756, "loss": 1.0931, "step": 4779 }, { "epoch": 0.9717422240292742, "grad_norm": 0.13316182792186737, "learning_rate": 0.00010289840333570631, "loss": 0.9781, "step": 4780 }, { "epoch": 0.9719455173815816, "grad_norm": 0.12570516765117645, "learning_rate": 0.00010287806366317503, "loss": 0.9451, "step": 4781 }, { "epoch": 0.972148810733889, "grad_norm": 0.14120420813560486, "learning_rate": 0.00010285772399064376, "loss": 1.0823, "step": 4782 }, { "epoch": 0.9723521040861964, "grad_norm": 0.12957200407981873, "learning_rate": 0.00010283738431811247, "loss": 0.9029, "step": 4783 }, { "epoch": 0.9725553974385037, "grad_norm": 0.1534145623445511, "learning_rate": 0.00010281704464558122, "loss": 1.0809, "step": 4784 }, { "epoch": 0.9727586907908111, "grad_norm": 0.1441192328929901, "learning_rate": 0.00010279670497304994, "loss": 1.0464, "step": 4785 }, { "epoch": 0.9729619841431185, "grad_norm": 0.14043961465358734, "learning_rate": 0.00010277636530051867, "loss": 1.2524, "step": 4786 }, { "epoch": 0.9731652774954259, "grad_norm": 0.1316906362771988, "learning_rate": 0.00010275602562798738, "loss": 0.9884, "step": 4787 }, { "epoch": 0.9733685708477333, "grad_norm": 0.14289647340774536, "learning_rate": 0.00010273568595545613, "loss": 1.0516, "step": 4788 }, { "epoch": 0.9735718642000406, "grad_norm": 0.14448580145835876, "learning_rate": 0.00010271534628292486, "loss": 1.0681, "step": 4789 }, { "epoch": 0.973775157552348, "grad_norm": 0.1232059895992279, "learning_rate": 0.00010269500661039358, "loss": 0.8642, "step": 4790 }, { "epoch": 0.9739784509046554, "grad_norm": 0.14698442816734314, "learning_rate": 0.00010267466693786229, "loss": 1.3623, "step": 4791 }, { "epoch": 0.9741817442569628, "grad_norm": 0.13110321760177612, "learning_rate": 0.00010265432726533104, "loss": 0.9757, "step": 4792 }, { "epoch": 0.9743850376092702, "grad_norm": 0.14491407573223114, "learning_rate": 0.00010263398759279977, "loss": 1.0556, "step": 4793 }, { "epoch": 0.9745883309615776, "grad_norm": 0.13034255802631378, "learning_rate": 0.00010261364792026849, "loss": 1.0508, "step": 4794 }, { "epoch": 0.9747916243138849, "grad_norm": 0.14367951452732086, "learning_rate": 0.0001025933082477372, "loss": 1.0724, "step": 4795 }, { "epoch": 0.9749949176661923, "grad_norm": 0.14647988975048065, "learning_rate": 0.00010257296857520595, "loss": 1.2706, "step": 4796 }, { "epoch": 0.9751982110184997, "grad_norm": 0.1294867992401123, "learning_rate": 0.00010255262890267468, "loss": 0.8556, "step": 4797 }, { "epoch": 0.9754015043708071, "grad_norm": 0.1383471041917801, "learning_rate": 0.0001025322892301434, "loss": 1.0519, "step": 4798 }, { "epoch": 0.9756047977231145, "grad_norm": 0.12977120280265808, "learning_rate": 0.00010251194955761211, "loss": 0.8823, "step": 4799 }, { "epoch": 0.9758080910754219, "grad_norm": 0.13635462522506714, "learning_rate": 0.00010249160988508087, "loss": 1.0164, "step": 4800 }, { "epoch": 0.9760113844277292, "grad_norm": 0.14203500747680664, "learning_rate": 0.00010247127021254959, "loss": 1.0384, "step": 4801 }, { "epoch": 0.9762146777800366, "grad_norm": 0.12203938513994217, "learning_rate": 0.00010245093054001831, "loss": 0.9222, "step": 4802 }, { "epoch": 0.976417971132344, "grad_norm": 0.16445662081241608, "learning_rate": 0.00010243059086748703, "loss": 1.2655, "step": 4803 }, { "epoch": 0.9766212644846514, "grad_norm": 0.13465256989002228, "learning_rate": 0.00010241025119495578, "loss": 1.0028, "step": 4804 }, { "epoch": 0.9768245578369588, "grad_norm": 0.13303688168525696, "learning_rate": 0.0001023899115224245, "loss": 0.9123, "step": 4805 }, { "epoch": 0.9770278511892662, "grad_norm": 0.15964846312999725, "learning_rate": 0.00010236957184989323, "loss": 1.203, "step": 4806 }, { "epoch": 0.9772311445415734, "grad_norm": 0.13582561910152435, "learning_rate": 0.00010234923217736194, "loss": 1.0838, "step": 4807 }, { "epoch": 0.9774344378938808, "grad_norm": 0.12856504321098328, "learning_rate": 0.00010232889250483069, "loss": 1.075, "step": 4808 }, { "epoch": 0.9776377312461882, "grad_norm": 0.15734675526618958, "learning_rate": 0.00010230855283229941, "loss": 1.0715, "step": 4809 }, { "epoch": 0.9778410245984956, "grad_norm": 0.12550866603851318, "learning_rate": 0.00010228821315976814, "loss": 0.9564, "step": 4810 }, { "epoch": 0.978044317950803, "grad_norm": 0.14596353471279144, "learning_rate": 0.00010226787348723685, "loss": 1.1543, "step": 4811 }, { "epoch": 0.9782476113031104, "grad_norm": 0.13755320012569427, "learning_rate": 0.0001022475338147056, "loss": 1.1465, "step": 4812 }, { "epoch": 0.9784509046554177, "grad_norm": 0.15162555873394012, "learning_rate": 0.00010222719414217432, "loss": 1.1004, "step": 4813 }, { "epoch": 0.9786541980077251, "grad_norm": 0.1351086050271988, "learning_rate": 0.00010220685446964305, "loss": 1.0812, "step": 4814 }, { "epoch": 0.9788574913600325, "grad_norm": 0.14810827374458313, "learning_rate": 0.00010218651479711176, "loss": 1.1604, "step": 4815 }, { "epoch": 0.9790607847123399, "grad_norm": 0.13627979159355164, "learning_rate": 0.00010216617512458051, "loss": 1.0392, "step": 4816 }, { "epoch": 0.9792640780646473, "grad_norm": 0.14900024235248566, "learning_rate": 0.00010214583545204924, "loss": 1.0946, "step": 4817 }, { "epoch": 0.9794673714169546, "grad_norm": 0.14076335728168488, "learning_rate": 0.00010212549577951796, "loss": 1.0511, "step": 4818 }, { "epoch": 0.979670664769262, "grad_norm": 0.12886211276054382, "learning_rate": 0.00010210515610698667, "loss": 1.0244, "step": 4819 }, { "epoch": 0.9798739581215694, "grad_norm": 0.1440308839082718, "learning_rate": 0.0001020848164344554, "loss": 1.1237, "step": 4820 }, { "epoch": 0.9800772514738768, "grad_norm": 0.1480611264705658, "learning_rate": 0.00010206447676192415, "loss": 1.1623, "step": 4821 }, { "epoch": 0.9802805448261842, "grad_norm": 0.1361786127090454, "learning_rate": 0.00010204413708939287, "loss": 1.0342, "step": 4822 }, { "epoch": 0.9804838381784916, "grad_norm": 0.13941383361816406, "learning_rate": 0.00010202379741686158, "loss": 1.2155, "step": 4823 }, { "epoch": 0.9806871315307989, "grad_norm": 0.13382181525230408, "learning_rate": 0.0001020034577443303, "loss": 1.0683, "step": 4824 }, { "epoch": 0.9808904248831063, "grad_norm": 0.14079181849956512, "learning_rate": 0.00010198311807179906, "loss": 1.1914, "step": 4825 }, { "epoch": 0.9810937182354137, "grad_norm": 0.1520659178495407, "learning_rate": 0.00010196277839926778, "loss": 1.0614, "step": 4826 }, { "epoch": 0.9812970115877211, "grad_norm": 0.12844201922416687, "learning_rate": 0.0001019424387267365, "loss": 0.9881, "step": 4827 }, { "epoch": 0.9815003049400285, "grad_norm": 0.12539590895175934, "learning_rate": 0.00010192209905420522, "loss": 1.0109, "step": 4828 }, { "epoch": 0.9817035982923359, "grad_norm": 0.11923157423734665, "learning_rate": 0.00010190175938167397, "loss": 0.8702, "step": 4829 }, { "epoch": 0.9819068916446432, "grad_norm": 0.1370554268360138, "learning_rate": 0.0001018814197091427, "loss": 0.9956, "step": 4830 }, { "epoch": 0.9821101849969506, "grad_norm": 0.14605580270290375, "learning_rate": 0.0001018610800366114, "loss": 1.1405, "step": 4831 }, { "epoch": 0.982313478349258, "grad_norm": 0.13594669103622437, "learning_rate": 0.00010184074036408013, "loss": 1.04, "step": 4832 }, { "epoch": 0.9825167717015654, "grad_norm": 0.13905544579029083, "learning_rate": 0.00010182040069154888, "loss": 1.0421, "step": 4833 }, { "epoch": 0.9827200650538728, "grad_norm": 0.13536664843559265, "learning_rate": 0.0001018000610190176, "loss": 0.952, "step": 4834 }, { "epoch": 0.9829233584061802, "grad_norm": 0.12906044721603394, "learning_rate": 0.00010177972134648633, "loss": 1.0468, "step": 4835 }, { "epoch": 0.9831266517584875, "grad_norm": 0.14076603949069977, "learning_rate": 0.00010175938167395504, "loss": 1.03, "step": 4836 }, { "epoch": 0.9833299451107949, "grad_norm": 0.12814532220363617, "learning_rate": 0.00010173904200142379, "loss": 0.9141, "step": 4837 }, { "epoch": 0.9835332384631023, "grad_norm": 0.13187144696712494, "learning_rate": 0.00010171870232889252, "loss": 0.9983, "step": 4838 }, { "epoch": 0.9837365318154097, "grad_norm": 0.12513421475887299, "learning_rate": 0.00010169836265636124, "loss": 0.9726, "step": 4839 }, { "epoch": 0.983939825167717, "grad_norm": 0.1411403864622116, "learning_rate": 0.00010167802298382995, "loss": 1.1544, "step": 4840 }, { "epoch": 0.9841431185200243, "grad_norm": 0.13814745843410492, "learning_rate": 0.0001016576833112987, "loss": 1.0902, "step": 4841 }, { "epoch": 0.9843464118723317, "grad_norm": 0.15102095901966095, "learning_rate": 0.00010163734363876743, "loss": 1.2006, "step": 4842 }, { "epoch": 0.9845497052246391, "grad_norm": 0.12632615864276886, "learning_rate": 0.00010161700396623615, "loss": 0.9465, "step": 4843 }, { "epoch": 0.9847529985769465, "grad_norm": 0.14967390894889832, "learning_rate": 0.00010159666429370486, "loss": 1.1275, "step": 4844 }, { "epoch": 0.9849562919292539, "grad_norm": 0.12356138229370117, "learning_rate": 0.00010157632462117362, "loss": 1.0253, "step": 4845 }, { "epoch": 0.9851595852815613, "grad_norm": 0.12783940136432648, "learning_rate": 0.00010155598494864234, "loss": 1.0647, "step": 4846 }, { "epoch": 0.9853628786338686, "grad_norm": 0.12966394424438477, "learning_rate": 0.00010153564527611106, "loss": 0.961, "step": 4847 }, { "epoch": 0.985566171986176, "grad_norm": 0.13176754117012024, "learning_rate": 0.00010151530560357977, "loss": 0.9036, "step": 4848 }, { "epoch": 0.9857694653384834, "grad_norm": 0.1355212777853012, "learning_rate": 0.00010149496593104853, "loss": 1.1102, "step": 4849 }, { "epoch": 0.9859727586907908, "grad_norm": 0.14852295815944672, "learning_rate": 0.00010147462625851725, "loss": 1.1925, "step": 4850 }, { "epoch": 0.9861760520430982, "grad_norm": 0.1206142008304596, "learning_rate": 0.00010145428658598598, "loss": 0.8656, "step": 4851 }, { "epoch": 0.9863793453954056, "grad_norm": 0.1338338702917099, "learning_rate": 0.00010143394691345469, "loss": 1.0639, "step": 4852 }, { "epoch": 0.9865826387477129, "grad_norm": 0.1332140564918518, "learning_rate": 0.00010141360724092344, "loss": 1.1448, "step": 4853 }, { "epoch": 0.9867859321000203, "grad_norm": 0.14103251695632935, "learning_rate": 0.00010139326756839216, "loss": 1.174, "step": 4854 }, { "epoch": 0.9869892254523277, "grad_norm": 0.13589176535606384, "learning_rate": 0.00010137292789586089, "loss": 0.9713, "step": 4855 }, { "epoch": 0.9871925188046351, "grad_norm": 0.13444828987121582, "learning_rate": 0.0001013525882233296, "loss": 1.0798, "step": 4856 }, { "epoch": 0.9873958121569425, "grad_norm": 0.15302203595638275, "learning_rate": 0.00010133224855079835, "loss": 1.1335, "step": 4857 }, { "epoch": 0.9875991055092499, "grad_norm": 0.1333010047674179, "learning_rate": 0.00010131190887826707, "loss": 1.0179, "step": 4858 }, { "epoch": 0.9878023988615572, "grad_norm": 0.1340804398059845, "learning_rate": 0.0001012915692057358, "loss": 0.9451, "step": 4859 }, { "epoch": 0.9880056922138646, "grad_norm": 0.14129623770713806, "learning_rate": 0.00010127122953320451, "loss": 1.1247, "step": 4860 }, { "epoch": 0.988208985566172, "grad_norm": 0.1130819022655487, "learning_rate": 0.00010125088986067323, "loss": 0.9078, "step": 4861 }, { "epoch": 0.9884122789184794, "grad_norm": 0.13647128641605377, "learning_rate": 0.00010123055018814199, "loss": 1.0182, "step": 4862 }, { "epoch": 0.9886155722707868, "grad_norm": 0.1271669566631317, "learning_rate": 0.00010121021051561071, "loss": 1.067, "step": 4863 }, { "epoch": 0.9888188656230942, "grad_norm": 0.12524248659610748, "learning_rate": 0.00010118987084307942, "loss": 0.9665, "step": 4864 }, { "epoch": 0.9890221589754015, "grad_norm": 0.13268783688545227, "learning_rate": 0.00010116953117054814, "loss": 1.1719, "step": 4865 }, { "epoch": 0.9892254523277089, "grad_norm": 0.13032928109169006, "learning_rate": 0.0001011491914980169, "loss": 1.1512, "step": 4866 }, { "epoch": 0.9894287456800163, "grad_norm": 0.1325322538614273, "learning_rate": 0.00010112885182548562, "loss": 0.954, "step": 4867 }, { "epoch": 0.9896320390323237, "grad_norm": 0.13537730276584625, "learning_rate": 0.00010110851215295433, "loss": 0.9071, "step": 4868 }, { "epoch": 0.9898353323846311, "grad_norm": 0.13004449009895325, "learning_rate": 0.00010108817248042306, "loss": 1.0135, "step": 4869 }, { "epoch": 0.9900386257369383, "grad_norm": 0.1332450807094574, "learning_rate": 0.00010106783280789181, "loss": 0.9139, "step": 4870 }, { "epoch": 0.9902419190892457, "grad_norm": 0.1302601844072342, "learning_rate": 0.00010104749313536053, "loss": 1.0315, "step": 4871 }, { "epoch": 0.9904452124415531, "grad_norm": 0.13632024824619293, "learning_rate": 0.00010102715346282924, "loss": 0.8711, "step": 4872 }, { "epoch": 0.9906485057938605, "grad_norm": 0.1373220980167389, "learning_rate": 0.00010100681379029797, "loss": 1.1581, "step": 4873 }, { "epoch": 0.9908517991461679, "grad_norm": 0.11876034736633301, "learning_rate": 0.00010098647411776672, "loss": 0.9445, "step": 4874 }, { "epoch": 0.9910550924984753, "grad_norm": 0.1467241793870926, "learning_rate": 0.00010096613444523544, "loss": 1.1618, "step": 4875 }, { "epoch": 0.9912583858507826, "grad_norm": 0.13242608308792114, "learning_rate": 0.00010094579477270415, "loss": 0.9251, "step": 4876 }, { "epoch": 0.99146167920309, "grad_norm": 0.1570916622877121, "learning_rate": 0.00010092545510017288, "loss": 1.1391, "step": 4877 }, { "epoch": 0.9916649725553974, "grad_norm": 0.1410514861345291, "learning_rate": 0.00010090511542764163, "loss": 1.067, "step": 4878 }, { "epoch": 0.9918682659077048, "grad_norm": 0.13417792320251465, "learning_rate": 0.00010088477575511036, "loss": 1.0545, "step": 4879 }, { "epoch": 0.9920715592600122, "grad_norm": 0.14980773627758026, "learning_rate": 0.00010086443608257907, "loss": 1.1653, "step": 4880 }, { "epoch": 0.9922748526123196, "grad_norm": 0.1144283264875412, "learning_rate": 0.00010084409641004779, "loss": 0.9679, "step": 4881 }, { "epoch": 0.9924781459646269, "grad_norm": 0.13899140059947968, "learning_rate": 0.00010082375673751654, "loss": 1.0324, "step": 4882 }, { "epoch": 0.9926814393169343, "grad_norm": 0.13620680570602417, "learning_rate": 0.00010080341706498527, "loss": 1.0691, "step": 4883 }, { "epoch": 0.9928847326692417, "grad_norm": 0.13614429533481598, "learning_rate": 0.00010078307739245398, "loss": 1.0911, "step": 4884 }, { "epoch": 0.9930880260215491, "grad_norm": 0.15049585700035095, "learning_rate": 0.0001007627377199227, "loss": 1.2096, "step": 4885 }, { "epoch": 0.9932913193738565, "grad_norm": 0.11920803040266037, "learning_rate": 0.00010074239804739145, "loss": 0.8579, "step": 4886 }, { "epoch": 0.9934946127261639, "grad_norm": 0.148103266954422, "learning_rate": 0.00010072205837486018, "loss": 0.9659, "step": 4887 }, { "epoch": 0.9936979060784712, "grad_norm": 0.14606502652168274, "learning_rate": 0.00010070171870232889, "loss": 1.1369, "step": 4888 }, { "epoch": 0.9939011994307786, "grad_norm": 0.15074527263641357, "learning_rate": 0.00010068137902979761, "loss": 1.1467, "step": 4889 }, { "epoch": 0.994104492783086, "grad_norm": 0.1285044550895691, "learning_rate": 0.00010066103935726636, "loss": 0.8631, "step": 4890 }, { "epoch": 0.9943077861353934, "grad_norm": 0.14599795639514923, "learning_rate": 0.00010064069968473509, "loss": 1.136, "step": 4891 }, { "epoch": 0.9945110794877008, "grad_norm": 0.15505965054035187, "learning_rate": 0.0001006203600122038, "loss": 1.2104, "step": 4892 }, { "epoch": 0.9947143728400081, "grad_norm": 0.13279956579208374, "learning_rate": 0.00010060002033967252, "loss": 1.0342, "step": 4893 }, { "epoch": 0.9949176661923155, "grad_norm": 0.1346520632505417, "learning_rate": 0.00010057968066714128, "loss": 1.0414, "step": 4894 }, { "epoch": 0.9951209595446229, "grad_norm": 0.1434224247932434, "learning_rate": 0.00010055934099461, "loss": 1.1259, "step": 4895 }, { "epoch": 0.9953242528969303, "grad_norm": 0.1336824744939804, "learning_rate": 0.00010053900132207873, "loss": 1.1013, "step": 4896 }, { "epoch": 0.9955275462492377, "grad_norm": 0.13659413158893585, "learning_rate": 0.00010051866164954744, "loss": 0.9543, "step": 4897 }, { "epoch": 0.9957308396015451, "grad_norm": 0.13040059804916382, "learning_rate": 0.00010049832197701619, "loss": 0.9669, "step": 4898 }, { "epoch": 0.9959341329538524, "grad_norm": 0.1415984034538269, "learning_rate": 0.00010047798230448491, "loss": 1.0195, "step": 4899 }, { "epoch": 0.9961374263061598, "grad_norm": 0.13746584951877594, "learning_rate": 0.00010045764263195364, "loss": 1.0605, "step": 4900 }, { "epoch": 0.9963407196584672, "grad_norm": 0.1430116593837738, "learning_rate": 0.00010043730295942235, "loss": 1.107, "step": 4901 }, { "epoch": 0.9965440130107746, "grad_norm": 0.11865589022636414, "learning_rate": 0.00010041696328689107, "loss": 0.8887, "step": 4902 }, { "epoch": 0.996747306363082, "grad_norm": 0.11495467275381088, "learning_rate": 0.00010039662361435982, "loss": 0.8365, "step": 4903 }, { "epoch": 0.9969505997153894, "grad_norm": 0.1354401409626007, "learning_rate": 0.00010037628394182855, "loss": 1.1705, "step": 4904 }, { "epoch": 0.9971538930676966, "grad_norm": 0.13998205959796906, "learning_rate": 0.00010035594426929726, "loss": 1.0365, "step": 4905 }, { "epoch": 0.997357186420004, "grad_norm": 0.15044035017490387, "learning_rate": 0.00010033560459676598, "loss": 1.1061, "step": 4906 }, { "epoch": 0.9975604797723114, "grad_norm": 0.1416459083557129, "learning_rate": 0.00010031526492423473, "loss": 1.1155, "step": 4907 }, { "epoch": 0.9977637731246188, "grad_norm": 0.13485343754291534, "learning_rate": 0.00010029492525170346, "loss": 0.9937, "step": 4908 }, { "epoch": 0.9979670664769262, "grad_norm": 0.14948885142803192, "learning_rate": 0.00010027458557917217, "loss": 1.1689, "step": 4909 }, { "epoch": 0.9981703598292336, "grad_norm": 0.1309768706560135, "learning_rate": 0.0001002542459066409, "loss": 0.9428, "step": 4910 }, { "epoch": 0.9983736531815409, "grad_norm": 0.11928943544626236, "learning_rate": 0.00010023390623410965, "loss": 0.8238, "step": 4911 }, { "epoch": 0.9985769465338483, "grad_norm": 0.1389857530593872, "learning_rate": 0.00010021356656157837, "loss": 1.0459, "step": 4912 }, { "epoch": 0.9987802398861557, "grad_norm": 0.14047744870185852, "learning_rate": 0.00010019322688904708, "loss": 0.9594, "step": 4913 }, { "epoch": 0.9989835332384631, "grad_norm": 0.1307019144296646, "learning_rate": 0.0001001728872165158, "loss": 1.1549, "step": 4914 }, { "epoch": 0.9991868265907705, "grad_norm": 0.13652239739894867, "learning_rate": 0.00010015254754398456, "loss": 1.142, "step": 4915 }, { "epoch": 0.9993901199430779, "grad_norm": 0.1404002457857132, "learning_rate": 0.00010013220787145328, "loss": 1.0275, "step": 4916 }, { "epoch": 0.9995934132953852, "grad_norm": 0.14137892425060272, "learning_rate": 0.00010011186819892199, "loss": 1.1169, "step": 4917 }, { "epoch": 0.9997967066476926, "grad_norm": 0.12362517416477203, "learning_rate": 0.00010009152852639072, "loss": 0.9733, "step": 4918 }, { "epoch": 1.0, "grad_norm": 0.16257604956626892, "learning_rate": 0.00010007118885385947, "loss": 1.214, "step": 4919 }, { "epoch": 1.0002032933523073, "grad_norm": 0.13455824553966522, "learning_rate": 0.0001000508491813282, "loss": 1.1717, "step": 4920 }, { "epoch": 1.0004065867046148, "grad_norm": 0.1244397908449173, "learning_rate": 0.0001000305095087969, "loss": 0.9873, "step": 4921 }, { "epoch": 1.000609880056922, "grad_norm": 0.13148358464241028, "learning_rate": 0.00010001016983626563, "loss": 1.0512, "step": 4922 }, { "epoch": 1.0008131734092296, "grad_norm": 0.14207464456558228, "learning_rate": 9.998983016373437e-05, "loss": 1.1071, "step": 4923 }, { "epoch": 1.0010164667615369, "grad_norm": 0.1350506693124771, "learning_rate": 9.99694904912031e-05, "loss": 1.1134, "step": 4924 }, { "epoch": 1.0012197601138442, "grad_norm": 0.14575833082199097, "learning_rate": 9.994915081867182e-05, "loss": 1.0793, "step": 4925 }, { "epoch": 1.0014230534661517, "grad_norm": 0.13254649937152863, "learning_rate": 9.992881114614055e-05, "loss": 0.9843, "step": 4926 }, { "epoch": 1.001626346818459, "grad_norm": 0.13385853171348572, "learning_rate": 9.990847147360928e-05, "loss": 1.0446, "step": 4927 }, { "epoch": 1.0018296401707665, "grad_norm": 0.13908478617668152, "learning_rate": 9.988813180107802e-05, "loss": 0.9968, "step": 4928 }, { "epoch": 1.0020329335230738, "grad_norm": 0.13923251628875732, "learning_rate": 9.986779212854673e-05, "loss": 1.0023, "step": 4929 }, { "epoch": 1.0022362268753813, "grad_norm": 0.1373911201953888, "learning_rate": 9.984745245601547e-05, "loss": 1.1753, "step": 4930 }, { "epoch": 1.0024395202276886, "grad_norm": 0.13491371273994446, "learning_rate": 9.982711278348419e-05, "loss": 0.893, "step": 4931 }, { "epoch": 1.0026428135799959, "grad_norm": 0.12279137223958969, "learning_rate": 9.980677311095293e-05, "loss": 0.8334, "step": 4932 }, { "epoch": 1.0028461069323034, "grad_norm": 0.1489049643278122, "learning_rate": 9.978643343842164e-05, "loss": 1.2196, "step": 4933 }, { "epoch": 1.0030494002846106, "grad_norm": 0.15800416469573975, "learning_rate": 9.976609376589038e-05, "loss": 1.1065, "step": 4934 }, { "epoch": 1.0032526936369182, "grad_norm": 0.12695717811584473, "learning_rate": 9.97457540933591e-05, "loss": 0.8969, "step": 4935 }, { "epoch": 1.0034559869892254, "grad_norm": 0.12970462441444397, "learning_rate": 9.972541442082784e-05, "loss": 0.9748, "step": 4936 }, { "epoch": 1.0036592803415327, "grad_norm": 0.13583384454250336, "learning_rate": 9.970507474829655e-05, "loss": 0.9943, "step": 4937 }, { "epoch": 1.0038625736938402, "grad_norm": 0.13171210885047913, "learning_rate": 9.968473507576529e-05, "loss": 1.0066, "step": 4938 }, { "epoch": 1.0040658670461475, "grad_norm": 0.140077605843544, "learning_rate": 9.966439540323401e-05, "loss": 1.0276, "step": 4939 }, { "epoch": 1.004269160398455, "grad_norm": 0.13248348236083984, "learning_rate": 9.964405573070275e-05, "loss": 0.9836, "step": 4940 }, { "epoch": 1.0044724537507623, "grad_norm": 0.1502828449010849, "learning_rate": 9.962371605817146e-05, "loss": 1.175, "step": 4941 }, { "epoch": 1.0046757471030698, "grad_norm": 0.14695493876934052, "learning_rate": 9.96033763856402e-05, "loss": 0.963, "step": 4942 }, { "epoch": 1.0048790404553771, "grad_norm": 0.14214938879013062, "learning_rate": 9.958303671310892e-05, "loss": 1.0651, "step": 4943 }, { "epoch": 1.0050823338076844, "grad_norm": 0.14761728048324585, "learning_rate": 9.956269704057765e-05, "loss": 0.9907, "step": 4944 }, { "epoch": 1.005285627159992, "grad_norm": 0.13151785731315613, "learning_rate": 9.954235736804637e-05, "loss": 0.8793, "step": 4945 }, { "epoch": 1.0054889205122992, "grad_norm": 0.1452670693397522, "learning_rate": 9.95220176955151e-05, "loss": 1.0906, "step": 4946 }, { "epoch": 1.0056922138646067, "grad_norm": 0.13930079340934753, "learning_rate": 9.950167802298384e-05, "loss": 0.9598, "step": 4947 }, { "epoch": 1.005895507216914, "grad_norm": 0.12317246198654175, "learning_rate": 9.948133835045256e-05, "loss": 0.9429, "step": 4948 }, { "epoch": 1.0060988005692213, "grad_norm": 0.13415516912937164, "learning_rate": 9.946099867792128e-05, "loss": 1.0848, "step": 4949 }, { "epoch": 1.0063020939215288, "grad_norm": 0.13976556062698364, "learning_rate": 9.944065900539001e-05, "loss": 0.934, "step": 4950 }, { "epoch": 1.006505387273836, "grad_norm": 0.13384398818016052, "learning_rate": 9.942031933285875e-05, "loss": 0.955, "step": 4951 }, { "epoch": 1.0067086806261436, "grad_norm": 0.14308519661426544, "learning_rate": 9.939997966032747e-05, "loss": 0.9543, "step": 4952 }, { "epoch": 1.006911973978451, "grad_norm": 0.14340607821941376, "learning_rate": 9.937963998779621e-05, "loss": 1.047, "step": 4953 }, { "epoch": 1.0071152673307582, "grad_norm": 0.14457905292510986, "learning_rate": 9.935930031526492e-05, "loss": 0.9937, "step": 4954 }, { "epoch": 1.0073185606830657, "grad_norm": 0.13555844128131866, "learning_rate": 9.933896064273366e-05, "loss": 1.0211, "step": 4955 }, { "epoch": 1.007521854035373, "grad_norm": 0.1536429524421692, "learning_rate": 9.931862097020238e-05, "loss": 1.188, "step": 4956 }, { "epoch": 1.0077251473876805, "grad_norm": 0.13193362951278687, "learning_rate": 9.929828129767112e-05, "loss": 0.9143, "step": 4957 }, { "epoch": 1.0079284407399878, "grad_norm": 0.14066417515277863, "learning_rate": 9.927794162513983e-05, "loss": 1.0662, "step": 4958 }, { "epoch": 1.0081317340922953, "grad_norm": 0.13579119741916656, "learning_rate": 9.925760195260857e-05, "loss": 0.8999, "step": 4959 }, { "epoch": 1.0083350274446026, "grad_norm": 0.14911122620105743, "learning_rate": 9.92372622800773e-05, "loss": 1.3171, "step": 4960 }, { "epoch": 1.0085383207969099, "grad_norm": 0.1447262316942215, "learning_rate": 9.921692260754603e-05, "loss": 1.0899, "step": 4961 }, { "epoch": 1.0087416141492174, "grad_norm": 0.1513487845659256, "learning_rate": 9.919658293501474e-05, "loss": 1.0844, "step": 4962 }, { "epoch": 1.0089449075015247, "grad_norm": 0.1470583975315094, "learning_rate": 9.917624326248348e-05, "loss": 1.1176, "step": 4963 }, { "epoch": 1.0091482008538322, "grad_norm": 0.13596630096435547, "learning_rate": 9.91559035899522e-05, "loss": 1.0829, "step": 4964 }, { "epoch": 1.0093514942061395, "grad_norm": 0.1411203145980835, "learning_rate": 9.913556391742094e-05, "loss": 1.0523, "step": 4965 }, { "epoch": 1.0095547875584467, "grad_norm": 0.14842981100082397, "learning_rate": 9.911522424488965e-05, "loss": 1.0513, "step": 4966 }, { "epoch": 1.0097580809107543, "grad_norm": 0.1505335569381714, "learning_rate": 9.909488457235839e-05, "loss": 0.9964, "step": 4967 }, { "epoch": 1.0099613742630615, "grad_norm": 0.12677620351314545, "learning_rate": 9.907454489982712e-05, "loss": 0.9546, "step": 4968 }, { "epoch": 1.010164667615369, "grad_norm": 0.13651777803897858, "learning_rate": 9.905420522729585e-05, "loss": 1.0823, "step": 4969 }, { "epoch": 1.0103679609676763, "grad_norm": 0.1392572969198227, "learning_rate": 9.903386555476457e-05, "loss": 0.9032, "step": 4970 }, { "epoch": 1.0105712543199838, "grad_norm": 0.16775289177894592, "learning_rate": 9.90135258822333e-05, "loss": 1.1434, "step": 4971 }, { "epoch": 1.0107745476722911, "grad_norm": 0.1534387320280075, "learning_rate": 9.899318620970203e-05, "loss": 1.166, "step": 4972 }, { "epoch": 1.0109778410245984, "grad_norm": 0.14180676639080048, "learning_rate": 9.897284653717077e-05, "loss": 1.0688, "step": 4973 }, { "epoch": 1.011181134376906, "grad_norm": 0.13633224368095398, "learning_rate": 9.895250686463948e-05, "loss": 1.0413, "step": 4974 }, { "epoch": 1.0113844277292132, "grad_norm": 0.15582099556922913, "learning_rate": 9.893216719210822e-05, "loss": 1.256, "step": 4975 }, { "epoch": 1.0115877210815207, "grad_norm": 0.16052106022834778, "learning_rate": 9.891182751957694e-05, "loss": 1.3048, "step": 4976 }, { "epoch": 1.011791014433828, "grad_norm": 0.15733475983142853, "learning_rate": 9.889148784704568e-05, "loss": 1.1024, "step": 4977 }, { "epoch": 1.0119943077861353, "grad_norm": 0.1398230642080307, "learning_rate": 9.887114817451439e-05, "loss": 1.0691, "step": 4978 }, { "epoch": 1.0121976011384428, "grad_norm": 0.15575705468654633, "learning_rate": 9.885080850198313e-05, "loss": 1.0019, "step": 4979 }, { "epoch": 1.01240089449075, "grad_norm": 0.13900624215602875, "learning_rate": 9.883046882945185e-05, "loss": 1.0318, "step": 4980 }, { "epoch": 1.0126041878430576, "grad_norm": 0.1266520619392395, "learning_rate": 9.881012915692059e-05, "loss": 0.9455, "step": 4981 }, { "epoch": 1.012807481195365, "grad_norm": 0.14327497780323029, "learning_rate": 9.87897894843893e-05, "loss": 1.1133, "step": 4982 }, { "epoch": 1.0130107745476722, "grad_norm": 0.14177127182483673, "learning_rate": 9.876944981185804e-05, "loss": 0.9969, "step": 4983 }, { "epoch": 1.0132140678999797, "grad_norm": 0.14066456258296967, "learning_rate": 9.874911013932676e-05, "loss": 0.9261, "step": 4984 }, { "epoch": 1.013417361252287, "grad_norm": 0.14441144466400146, "learning_rate": 9.872877046679549e-05, "loss": 1.0065, "step": 4985 }, { "epoch": 1.0136206546045945, "grad_norm": 0.12858086824417114, "learning_rate": 9.870843079426421e-05, "loss": 0.9306, "step": 4986 }, { "epoch": 1.0138239479569018, "grad_norm": 0.1305333971977234, "learning_rate": 9.868809112173294e-05, "loss": 1.0058, "step": 4987 }, { "epoch": 1.0140272413092093, "grad_norm": 0.1652311384677887, "learning_rate": 9.866775144920167e-05, "loss": 1.1992, "step": 4988 }, { "epoch": 1.0142305346615166, "grad_norm": 0.1123913899064064, "learning_rate": 9.86474117766704e-05, "loss": 0.8779, "step": 4989 }, { "epoch": 1.0144338280138239, "grad_norm": 0.15201310813426971, "learning_rate": 9.862707210413912e-05, "loss": 1.1553, "step": 4990 }, { "epoch": 1.0146371213661314, "grad_norm": 0.13241463899612427, "learning_rate": 9.860673243160785e-05, "loss": 0.9276, "step": 4991 }, { "epoch": 1.0148404147184387, "grad_norm": 0.15238632261753082, "learning_rate": 9.858639275907659e-05, "loss": 1.1528, "step": 4992 }, { "epoch": 1.0150437080707462, "grad_norm": 0.13771474361419678, "learning_rate": 9.856605308654531e-05, "loss": 1.1871, "step": 4993 }, { "epoch": 1.0152470014230535, "grad_norm": 0.135041743516922, "learning_rate": 9.854571341401403e-05, "loss": 0.9718, "step": 4994 }, { "epoch": 1.0154502947753608, "grad_norm": 0.14199897646903992, "learning_rate": 9.852537374148276e-05, "loss": 1.0454, "step": 4995 }, { "epoch": 1.0156535881276683, "grad_norm": 0.14556720852851868, "learning_rate": 9.85050340689515e-05, "loss": 1.102, "step": 4996 }, { "epoch": 1.0158568814799755, "grad_norm": 0.1287354975938797, "learning_rate": 9.848469439642022e-05, "loss": 0.929, "step": 4997 }, { "epoch": 1.016060174832283, "grad_norm": 0.15297791361808777, "learning_rate": 9.846435472388895e-05, "loss": 1.0234, "step": 4998 }, { "epoch": 1.0162634681845903, "grad_norm": 0.1549387276172638, "learning_rate": 9.844401505135767e-05, "loss": 1.1666, "step": 4999 }, { "epoch": 1.0164667615368979, "grad_norm": 0.15455321967601776, "learning_rate": 9.842367537882641e-05, "loss": 1.0845, "step": 5000 }, { "epoch": 1.0166700548892051, "grad_norm": 0.1259438842535019, "learning_rate": 9.840333570629513e-05, "loss": 0.8045, "step": 5001 }, { "epoch": 1.0168733482415124, "grad_norm": 0.12807638943195343, "learning_rate": 9.838299603376386e-05, "loss": 0.8213, "step": 5002 }, { "epoch": 1.01707664159382, "grad_norm": 0.14414066076278687, "learning_rate": 9.836265636123258e-05, "loss": 0.9676, "step": 5003 }, { "epoch": 1.0172799349461272, "grad_norm": 0.1384848803281784, "learning_rate": 9.834231668870132e-05, "loss": 1.0864, "step": 5004 }, { "epoch": 1.0174832282984347, "grad_norm": 0.1476108878850937, "learning_rate": 9.832197701617004e-05, "loss": 1.0906, "step": 5005 }, { "epoch": 1.017686521650742, "grad_norm": 0.14003942906856537, "learning_rate": 9.830163734363877e-05, "loss": 1.0768, "step": 5006 }, { "epoch": 1.0178898150030493, "grad_norm": 0.15422053635120392, "learning_rate": 9.828129767110749e-05, "loss": 1.0807, "step": 5007 }, { "epoch": 1.0180931083553568, "grad_norm": 0.15271607041358948, "learning_rate": 9.826095799857623e-05, "loss": 1.0285, "step": 5008 }, { "epoch": 1.0182964017076641, "grad_norm": 0.14479339122772217, "learning_rate": 9.824061832604496e-05, "loss": 1.0763, "step": 5009 }, { "epoch": 1.0184996950599716, "grad_norm": 0.16509747505187988, "learning_rate": 9.822027865351368e-05, "loss": 1.2167, "step": 5010 }, { "epoch": 1.018702988412279, "grad_norm": 0.12720637023448944, "learning_rate": 9.81999389809824e-05, "loss": 0.8985, "step": 5011 }, { "epoch": 1.0189062817645862, "grad_norm": 0.15708747506141663, "learning_rate": 9.817959930845114e-05, "loss": 1.0261, "step": 5012 }, { "epoch": 1.0191095751168937, "grad_norm": 0.1485368311405182, "learning_rate": 9.815925963591987e-05, "loss": 1.0739, "step": 5013 }, { "epoch": 1.019312868469201, "grad_norm": 0.1357945054769516, "learning_rate": 9.81389199633886e-05, "loss": 0.9953, "step": 5014 }, { "epoch": 1.0195161618215085, "grad_norm": 0.13788209855556488, "learning_rate": 9.811858029085732e-05, "loss": 0.9359, "step": 5015 }, { "epoch": 1.0197194551738158, "grad_norm": 0.13577666878700256, "learning_rate": 9.809824061832605e-05, "loss": 0.8714, "step": 5016 }, { "epoch": 1.0199227485261233, "grad_norm": 0.13565793633460999, "learning_rate": 9.807790094579478e-05, "loss": 1.0903, "step": 5017 }, { "epoch": 1.0201260418784306, "grad_norm": 0.13475680351257324, "learning_rate": 9.805756127326352e-05, "loss": 0.936, "step": 5018 }, { "epoch": 1.0203293352307379, "grad_norm": 0.12296731770038605, "learning_rate": 9.803722160073223e-05, "loss": 0.8435, "step": 5019 }, { "epoch": 1.0205326285830454, "grad_norm": 0.1502811461687088, "learning_rate": 9.801688192820097e-05, "loss": 1.22, "step": 5020 }, { "epoch": 1.0207359219353527, "grad_norm": 0.15247604250907898, "learning_rate": 9.799654225566969e-05, "loss": 1.0246, "step": 5021 }, { "epoch": 1.0209392152876602, "grad_norm": 0.1271592229604721, "learning_rate": 9.797620258313843e-05, "loss": 0.9396, "step": 5022 }, { "epoch": 1.0211425086399675, "grad_norm": 0.12880264222621918, "learning_rate": 9.795586291060714e-05, "loss": 0.9512, "step": 5023 }, { "epoch": 1.0213458019922748, "grad_norm": 0.14689882099628448, "learning_rate": 9.793552323807588e-05, "loss": 1.0981, "step": 5024 }, { "epoch": 1.0215490953445823, "grad_norm": 0.15388964116573334, "learning_rate": 9.79151835655446e-05, "loss": 1.0772, "step": 5025 }, { "epoch": 1.0217523886968896, "grad_norm": 0.13570699095726013, "learning_rate": 9.789484389301334e-05, "loss": 0.9322, "step": 5026 }, { "epoch": 1.021955682049197, "grad_norm": 0.1563068926334381, "learning_rate": 9.787450422048205e-05, "loss": 1.1555, "step": 5027 }, { "epoch": 1.0221589754015044, "grad_norm": 0.14098972082138062, "learning_rate": 9.785416454795077e-05, "loss": 0.9733, "step": 5028 }, { "epoch": 1.0223622687538116, "grad_norm": 0.15485285222530365, "learning_rate": 9.783382487541951e-05, "loss": 1.0956, "step": 5029 }, { "epoch": 1.0225655621061192, "grad_norm": 0.1376192718744278, "learning_rate": 9.781348520288824e-05, "loss": 0.9571, "step": 5030 }, { "epoch": 1.0227688554584264, "grad_norm": 0.1520523577928543, "learning_rate": 9.779314553035696e-05, "loss": 0.9815, "step": 5031 }, { "epoch": 1.022972148810734, "grad_norm": 0.14404673874378204, "learning_rate": 9.777280585782569e-05, "loss": 1.025, "step": 5032 }, { "epoch": 1.0231754421630412, "grad_norm": 0.15610331296920776, "learning_rate": 9.775246618529442e-05, "loss": 1.2032, "step": 5033 }, { "epoch": 1.0233787355153487, "grad_norm": 0.14844031631946564, "learning_rate": 9.773212651276315e-05, "loss": 1.133, "step": 5034 }, { "epoch": 1.023582028867656, "grad_norm": 0.15106390416622162, "learning_rate": 9.771178684023187e-05, "loss": 1.0273, "step": 5035 }, { "epoch": 1.0237853222199633, "grad_norm": 0.15353932976722717, "learning_rate": 9.76914471677006e-05, "loss": 1.0613, "step": 5036 }, { "epoch": 1.0239886155722708, "grad_norm": 0.14371348917484283, "learning_rate": 9.767110749516934e-05, "loss": 1.1526, "step": 5037 }, { "epoch": 1.0241919089245781, "grad_norm": 0.1600302904844284, "learning_rate": 9.765076782263806e-05, "loss": 1.2017, "step": 5038 }, { "epoch": 1.0243952022768856, "grad_norm": 0.1495140641927719, "learning_rate": 9.763042815010678e-05, "loss": 1.1276, "step": 5039 }, { "epoch": 1.024598495629193, "grad_norm": 0.13793641328811646, "learning_rate": 9.761008847757551e-05, "loss": 1.0703, "step": 5040 }, { "epoch": 1.0248017889815002, "grad_norm": 0.14516127109527588, "learning_rate": 9.758974880504425e-05, "loss": 0.9643, "step": 5041 }, { "epoch": 1.0250050823338077, "grad_norm": 0.14541226625442505, "learning_rate": 9.756940913251297e-05, "loss": 1.0796, "step": 5042 }, { "epoch": 1.025208375686115, "grad_norm": 0.1590830236673355, "learning_rate": 9.75490694599817e-05, "loss": 1.0886, "step": 5043 }, { "epoch": 1.0254116690384225, "grad_norm": 0.14074791967868805, "learning_rate": 9.752872978745042e-05, "loss": 1.0064, "step": 5044 }, { "epoch": 1.0256149623907298, "grad_norm": 0.13759545981884003, "learning_rate": 9.750839011491916e-05, "loss": 0.9204, "step": 5045 }, { "epoch": 1.0258182557430373, "grad_norm": 0.1406768560409546, "learning_rate": 9.748805044238788e-05, "loss": 1.0328, "step": 5046 }, { "epoch": 1.0260215490953446, "grad_norm": 0.14663250744342804, "learning_rate": 9.746771076985661e-05, "loss": 1.0868, "step": 5047 }, { "epoch": 1.0262248424476519, "grad_norm": 0.15370012819766998, "learning_rate": 9.744737109732533e-05, "loss": 1.1454, "step": 5048 }, { "epoch": 1.0264281357999594, "grad_norm": 0.16050153970718384, "learning_rate": 9.742703142479407e-05, "loss": 1.2114, "step": 5049 }, { "epoch": 1.0266314291522667, "grad_norm": 0.15520580112934113, "learning_rate": 9.74066917522628e-05, "loss": 1.1716, "step": 5050 }, { "epoch": 1.0268347225045742, "grad_norm": 0.14273221790790558, "learning_rate": 9.738635207973152e-05, "loss": 1.0047, "step": 5051 }, { "epoch": 1.0270380158568815, "grad_norm": 0.14701679348945618, "learning_rate": 9.736601240720024e-05, "loss": 1.1173, "step": 5052 }, { "epoch": 1.0272413092091888, "grad_norm": 0.1339159458875656, "learning_rate": 9.734567273466898e-05, "loss": 0.8771, "step": 5053 }, { "epoch": 1.0274446025614963, "grad_norm": 0.13483907282352448, "learning_rate": 9.73253330621377e-05, "loss": 0.9459, "step": 5054 }, { "epoch": 1.0276478959138036, "grad_norm": 0.13941912353038788, "learning_rate": 9.730499338960643e-05, "loss": 1.0684, "step": 5055 }, { "epoch": 1.027851189266111, "grad_norm": 0.15202683210372925, "learning_rate": 9.728465371707515e-05, "loss": 1.0361, "step": 5056 }, { "epoch": 1.0280544826184184, "grad_norm": 0.13405011594295502, "learning_rate": 9.726431404454389e-05, "loss": 0.9139, "step": 5057 }, { "epoch": 1.0282577759707257, "grad_norm": 0.15052016079425812, "learning_rate": 9.724397437201262e-05, "loss": 1.1181, "step": 5058 }, { "epoch": 1.0284610693230332, "grad_norm": 0.13862478733062744, "learning_rate": 9.722363469948134e-05, "loss": 0.9646, "step": 5059 }, { "epoch": 1.0286643626753404, "grad_norm": 0.13493207097053528, "learning_rate": 9.720329502695007e-05, "loss": 0.9966, "step": 5060 }, { "epoch": 1.028867656027648, "grad_norm": 0.13806003332138062, "learning_rate": 9.71829553544188e-05, "loss": 1.03, "step": 5061 }, { "epoch": 1.0290709493799552, "grad_norm": 0.13732276856899261, "learning_rate": 9.716261568188753e-05, "loss": 1.0235, "step": 5062 }, { "epoch": 1.0292742427322628, "grad_norm": 0.1543864756822586, "learning_rate": 9.714227600935625e-05, "loss": 1.124, "step": 5063 }, { "epoch": 1.02947753608457, "grad_norm": 0.12406022101640701, "learning_rate": 9.712193633682498e-05, "loss": 0.936, "step": 5064 }, { "epoch": 1.0296808294368773, "grad_norm": 0.1271096020936966, "learning_rate": 9.710159666429371e-05, "loss": 0.9351, "step": 5065 }, { "epoch": 1.0298841227891848, "grad_norm": 0.15488265454769135, "learning_rate": 9.708125699176244e-05, "loss": 1.1175, "step": 5066 }, { "epoch": 1.0300874161414921, "grad_norm": 0.15858517587184906, "learning_rate": 9.706091731923116e-05, "loss": 1.107, "step": 5067 }, { "epoch": 1.0302907094937996, "grad_norm": 0.12946265935897827, "learning_rate": 9.704057764669989e-05, "loss": 0.8796, "step": 5068 }, { "epoch": 1.030494002846107, "grad_norm": 0.1463090032339096, "learning_rate": 9.702023797416861e-05, "loss": 0.9929, "step": 5069 }, { "epoch": 1.0306972961984142, "grad_norm": 0.1322004348039627, "learning_rate": 9.699989830163735e-05, "loss": 0.9455, "step": 5070 }, { "epoch": 1.0309005895507217, "grad_norm": 0.1394505351781845, "learning_rate": 9.697955862910608e-05, "loss": 0.8651, "step": 5071 }, { "epoch": 1.031103882903029, "grad_norm": 0.14322896301746368, "learning_rate": 9.69592189565748e-05, "loss": 0.9789, "step": 5072 }, { "epoch": 1.0313071762553365, "grad_norm": 0.1413803994655609, "learning_rate": 9.693887928404352e-05, "loss": 0.9716, "step": 5073 }, { "epoch": 1.0315104696076438, "grad_norm": 0.14607295393943787, "learning_rate": 9.691853961151226e-05, "loss": 1.2096, "step": 5074 }, { "epoch": 1.031713762959951, "grad_norm": 0.13793663680553436, "learning_rate": 9.689819993898099e-05, "loss": 0.9774, "step": 5075 }, { "epoch": 1.0319170563122586, "grad_norm": 0.15151038765907288, "learning_rate": 9.687786026644971e-05, "loss": 1.1573, "step": 5076 }, { "epoch": 1.032120349664566, "grad_norm": 0.15216906368732452, "learning_rate": 9.685752059391844e-05, "loss": 0.9275, "step": 5077 }, { "epoch": 1.0323236430168734, "grad_norm": 0.13646896183490753, "learning_rate": 9.683718092138717e-05, "loss": 0.9482, "step": 5078 }, { "epoch": 1.0325269363691807, "grad_norm": 0.13924843072891235, "learning_rate": 9.68168412488559e-05, "loss": 0.9035, "step": 5079 }, { "epoch": 1.0327302297214882, "grad_norm": 0.15003299713134766, "learning_rate": 9.679650157632462e-05, "loss": 1.0521, "step": 5080 }, { "epoch": 1.0329335230737955, "grad_norm": 0.14902108907699585, "learning_rate": 9.677616190379335e-05, "loss": 1.0682, "step": 5081 }, { "epoch": 1.0331368164261028, "grad_norm": 0.14243461191654205, "learning_rate": 9.675582223126208e-05, "loss": 0.8912, "step": 5082 }, { "epoch": 1.0333401097784103, "grad_norm": 0.13720953464508057, "learning_rate": 9.673548255873081e-05, "loss": 0.9996, "step": 5083 }, { "epoch": 1.0335434031307176, "grad_norm": 0.1456315517425537, "learning_rate": 9.671514288619953e-05, "loss": 1.1464, "step": 5084 }, { "epoch": 1.033746696483025, "grad_norm": 0.14110687375068665, "learning_rate": 9.669480321366826e-05, "loss": 0.8695, "step": 5085 }, { "epoch": 1.0339499898353324, "grad_norm": 0.1465069204568863, "learning_rate": 9.6674463541137e-05, "loss": 1.1388, "step": 5086 }, { "epoch": 1.0341532831876397, "grad_norm": 0.15050126612186432, "learning_rate": 9.665412386860572e-05, "loss": 0.9978, "step": 5087 }, { "epoch": 1.0343565765399472, "grad_norm": 0.14004911482334137, "learning_rate": 9.663378419607445e-05, "loss": 0.89, "step": 5088 }, { "epoch": 1.0345598698922545, "grad_norm": 0.1353030502796173, "learning_rate": 9.661344452354317e-05, "loss": 0.9443, "step": 5089 }, { "epoch": 1.034763163244562, "grad_norm": 0.14427968859672546, "learning_rate": 9.659310485101191e-05, "loss": 1.0751, "step": 5090 }, { "epoch": 1.0349664565968693, "grad_norm": 0.1521504521369934, "learning_rate": 9.657276517848063e-05, "loss": 1.0727, "step": 5091 }, { "epoch": 1.0351697499491768, "grad_norm": 0.14062680304050446, "learning_rate": 9.655242550594936e-05, "loss": 0.9034, "step": 5092 }, { "epoch": 1.035373043301484, "grad_norm": 0.17620261013507843, "learning_rate": 9.653208583341808e-05, "loss": 1.2104, "step": 5093 }, { "epoch": 1.0355763366537913, "grad_norm": 0.1438150703907013, "learning_rate": 9.651174616088682e-05, "loss": 1.0179, "step": 5094 }, { "epoch": 1.0357796300060989, "grad_norm": 0.16635897755622864, "learning_rate": 9.649140648835554e-05, "loss": 1.1789, "step": 5095 }, { "epoch": 1.0359829233584061, "grad_norm": 0.14373865723609924, "learning_rate": 9.647106681582427e-05, "loss": 0.9776, "step": 5096 }, { "epoch": 1.0361862167107136, "grad_norm": 0.1490122526884079, "learning_rate": 9.645072714329299e-05, "loss": 1.0112, "step": 5097 }, { "epoch": 1.036389510063021, "grad_norm": 0.14242301881313324, "learning_rate": 9.643038747076173e-05, "loss": 1.0629, "step": 5098 }, { "epoch": 1.0365928034153282, "grad_norm": 0.13605085015296936, "learning_rate": 9.641004779823045e-05, "loss": 0.8603, "step": 5099 }, { "epoch": 1.0367960967676357, "grad_norm": 0.15348966419696808, "learning_rate": 9.638970812569918e-05, "loss": 1.1368, "step": 5100 }, { "epoch": 1.036999390119943, "grad_norm": 0.14863932132720947, "learning_rate": 9.63693684531679e-05, "loss": 0.8904, "step": 5101 }, { "epoch": 1.0372026834722505, "grad_norm": 0.15452273190021515, "learning_rate": 9.634902878063664e-05, "loss": 1.1195, "step": 5102 }, { "epoch": 1.0374059768245578, "grad_norm": 0.14655210077762604, "learning_rate": 9.632868910810537e-05, "loss": 1.0639, "step": 5103 }, { "epoch": 1.037609270176865, "grad_norm": 0.13452014327049255, "learning_rate": 9.630834943557409e-05, "loss": 0.9851, "step": 5104 }, { "epoch": 1.0378125635291726, "grad_norm": 0.15053342282772064, "learning_rate": 9.628800976304282e-05, "loss": 0.906, "step": 5105 }, { "epoch": 1.03801585688148, "grad_norm": 0.16290277242660522, "learning_rate": 9.626767009051155e-05, "loss": 1.1462, "step": 5106 }, { "epoch": 1.0382191502337874, "grad_norm": 0.1363389641046524, "learning_rate": 9.624733041798028e-05, "loss": 0.9143, "step": 5107 }, { "epoch": 1.0384224435860947, "grad_norm": 0.13875210285186768, "learning_rate": 9.6226990745449e-05, "loss": 0.823, "step": 5108 }, { "epoch": 1.0386257369384022, "grad_norm": 0.13508771359920502, "learning_rate": 9.620665107291773e-05, "loss": 1.061, "step": 5109 }, { "epoch": 1.0388290302907095, "grad_norm": 0.1293465942144394, "learning_rate": 9.618631140038645e-05, "loss": 0.8473, "step": 5110 }, { "epoch": 1.0390323236430168, "grad_norm": 0.1552773416042328, "learning_rate": 9.616597172785519e-05, "loss": 1.1096, "step": 5111 }, { "epoch": 1.0392356169953243, "grad_norm": 0.13282112777233124, "learning_rate": 9.614563205532391e-05, "loss": 0.8199, "step": 5112 }, { "epoch": 1.0394389103476316, "grad_norm": 0.12775538861751556, "learning_rate": 9.612529238279264e-05, "loss": 0.8483, "step": 5113 }, { "epoch": 1.039642203699939, "grad_norm": 0.14461298286914825, "learning_rate": 9.610495271026136e-05, "loss": 0.9767, "step": 5114 }, { "epoch": 1.0398454970522464, "grad_norm": 0.1476954072713852, "learning_rate": 9.60846130377301e-05, "loss": 0.9892, "step": 5115 }, { "epoch": 1.0400487904045537, "grad_norm": 0.13602770864963531, "learning_rate": 9.606427336519883e-05, "loss": 1.0392, "step": 5116 }, { "epoch": 1.0402520837568612, "grad_norm": 0.14463678002357483, "learning_rate": 9.604393369266755e-05, "loss": 0.8989, "step": 5117 }, { "epoch": 1.0404553771091685, "grad_norm": 0.1426476687192917, "learning_rate": 9.602359402013627e-05, "loss": 1.0073, "step": 5118 }, { "epoch": 1.040658670461476, "grad_norm": 0.14196395874023438, "learning_rate": 9.600325434760501e-05, "loss": 1.1132, "step": 5119 }, { "epoch": 1.0408619638137833, "grad_norm": 0.14742383360862732, "learning_rate": 9.598291467507374e-05, "loss": 0.9791, "step": 5120 }, { "epoch": 1.0410652571660908, "grad_norm": 0.13551802933216095, "learning_rate": 9.596257500254246e-05, "loss": 0.927, "step": 5121 }, { "epoch": 1.041268550518398, "grad_norm": 0.12378886342048645, "learning_rate": 9.594223533001119e-05, "loss": 0.9554, "step": 5122 }, { "epoch": 1.0414718438707053, "grad_norm": 0.13679155707359314, "learning_rate": 9.592189565747992e-05, "loss": 0.8767, "step": 5123 }, { "epoch": 1.0416751372230129, "grad_norm": 0.12900646030902863, "learning_rate": 9.590155598494865e-05, "loss": 1.0194, "step": 5124 }, { "epoch": 1.0418784305753201, "grad_norm": 0.14963632822036743, "learning_rate": 9.588121631241737e-05, "loss": 1.0235, "step": 5125 }, { "epoch": 1.0420817239276277, "grad_norm": 0.16666048765182495, "learning_rate": 9.58608766398861e-05, "loss": 1.0171, "step": 5126 }, { "epoch": 1.042285017279935, "grad_norm": 0.13959690928459167, "learning_rate": 9.584053696735483e-05, "loss": 1.0413, "step": 5127 }, { "epoch": 1.0424883106322422, "grad_norm": 0.1701730191707611, "learning_rate": 9.582019729482356e-05, "loss": 1.169, "step": 5128 }, { "epoch": 1.0426916039845497, "grad_norm": 0.147377148270607, "learning_rate": 9.579985762229228e-05, "loss": 1.0956, "step": 5129 }, { "epoch": 1.042894897336857, "grad_norm": 0.1471777856349945, "learning_rate": 9.577951794976101e-05, "loss": 1.1304, "step": 5130 }, { "epoch": 1.0430981906891645, "grad_norm": 0.14388343691825867, "learning_rate": 9.575917827722975e-05, "loss": 1.1626, "step": 5131 }, { "epoch": 1.0433014840414718, "grad_norm": 0.1318674087524414, "learning_rate": 9.573883860469847e-05, "loss": 0.958, "step": 5132 }, { "epoch": 1.0435047773937791, "grad_norm": 0.15218976140022278, "learning_rate": 9.57184989321672e-05, "loss": 1.0546, "step": 5133 }, { "epoch": 1.0437080707460866, "grad_norm": 0.13113273680210114, "learning_rate": 9.569815925963592e-05, "loss": 0.8427, "step": 5134 }, { "epoch": 1.043911364098394, "grad_norm": 0.12252294272184372, "learning_rate": 9.567781958710466e-05, "loss": 0.8234, "step": 5135 }, { "epoch": 1.0441146574507014, "grad_norm": 0.15691354870796204, "learning_rate": 9.565747991457338e-05, "loss": 1.0107, "step": 5136 }, { "epoch": 1.0443179508030087, "grad_norm": 0.13608700037002563, "learning_rate": 9.56371402420421e-05, "loss": 0.9373, "step": 5137 }, { "epoch": 1.0445212441553162, "grad_norm": 0.14586831629276276, "learning_rate": 9.561680056951083e-05, "loss": 0.9283, "step": 5138 }, { "epoch": 1.0447245375076235, "grad_norm": 0.15548555552959442, "learning_rate": 9.559646089697957e-05, "loss": 1.0762, "step": 5139 }, { "epoch": 1.0449278308599308, "grad_norm": 0.14028286933898926, "learning_rate": 9.55761212244483e-05, "loss": 1.0798, "step": 5140 }, { "epoch": 1.0451311242122383, "grad_norm": 0.13946202397346497, "learning_rate": 9.555578155191702e-05, "loss": 0.9123, "step": 5141 }, { "epoch": 1.0453344175645456, "grad_norm": 0.15305642783641815, "learning_rate": 9.553544187938574e-05, "loss": 1.1277, "step": 5142 }, { "epoch": 1.045537710916853, "grad_norm": 0.1336325705051422, "learning_rate": 9.551510220685448e-05, "loss": 0.943, "step": 5143 }, { "epoch": 1.0457410042691604, "grad_norm": 0.1418871283531189, "learning_rate": 9.54947625343232e-05, "loss": 0.9236, "step": 5144 }, { "epoch": 1.0459442976214677, "grad_norm": 0.1378573775291443, "learning_rate": 9.547442286179193e-05, "loss": 1.0725, "step": 5145 }, { "epoch": 1.0461475909737752, "grad_norm": 0.1409774124622345, "learning_rate": 9.545408318926065e-05, "loss": 1.0191, "step": 5146 }, { "epoch": 1.0463508843260825, "grad_norm": 0.14639027416706085, "learning_rate": 9.543374351672939e-05, "loss": 0.996, "step": 5147 }, { "epoch": 1.04655417767839, "grad_norm": 0.15926292538642883, "learning_rate": 9.541340384419812e-05, "loss": 1.1456, "step": 5148 }, { "epoch": 1.0467574710306973, "grad_norm": 0.15091705322265625, "learning_rate": 9.539306417166684e-05, "loss": 1.074, "step": 5149 }, { "epoch": 1.0469607643830048, "grad_norm": 0.1436360776424408, "learning_rate": 9.537272449913557e-05, "loss": 1.0315, "step": 5150 }, { "epoch": 1.047164057735312, "grad_norm": 0.15203581750392914, "learning_rate": 9.535238482660429e-05, "loss": 1.0482, "step": 5151 }, { "epoch": 1.0473673510876194, "grad_norm": 0.14809484779834747, "learning_rate": 9.533204515407303e-05, "loss": 1.1127, "step": 5152 }, { "epoch": 1.0475706444399269, "grad_norm": 0.142609104514122, "learning_rate": 9.531170548154175e-05, "loss": 1.0597, "step": 5153 }, { "epoch": 1.0477739377922342, "grad_norm": 0.14975500106811523, "learning_rate": 9.529136580901048e-05, "loss": 0.9545, "step": 5154 }, { "epoch": 1.0479772311445417, "grad_norm": 0.1335391253232956, "learning_rate": 9.52710261364792e-05, "loss": 0.9786, "step": 5155 }, { "epoch": 1.048180524496849, "grad_norm": 0.1525142341852188, "learning_rate": 9.525068646394794e-05, "loss": 1.1635, "step": 5156 }, { "epoch": 1.0483838178491562, "grad_norm": 0.14316526055335999, "learning_rate": 9.523034679141666e-05, "loss": 1.0514, "step": 5157 }, { "epoch": 1.0485871112014638, "grad_norm": 0.13840989768505096, "learning_rate": 9.521000711888539e-05, "loss": 0.9964, "step": 5158 }, { "epoch": 1.048790404553771, "grad_norm": 0.14186960458755493, "learning_rate": 9.518966744635411e-05, "loss": 1.0424, "step": 5159 }, { "epoch": 1.0489936979060785, "grad_norm": 0.14743170142173767, "learning_rate": 9.516932777382285e-05, "loss": 1.0868, "step": 5160 }, { "epoch": 1.0491969912583858, "grad_norm": 0.13749894499778748, "learning_rate": 9.514898810129157e-05, "loss": 1.1072, "step": 5161 }, { "epoch": 1.0494002846106931, "grad_norm": 0.15378478169441223, "learning_rate": 9.51286484287603e-05, "loss": 0.9729, "step": 5162 }, { "epoch": 1.0496035779630006, "grad_norm": 0.14535516500473022, "learning_rate": 9.510830875622902e-05, "loss": 0.9586, "step": 5163 }, { "epoch": 1.049806871315308, "grad_norm": 0.15049873292446136, "learning_rate": 9.508796908369776e-05, "loss": 1.076, "step": 5164 }, { "epoch": 1.0500101646676154, "grad_norm": 0.14930661022663116, "learning_rate": 9.506762941116649e-05, "loss": 1.1252, "step": 5165 }, { "epoch": 1.0502134580199227, "grad_norm": 0.13975845277309418, "learning_rate": 9.504728973863521e-05, "loss": 0.9545, "step": 5166 }, { "epoch": 1.0504167513722302, "grad_norm": 0.15114235877990723, "learning_rate": 9.502695006610394e-05, "loss": 1.0946, "step": 5167 }, { "epoch": 1.0506200447245375, "grad_norm": 0.1519806683063507, "learning_rate": 9.500661039357267e-05, "loss": 0.9512, "step": 5168 }, { "epoch": 1.0508233380768448, "grad_norm": 0.14767295122146606, "learning_rate": 9.49862707210414e-05, "loss": 0.9872, "step": 5169 }, { "epoch": 1.0510266314291523, "grad_norm": 0.1331123262643814, "learning_rate": 9.496593104851012e-05, "loss": 0.8648, "step": 5170 }, { "epoch": 1.0512299247814596, "grad_norm": 0.15055252611637115, "learning_rate": 9.494559137597885e-05, "loss": 0.9938, "step": 5171 }, { "epoch": 1.0514332181337671, "grad_norm": 0.14633582532405853, "learning_rate": 9.492525170344758e-05, "loss": 0.978, "step": 5172 }, { "epoch": 1.0516365114860744, "grad_norm": 0.13616123795509338, "learning_rate": 9.490491203091631e-05, "loss": 1.0177, "step": 5173 }, { "epoch": 1.0518398048383817, "grad_norm": 0.14793802797794342, "learning_rate": 9.488457235838503e-05, "loss": 1.1359, "step": 5174 }, { "epoch": 1.0520430981906892, "grad_norm": 0.14621852338314056, "learning_rate": 9.486423268585376e-05, "loss": 1.1179, "step": 5175 }, { "epoch": 1.0522463915429965, "grad_norm": 0.14446653425693512, "learning_rate": 9.48438930133225e-05, "loss": 0.9409, "step": 5176 }, { "epoch": 1.052449684895304, "grad_norm": 0.1389390528202057, "learning_rate": 9.482355334079122e-05, "loss": 0.8515, "step": 5177 }, { "epoch": 1.0526529782476113, "grad_norm": 0.144555002450943, "learning_rate": 9.480321366825994e-05, "loss": 1.0017, "step": 5178 }, { "epoch": 1.0528562715999188, "grad_norm": 0.1591474562883377, "learning_rate": 9.478287399572867e-05, "loss": 1.1194, "step": 5179 }, { "epoch": 1.053059564952226, "grad_norm": 0.13776561617851257, "learning_rate": 9.476253432319741e-05, "loss": 0.9357, "step": 5180 }, { "epoch": 1.0532628583045334, "grad_norm": 0.12213550508022308, "learning_rate": 9.474219465066613e-05, "loss": 0.8272, "step": 5181 }, { "epoch": 1.0534661516568409, "grad_norm": 0.15859781205654144, "learning_rate": 9.472185497813486e-05, "loss": 0.9405, "step": 5182 }, { "epoch": 1.0536694450091482, "grad_norm": 0.1493707299232483, "learning_rate": 9.470151530560358e-05, "loss": 1.037, "step": 5183 }, { "epoch": 1.0538727383614557, "grad_norm": 0.1524028778076172, "learning_rate": 9.468117563307232e-05, "loss": 1.1292, "step": 5184 }, { "epoch": 1.054076031713763, "grad_norm": 0.144907146692276, "learning_rate": 9.466083596054104e-05, "loss": 0.8658, "step": 5185 }, { "epoch": 1.0542793250660702, "grad_norm": 0.13830581307411194, "learning_rate": 9.464049628800977e-05, "loss": 0.9321, "step": 5186 }, { "epoch": 1.0544826184183778, "grad_norm": 0.13525360822677612, "learning_rate": 9.462015661547849e-05, "loss": 1.0633, "step": 5187 }, { "epoch": 1.054685911770685, "grad_norm": 0.14305701851844788, "learning_rate": 9.459981694294723e-05, "loss": 0.9783, "step": 5188 }, { "epoch": 1.0548892051229926, "grad_norm": 0.15196113288402557, "learning_rate": 9.457947727041595e-05, "loss": 1.0146, "step": 5189 }, { "epoch": 1.0550924984752998, "grad_norm": 0.14528249204158783, "learning_rate": 9.455913759788468e-05, "loss": 1.0114, "step": 5190 }, { "epoch": 1.0552957918276071, "grad_norm": 0.14285583794116974, "learning_rate": 9.45387979253534e-05, "loss": 1.0498, "step": 5191 }, { "epoch": 1.0554990851799146, "grad_norm": 0.15215423703193665, "learning_rate": 9.451845825282214e-05, "loss": 0.8786, "step": 5192 }, { "epoch": 1.055702378532222, "grad_norm": 0.15505358576774597, "learning_rate": 9.449811858029087e-05, "loss": 1.1052, "step": 5193 }, { "epoch": 1.0559056718845294, "grad_norm": 0.1445719599723816, "learning_rate": 9.447777890775959e-05, "loss": 0.9482, "step": 5194 }, { "epoch": 1.0561089652368367, "grad_norm": 0.1489405632019043, "learning_rate": 9.445743923522831e-05, "loss": 0.9503, "step": 5195 }, { "epoch": 1.0563122585891442, "grad_norm": 0.1727711409330368, "learning_rate": 9.443709956269704e-05, "loss": 1.2595, "step": 5196 }, { "epoch": 1.0565155519414515, "grad_norm": 0.1496013104915619, "learning_rate": 9.441675989016578e-05, "loss": 1.1557, "step": 5197 }, { "epoch": 1.0567188452937588, "grad_norm": 0.14185824990272522, "learning_rate": 9.43964202176345e-05, "loss": 1.04, "step": 5198 }, { "epoch": 1.0569221386460663, "grad_norm": 0.14532503485679626, "learning_rate": 9.437608054510323e-05, "loss": 1.0624, "step": 5199 }, { "epoch": 1.0571254319983736, "grad_norm": 0.13846822082996368, "learning_rate": 9.435574087257195e-05, "loss": 1.0004, "step": 5200 }, { "epoch": 1.0573287253506811, "grad_norm": 0.14008352160453796, "learning_rate": 9.433540120004069e-05, "loss": 0.9743, "step": 5201 }, { "epoch": 1.0575320187029884, "grad_norm": 0.14041657745838165, "learning_rate": 9.431506152750941e-05, "loss": 1.0694, "step": 5202 }, { "epoch": 1.0577353120552957, "grad_norm": 0.15184582769870758, "learning_rate": 9.429472185497814e-05, "loss": 1.0546, "step": 5203 }, { "epoch": 1.0579386054076032, "grad_norm": 0.14408542215824127, "learning_rate": 9.427438218244686e-05, "loss": 1.0136, "step": 5204 }, { "epoch": 1.0581418987599105, "grad_norm": 0.1553533673286438, "learning_rate": 9.42540425099156e-05, "loss": 1.0661, "step": 5205 }, { "epoch": 1.058345192112218, "grad_norm": 0.14355801045894623, "learning_rate": 9.423370283738432e-05, "loss": 1.0145, "step": 5206 }, { "epoch": 1.0585484854645253, "grad_norm": 0.15593981742858887, "learning_rate": 9.421336316485305e-05, "loss": 1.0197, "step": 5207 }, { "epoch": 1.0587517788168328, "grad_norm": 0.15501219034194946, "learning_rate": 9.419302349232177e-05, "loss": 1.1659, "step": 5208 }, { "epoch": 1.05895507216914, "grad_norm": 0.14046739041805267, "learning_rate": 9.417268381979051e-05, "loss": 1.0586, "step": 5209 }, { "epoch": 1.0591583655214474, "grad_norm": 0.13722114264965057, "learning_rate": 9.415234414725924e-05, "loss": 1.0901, "step": 5210 }, { "epoch": 1.0593616588737549, "grad_norm": 0.15568463504314423, "learning_rate": 9.413200447472796e-05, "loss": 1.1566, "step": 5211 }, { "epoch": 1.0595649522260622, "grad_norm": 0.1628820151090622, "learning_rate": 9.411166480219668e-05, "loss": 1.1257, "step": 5212 }, { "epoch": 1.0597682455783697, "grad_norm": 0.12070141732692719, "learning_rate": 9.409132512966542e-05, "loss": 0.8634, "step": 5213 }, { "epoch": 1.059971538930677, "grad_norm": 0.147191122174263, "learning_rate": 9.407098545713415e-05, "loss": 1.1329, "step": 5214 }, { "epoch": 1.0601748322829843, "grad_norm": 0.14349913597106934, "learning_rate": 9.405064578460287e-05, "loss": 1.061, "step": 5215 }, { "epoch": 1.0603781256352918, "grad_norm": 0.14183680713176727, "learning_rate": 9.40303061120716e-05, "loss": 0.9728, "step": 5216 }, { "epoch": 1.060581418987599, "grad_norm": 0.1697556972503662, "learning_rate": 9.400996643954033e-05, "loss": 1.2494, "step": 5217 }, { "epoch": 1.0607847123399066, "grad_norm": 0.1233520433306694, "learning_rate": 9.398962676700906e-05, "loss": 0.9116, "step": 5218 }, { "epoch": 1.0609880056922139, "grad_norm": 0.1457953006029129, "learning_rate": 9.396928709447778e-05, "loss": 1.0453, "step": 5219 }, { "epoch": 1.0611912990445211, "grad_norm": 0.13704726099967957, "learning_rate": 9.394894742194651e-05, "loss": 0.9965, "step": 5220 }, { "epoch": 1.0613945923968287, "grad_norm": 0.13508938252925873, "learning_rate": 9.392860774941525e-05, "loss": 0.9579, "step": 5221 }, { "epoch": 1.061597885749136, "grad_norm": 0.1393241584300995, "learning_rate": 9.390826807688397e-05, "loss": 0.9958, "step": 5222 }, { "epoch": 1.0618011791014434, "grad_norm": 0.15738067030906677, "learning_rate": 9.38879284043527e-05, "loss": 1.092, "step": 5223 }, { "epoch": 1.0620044724537507, "grad_norm": 0.1496880203485489, "learning_rate": 9.386758873182142e-05, "loss": 1.1966, "step": 5224 }, { "epoch": 1.0622077658060582, "grad_norm": 0.1586069017648697, "learning_rate": 9.384724905929016e-05, "loss": 1.1199, "step": 5225 }, { "epoch": 1.0624110591583655, "grad_norm": 0.14158375561237335, "learning_rate": 9.382690938675888e-05, "loss": 1.0343, "step": 5226 }, { "epoch": 1.0626143525106728, "grad_norm": 0.16692568361759186, "learning_rate": 9.38065697142276e-05, "loss": 1.1547, "step": 5227 }, { "epoch": 1.0628176458629803, "grad_norm": 0.14403125643730164, "learning_rate": 9.378623004169633e-05, "loss": 1.0403, "step": 5228 }, { "epoch": 1.0630209392152876, "grad_norm": 0.13261236250400543, "learning_rate": 9.376589036916507e-05, "loss": 0.9382, "step": 5229 }, { "epoch": 1.0632242325675951, "grad_norm": 0.15169022977352142, "learning_rate": 9.374555069663379e-05, "loss": 1.1496, "step": 5230 }, { "epoch": 1.0634275259199024, "grad_norm": 0.1648220419883728, "learning_rate": 9.372521102410252e-05, "loss": 1.2013, "step": 5231 }, { "epoch": 1.0636308192722097, "grad_norm": 0.1417074352502823, "learning_rate": 9.370487135157124e-05, "loss": 1.007, "step": 5232 }, { "epoch": 1.0638341126245172, "grad_norm": 0.15137724578380585, "learning_rate": 9.368453167903998e-05, "loss": 1.2016, "step": 5233 }, { "epoch": 1.0640374059768245, "grad_norm": 0.15120957791805267, "learning_rate": 9.36641920065087e-05, "loss": 1.1459, "step": 5234 }, { "epoch": 1.064240699329132, "grad_norm": 0.14129264652729034, "learning_rate": 9.364385233397742e-05, "loss": 0.9686, "step": 5235 }, { "epoch": 1.0644439926814393, "grad_norm": 0.13735541701316833, "learning_rate": 9.362351266144615e-05, "loss": 0.9117, "step": 5236 }, { "epoch": 1.0646472860337468, "grad_norm": 0.14280696213245392, "learning_rate": 9.360317298891488e-05, "loss": 0.9481, "step": 5237 }, { "epoch": 1.064850579386054, "grad_norm": 0.15263578295707703, "learning_rate": 9.358283331638362e-05, "loss": 1.0191, "step": 5238 }, { "epoch": 1.0650538727383614, "grad_norm": 0.14364738762378693, "learning_rate": 9.356249364385234e-05, "loss": 1.0692, "step": 5239 }, { "epoch": 1.065257166090669, "grad_norm": 0.15514370799064636, "learning_rate": 9.354215397132106e-05, "loss": 1.0808, "step": 5240 }, { "epoch": 1.0654604594429762, "grad_norm": 0.14693541824817657, "learning_rate": 9.352181429878979e-05, "loss": 1.0964, "step": 5241 }, { "epoch": 1.0656637527952837, "grad_norm": 0.15230287611484528, "learning_rate": 9.350147462625853e-05, "loss": 1.142, "step": 5242 }, { "epoch": 1.065867046147591, "grad_norm": 0.13272824883460999, "learning_rate": 9.348113495372725e-05, "loss": 0.954, "step": 5243 }, { "epoch": 1.0660703394998983, "grad_norm": 0.14376932382583618, "learning_rate": 9.346079528119598e-05, "loss": 0.9046, "step": 5244 }, { "epoch": 1.0662736328522058, "grad_norm": 0.15434645116329193, "learning_rate": 9.34404556086647e-05, "loss": 1.0499, "step": 5245 }, { "epoch": 1.066476926204513, "grad_norm": 0.14078587293624878, "learning_rate": 9.342011593613344e-05, "loss": 1.0289, "step": 5246 }, { "epoch": 1.0666802195568206, "grad_norm": 0.15372897684574127, "learning_rate": 9.339977626360216e-05, "loss": 1.0794, "step": 5247 }, { "epoch": 1.0668835129091279, "grad_norm": 0.1350528746843338, "learning_rate": 9.337943659107089e-05, "loss": 1.0136, "step": 5248 }, { "epoch": 1.0670868062614352, "grad_norm": 0.15009120106697083, "learning_rate": 9.335909691853961e-05, "loss": 1.0563, "step": 5249 }, { "epoch": 1.0672900996137427, "grad_norm": 0.15966548025608063, "learning_rate": 9.333875724600835e-05, "loss": 1.2194, "step": 5250 }, { "epoch": 1.06749339296605, "grad_norm": 0.14889566600322723, "learning_rate": 9.331841757347707e-05, "loss": 1.0898, "step": 5251 }, { "epoch": 1.0676966863183575, "grad_norm": 0.1444637030363083, "learning_rate": 9.32980779009458e-05, "loss": 0.9107, "step": 5252 }, { "epoch": 1.0678999796706647, "grad_norm": 0.13386781513690948, "learning_rate": 9.327773822841452e-05, "loss": 0.9542, "step": 5253 }, { "epoch": 1.068103273022972, "grad_norm": 0.16856461763381958, "learning_rate": 9.325739855588326e-05, "loss": 1.2317, "step": 5254 }, { "epoch": 1.0683065663752795, "grad_norm": 0.14999926090240479, "learning_rate": 9.323705888335199e-05, "loss": 0.9972, "step": 5255 }, { "epoch": 1.0685098597275868, "grad_norm": 0.16005565226078033, "learning_rate": 9.321671921082071e-05, "loss": 1.1752, "step": 5256 }, { "epoch": 1.0687131530798943, "grad_norm": 0.13948242366313934, "learning_rate": 9.319637953828943e-05, "loss": 0.9566, "step": 5257 }, { "epoch": 1.0689164464322016, "grad_norm": 0.14965321123600006, "learning_rate": 9.317603986575817e-05, "loss": 0.8965, "step": 5258 }, { "epoch": 1.0691197397845091, "grad_norm": 0.1279067099094391, "learning_rate": 9.31557001932269e-05, "loss": 0.894, "step": 5259 }, { "epoch": 1.0693230331368164, "grad_norm": 0.13281695544719696, "learning_rate": 9.313536052069562e-05, "loss": 1.0136, "step": 5260 }, { "epoch": 1.0695263264891237, "grad_norm": 0.16254045069217682, "learning_rate": 9.311502084816435e-05, "loss": 0.9889, "step": 5261 }, { "epoch": 1.0697296198414312, "grad_norm": 0.13825705647468567, "learning_rate": 9.309468117563308e-05, "loss": 0.7885, "step": 5262 }, { "epoch": 1.0699329131937385, "grad_norm": 0.13333582878112793, "learning_rate": 9.307434150310181e-05, "loss": 0.9819, "step": 5263 }, { "epoch": 1.070136206546046, "grad_norm": 0.13237889111042023, "learning_rate": 9.305400183057053e-05, "loss": 0.9114, "step": 5264 }, { "epoch": 1.0703394998983533, "grad_norm": 0.13572004437446594, "learning_rate": 9.303366215803926e-05, "loss": 1.0542, "step": 5265 }, { "epoch": 1.0705427932506608, "grad_norm": 0.1455228328704834, "learning_rate": 9.3013322485508e-05, "loss": 1.103, "step": 5266 }, { "epoch": 1.070746086602968, "grad_norm": 0.15622329711914062, "learning_rate": 9.299298281297672e-05, "loss": 1.115, "step": 5267 }, { "epoch": 1.0709493799552754, "grad_norm": 0.13965292274951935, "learning_rate": 9.297264314044544e-05, "loss": 0.9293, "step": 5268 }, { "epoch": 1.071152673307583, "grad_norm": 0.1426744908094406, "learning_rate": 9.295230346791417e-05, "loss": 0.9612, "step": 5269 }, { "epoch": 1.0713559666598902, "grad_norm": 0.1491684466600418, "learning_rate": 9.293196379538291e-05, "loss": 0.9873, "step": 5270 }, { "epoch": 1.0715592600121977, "grad_norm": 0.13515818119049072, "learning_rate": 9.291162412285163e-05, "loss": 0.9301, "step": 5271 }, { "epoch": 1.071762553364505, "grad_norm": 0.15374673902988434, "learning_rate": 9.289128445032036e-05, "loss": 1.093, "step": 5272 }, { "epoch": 1.0719658467168123, "grad_norm": 0.15958675742149353, "learning_rate": 9.287094477778908e-05, "loss": 1.017, "step": 5273 }, { "epoch": 1.0721691400691198, "grad_norm": 0.145900696516037, "learning_rate": 9.285060510525782e-05, "loss": 1.0264, "step": 5274 }, { "epoch": 1.072372433421427, "grad_norm": 0.14528687298297882, "learning_rate": 9.283026543272654e-05, "loss": 0.9493, "step": 5275 }, { "epoch": 1.0725757267737346, "grad_norm": 0.154060959815979, "learning_rate": 9.280992576019525e-05, "loss": 1.0978, "step": 5276 }, { "epoch": 1.0727790201260419, "grad_norm": 0.15888723731040955, "learning_rate": 9.278958608766399e-05, "loss": 1.1868, "step": 5277 }, { "epoch": 1.0729823134783492, "grad_norm": 0.15369616448879242, "learning_rate": 9.276924641513272e-05, "loss": 0.9594, "step": 5278 }, { "epoch": 1.0731856068306567, "grad_norm": 0.12550809979438782, "learning_rate": 9.274890674260145e-05, "loss": 0.903, "step": 5279 }, { "epoch": 1.073388900182964, "grad_norm": 0.14872346818447113, "learning_rate": 9.272856707007017e-05, "loss": 1.0651, "step": 5280 }, { "epoch": 1.0735921935352715, "grad_norm": 0.12688830494880676, "learning_rate": 9.27082273975389e-05, "loss": 0.8515, "step": 5281 }, { "epoch": 1.0737954868875788, "grad_norm": 0.14313896000385284, "learning_rate": 9.268788772500763e-05, "loss": 0.9985, "step": 5282 }, { "epoch": 1.073998780239886, "grad_norm": 0.1391676962375641, "learning_rate": 9.266754805247637e-05, "loss": 1.0091, "step": 5283 }, { "epoch": 1.0742020735921936, "grad_norm": 0.12431914359331131, "learning_rate": 9.264720837994508e-05, "loss": 0.8205, "step": 5284 }, { "epoch": 1.0744053669445008, "grad_norm": 0.13841819763183594, "learning_rate": 9.262686870741381e-05, "loss": 0.9676, "step": 5285 }, { "epoch": 1.0746086602968083, "grad_norm": 0.15717893838882446, "learning_rate": 9.260652903488254e-05, "loss": 0.9393, "step": 5286 }, { "epoch": 1.0748119536491156, "grad_norm": 0.1341715157032013, "learning_rate": 9.258618936235128e-05, "loss": 0.851, "step": 5287 }, { "epoch": 1.0750152470014231, "grad_norm": 0.165984645485878, "learning_rate": 9.256584968981999e-05, "loss": 1.1344, "step": 5288 }, { "epoch": 1.0752185403537304, "grad_norm": 0.13740302622318268, "learning_rate": 9.254551001728873e-05, "loss": 0.9482, "step": 5289 }, { "epoch": 1.0754218337060377, "grad_norm": 0.14151054620742798, "learning_rate": 9.252517034475745e-05, "loss": 0.926, "step": 5290 }, { "epoch": 1.0756251270583452, "grad_norm": 0.13971510529518127, "learning_rate": 9.250483067222619e-05, "loss": 0.8981, "step": 5291 }, { "epoch": 1.0758284204106525, "grad_norm": 0.13549084961414337, "learning_rate": 9.24844909996949e-05, "loss": 0.9937, "step": 5292 }, { "epoch": 1.07603171376296, "grad_norm": 0.1490660309791565, "learning_rate": 9.246415132716364e-05, "loss": 0.9437, "step": 5293 }, { "epoch": 1.0762350071152673, "grad_norm": 0.14780007302761078, "learning_rate": 9.244381165463236e-05, "loss": 0.9588, "step": 5294 }, { "epoch": 1.0764383004675748, "grad_norm": 0.15218015015125275, "learning_rate": 9.24234719821011e-05, "loss": 1.1914, "step": 5295 }, { "epoch": 1.0766415938198821, "grad_norm": 0.1452578455209732, "learning_rate": 9.240313230956982e-05, "loss": 1.0413, "step": 5296 }, { "epoch": 1.0768448871721894, "grad_norm": 0.15724782645702362, "learning_rate": 9.238279263703855e-05, "loss": 1.1071, "step": 5297 }, { "epoch": 1.077048180524497, "grad_norm": 0.13541807234287262, "learning_rate": 9.236245296450727e-05, "loss": 0.9328, "step": 5298 }, { "epoch": 1.0772514738768042, "grad_norm": 0.15523411333560944, "learning_rate": 9.234211329197601e-05, "loss": 1.0679, "step": 5299 }, { "epoch": 1.0774547672291117, "grad_norm": 0.15143822133541107, "learning_rate": 9.232177361944474e-05, "loss": 1.083, "step": 5300 }, { "epoch": 1.077658060581419, "grad_norm": 0.13371649384498596, "learning_rate": 9.230143394691346e-05, "loss": 0.8173, "step": 5301 }, { "epoch": 1.0778613539337263, "grad_norm": 0.15870672464370728, "learning_rate": 9.228109427438218e-05, "loss": 1.1819, "step": 5302 }, { "epoch": 1.0780646472860338, "grad_norm": 0.13374915719032288, "learning_rate": 9.226075460185092e-05, "loss": 0.8671, "step": 5303 }, { "epoch": 1.078267940638341, "grad_norm": 0.14287759363651276, "learning_rate": 9.224041492931965e-05, "loss": 0.9432, "step": 5304 }, { "epoch": 1.0784712339906486, "grad_norm": 0.14194057881832123, "learning_rate": 9.222007525678837e-05, "loss": 0.9783, "step": 5305 }, { "epoch": 1.0786745273429559, "grad_norm": 0.16892357170581818, "learning_rate": 9.21997355842571e-05, "loss": 1.1271, "step": 5306 }, { "epoch": 1.0788778206952632, "grad_norm": 0.13903899490833282, "learning_rate": 9.217939591172583e-05, "loss": 1.118, "step": 5307 }, { "epoch": 1.0790811140475707, "grad_norm": 0.1528656780719757, "learning_rate": 9.215905623919456e-05, "loss": 0.9426, "step": 5308 }, { "epoch": 1.079284407399878, "grad_norm": 0.14197883009910583, "learning_rate": 9.213871656666328e-05, "loss": 1.0157, "step": 5309 }, { "epoch": 1.0794877007521855, "grad_norm": 0.16497009992599487, "learning_rate": 9.211837689413201e-05, "loss": 1.1818, "step": 5310 }, { "epoch": 1.0796909941044928, "grad_norm": 0.1422882378101349, "learning_rate": 9.209803722160075e-05, "loss": 0.913, "step": 5311 }, { "epoch": 1.0798942874568, "grad_norm": 0.14429578185081482, "learning_rate": 9.207769754906947e-05, "loss": 1.0821, "step": 5312 }, { "epoch": 1.0800975808091076, "grad_norm": 0.1493476927280426, "learning_rate": 9.20573578765382e-05, "loss": 0.9935, "step": 5313 }, { "epoch": 1.0803008741614148, "grad_norm": 0.1300468146800995, "learning_rate": 9.203701820400692e-05, "loss": 0.8461, "step": 5314 }, { "epoch": 1.0805041675137224, "grad_norm": 0.15119443833827972, "learning_rate": 9.201667853147566e-05, "loss": 1.023, "step": 5315 }, { "epoch": 1.0807074608660296, "grad_norm": 0.11995972692966461, "learning_rate": 9.199633885894438e-05, "loss": 0.8724, "step": 5316 }, { "epoch": 1.0809107542183372, "grad_norm": 0.12740197777748108, "learning_rate": 9.197599918641309e-05, "loss": 0.8857, "step": 5317 }, { "epoch": 1.0811140475706444, "grad_norm": 0.1593119502067566, "learning_rate": 9.195565951388183e-05, "loss": 1.1509, "step": 5318 }, { "epoch": 1.0813173409229517, "grad_norm": 0.16390585899353027, "learning_rate": 9.193531984135055e-05, "loss": 0.9571, "step": 5319 }, { "epoch": 1.0815206342752592, "grad_norm": 0.1462492048740387, "learning_rate": 9.191498016881929e-05, "loss": 0.9622, "step": 5320 }, { "epoch": 1.0817239276275665, "grad_norm": 0.14783547818660736, "learning_rate": 9.1894640496288e-05, "loss": 0.9187, "step": 5321 }, { "epoch": 1.081927220979874, "grad_norm": 0.1550622433423996, "learning_rate": 9.187430082375674e-05, "loss": 1.0517, "step": 5322 }, { "epoch": 1.0821305143321813, "grad_norm": 0.16103710234165192, "learning_rate": 9.185396115122547e-05, "loss": 1.0936, "step": 5323 }, { "epoch": 1.0823338076844886, "grad_norm": 0.14218467473983765, "learning_rate": 9.18336214786942e-05, "loss": 1.0221, "step": 5324 }, { "epoch": 1.0825371010367961, "grad_norm": 0.13486315310001373, "learning_rate": 9.181328180616292e-05, "loss": 0.9754, "step": 5325 }, { "epoch": 1.0827403943891034, "grad_norm": 0.1479676365852356, "learning_rate": 9.179294213363165e-05, "loss": 1.077, "step": 5326 }, { "epoch": 1.082943687741411, "grad_norm": 0.16341105103492737, "learning_rate": 9.177260246110038e-05, "loss": 1.2277, "step": 5327 }, { "epoch": 1.0831469810937182, "grad_norm": 0.16840991377830505, "learning_rate": 9.175226278856912e-05, "loss": 1.2679, "step": 5328 }, { "epoch": 1.0833502744460257, "grad_norm": 0.1398116499185562, "learning_rate": 9.173192311603783e-05, "loss": 0.9881, "step": 5329 }, { "epoch": 1.083553567798333, "grad_norm": 0.13883648812770844, "learning_rate": 9.171158344350656e-05, "loss": 1.001, "step": 5330 }, { "epoch": 1.0837568611506403, "grad_norm": 0.1527661830186844, "learning_rate": 9.169124377097529e-05, "loss": 1.1497, "step": 5331 }, { "epoch": 1.0839601545029478, "grad_norm": 0.17001107335090637, "learning_rate": 9.167090409844403e-05, "loss": 1.1845, "step": 5332 }, { "epoch": 1.084163447855255, "grad_norm": 0.150950089097023, "learning_rate": 9.165056442591274e-05, "loss": 1.0365, "step": 5333 }, { "epoch": 1.0843667412075626, "grad_norm": 0.14451636373996735, "learning_rate": 9.163022475338148e-05, "loss": 0.9777, "step": 5334 }, { "epoch": 1.0845700345598699, "grad_norm": 0.16429436206817627, "learning_rate": 9.16098850808502e-05, "loss": 1.2175, "step": 5335 }, { "epoch": 1.0847733279121772, "grad_norm": 0.1558394581079483, "learning_rate": 9.158954540831894e-05, "loss": 1.1054, "step": 5336 }, { "epoch": 1.0849766212644847, "grad_norm": 0.13814805448055267, "learning_rate": 9.156920573578765e-05, "loss": 0.9972, "step": 5337 }, { "epoch": 1.085179914616792, "grad_norm": 0.15244194865226746, "learning_rate": 9.154886606325639e-05, "loss": 1.0955, "step": 5338 }, { "epoch": 1.0853832079690995, "grad_norm": 0.1596089005470276, "learning_rate": 9.152852639072511e-05, "loss": 1.1731, "step": 5339 }, { "epoch": 1.0855865013214068, "grad_norm": 0.15481899678707123, "learning_rate": 9.150818671819385e-05, "loss": 1.0905, "step": 5340 }, { "epoch": 1.085789794673714, "grad_norm": 0.14864175021648407, "learning_rate": 9.148784704566256e-05, "loss": 1.1572, "step": 5341 }, { "epoch": 1.0859930880260216, "grad_norm": 0.16066181659698486, "learning_rate": 9.14675073731313e-05, "loss": 1.1283, "step": 5342 }, { "epoch": 1.0861963813783289, "grad_norm": 0.13866567611694336, "learning_rate": 9.144716770060002e-05, "loss": 0.8998, "step": 5343 }, { "epoch": 1.0863996747306364, "grad_norm": 0.1368878036737442, "learning_rate": 9.142682802806876e-05, "loss": 0.9914, "step": 5344 }, { "epoch": 1.0866029680829437, "grad_norm": 0.12759725749492645, "learning_rate": 9.140648835553747e-05, "loss": 0.8174, "step": 5345 }, { "epoch": 1.0868062614352512, "grad_norm": 0.16169612109661102, "learning_rate": 9.138614868300621e-05, "loss": 1.13, "step": 5346 }, { "epoch": 1.0870095547875585, "grad_norm": 0.15470115840435028, "learning_rate": 9.136580901047493e-05, "loss": 1.1087, "step": 5347 }, { "epoch": 1.0872128481398657, "grad_norm": 0.1303420513868332, "learning_rate": 9.134546933794367e-05, "loss": 0.9249, "step": 5348 }, { "epoch": 1.0874161414921732, "grad_norm": 0.14182552695274353, "learning_rate": 9.132512966541238e-05, "loss": 0.9983, "step": 5349 }, { "epoch": 1.0876194348444805, "grad_norm": 0.14217984676361084, "learning_rate": 9.130478999288112e-05, "loss": 1.0707, "step": 5350 }, { "epoch": 1.087822728196788, "grad_norm": 0.14239375293254852, "learning_rate": 9.128445032034985e-05, "loss": 0.9731, "step": 5351 }, { "epoch": 1.0880260215490953, "grad_norm": 0.13025087118148804, "learning_rate": 9.126411064781858e-05, "loss": 0.8348, "step": 5352 }, { "epoch": 1.0882293149014026, "grad_norm": 0.15558715164661407, "learning_rate": 9.12437709752873e-05, "loss": 1.1197, "step": 5353 }, { "epoch": 1.0884326082537101, "grad_norm": 0.15348006784915924, "learning_rate": 9.122343130275603e-05, "loss": 1.1771, "step": 5354 }, { "epoch": 1.0886359016060174, "grad_norm": 0.15580473840236664, "learning_rate": 9.120309163022476e-05, "loss": 1.0984, "step": 5355 }, { "epoch": 1.088839194958325, "grad_norm": 0.1323358714580536, "learning_rate": 9.11827519576935e-05, "loss": 0.971, "step": 5356 }, { "epoch": 1.0890424883106322, "grad_norm": 0.14014890789985657, "learning_rate": 9.116241228516222e-05, "loss": 0.9644, "step": 5357 }, { "epoch": 1.0892457816629397, "grad_norm": 0.1312636435031891, "learning_rate": 9.114207261263094e-05, "loss": 0.8542, "step": 5358 }, { "epoch": 1.089449075015247, "grad_norm": 0.14939019083976746, "learning_rate": 9.112173294009967e-05, "loss": 1.053, "step": 5359 }, { "epoch": 1.0896523683675543, "grad_norm": 0.15127089619636536, "learning_rate": 9.110139326756839e-05, "loss": 1.0542, "step": 5360 }, { "epoch": 1.0898556617198618, "grad_norm": 0.14450344443321228, "learning_rate": 9.108105359503713e-05, "loss": 1.1007, "step": 5361 }, { "epoch": 1.090058955072169, "grad_norm": 0.1629301756620407, "learning_rate": 9.106071392250584e-05, "loss": 1.1548, "step": 5362 }, { "epoch": 1.0902622484244766, "grad_norm": 0.13532504439353943, "learning_rate": 9.104037424997458e-05, "loss": 0.9654, "step": 5363 }, { "epoch": 1.090465541776784, "grad_norm": 0.12162788212299347, "learning_rate": 9.10200345774433e-05, "loss": 0.8145, "step": 5364 }, { "epoch": 1.0906688351290912, "grad_norm": 0.1330665647983551, "learning_rate": 9.099969490491204e-05, "loss": 0.8723, "step": 5365 }, { "epoch": 1.0908721284813987, "grad_norm": 0.15818393230438232, "learning_rate": 9.097935523238075e-05, "loss": 0.9827, "step": 5366 }, { "epoch": 1.091075421833706, "grad_norm": 0.1608572006225586, "learning_rate": 9.095901555984949e-05, "loss": 1.0993, "step": 5367 }, { "epoch": 1.0912787151860135, "grad_norm": 0.14446404576301575, "learning_rate": 9.093867588731822e-05, "loss": 0.9614, "step": 5368 }, { "epoch": 1.0914820085383208, "grad_norm": 0.14879867434501648, "learning_rate": 9.091833621478695e-05, "loss": 1.0191, "step": 5369 }, { "epoch": 1.091685301890628, "grad_norm": 0.1518966406583786, "learning_rate": 9.089799654225566e-05, "loss": 1.0615, "step": 5370 }, { "epoch": 1.0918885952429356, "grad_norm": 0.15035811066627502, "learning_rate": 9.08776568697244e-05, "loss": 1.0437, "step": 5371 }, { "epoch": 1.0920918885952429, "grad_norm": 0.13971300423145294, "learning_rate": 9.085731719719313e-05, "loss": 0.9, "step": 5372 }, { "epoch": 1.0922951819475504, "grad_norm": 0.15553440153598785, "learning_rate": 9.083697752466187e-05, "loss": 1.1681, "step": 5373 }, { "epoch": 1.0924984752998577, "grad_norm": 0.1469622105360031, "learning_rate": 9.081663785213058e-05, "loss": 1.0313, "step": 5374 }, { "epoch": 1.0927017686521652, "grad_norm": 0.14632147550582886, "learning_rate": 9.079629817959931e-05, "loss": 0.9966, "step": 5375 }, { "epoch": 1.0929050620044725, "grad_norm": 0.14677052199840546, "learning_rate": 9.077595850706804e-05, "loss": 1.0508, "step": 5376 }, { "epoch": 1.0931083553567797, "grad_norm": 0.1287970244884491, "learning_rate": 9.075561883453678e-05, "loss": 0.8408, "step": 5377 }, { "epoch": 1.0933116487090873, "grad_norm": 0.14569233357906342, "learning_rate": 9.073527916200549e-05, "loss": 1.0676, "step": 5378 }, { "epoch": 1.0935149420613945, "grad_norm": 0.13696177303791046, "learning_rate": 9.071493948947423e-05, "loss": 0.9689, "step": 5379 }, { "epoch": 1.093718235413702, "grad_norm": 0.13838046789169312, "learning_rate": 9.069459981694295e-05, "loss": 0.9577, "step": 5380 }, { "epoch": 1.0939215287660093, "grad_norm": 0.13019011914730072, "learning_rate": 9.067426014441169e-05, "loss": 0.8568, "step": 5381 }, { "epoch": 1.0941248221183166, "grad_norm": 0.13824008405208588, "learning_rate": 9.06539204718804e-05, "loss": 0.8465, "step": 5382 }, { "epoch": 1.0943281154706241, "grad_norm": 0.15692386031150818, "learning_rate": 9.063358079934914e-05, "loss": 1.1576, "step": 5383 }, { "epoch": 1.0945314088229314, "grad_norm": 0.166419118642807, "learning_rate": 9.061324112681786e-05, "loss": 1.2284, "step": 5384 }, { "epoch": 1.094734702175239, "grad_norm": 0.1578996330499649, "learning_rate": 9.05929014542866e-05, "loss": 1.2505, "step": 5385 }, { "epoch": 1.0949379955275462, "grad_norm": 0.15102256834506989, "learning_rate": 9.057256178175531e-05, "loss": 1.2031, "step": 5386 }, { "epoch": 1.0951412888798537, "grad_norm": 0.15047991275787354, "learning_rate": 9.055222210922405e-05, "loss": 0.944, "step": 5387 }, { "epoch": 1.095344582232161, "grad_norm": 0.138466015458107, "learning_rate": 9.053188243669277e-05, "loss": 0.9546, "step": 5388 }, { "epoch": 1.0955478755844683, "grad_norm": 0.13472160696983337, "learning_rate": 9.051154276416151e-05, "loss": 0.8465, "step": 5389 }, { "epoch": 1.0957511689367758, "grad_norm": 0.14435645937919617, "learning_rate": 9.049120309163022e-05, "loss": 0.9928, "step": 5390 }, { "epoch": 1.095954462289083, "grad_norm": 0.16124622523784637, "learning_rate": 9.047086341909896e-05, "loss": 1.1246, "step": 5391 }, { "epoch": 1.0961577556413906, "grad_norm": 0.14154884219169617, "learning_rate": 9.045052374656768e-05, "loss": 1.0244, "step": 5392 }, { "epoch": 1.096361048993698, "grad_norm": 0.14055821299552917, "learning_rate": 9.043018407403642e-05, "loss": 0.8506, "step": 5393 }, { "epoch": 1.0965643423460052, "grad_norm": 0.16625897586345673, "learning_rate": 9.040984440150513e-05, "loss": 1.2688, "step": 5394 }, { "epoch": 1.0967676356983127, "grad_norm": 0.1494191437959671, "learning_rate": 9.038950472897387e-05, "loss": 1.1474, "step": 5395 }, { "epoch": 1.09697092905062, "grad_norm": 0.1614067256450653, "learning_rate": 9.03691650564426e-05, "loss": 1.2221, "step": 5396 }, { "epoch": 1.0971742224029275, "grad_norm": 0.1351841688156128, "learning_rate": 9.034882538391133e-05, "loss": 1.0106, "step": 5397 }, { "epoch": 1.0973775157552348, "grad_norm": 0.15088312327861786, "learning_rate": 9.032848571138004e-05, "loss": 1.0176, "step": 5398 }, { "epoch": 1.097580809107542, "grad_norm": 0.14838740229606628, "learning_rate": 9.030814603884878e-05, "loss": 1.1022, "step": 5399 }, { "epoch": 1.0977841024598496, "grad_norm": 0.13708491623401642, "learning_rate": 9.028780636631751e-05, "loss": 0.8909, "step": 5400 }, { "epoch": 1.0979873958121569, "grad_norm": 0.15131209790706635, "learning_rate": 9.026746669378623e-05, "loss": 1.218, "step": 5401 }, { "epoch": 1.0981906891644644, "grad_norm": 0.1643504798412323, "learning_rate": 9.024712702125496e-05, "loss": 1.1279, "step": 5402 }, { "epoch": 1.0983939825167717, "grad_norm": 0.1353635936975479, "learning_rate": 9.022678734872368e-05, "loss": 0.9174, "step": 5403 }, { "epoch": 1.0985972758690792, "grad_norm": 0.1374010145664215, "learning_rate": 9.020644767619242e-05, "loss": 1.0884, "step": 5404 }, { "epoch": 1.0988005692213865, "grad_norm": 0.15371763706207275, "learning_rate": 9.018610800366114e-05, "loss": 0.9519, "step": 5405 }, { "epoch": 1.0990038625736938, "grad_norm": 0.15383990108966827, "learning_rate": 9.016576833112987e-05, "loss": 1.2321, "step": 5406 }, { "epoch": 1.0992071559260013, "grad_norm": 0.1341893970966339, "learning_rate": 9.014542865859859e-05, "loss": 1.0372, "step": 5407 }, { "epoch": 1.0994104492783086, "grad_norm": 0.13906438648700714, "learning_rate": 9.012508898606733e-05, "loss": 0.8928, "step": 5408 }, { "epoch": 1.099613742630616, "grad_norm": 0.14861425757408142, "learning_rate": 9.010474931353605e-05, "loss": 1.0851, "step": 5409 }, { "epoch": 1.0998170359829234, "grad_norm": 0.1484142243862152, "learning_rate": 9.008440964100478e-05, "loss": 1.1029, "step": 5410 }, { "epoch": 1.1000203293352306, "grad_norm": 0.14029765129089355, "learning_rate": 9.00640699684735e-05, "loss": 0.9775, "step": 5411 }, { "epoch": 1.1002236226875381, "grad_norm": 0.1527182012796402, "learning_rate": 9.004373029594224e-05, "loss": 1.0849, "step": 5412 }, { "epoch": 1.1004269160398454, "grad_norm": 0.15019920468330383, "learning_rate": 9.002339062341097e-05, "loss": 0.978, "step": 5413 }, { "epoch": 1.100630209392153, "grad_norm": 0.15240661799907684, "learning_rate": 9.00030509508797e-05, "loss": 1.1096, "step": 5414 }, { "epoch": 1.1008335027444602, "grad_norm": 0.14962786436080933, "learning_rate": 8.998271127834841e-05, "loss": 1.1918, "step": 5415 }, { "epoch": 1.1010367960967677, "grad_norm": 0.13056641817092896, "learning_rate": 8.996237160581715e-05, "loss": 0.8757, "step": 5416 }, { "epoch": 1.101240089449075, "grad_norm": 0.1387203186750412, "learning_rate": 8.994203193328588e-05, "loss": 0.8188, "step": 5417 }, { "epoch": 1.1014433828013823, "grad_norm": 0.15652364492416382, "learning_rate": 8.992169226075462e-05, "loss": 0.9948, "step": 5418 }, { "epoch": 1.1016466761536898, "grad_norm": 0.16937415301799774, "learning_rate": 8.990135258822333e-05, "loss": 1.077, "step": 5419 }, { "epoch": 1.1018499695059971, "grad_norm": 0.1426754593849182, "learning_rate": 8.988101291569206e-05, "loss": 0.9465, "step": 5420 }, { "epoch": 1.1020532628583046, "grad_norm": 0.15397274494171143, "learning_rate": 8.986067324316079e-05, "loss": 1.0617, "step": 5421 }, { "epoch": 1.102256556210612, "grad_norm": 0.13782437145709991, "learning_rate": 8.984033357062953e-05, "loss": 0.9, "step": 5422 }, { "epoch": 1.1024598495629192, "grad_norm": 0.13675148785114288, "learning_rate": 8.981999389809824e-05, "loss": 0.9424, "step": 5423 }, { "epoch": 1.1026631429152267, "grad_norm": 0.15254589915275574, "learning_rate": 8.979965422556698e-05, "loss": 1.0416, "step": 5424 }, { "epoch": 1.102866436267534, "grad_norm": 0.15221473574638367, "learning_rate": 8.97793145530357e-05, "loss": 1.0762, "step": 5425 }, { "epoch": 1.1030697296198415, "grad_norm": 0.16279594600200653, "learning_rate": 8.975897488050444e-05, "loss": 1.116, "step": 5426 }, { "epoch": 1.1032730229721488, "grad_norm": 0.14563637971878052, "learning_rate": 8.973863520797315e-05, "loss": 0.9308, "step": 5427 }, { "epoch": 1.103476316324456, "grad_norm": 0.1332903355360031, "learning_rate": 8.971829553544189e-05, "loss": 0.901, "step": 5428 }, { "epoch": 1.1036796096767636, "grad_norm": 0.16691714525222778, "learning_rate": 8.969795586291061e-05, "loss": 1.1796, "step": 5429 }, { "epoch": 1.1038829030290709, "grad_norm": 0.15106704831123352, "learning_rate": 8.967761619037935e-05, "loss": 1.034, "step": 5430 }, { "epoch": 1.1040861963813784, "grad_norm": 0.14782746136188507, "learning_rate": 8.965727651784806e-05, "loss": 0.9958, "step": 5431 }, { "epoch": 1.1042894897336857, "grad_norm": 0.13478310406208038, "learning_rate": 8.96369368453168e-05, "loss": 0.9949, "step": 5432 }, { "epoch": 1.1044927830859932, "grad_norm": 0.1417469084262848, "learning_rate": 8.961659717278552e-05, "loss": 0.945, "step": 5433 }, { "epoch": 1.1046960764383005, "grad_norm": 0.15904608368873596, "learning_rate": 8.959625750025426e-05, "loss": 0.9804, "step": 5434 }, { "epoch": 1.1048993697906078, "grad_norm": 0.14991609752178192, "learning_rate": 8.957591782772297e-05, "loss": 0.9633, "step": 5435 }, { "epoch": 1.1051026631429153, "grad_norm": 0.14758650958538055, "learning_rate": 8.955557815519171e-05, "loss": 1.2216, "step": 5436 }, { "epoch": 1.1053059564952226, "grad_norm": 0.15049061179161072, "learning_rate": 8.953523848266043e-05, "loss": 0.975, "step": 5437 }, { "epoch": 1.10550924984753, "grad_norm": 0.15753579139709473, "learning_rate": 8.951489881012917e-05, "loss": 1.1518, "step": 5438 }, { "epoch": 1.1057125431998374, "grad_norm": 0.14249707758426666, "learning_rate": 8.949455913759788e-05, "loss": 1.0243, "step": 5439 }, { "epoch": 1.1059158365521446, "grad_norm": 0.14323362708091736, "learning_rate": 8.947421946506662e-05, "loss": 0.9114, "step": 5440 }, { "epoch": 1.1061191299044522, "grad_norm": 0.12730684876441956, "learning_rate": 8.945387979253535e-05, "loss": 0.8369, "step": 5441 }, { "epoch": 1.1063224232567594, "grad_norm": 0.16354194283485413, "learning_rate": 8.943354012000407e-05, "loss": 1.0706, "step": 5442 }, { "epoch": 1.106525716609067, "grad_norm": 0.1542551964521408, "learning_rate": 8.94132004474728e-05, "loss": 1.1125, "step": 5443 }, { "epoch": 1.1067290099613742, "grad_norm": 0.14237554371356964, "learning_rate": 8.939286077494152e-05, "loss": 0.8999, "step": 5444 }, { "epoch": 1.1069323033136818, "grad_norm": 0.15537720918655396, "learning_rate": 8.937252110241026e-05, "loss": 0.9615, "step": 5445 }, { "epoch": 1.107135596665989, "grad_norm": 0.15942874550819397, "learning_rate": 8.935218142987898e-05, "loss": 1.0923, "step": 5446 }, { "epoch": 1.1073388900182963, "grad_norm": 0.12632033228874207, "learning_rate": 8.93318417573477e-05, "loss": 0.8124, "step": 5447 }, { "epoch": 1.1075421833706038, "grad_norm": 0.1381000280380249, "learning_rate": 8.931150208481643e-05, "loss": 0.8588, "step": 5448 }, { "epoch": 1.1077454767229111, "grad_norm": 0.13437995314598083, "learning_rate": 8.929116241228517e-05, "loss": 0.9958, "step": 5449 }, { "epoch": 1.1079487700752186, "grad_norm": 0.14716283977031708, "learning_rate": 8.927082273975389e-05, "loss": 0.9405, "step": 5450 }, { "epoch": 1.108152063427526, "grad_norm": 0.1730317771434784, "learning_rate": 8.925048306722262e-05, "loss": 1.1552, "step": 5451 }, { "epoch": 1.1083553567798332, "grad_norm": 0.1391172856092453, "learning_rate": 8.923014339469134e-05, "loss": 0.8306, "step": 5452 }, { "epoch": 1.1085586501321407, "grad_norm": 0.12834659218788147, "learning_rate": 8.920980372216008e-05, "loss": 0.918, "step": 5453 }, { "epoch": 1.108761943484448, "grad_norm": 0.1469629555940628, "learning_rate": 8.91894640496288e-05, "loss": 1.066, "step": 5454 }, { "epoch": 1.1089652368367555, "grad_norm": 0.15368422865867615, "learning_rate": 8.916912437709753e-05, "loss": 1.097, "step": 5455 }, { "epoch": 1.1091685301890628, "grad_norm": 0.1652909219264984, "learning_rate": 8.914878470456625e-05, "loss": 0.9886, "step": 5456 }, { "epoch": 1.10937182354137, "grad_norm": 0.14498552680015564, "learning_rate": 8.912844503203499e-05, "loss": 0.9856, "step": 5457 }, { "epoch": 1.1095751168936776, "grad_norm": 0.15853099524974823, "learning_rate": 8.910810535950372e-05, "loss": 1.1654, "step": 5458 }, { "epoch": 1.109778410245985, "grad_norm": 0.14158296585083008, "learning_rate": 8.908776568697244e-05, "loss": 0.9035, "step": 5459 }, { "epoch": 1.1099817035982924, "grad_norm": 0.18183912336826324, "learning_rate": 8.906742601444116e-05, "loss": 1.3072, "step": 5460 }, { "epoch": 1.1101849969505997, "grad_norm": 0.13865096867084503, "learning_rate": 8.90470863419099e-05, "loss": 0.8537, "step": 5461 }, { "epoch": 1.110388290302907, "grad_norm": 0.15828418731689453, "learning_rate": 8.902674666937863e-05, "loss": 1.074, "step": 5462 }, { "epoch": 1.1105915836552145, "grad_norm": 0.1492200642824173, "learning_rate": 8.900640699684735e-05, "loss": 0.9791, "step": 5463 }, { "epoch": 1.1107948770075218, "grad_norm": 0.14875096082687378, "learning_rate": 8.898606732431608e-05, "loss": 0.9389, "step": 5464 }, { "epoch": 1.1109981703598293, "grad_norm": 0.1451048105955124, "learning_rate": 8.896572765178481e-05, "loss": 1.0095, "step": 5465 }, { "epoch": 1.1112014637121366, "grad_norm": 0.14733406901359558, "learning_rate": 8.894538797925354e-05, "loss": 0.9441, "step": 5466 }, { "epoch": 1.111404757064444, "grad_norm": 0.15560275316238403, "learning_rate": 8.892504830672226e-05, "loss": 1.0225, "step": 5467 }, { "epoch": 1.1116080504167514, "grad_norm": 0.15289406478405, "learning_rate": 8.890470863419099e-05, "loss": 1.044, "step": 5468 }, { "epoch": 1.1118113437690587, "grad_norm": 0.13992512226104736, "learning_rate": 8.888436896165973e-05, "loss": 0.9947, "step": 5469 }, { "epoch": 1.1120146371213662, "grad_norm": 0.150540292263031, "learning_rate": 8.886402928912845e-05, "loss": 1.0501, "step": 5470 }, { "epoch": 1.1122179304736735, "grad_norm": 0.1405702531337738, "learning_rate": 8.884368961659719e-05, "loss": 0.9497, "step": 5471 }, { "epoch": 1.112421223825981, "grad_norm": 0.15190722048282623, "learning_rate": 8.88233499440659e-05, "loss": 1.016, "step": 5472 }, { "epoch": 1.1126245171782883, "grad_norm": 0.14616787433624268, "learning_rate": 8.880301027153464e-05, "loss": 1.0629, "step": 5473 }, { "epoch": 1.1128278105305958, "grad_norm": 0.13169008493423462, "learning_rate": 8.878267059900336e-05, "loss": 0.9228, "step": 5474 }, { "epoch": 1.113031103882903, "grad_norm": 0.14765764772891998, "learning_rate": 8.87623309264721e-05, "loss": 1.0189, "step": 5475 }, { "epoch": 1.1132343972352103, "grad_norm": 0.1653669774532318, "learning_rate": 8.874199125394081e-05, "loss": 1.292, "step": 5476 }, { "epoch": 1.1134376905875178, "grad_norm": 0.12520195543766022, "learning_rate": 8.872165158140955e-05, "loss": 0.7488, "step": 5477 }, { "epoch": 1.1136409839398251, "grad_norm": 0.15892694890499115, "learning_rate": 8.870131190887827e-05, "loss": 1.1323, "step": 5478 }, { "epoch": 1.1138442772921326, "grad_norm": 0.15398187935352325, "learning_rate": 8.868097223634701e-05, "loss": 1.1707, "step": 5479 }, { "epoch": 1.11404757064444, "grad_norm": 0.15214598178863525, "learning_rate": 8.866063256381572e-05, "loss": 1.0544, "step": 5480 }, { "epoch": 1.1142508639967472, "grad_norm": 0.14790403842926025, "learning_rate": 8.864029289128446e-05, "loss": 1.073, "step": 5481 }, { "epoch": 1.1144541573490547, "grad_norm": 0.1339927762746811, "learning_rate": 8.861995321875318e-05, "loss": 0.8919, "step": 5482 }, { "epoch": 1.114657450701362, "grad_norm": 0.12794196605682373, "learning_rate": 8.859961354622191e-05, "loss": 0.8973, "step": 5483 }, { "epoch": 1.1148607440536695, "grad_norm": 0.1576857566833496, "learning_rate": 8.857927387369063e-05, "loss": 1.0195, "step": 5484 }, { "epoch": 1.1150640374059768, "grad_norm": 0.13559751212596893, "learning_rate": 8.855893420115936e-05, "loss": 0.8857, "step": 5485 }, { "epoch": 1.115267330758284, "grad_norm": 0.15552525222301483, "learning_rate": 8.85385945286281e-05, "loss": 1.1108, "step": 5486 }, { "epoch": 1.1154706241105916, "grad_norm": 0.14692267775535583, "learning_rate": 8.851825485609682e-05, "loss": 0.952, "step": 5487 }, { "epoch": 1.115673917462899, "grad_norm": 0.15186382830142975, "learning_rate": 8.849791518356554e-05, "loss": 1.0116, "step": 5488 }, { "epoch": 1.1158772108152064, "grad_norm": 0.1544073224067688, "learning_rate": 8.847757551103427e-05, "loss": 1.0548, "step": 5489 }, { "epoch": 1.1160805041675137, "grad_norm": 0.149419903755188, "learning_rate": 8.8457235838503e-05, "loss": 1.0116, "step": 5490 }, { "epoch": 1.116283797519821, "grad_norm": 0.14665424823760986, "learning_rate": 8.843689616597173e-05, "loss": 1.0848, "step": 5491 }, { "epoch": 1.1164870908721285, "grad_norm": 0.15898637473583221, "learning_rate": 8.841655649344046e-05, "loss": 1.0112, "step": 5492 }, { "epoch": 1.1166903842244358, "grad_norm": 0.1535964161157608, "learning_rate": 8.839621682090918e-05, "loss": 1.1055, "step": 5493 }, { "epoch": 1.1168936775767433, "grad_norm": 0.1359899640083313, "learning_rate": 8.837587714837792e-05, "loss": 0.9534, "step": 5494 }, { "epoch": 1.1170969709290506, "grad_norm": 0.1600065380334854, "learning_rate": 8.835553747584664e-05, "loss": 1.0518, "step": 5495 }, { "epoch": 1.117300264281358, "grad_norm": 0.15466952323913574, "learning_rate": 8.833519780331537e-05, "loss": 0.9669, "step": 5496 }, { "epoch": 1.1175035576336654, "grad_norm": 0.13147573173046112, "learning_rate": 8.831485813078409e-05, "loss": 0.8511, "step": 5497 }, { "epoch": 1.1177068509859727, "grad_norm": 0.15012076497077942, "learning_rate": 8.829451845825283e-05, "loss": 1.0451, "step": 5498 }, { "epoch": 1.1179101443382802, "grad_norm": 0.16345028579235077, "learning_rate": 8.827417878572155e-05, "loss": 1.3569, "step": 5499 }, { "epoch": 1.1181134376905875, "grad_norm": 0.1560164839029312, "learning_rate": 8.825383911319028e-05, "loss": 1.0785, "step": 5500 }, { "epoch": 1.118316731042895, "grad_norm": 0.16095221042633057, "learning_rate": 8.8233499440659e-05, "loss": 1.1265, "step": 5501 }, { "epoch": 1.1185200243952023, "grad_norm": 0.1449424773454666, "learning_rate": 8.821315976812774e-05, "loss": 1.0265, "step": 5502 }, { "epoch": 1.1187233177475098, "grad_norm": 0.13522765040397644, "learning_rate": 8.819282009559647e-05, "loss": 0.8884, "step": 5503 }, { "epoch": 1.118926611099817, "grad_norm": 0.15144997835159302, "learning_rate": 8.817248042306519e-05, "loss": 1.0492, "step": 5504 }, { "epoch": 1.1191299044521243, "grad_norm": 0.14827972650527954, "learning_rate": 8.815214075053391e-05, "loss": 0.8902, "step": 5505 }, { "epoch": 1.1193331978044319, "grad_norm": 0.15328602492809296, "learning_rate": 8.813180107800265e-05, "loss": 1.035, "step": 5506 }, { "epoch": 1.1195364911567391, "grad_norm": 0.1393328458070755, "learning_rate": 8.811146140547138e-05, "loss": 1.0063, "step": 5507 }, { "epoch": 1.1197397845090467, "grad_norm": 0.15321269631385803, "learning_rate": 8.80911217329401e-05, "loss": 0.8726, "step": 5508 }, { "epoch": 1.119943077861354, "grad_norm": 0.14183679223060608, "learning_rate": 8.807078206040883e-05, "loss": 0.9512, "step": 5509 }, { "epoch": 1.1201463712136612, "grad_norm": 0.14985686540603638, "learning_rate": 8.805044238787756e-05, "loss": 1.065, "step": 5510 }, { "epoch": 1.1203496645659687, "grad_norm": 0.13866209983825684, "learning_rate": 8.803010271534629e-05, "loss": 1.0288, "step": 5511 }, { "epoch": 1.120552957918276, "grad_norm": 0.14831571280956268, "learning_rate": 8.800976304281501e-05, "loss": 0.9417, "step": 5512 }, { "epoch": 1.1207562512705835, "grad_norm": 0.1543184220790863, "learning_rate": 8.798942337028374e-05, "loss": 1.1379, "step": 5513 }, { "epoch": 1.1209595446228908, "grad_norm": 0.14912952482700348, "learning_rate": 8.796908369775248e-05, "loss": 1.0517, "step": 5514 }, { "epoch": 1.1211628379751981, "grad_norm": 0.15913262963294983, "learning_rate": 8.79487440252212e-05, "loss": 1.0256, "step": 5515 }, { "epoch": 1.1213661313275056, "grad_norm": 0.14145322144031525, "learning_rate": 8.792840435268992e-05, "loss": 1.0517, "step": 5516 }, { "epoch": 1.121569424679813, "grad_norm": 0.1385679990053177, "learning_rate": 8.790806468015865e-05, "loss": 0.8375, "step": 5517 }, { "epoch": 1.1217727180321204, "grad_norm": 0.15951962769031525, "learning_rate": 8.788772500762739e-05, "loss": 1.0235, "step": 5518 }, { "epoch": 1.1219760113844277, "grad_norm": 0.14883315563201904, "learning_rate": 8.786738533509611e-05, "loss": 1.1446, "step": 5519 }, { "epoch": 1.122179304736735, "grad_norm": 0.15519806742668152, "learning_rate": 8.784704566256484e-05, "loss": 1.0978, "step": 5520 }, { "epoch": 1.1223825980890425, "grad_norm": 0.1433810442686081, "learning_rate": 8.782670599003356e-05, "loss": 0.9519, "step": 5521 }, { "epoch": 1.1225858914413498, "grad_norm": 0.13667570054531097, "learning_rate": 8.78063663175023e-05, "loss": 0.95, "step": 5522 }, { "epoch": 1.1227891847936573, "grad_norm": 0.1338377296924591, "learning_rate": 8.778602664497102e-05, "loss": 0.9165, "step": 5523 }, { "epoch": 1.1229924781459646, "grad_norm": 0.1531645655632019, "learning_rate": 8.776568697243975e-05, "loss": 1.2018, "step": 5524 }, { "epoch": 1.123195771498272, "grad_norm": 0.15291288495063782, "learning_rate": 8.774534729990847e-05, "loss": 1.1504, "step": 5525 }, { "epoch": 1.1233990648505794, "grad_norm": 0.1413464993238449, "learning_rate": 8.77250076273772e-05, "loss": 0.9624, "step": 5526 }, { "epoch": 1.1236023582028867, "grad_norm": 0.15262098610401154, "learning_rate": 8.770466795484593e-05, "loss": 1.1616, "step": 5527 }, { "epoch": 1.1238056515551942, "grad_norm": 0.1626216322183609, "learning_rate": 8.768432828231466e-05, "loss": 1.2003, "step": 5528 }, { "epoch": 1.1240089449075015, "grad_norm": 0.16189110279083252, "learning_rate": 8.766398860978338e-05, "loss": 1.0696, "step": 5529 }, { "epoch": 1.124212238259809, "grad_norm": 0.14506696164608002, "learning_rate": 8.764364893725211e-05, "loss": 1.0076, "step": 5530 }, { "epoch": 1.1244155316121163, "grad_norm": 0.135748028755188, "learning_rate": 8.762330926472085e-05, "loss": 0.8196, "step": 5531 }, { "epoch": 1.1246188249644238, "grad_norm": 0.15155237913131714, "learning_rate": 8.760296959218957e-05, "loss": 1.0832, "step": 5532 }, { "epoch": 1.124822118316731, "grad_norm": 0.14161360263824463, "learning_rate": 8.75826299196583e-05, "loss": 1.0264, "step": 5533 }, { "epoch": 1.1250254116690384, "grad_norm": 0.12882399559020996, "learning_rate": 8.756229024712702e-05, "loss": 0.8623, "step": 5534 }, { "epoch": 1.1252287050213459, "grad_norm": 0.14270846545696259, "learning_rate": 8.754195057459576e-05, "loss": 1.0163, "step": 5535 }, { "epoch": 1.1254319983736532, "grad_norm": 0.1428549438714981, "learning_rate": 8.752161090206448e-05, "loss": 0.9291, "step": 5536 }, { "epoch": 1.1256352917259607, "grad_norm": 0.13165485858917236, "learning_rate": 8.75012712295332e-05, "loss": 0.8326, "step": 5537 }, { "epoch": 1.125838585078268, "grad_norm": 0.14879177510738373, "learning_rate": 8.748093155700193e-05, "loss": 1.1023, "step": 5538 }, { "epoch": 1.1260418784305752, "grad_norm": 0.150389164686203, "learning_rate": 8.746059188447067e-05, "loss": 0.982, "step": 5539 }, { "epoch": 1.1262451717828827, "grad_norm": 0.16434215009212494, "learning_rate": 8.744025221193939e-05, "loss": 1.0485, "step": 5540 }, { "epoch": 1.12644846513519, "grad_norm": 0.13194431364536285, "learning_rate": 8.741991253940812e-05, "loss": 0.8257, "step": 5541 }, { "epoch": 1.1266517584874975, "grad_norm": 0.1411994993686676, "learning_rate": 8.739957286687684e-05, "loss": 0.9818, "step": 5542 }, { "epoch": 1.1268550518398048, "grad_norm": 0.13878868520259857, "learning_rate": 8.737923319434558e-05, "loss": 0.9704, "step": 5543 }, { "epoch": 1.1270583451921121, "grad_norm": 0.16277877986431122, "learning_rate": 8.73588935218143e-05, "loss": 1.0612, "step": 5544 }, { "epoch": 1.1272616385444196, "grad_norm": 0.16174069046974182, "learning_rate": 8.733855384928303e-05, "loss": 1.2214, "step": 5545 }, { "epoch": 1.127464931896727, "grad_norm": 0.1509084403514862, "learning_rate": 8.731821417675175e-05, "loss": 0.9823, "step": 5546 }, { "epoch": 1.1276682252490344, "grad_norm": 0.1313520222902298, "learning_rate": 8.729787450422049e-05, "loss": 0.8928, "step": 5547 }, { "epoch": 1.1278715186013417, "grad_norm": 0.14811810851097107, "learning_rate": 8.727753483168922e-05, "loss": 1.0147, "step": 5548 }, { "epoch": 1.128074811953649, "grad_norm": 0.16996385157108307, "learning_rate": 8.725719515915794e-05, "loss": 1.1564, "step": 5549 }, { "epoch": 1.1282781053059565, "grad_norm": 0.145395427942276, "learning_rate": 8.723685548662666e-05, "loss": 0.9943, "step": 5550 }, { "epoch": 1.1284813986582638, "grad_norm": 0.13872261345386505, "learning_rate": 8.72165158140954e-05, "loss": 0.9406, "step": 5551 }, { "epoch": 1.1286846920105713, "grad_norm": 0.15163040161132812, "learning_rate": 8.719617614156413e-05, "loss": 0.9178, "step": 5552 }, { "epoch": 1.1288879853628786, "grad_norm": 0.1619151532649994, "learning_rate": 8.717583646903285e-05, "loss": 1.184, "step": 5553 }, { "epoch": 1.129091278715186, "grad_norm": 0.1568858027458191, "learning_rate": 8.715549679650158e-05, "loss": 1.0981, "step": 5554 }, { "epoch": 1.1292945720674934, "grad_norm": 0.158450186252594, "learning_rate": 8.713515712397031e-05, "loss": 1.1141, "step": 5555 }, { "epoch": 1.1294978654198007, "grad_norm": 0.1398298293352127, "learning_rate": 8.711481745143904e-05, "loss": 0.9325, "step": 5556 }, { "epoch": 1.1297011587721082, "grad_norm": 0.14823031425476074, "learning_rate": 8.709447777890776e-05, "loss": 1.088, "step": 5557 }, { "epoch": 1.1299044521244155, "grad_norm": 0.1620652973651886, "learning_rate": 8.707413810637649e-05, "loss": 1.1094, "step": 5558 }, { "epoch": 1.130107745476723, "grad_norm": 0.15144942700862885, "learning_rate": 8.705379843384522e-05, "loss": 1.05, "step": 5559 }, { "epoch": 1.1303110388290303, "grad_norm": 0.14913499355316162, "learning_rate": 8.703345876131395e-05, "loss": 0.8884, "step": 5560 }, { "epoch": 1.1305143321813378, "grad_norm": 0.13716764748096466, "learning_rate": 8.701311908878267e-05, "loss": 0.96, "step": 5561 }, { "epoch": 1.130717625533645, "grad_norm": 0.14230920374393463, "learning_rate": 8.69927794162514e-05, "loss": 0.9695, "step": 5562 }, { "epoch": 1.1309209188859524, "grad_norm": 0.14691171050071716, "learning_rate": 8.697243974372014e-05, "loss": 1.0266, "step": 5563 }, { "epoch": 1.1311242122382599, "grad_norm": 0.15973179042339325, "learning_rate": 8.695210007118886e-05, "loss": 1.1036, "step": 5564 }, { "epoch": 1.1313275055905672, "grad_norm": 0.14725075662136078, "learning_rate": 8.693176039865759e-05, "loss": 1.0159, "step": 5565 }, { "epoch": 1.1315307989428747, "grad_norm": 0.15641410648822784, "learning_rate": 8.691142072612631e-05, "loss": 1.11, "step": 5566 }, { "epoch": 1.131734092295182, "grad_norm": 0.12851740419864655, "learning_rate": 8.689108105359503e-05, "loss": 0.8651, "step": 5567 }, { "epoch": 1.1319373856474892, "grad_norm": 0.15197080373764038, "learning_rate": 8.687074138106377e-05, "loss": 0.9718, "step": 5568 }, { "epoch": 1.1321406789997968, "grad_norm": 0.1637222021818161, "learning_rate": 8.68504017085325e-05, "loss": 1.2166, "step": 5569 }, { "epoch": 1.132343972352104, "grad_norm": 0.1471412032842636, "learning_rate": 8.683006203600122e-05, "loss": 1.0772, "step": 5570 }, { "epoch": 1.1325472657044116, "grad_norm": 0.1441943347454071, "learning_rate": 8.680972236346995e-05, "loss": 1.0778, "step": 5571 }, { "epoch": 1.1327505590567188, "grad_norm": 0.13848277926445007, "learning_rate": 8.678938269093868e-05, "loss": 0.9954, "step": 5572 }, { "epoch": 1.1329538524090261, "grad_norm": 0.15798182785511017, "learning_rate": 8.676904301840741e-05, "loss": 1.1491, "step": 5573 }, { "epoch": 1.1331571457613336, "grad_norm": 0.1413598209619522, "learning_rate": 8.674870334587613e-05, "loss": 1.0427, "step": 5574 }, { "epoch": 1.133360439113641, "grad_norm": 0.1488219052553177, "learning_rate": 8.672836367334486e-05, "loss": 1.0375, "step": 5575 }, { "epoch": 1.1335637324659484, "grad_norm": 0.16564448177814484, "learning_rate": 8.67080240008136e-05, "loss": 1.2393, "step": 5576 }, { "epoch": 1.1337670258182557, "grad_norm": 0.13589560985565186, "learning_rate": 8.668768432828232e-05, "loss": 0.8928, "step": 5577 }, { "epoch": 1.133970319170563, "grad_norm": 0.140016570687294, "learning_rate": 8.666734465575104e-05, "loss": 0.9078, "step": 5578 }, { "epoch": 1.1341736125228705, "grad_norm": 0.1660967916250229, "learning_rate": 8.664700498321977e-05, "loss": 1.1746, "step": 5579 }, { "epoch": 1.1343769058751778, "grad_norm": 0.14648735523223877, "learning_rate": 8.66266653106885e-05, "loss": 1.0231, "step": 5580 }, { "epoch": 1.1345801992274853, "grad_norm": 0.15857502818107605, "learning_rate": 8.660632563815723e-05, "loss": 1.1395, "step": 5581 }, { "epoch": 1.1347834925797926, "grad_norm": 0.15594516694545746, "learning_rate": 8.658598596562596e-05, "loss": 1.0801, "step": 5582 }, { "epoch": 1.1349867859321001, "grad_norm": 0.1439754217863083, "learning_rate": 8.656564629309468e-05, "loss": 0.9435, "step": 5583 }, { "epoch": 1.1351900792844074, "grad_norm": 0.1530708372592926, "learning_rate": 8.654530662056342e-05, "loss": 1.0227, "step": 5584 }, { "epoch": 1.1353933726367147, "grad_norm": 0.15302684903144836, "learning_rate": 8.652496694803214e-05, "loss": 1.0522, "step": 5585 }, { "epoch": 1.1355966659890222, "grad_norm": 0.15681840479373932, "learning_rate": 8.650462727550087e-05, "loss": 1.117, "step": 5586 }, { "epoch": 1.1357999593413295, "grad_norm": 0.14197994768619537, "learning_rate": 8.648428760296959e-05, "loss": 0.9114, "step": 5587 }, { "epoch": 1.136003252693637, "grad_norm": 0.15581487119197845, "learning_rate": 8.646394793043833e-05, "loss": 1.0896, "step": 5588 }, { "epoch": 1.1362065460459443, "grad_norm": 0.15578950941562653, "learning_rate": 8.644360825790705e-05, "loss": 1.0771, "step": 5589 }, { "epoch": 1.1364098393982518, "grad_norm": 0.15608197450637817, "learning_rate": 8.642326858537578e-05, "loss": 1.0018, "step": 5590 }, { "epoch": 1.136613132750559, "grad_norm": 0.14285367727279663, "learning_rate": 8.64029289128445e-05, "loss": 0.9629, "step": 5591 }, { "epoch": 1.1368164261028664, "grad_norm": 0.14336615800857544, "learning_rate": 8.638258924031324e-05, "loss": 1.0679, "step": 5592 }, { "epoch": 1.1370197194551739, "grad_norm": 0.13040810823440552, "learning_rate": 8.636224956778197e-05, "loss": 0.898, "step": 5593 }, { "epoch": 1.1372230128074812, "grad_norm": 0.14586962759494781, "learning_rate": 8.634190989525069e-05, "loss": 1.0594, "step": 5594 }, { "epoch": 1.1374263061597887, "grad_norm": 0.15765921771526337, "learning_rate": 8.632157022271941e-05, "loss": 1.0844, "step": 5595 }, { "epoch": 1.137629599512096, "grad_norm": 0.14407019317150116, "learning_rate": 8.630123055018815e-05, "loss": 1.0996, "step": 5596 }, { "epoch": 1.1378328928644033, "grad_norm": 0.16062206029891968, "learning_rate": 8.628089087765688e-05, "loss": 1.1719, "step": 5597 }, { "epoch": 1.1380361862167108, "grad_norm": 0.17504040896892548, "learning_rate": 8.62605512051256e-05, "loss": 1.1719, "step": 5598 }, { "epoch": 1.138239479569018, "grad_norm": 0.15141957998275757, "learning_rate": 8.624021153259433e-05, "loss": 1.064, "step": 5599 }, { "epoch": 1.1384427729213256, "grad_norm": 0.15264716744422913, "learning_rate": 8.621987186006306e-05, "loss": 1.0841, "step": 5600 }, { "epoch": 1.1386460662736329, "grad_norm": 0.16055592894554138, "learning_rate": 8.619953218753179e-05, "loss": 1.1176, "step": 5601 }, { "epoch": 1.1388493596259401, "grad_norm": 0.15006980299949646, "learning_rate": 8.617919251500051e-05, "loss": 1.161, "step": 5602 }, { "epoch": 1.1390526529782476, "grad_norm": 0.17045535147190094, "learning_rate": 8.615885284246924e-05, "loss": 1.2455, "step": 5603 }, { "epoch": 1.139255946330555, "grad_norm": 0.12832573056221008, "learning_rate": 8.613851316993797e-05, "loss": 0.8092, "step": 5604 }, { "epoch": 1.1394592396828624, "grad_norm": 0.13745355606079102, "learning_rate": 8.61181734974067e-05, "loss": 0.9675, "step": 5605 }, { "epoch": 1.1396625330351697, "grad_norm": 0.14315581321716309, "learning_rate": 8.609783382487542e-05, "loss": 0.9764, "step": 5606 }, { "epoch": 1.139865826387477, "grad_norm": 0.1444985270500183, "learning_rate": 8.607749415234415e-05, "loss": 0.8869, "step": 5607 }, { "epoch": 1.1400691197397845, "grad_norm": 0.1422191560268402, "learning_rate": 8.605715447981287e-05, "loss": 0.963, "step": 5608 }, { "epoch": 1.1402724130920918, "grad_norm": 0.1547440141439438, "learning_rate": 8.603681480728161e-05, "loss": 1.0648, "step": 5609 }, { "epoch": 1.1404757064443993, "grad_norm": 0.14438459277153015, "learning_rate": 8.601647513475034e-05, "loss": 0.9473, "step": 5610 }, { "epoch": 1.1406789997967066, "grad_norm": 0.15834081172943115, "learning_rate": 8.599613546221906e-05, "loss": 1.1726, "step": 5611 }, { "epoch": 1.140882293149014, "grad_norm": 0.15683820843696594, "learning_rate": 8.597579578968778e-05, "loss": 1.1011, "step": 5612 }, { "epoch": 1.1410855865013214, "grad_norm": 0.15861444175243378, "learning_rate": 8.595545611715652e-05, "loss": 1.1875, "step": 5613 }, { "epoch": 1.1412888798536287, "grad_norm": 0.14514437317848206, "learning_rate": 8.593511644462525e-05, "loss": 0.9358, "step": 5614 }, { "epoch": 1.1414921732059362, "grad_norm": 0.14084678888320923, "learning_rate": 8.591477677209397e-05, "loss": 0.9222, "step": 5615 }, { "epoch": 1.1416954665582435, "grad_norm": 0.15795862674713135, "learning_rate": 8.58944370995627e-05, "loss": 0.8645, "step": 5616 }, { "epoch": 1.141898759910551, "grad_norm": 0.16617043316364288, "learning_rate": 8.587409742703143e-05, "loss": 1.0558, "step": 5617 }, { "epoch": 1.1421020532628583, "grad_norm": 0.1414622664451599, "learning_rate": 8.585375775450016e-05, "loss": 0.9047, "step": 5618 }, { "epoch": 1.1423053466151658, "grad_norm": 0.12412548065185547, "learning_rate": 8.583341808196888e-05, "loss": 0.9193, "step": 5619 }, { "epoch": 1.142508639967473, "grad_norm": 0.14441967010498047, "learning_rate": 8.581307840943761e-05, "loss": 1.0402, "step": 5620 }, { "epoch": 1.1427119333197804, "grad_norm": 0.11869372427463531, "learning_rate": 8.579273873690634e-05, "loss": 0.6884, "step": 5621 }, { "epoch": 1.142915226672088, "grad_norm": 0.14870472252368927, "learning_rate": 8.577239906437507e-05, "loss": 1.0158, "step": 5622 }, { "epoch": 1.1431185200243952, "grad_norm": 0.1534339189529419, "learning_rate": 8.57520593918438e-05, "loss": 1.0615, "step": 5623 }, { "epoch": 1.1433218133767027, "grad_norm": 0.15304234623908997, "learning_rate": 8.573171971931252e-05, "loss": 1.0745, "step": 5624 }, { "epoch": 1.14352510672901, "grad_norm": 0.13723568618297577, "learning_rate": 8.571138004678126e-05, "loss": 0.9799, "step": 5625 }, { "epoch": 1.1437284000813173, "grad_norm": 0.1478075534105301, "learning_rate": 8.569104037424998e-05, "loss": 0.9989, "step": 5626 }, { "epoch": 1.1439316934336248, "grad_norm": 0.1592124104499817, "learning_rate": 8.56707007017187e-05, "loss": 1.1198, "step": 5627 }, { "epoch": 1.144134986785932, "grad_norm": 0.14707691967487335, "learning_rate": 8.565036102918743e-05, "loss": 1.0284, "step": 5628 }, { "epoch": 1.1443382801382396, "grad_norm": 0.16647355258464813, "learning_rate": 8.563002135665617e-05, "loss": 1.1762, "step": 5629 }, { "epoch": 1.1445415734905469, "grad_norm": 0.13492952287197113, "learning_rate": 8.560968168412489e-05, "loss": 0.8634, "step": 5630 }, { "epoch": 1.1447448668428541, "grad_norm": 0.13840839266777039, "learning_rate": 8.558934201159362e-05, "loss": 0.9339, "step": 5631 }, { "epoch": 1.1449481601951617, "grad_norm": 0.1501016616821289, "learning_rate": 8.556900233906234e-05, "loss": 0.9829, "step": 5632 }, { "epoch": 1.145151453547469, "grad_norm": 0.16703301668167114, "learning_rate": 8.554866266653108e-05, "loss": 1.0938, "step": 5633 }, { "epoch": 1.1453547468997765, "grad_norm": 0.14363522827625275, "learning_rate": 8.55283229939998e-05, "loss": 1.0352, "step": 5634 }, { "epoch": 1.1455580402520837, "grad_norm": 0.13370376825332642, "learning_rate": 8.550798332146853e-05, "loss": 0.8742, "step": 5635 }, { "epoch": 1.145761333604391, "grad_norm": 0.1378611922264099, "learning_rate": 8.548764364893725e-05, "loss": 0.9481, "step": 5636 }, { "epoch": 1.1459646269566985, "grad_norm": 0.14585551619529724, "learning_rate": 8.546730397640599e-05, "loss": 0.9109, "step": 5637 }, { "epoch": 1.1461679203090058, "grad_norm": 0.1484040468931198, "learning_rate": 8.544696430387471e-05, "loss": 1.1809, "step": 5638 }, { "epoch": 1.1463712136613133, "grad_norm": 0.14454567432403564, "learning_rate": 8.542662463134344e-05, "loss": 0.9458, "step": 5639 }, { "epoch": 1.1465745070136206, "grad_norm": 0.16539864242076874, "learning_rate": 8.540628495881216e-05, "loss": 1.1604, "step": 5640 }, { "epoch": 1.146777800365928, "grad_norm": 0.14397001266479492, "learning_rate": 8.53859452862809e-05, "loss": 1.0053, "step": 5641 }, { "epoch": 1.1469810937182354, "grad_norm": 0.1477411836385727, "learning_rate": 8.536560561374963e-05, "loss": 0.8505, "step": 5642 }, { "epoch": 1.1471843870705427, "grad_norm": 0.14993907511234283, "learning_rate": 8.534526594121835e-05, "loss": 1.0681, "step": 5643 }, { "epoch": 1.1473876804228502, "grad_norm": 0.1390916258096695, "learning_rate": 8.532492626868708e-05, "loss": 1.0582, "step": 5644 }, { "epoch": 1.1475909737751575, "grad_norm": 0.14765296876430511, "learning_rate": 8.530458659615581e-05, "loss": 0.9839, "step": 5645 }, { "epoch": 1.147794267127465, "grad_norm": 0.14012466371059418, "learning_rate": 8.528424692362454e-05, "loss": 0.8695, "step": 5646 }, { "epoch": 1.1479975604797723, "grad_norm": 0.16302555799484253, "learning_rate": 8.526390725109326e-05, "loss": 1.0865, "step": 5647 }, { "epoch": 1.1482008538320798, "grad_norm": 0.14376793801784515, "learning_rate": 8.524356757856199e-05, "loss": 0.9831, "step": 5648 }, { "epoch": 1.148404147184387, "grad_norm": 0.13375428318977356, "learning_rate": 8.522322790603071e-05, "loss": 0.9391, "step": 5649 }, { "epoch": 1.1486074405366944, "grad_norm": 0.16969521343708038, "learning_rate": 8.520288823349945e-05, "loss": 1.0134, "step": 5650 }, { "epoch": 1.148810733889002, "grad_norm": 0.1528272181749344, "learning_rate": 8.518254856096817e-05, "loss": 1.0294, "step": 5651 }, { "epoch": 1.1490140272413092, "grad_norm": 0.1412121057510376, "learning_rate": 8.51622088884369e-05, "loss": 1.0031, "step": 5652 }, { "epoch": 1.1492173205936167, "grad_norm": 0.15225495398044586, "learning_rate": 8.514186921590562e-05, "loss": 1.1583, "step": 5653 }, { "epoch": 1.149420613945924, "grad_norm": 0.15183210372924805, "learning_rate": 8.512152954337436e-05, "loss": 1.0097, "step": 5654 }, { "epoch": 1.1496239072982313, "grad_norm": 0.15771012008190155, "learning_rate": 8.510118987084308e-05, "loss": 0.9539, "step": 5655 }, { "epoch": 1.1498272006505388, "grad_norm": 0.13775765895843506, "learning_rate": 8.508085019831181e-05, "loss": 0.8771, "step": 5656 }, { "epoch": 1.150030494002846, "grad_norm": 0.15493398904800415, "learning_rate": 8.506051052578053e-05, "loss": 1.0152, "step": 5657 }, { "epoch": 1.1502337873551536, "grad_norm": 0.15068112313747406, "learning_rate": 8.504017085324927e-05, "loss": 1.0763, "step": 5658 }, { "epoch": 1.1504370807074609, "grad_norm": 0.17684713006019592, "learning_rate": 8.5019831180718e-05, "loss": 1.1555, "step": 5659 }, { "epoch": 1.1506403740597682, "grad_norm": 0.15432146191596985, "learning_rate": 8.499949150818672e-05, "loss": 1.0223, "step": 5660 }, { "epoch": 1.1508436674120757, "grad_norm": 0.1458262950181961, "learning_rate": 8.497915183565545e-05, "loss": 1.1111, "step": 5661 }, { "epoch": 1.151046960764383, "grad_norm": 0.14266814291477203, "learning_rate": 8.495881216312418e-05, "loss": 1.058, "step": 5662 }, { "epoch": 1.1512502541166905, "grad_norm": 0.14124521613121033, "learning_rate": 8.493847249059291e-05, "loss": 1.0221, "step": 5663 }, { "epoch": 1.1514535474689978, "grad_norm": 0.13366392254829407, "learning_rate": 8.491813281806163e-05, "loss": 0.8048, "step": 5664 }, { "epoch": 1.151656840821305, "grad_norm": 0.14957614243030548, "learning_rate": 8.489779314553036e-05, "loss": 1.0168, "step": 5665 }, { "epoch": 1.1518601341736125, "grad_norm": 0.15111826360225677, "learning_rate": 8.48774534729991e-05, "loss": 1.0378, "step": 5666 }, { "epoch": 1.1520634275259198, "grad_norm": 0.14935816824436188, "learning_rate": 8.485711380046782e-05, "loss": 1.0891, "step": 5667 }, { "epoch": 1.1522667208782273, "grad_norm": 0.14815278351306915, "learning_rate": 8.483677412793654e-05, "loss": 0.9746, "step": 5668 }, { "epoch": 1.1524700142305346, "grad_norm": 0.14705118536949158, "learning_rate": 8.481643445540527e-05, "loss": 1.1137, "step": 5669 }, { "epoch": 1.152673307582842, "grad_norm": 0.1314670443534851, "learning_rate": 8.4796094782874e-05, "loss": 0.951, "step": 5670 }, { "epoch": 1.1528766009351494, "grad_norm": 0.14406251907348633, "learning_rate": 8.477575511034273e-05, "loss": 1.0297, "step": 5671 }, { "epoch": 1.1530798942874567, "grad_norm": 0.14774449169635773, "learning_rate": 8.475541543781146e-05, "loss": 1.0561, "step": 5672 }, { "epoch": 1.1532831876397642, "grad_norm": 0.1500570923089981, "learning_rate": 8.473507576528018e-05, "loss": 0.8946, "step": 5673 }, { "epoch": 1.1534864809920715, "grad_norm": 0.1514650285243988, "learning_rate": 8.471473609274892e-05, "loss": 1.1588, "step": 5674 }, { "epoch": 1.153689774344379, "grad_norm": 0.14556598663330078, "learning_rate": 8.469439642021764e-05, "loss": 1.0184, "step": 5675 }, { "epoch": 1.1538930676966863, "grad_norm": 0.14652854204177856, "learning_rate": 8.467405674768637e-05, "loss": 0.9221, "step": 5676 }, { "epoch": 1.1540963610489936, "grad_norm": 0.15506261587142944, "learning_rate": 8.465371707515509e-05, "loss": 1.14, "step": 5677 }, { "epoch": 1.1542996544013011, "grad_norm": 0.14113929867744446, "learning_rate": 8.463337740262383e-05, "loss": 0.9412, "step": 5678 }, { "epoch": 1.1545029477536084, "grad_norm": 0.14070037007331848, "learning_rate": 8.461303773009255e-05, "loss": 1.0308, "step": 5679 }, { "epoch": 1.154706241105916, "grad_norm": 0.1333095133304596, "learning_rate": 8.459269805756128e-05, "loss": 0.9542, "step": 5680 }, { "epoch": 1.1549095344582232, "grad_norm": 0.15399445593357086, "learning_rate": 8.457235838503e-05, "loss": 0.9656, "step": 5681 }, { "epoch": 1.1551128278105307, "grad_norm": 0.14313603937625885, "learning_rate": 8.455201871249874e-05, "loss": 1.0213, "step": 5682 }, { "epoch": 1.155316121162838, "grad_norm": 0.1357993334531784, "learning_rate": 8.453167903996746e-05, "loss": 0.8993, "step": 5683 }, { "epoch": 1.1555194145151453, "grad_norm": 0.14041830599308014, "learning_rate": 8.451133936743619e-05, "loss": 0.9714, "step": 5684 }, { "epoch": 1.1557227078674528, "grad_norm": 0.13149768114089966, "learning_rate": 8.449099969490491e-05, "loss": 0.8474, "step": 5685 }, { "epoch": 1.15592600121976, "grad_norm": 0.15044553577899933, "learning_rate": 8.447066002237365e-05, "loss": 1.0609, "step": 5686 }, { "epoch": 1.1561292945720676, "grad_norm": 0.1325349062681198, "learning_rate": 8.445032034984238e-05, "loss": 0.9539, "step": 5687 }, { "epoch": 1.1563325879243749, "grad_norm": 0.1530589908361435, "learning_rate": 8.44299806773111e-05, "loss": 1.1166, "step": 5688 }, { "epoch": 1.1565358812766822, "grad_norm": 0.14407488703727722, "learning_rate": 8.440964100477983e-05, "loss": 0.9873, "step": 5689 }, { "epoch": 1.1567391746289897, "grad_norm": 0.15614476799964905, "learning_rate": 8.438930133224856e-05, "loss": 1.1084, "step": 5690 }, { "epoch": 1.156942467981297, "grad_norm": 0.15673379600048065, "learning_rate": 8.436896165971729e-05, "loss": 1.1468, "step": 5691 }, { "epoch": 1.1571457613336045, "grad_norm": 0.16642117500305176, "learning_rate": 8.4348621987186e-05, "loss": 1.1427, "step": 5692 }, { "epoch": 1.1573490546859118, "grad_norm": 0.11349500715732574, "learning_rate": 8.432828231465474e-05, "loss": 0.7629, "step": 5693 }, { "epoch": 1.157552348038219, "grad_norm": 0.15668603777885437, "learning_rate": 8.430794264212346e-05, "loss": 1.1667, "step": 5694 }, { "epoch": 1.1577556413905266, "grad_norm": 0.13904212415218353, "learning_rate": 8.42876029695922e-05, "loss": 0.9752, "step": 5695 }, { "epoch": 1.1579589347428338, "grad_norm": 0.15101003646850586, "learning_rate": 8.426726329706091e-05, "loss": 0.9314, "step": 5696 }, { "epoch": 1.1581622280951414, "grad_norm": 0.1544020175933838, "learning_rate": 8.424692362452965e-05, "loss": 1.0759, "step": 5697 }, { "epoch": 1.1583655214474486, "grad_norm": 0.15711095929145813, "learning_rate": 8.422658395199837e-05, "loss": 1.0702, "step": 5698 }, { "epoch": 1.158568814799756, "grad_norm": 0.1422794610261917, "learning_rate": 8.420624427946711e-05, "loss": 0.9433, "step": 5699 }, { "epoch": 1.1587721081520634, "grad_norm": 0.15745525062084198, "learning_rate": 8.418590460693583e-05, "loss": 1.0318, "step": 5700 }, { "epoch": 1.1589754015043707, "grad_norm": 0.1667427271604538, "learning_rate": 8.416556493440456e-05, "loss": 1.2325, "step": 5701 }, { "epoch": 1.1591786948566782, "grad_norm": 0.16718730330467224, "learning_rate": 8.414522526187328e-05, "loss": 1.259, "step": 5702 }, { "epoch": 1.1593819882089855, "grad_norm": 0.15154454112052917, "learning_rate": 8.412488558934202e-05, "loss": 1.1235, "step": 5703 }, { "epoch": 1.159585281561293, "grad_norm": 0.13686643540859222, "learning_rate": 8.410454591681075e-05, "loss": 0.9764, "step": 5704 }, { "epoch": 1.1597885749136003, "grad_norm": 0.1651173233985901, "learning_rate": 8.408420624427947e-05, "loss": 1.2071, "step": 5705 }, { "epoch": 1.1599918682659076, "grad_norm": 0.15027135610580444, "learning_rate": 8.40638665717482e-05, "loss": 0.999, "step": 5706 }, { "epoch": 1.1601951616182151, "grad_norm": 0.153062641620636, "learning_rate": 8.404352689921693e-05, "loss": 1.0434, "step": 5707 }, { "epoch": 1.1603984549705224, "grad_norm": 0.1480168104171753, "learning_rate": 8.402318722668566e-05, "loss": 1.042, "step": 5708 }, { "epoch": 1.16060174832283, "grad_norm": 0.14577078819274902, "learning_rate": 8.400284755415438e-05, "loss": 0.9825, "step": 5709 }, { "epoch": 1.1608050416751372, "grad_norm": 0.1533634513616562, "learning_rate": 8.39825078816231e-05, "loss": 1.0282, "step": 5710 }, { "epoch": 1.1610083350274447, "grad_norm": 0.16153539717197418, "learning_rate": 8.396216820909184e-05, "loss": 1.0728, "step": 5711 }, { "epoch": 1.161211628379752, "grad_norm": 0.17193733155727386, "learning_rate": 8.394182853656057e-05, "loss": 1.0292, "step": 5712 }, { "epoch": 1.1614149217320593, "grad_norm": 0.142516627907753, "learning_rate": 8.39214888640293e-05, "loss": 0.892, "step": 5713 }, { "epoch": 1.1616182150843668, "grad_norm": 0.1397552192211151, "learning_rate": 8.390114919149802e-05, "loss": 0.9634, "step": 5714 }, { "epoch": 1.161821508436674, "grad_norm": 0.15920226275920868, "learning_rate": 8.388080951896676e-05, "loss": 1.124, "step": 5715 }, { "epoch": 1.1620248017889816, "grad_norm": 0.15637387335300446, "learning_rate": 8.386046984643548e-05, "loss": 1.0682, "step": 5716 }, { "epoch": 1.1622280951412889, "grad_norm": 0.136688694357872, "learning_rate": 8.38401301739042e-05, "loss": 0.902, "step": 5717 }, { "epoch": 1.1624313884935962, "grad_norm": 0.15799644589424133, "learning_rate": 8.381979050137293e-05, "loss": 1.1659, "step": 5718 }, { "epoch": 1.1626346818459037, "grad_norm": 0.12915347516536713, "learning_rate": 8.379945082884167e-05, "loss": 0.7916, "step": 5719 }, { "epoch": 1.162837975198211, "grad_norm": 0.1522664725780487, "learning_rate": 8.377911115631039e-05, "loss": 1.1851, "step": 5720 }, { "epoch": 1.1630412685505185, "grad_norm": 0.16952194273471832, "learning_rate": 8.375877148377912e-05, "loss": 1.1392, "step": 5721 }, { "epoch": 1.1632445619028258, "grad_norm": 0.14386901259422302, "learning_rate": 8.373843181124784e-05, "loss": 1.0165, "step": 5722 }, { "epoch": 1.163447855255133, "grad_norm": 0.15671277046203613, "learning_rate": 8.371809213871658e-05, "loss": 1.1116, "step": 5723 }, { "epoch": 1.1636511486074406, "grad_norm": 0.1588602215051651, "learning_rate": 8.36977524661853e-05, "loss": 1.1571, "step": 5724 }, { "epoch": 1.1638544419597479, "grad_norm": 0.14656004309654236, "learning_rate": 8.367741279365403e-05, "loss": 0.9233, "step": 5725 }, { "epoch": 1.1640577353120554, "grad_norm": 0.1547871232032776, "learning_rate": 8.365707312112275e-05, "loss": 1.0689, "step": 5726 }, { "epoch": 1.1642610286643627, "grad_norm": 0.15510883927345276, "learning_rate": 8.363673344859149e-05, "loss": 1.0671, "step": 5727 }, { "epoch": 1.16446432201667, "grad_norm": 0.17547018826007843, "learning_rate": 8.361639377606021e-05, "loss": 1.2643, "step": 5728 }, { "epoch": 1.1646676153689774, "grad_norm": 0.12577450275421143, "learning_rate": 8.359605410352894e-05, "loss": 0.7717, "step": 5729 }, { "epoch": 1.1648709087212847, "grad_norm": 0.15457884967327118, "learning_rate": 8.357571443099766e-05, "loss": 1.0971, "step": 5730 }, { "epoch": 1.1650742020735922, "grad_norm": 0.14000065624713898, "learning_rate": 8.35553747584664e-05, "loss": 1.0287, "step": 5731 }, { "epoch": 1.1652774954258995, "grad_norm": 0.14675964415073395, "learning_rate": 8.353503508593513e-05, "loss": 1.1408, "step": 5732 }, { "epoch": 1.165480788778207, "grad_norm": 0.15442916750907898, "learning_rate": 8.351469541340384e-05, "loss": 1.0237, "step": 5733 }, { "epoch": 1.1656840821305143, "grad_norm": 0.13658323884010315, "learning_rate": 8.349435574087257e-05, "loss": 0.8282, "step": 5734 }, { "epoch": 1.1658873754828216, "grad_norm": 0.15540479123592377, "learning_rate": 8.34740160683413e-05, "loss": 1.0655, "step": 5735 }, { "epoch": 1.1660906688351291, "grad_norm": 0.16539782285690308, "learning_rate": 8.345367639581004e-05, "loss": 1.0761, "step": 5736 }, { "epoch": 1.1662939621874364, "grad_norm": 0.1480647623538971, "learning_rate": 8.343333672327875e-05, "loss": 0.9653, "step": 5737 }, { "epoch": 1.166497255539744, "grad_norm": 0.15932734310626984, "learning_rate": 8.341299705074749e-05, "loss": 1.0883, "step": 5738 }, { "epoch": 1.1667005488920512, "grad_norm": 0.1572417914867401, "learning_rate": 8.339265737821621e-05, "loss": 1.1278, "step": 5739 }, { "epoch": 1.1669038422443587, "grad_norm": 0.14322857558727264, "learning_rate": 8.337231770568495e-05, "loss": 1.0414, "step": 5740 }, { "epoch": 1.167107135596666, "grad_norm": 0.13125763833522797, "learning_rate": 8.335197803315366e-05, "loss": 0.9526, "step": 5741 }, { "epoch": 1.1673104289489733, "grad_norm": 0.15495732426643372, "learning_rate": 8.33316383606224e-05, "loss": 1.1722, "step": 5742 }, { "epoch": 1.1675137223012808, "grad_norm": 0.13582062721252441, "learning_rate": 8.331129868809112e-05, "loss": 0.9187, "step": 5743 }, { "epoch": 1.167717015653588, "grad_norm": 0.1391330063343048, "learning_rate": 8.329095901555986e-05, "loss": 0.9192, "step": 5744 }, { "epoch": 1.1679203090058956, "grad_norm": 0.13935451209545135, "learning_rate": 8.327061934302857e-05, "loss": 0.9838, "step": 5745 }, { "epoch": 1.168123602358203, "grad_norm": 0.14375537633895874, "learning_rate": 8.325027967049731e-05, "loss": 1.0435, "step": 5746 }, { "epoch": 1.1683268957105102, "grad_norm": 0.15256935358047485, "learning_rate": 8.322993999796603e-05, "loss": 1.095, "step": 5747 }, { "epoch": 1.1685301890628177, "grad_norm": 0.15851211547851562, "learning_rate": 8.320960032543477e-05, "loss": 1.0523, "step": 5748 }, { "epoch": 1.168733482415125, "grad_norm": 0.14433102309703827, "learning_rate": 8.318926065290348e-05, "loss": 0.9839, "step": 5749 }, { "epoch": 1.1689367757674325, "grad_norm": 0.1549297720193863, "learning_rate": 8.316892098037222e-05, "loss": 1.0124, "step": 5750 }, { "epoch": 1.1691400691197398, "grad_norm": 0.15880125761032104, "learning_rate": 8.314858130784094e-05, "loss": 1.1537, "step": 5751 }, { "epoch": 1.169343362472047, "grad_norm": 0.15532851219177246, "learning_rate": 8.312824163530968e-05, "loss": 0.9927, "step": 5752 }, { "epoch": 1.1695466558243546, "grad_norm": 0.16933661699295044, "learning_rate": 8.31079019627784e-05, "loss": 1.1238, "step": 5753 }, { "epoch": 1.1697499491766619, "grad_norm": 0.1438983678817749, "learning_rate": 8.308756229024713e-05, "loss": 1.1148, "step": 5754 }, { "epoch": 1.1699532425289694, "grad_norm": 0.14921659231185913, "learning_rate": 8.306722261771586e-05, "loss": 0.9964, "step": 5755 }, { "epoch": 1.1701565358812767, "grad_norm": 0.14099189639091492, "learning_rate": 8.30468829451846e-05, "loss": 0.9867, "step": 5756 }, { "epoch": 1.170359829233584, "grad_norm": 0.14442093670368195, "learning_rate": 8.302654327265332e-05, "loss": 0.9026, "step": 5757 }, { "epoch": 1.1705631225858915, "grad_norm": 0.1563616544008255, "learning_rate": 8.300620360012204e-05, "loss": 1.0916, "step": 5758 }, { "epoch": 1.1707664159381987, "grad_norm": 0.16063082218170166, "learning_rate": 8.298586392759077e-05, "loss": 1.0803, "step": 5759 }, { "epoch": 1.1709697092905063, "grad_norm": 0.13853336870670319, "learning_rate": 8.29655242550595e-05, "loss": 0.8939, "step": 5760 }, { "epoch": 1.1711730026428135, "grad_norm": 0.14909055829048157, "learning_rate": 8.294518458252823e-05, "loss": 1.0755, "step": 5761 }, { "epoch": 1.171376295995121, "grad_norm": 0.1274290829896927, "learning_rate": 8.292484490999695e-05, "loss": 0.8844, "step": 5762 }, { "epoch": 1.1715795893474283, "grad_norm": 0.144126296043396, "learning_rate": 8.290450523746568e-05, "loss": 0.9573, "step": 5763 }, { "epoch": 1.1717828826997356, "grad_norm": 0.16232167184352875, "learning_rate": 8.288416556493442e-05, "loss": 1.1019, "step": 5764 }, { "epoch": 1.1719861760520431, "grad_norm": 0.14691004157066345, "learning_rate": 8.286382589240314e-05, "loss": 1.0463, "step": 5765 }, { "epoch": 1.1721894694043504, "grad_norm": 0.148517444729805, "learning_rate": 8.284348621987187e-05, "loss": 1.115, "step": 5766 }, { "epoch": 1.172392762756658, "grad_norm": 0.1558620035648346, "learning_rate": 8.282314654734059e-05, "loss": 1.1398, "step": 5767 }, { "epoch": 1.1725960561089652, "grad_norm": 0.1335366815328598, "learning_rate": 8.280280687480933e-05, "loss": 0.8479, "step": 5768 }, { "epoch": 1.1727993494612727, "grad_norm": 0.16315288841724396, "learning_rate": 8.278246720227805e-05, "loss": 1.0577, "step": 5769 }, { "epoch": 1.17300264281358, "grad_norm": 0.15037906169891357, "learning_rate": 8.276212752974678e-05, "loss": 1.0101, "step": 5770 }, { "epoch": 1.1732059361658873, "grad_norm": 0.14638791978359222, "learning_rate": 8.27417878572155e-05, "loss": 1.003, "step": 5771 }, { "epoch": 1.1734092295181948, "grad_norm": 0.1514301598072052, "learning_rate": 8.272144818468424e-05, "loss": 0.9576, "step": 5772 }, { "epoch": 1.173612522870502, "grad_norm": 0.13897733390331268, "learning_rate": 8.270110851215296e-05, "loss": 1.0559, "step": 5773 }, { "epoch": 1.1738158162228096, "grad_norm": 0.15974655747413635, "learning_rate": 8.268076883962168e-05, "loss": 1.0193, "step": 5774 }, { "epoch": 1.174019109575117, "grad_norm": 0.15432977676391602, "learning_rate": 8.266042916709041e-05, "loss": 1.0128, "step": 5775 }, { "epoch": 1.1742224029274242, "grad_norm": 0.15510208904743195, "learning_rate": 8.264008949455914e-05, "loss": 1.105, "step": 5776 }, { "epoch": 1.1744256962797317, "grad_norm": 0.14643555879592896, "learning_rate": 8.261974982202788e-05, "loss": 1.0362, "step": 5777 }, { "epoch": 1.174628989632039, "grad_norm": 0.1518491804599762, "learning_rate": 8.259941014949659e-05, "loss": 0.9863, "step": 5778 }, { "epoch": 1.1748322829843465, "grad_norm": 0.16607214510440826, "learning_rate": 8.257907047696532e-05, "loss": 1.1544, "step": 5779 }, { "epoch": 1.1750355763366538, "grad_norm": 0.14509828388690948, "learning_rate": 8.255873080443405e-05, "loss": 0.9395, "step": 5780 }, { "epoch": 1.175238869688961, "grad_norm": 0.1378895491361618, "learning_rate": 8.253839113190279e-05, "loss": 0.9349, "step": 5781 }, { "epoch": 1.1754421630412686, "grad_norm": 0.14478927850723267, "learning_rate": 8.25180514593715e-05, "loss": 1.0171, "step": 5782 }, { "epoch": 1.1756454563935759, "grad_norm": 0.1440618932247162, "learning_rate": 8.249771178684024e-05, "loss": 1.0032, "step": 5783 }, { "epoch": 1.1758487497458834, "grad_norm": 0.15416118502616882, "learning_rate": 8.247737211430896e-05, "loss": 1.0134, "step": 5784 }, { "epoch": 1.1760520430981907, "grad_norm": 0.1631413996219635, "learning_rate": 8.24570324417777e-05, "loss": 1.0369, "step": 5785 }, { "epoch": 1.176255336450498, "grad_norm": 0.1420244723558426, "learning_rate": 8.243669276924641e-05, "loss": 1.0298, "step": 5786 }, { "epoch": 1.1764586298028055, "grad_norm": 0.14719687402248383, "learning_rate": 8.241635309671515e-05, "loss": 1.0469, "step": 5787 }, { "epoch": 1.1766619231551128, "grad_norm": 0.1576554775238037, "learning_rate": 8.239601342418387e-05, "loss": 1.0751, "step": 5788 }, { "epoch": 1.1768652165074203, "grad_norm": 0.14992351830005646, "learning_rate": 8.237567375165261e-05, "loss": 1.0858, "step": 5789 }, { "epoch": 1.1770685098597276, "grad_norm": 0.1365818977355957, "learning_rate": 8.235533407912132e-05, "loss": 0.9918, "step": 5790 }, { "epoch": 1.177271803212035, "grad_norm": 0.1521523892879486, "learning_rate": 8.233499440659006e-05, "loss": 1.1572, "step": 5791 }, { "epoch": 1.1774750965643423, "grad_norm": 0.15335099399089813, "learning_rate": 8.231465473405878e-05, "loss": 1.1044, "step": 5792 }, { "epoch": 1.1776783899166496, "grad_norm": 0.15510833263397217, "learning_rate": 8.229431506152752e-05, "loss": 1.014, "step": 5793 }, { "epoch": 1.1778816832689571, "grad_norm": 0.15188954770565033, "learning_rate": 8.227397538899623e-05, "loss": 1.0646, "step": 5794 }, { "epoch": 1.1780849766212644, "grad_norm": 0.14498646557331085, "learning_rate": 8.225363571646497e-05, "loss": 0.8946, "step": 5795 }, { "epoch": 1.178288269973572, "grad_norm": 0.16170433163642883, "learning_rate": 8.22332960439337e-05, "loss": 1.1044, "step": 5796 }, { "epoch": 1.1784915633258792, "grad_norm": 0.14234782755374908, "learning_rate": 8.221295637140243e-05, "loss": 1.0112, "step": 5797 }, { "epoch": 1.1786948566781867, "grad_norm": 0.15361133217811584, "learning_rate": 8.219261669887114e-05, "loss": 1.163, "step": 5798 }, { "epoch": 1.178898150030494, "grad_norm": 0.1423853635787964, "learning_rate": 8.217227702633988e-05, "loss": 1.0082, "step": 5799 }, { "epoch": 1.1791014433828013, "grad_norm": 0.14732852578163147, "learning_rate": 8.21519373538086e-05, "loss": 1.0438, "step": 5800 }, { "epoch": 1.1793047367351088, "grad_norm": 0.15599720180034637, "learning_rate": 8.213159768127734e-05, "loss": 1.1768, "step": 5801 }, { "epoch": 1.1795080300874161, "grad_norm": 0.14181433618068695, "learning_rate": 8.211125800874606e-05, "loss": 1.0371, "step": 5802 }, { "epoch": 1.1797113234397236, "grad_norm": 0.14503152668476105, "learning_rate": 8.209091833621479e-05, "loss": 0.9382, "step": 5803 }, { "epoch": 1.179914616792031, "grad_norm": 0.1486697643995285, "learning_rate": 8.207057866368352e-05, "loss": 1.0448, "step": 5804 }, { "epoch": 1.1801179101443382, "grad_norm": 0.13623777031898499, "learning_rate": 8.205023899115226e-05, "loss": 0.8852, "step": 5805 }, { "epoch": 1.1803212034966457, "grad_norm": 0.16604407131671906, "learning_rate": 8.202989931862097e-05, "loss": 1.1318, "step": 5806 }, { "epoch": 1.180524496848953, "grad_norm": 0.14802932739257812, "learning_rate": 8.20095596460897e-05, "loss": 0.9212, "step": 5807 }, { "epoch": 1.1807277902012605, "grad_norm": 0.1431393027305603, "learning_rate": 8.198921997355843e-05, "loss": 0.994, "step": 5808 }, { "epoch": 1.1809310835535678, "grad_norm": 0.164002925157547, "learning_rate": 8.196888030102717e-05, "loss": 1.1154, "step": 5809 }, { "epoch": 1.181134376905875, "grad_norm": 0.15111534297466278, "learning_rate": 8.194854062849588e-05, "loss": 1.0558, "step": 5810 }, { "epoch": 1.1813376702581826, "grad_norm": 0.1695423573255539, "learning_rate": 8.192820095596462e-05, "loss": 1.1482, "step": 5811 }, { "epoch": 1.1815409636104899, "grad_norm": 0.13451817631721497, "learning_rate": 8.190786128343334e-05, "loss": 0.976, "step": 5812 }, { "epoch": 1.1817442569627974, "grad_norm": 0.14814557135105133, "learning_rate": 8.188752161090208e-05, "loss": 1.0552, "step": 5813 }, { "epoch": 1.1819475503151047, "grad_norm": 0.1545884907245636, "learning_rate": 8.18671819383708e-05, "loss": 1.0192, "step": 5814 }, { "epoch": 1.182150843667412, "grad_norm": 0.14275769889354706, "learning_rate": 8.184684226583951e-05, "loss": 0.9805, "step": 5815 }, { "epoch": 1.1823541370197195, "grad_norm": 0.1415022313594818, "learning_rate": 8.182650259330825e-05, "loss": 0.9754, "step": 5816 }, { "epoch": 1.1825574303720268, "grad_norm": 0.1614035665988922, "learning_rate": 8.180616292077698e-05, "loss": 1.1702, "step": 5817 }, { "epoch": 1.1827607237243343, "grad_norm": 0.17317655682563782, "learning_rate": 8.178582324824571e-05, "loss": 1.2551, "step": 5818 }, { "epoch": 1.1829640170766416, "grad_norm": 0.1425011307001114, "learning_rate": 8.176548357571443e-05, "loss": 0.9335, "step": 5819 }, { "epoch": 1.1831673104289488, "grad_norm": 0.14078962802886963, "learning_rate": 8.174514390318316e-05, "loss": 0.9349, "step": 5820 }, { "epoch": 1.1833706037812564, "grad_norm": 0.1340751200914383, "learning_rate": 8.172480423065189e-05, "loss": 0.8411, "step": 5821 }, { "epoch": 1.1835738971335636, "grad_norm": 0.151747927069664, "learning_rate": 8.170446455812063e-05, "loss": 0.9972, "step": 5822 }, { "epoch": 1.1837771904858712, "grad_norm": 0.1423603892326355, "learning_rate": 8.168412488558934e-05, "loss": 0.9361, "step": 5823 }, { "epoch": 1.1839804838381784, "grad_norm": 0.14710086584091187, "learning_rate": 8.166378521305807e-05, "loss": 0.9928, "step": 5824 }, { "epoch": 1.184183777190486, "grad_norm": 0.15306688845157623, "learning_rate": 8.16434455405268e-05, "loss": 1.0885, "step": 5825 }, { "epoch": 1.1843870705427932, "grad_norm": 0.13832566142082214, "learning_rate": 8.162310586799554e-05, "loss": 0.9198, "step": 5826 }, { "epoch": 1.1845903638951008, "grad_norm": 0.15482375025749207, "learning_rate": 8.160276619546425e-05, "loss": 0.9793, "step": 5827 }, { "epoch": 1.184793657247408, "grad_norm": 0.15048684179782867, "learning_rate": 8.158242652293299e-05, "loss": 0.9304, "step": 5828 }, { "epoch": 1.1849969505997153, "grad_norm": 0.13934911787509918, "learning_rate": 8.156208685040171e-05, "loss": 0.8829, "step": 5829 }, { "epoch": 1.1852002439520228, "grad_norm": 0.132803812623024, "learning_rate": 8.154174717787045e-05, "loss": 0.9299, "step": 5830 }, { "epoch": 1.1854035373043301, "grad_norm": 0.15293893218040466, "learning_rate": 8.152140750533916e-05, "loss": 0.9339, "step": 5831 }, { "epoch": 1.1856068306566376, "grad_norm": 0.1575455665588379, "learning_rate": 8.15010678328079e-05, "loss": 1.1541, "step": 5832 }, { "epoch": 1.185810124008945, "grad_norm": 0.15015073120594025, "learning_rate": 8.148072816027662e-05, "loss": 0.9863, "step": 5833 }, { "epoch": 1.1860134173612522, "grad_norm": 0.1547766476869583, "learning_rate": 8.146038848774536e-05, "loss": 1.0668, "step": 5834 }, { "epoch": 1.1862167107135597, "grad_norm": 0.1677473932504654, "learning_rate": 8.144004881521407e-05, "loss": 1.1909, "step": 5835 }, { "epoch": 1.186420004065867, "grad_norm": 0.15054230391979218, "learning_rate": 8.141970914268281e-05, "loss": 0.9628, "step": 5836 }, { "epoch": 1.1866232974181745, "grad_norm": 0.14739026129245758, "learning_rate": 8.139936947015153e-05, "loss": 1.0336, "step": 5837 }, { "epoch": 1.1868265907704818, "grad_norm": 0.14510677754878998, "learning_rate": 8.137902979762027e-05, "loss": 1.0433, "step": 5838 }, { "epoch": 1.187029884122789, "grad_norm": 0.15220728516578674, "learning_rate": 8.135869012508898e-05, "loss": 0.98, "step": 5839 }, { "epoch": 1.1872331774750966, "grad_norm": 0.1537727415561676, "learning_rate": 8.133835045255772e-05, "loss": 1.0428, "step": 5840 }, { "epoch": 1.187436470827404, "grad_norm": 0.14347486197948456, "learning_rate": 8.131801078002644e-05, "loss": 0.9355, "step": 5841 }, { "epoch": 1.1876397641797114, "grad_norm": 0.15678545832633972, "learning_rate": 8.129767110749518e-05, "loss": 1.1894, "step": 5842 }, { "epoch": 1.1878430575320187, "grad_norm": 0.14617744088172913, "learning_rate": 8.12773314349639e-05, "loss": 0.9334, "step": 5843 }, { "epoch": 1.188046350884326, "grad_norm": 0.13615253567695618, "learning_rate": 8.125699176243263e-05, "loss": 0.8091, "step": 5844 }, { "epoch": 1.1882496442366335, "grad_norm": 0.14368019998073578, "learning_rate": 8.123665208990136e-05, "loss": 0.8983, "step": 5845 }, { "epoch": 1.1884529375889408, "grad_norm": 0.15359970927238464, "learning_rate": 8.12163124173701e-05, "loss": 1.0781, "step": 5846 }, { "epoch": 1.1886562309412483, "grad_norm": 0.15584875643253326, "learning_rate": 8.11959727448388e-05, "loss": 1.1816, "step": 5847 }, { "epoch": 1.1888595242935556, "grad_norm": 0.14251692593097687, "learning_rate": 8.117563307230754e-05, "loss": 0.9314, "step": 5848 }, { "epoch": 1.1890628176458629, "grad_norm": 0.14834555983543396, "learning_rate": 8.115529339977627e-05, "loss": 1.004, "step": 5849 }, { "epoch": 1.1892661109981704, "grad_norm": 0.1568392813205719, "learning_rate": 8.1134953727245e-05, "loss": 1.1264, "step": 5850 }, { "epoch": 1.1894694043504777, "grad_norm": 0.16257062554359436, "learning_rate": 8.111461405471372e-05, "loss": 1.1795, "step": 5851 }, { "epoch": 1.1896726977027852, "grad_norm": 0.15817001461982727, "learning_rate": 8.109427438218245e-05, "loss": 1.1681, "step": 5852 }, { "epoch": 1.1898759910550925, "grad_norm": 0.14932581782341003, "learning_rate": 8.107393470965118e-05, "loss": 1.1543, "step": 5853 }, { "epoch": 1.1900792844074, "grad_norm": 0.15669305622577667, "learning_rate": 8.105359503711992e-05, "loss": 1.1768, "step": 5854 }, { "epoch": 1.1902825777597072, "grad_norm": 0.14542458951473236, "learning_rate": 8.103325536458863e-05, "loss": 0.9986, "step": 5855 }, { "epoch": 1.1904858711120148, "grad_norm": 0.17203249037265778, "learning_rate": 8.101291569205737e-05, "loss": 1.1656, "step": 5856 }, { "epoch": 1.190689164464322, "grad_norm": 0.14532865583896637, "learning_rate": 8.099257601952609e-05, "loss": 1.0212, "step": 5857 }, { "epoch": 1.1908924578166293, "grad_norm": 0.1520928293466568, "learning_rate": 8.097223634699481e-05, "loss": 1.1934, "step": 5858 }, { "epoch": 1.1910957511689368, "grad_norm": 0.14724013209342957, "learning_rate": 8.095189667446354e-05, "loss": 0.9037, "step": 5859 }, { "epoch": 1.1912990445212441, "grad_norm": 0.13861322402954102, "learning_rate": 8.093155700193226e-05, "loss": 0.9378, "step": 5860 }, { "epoch": 1.1915023378735516, "grad_norm": 0.15509451925754547, "learning_rate": 8.0911217329401e-05, "loss": 1.1247, "step": 5861 }, { "epoch": 1.191705631225859, "grad_norm": 0.1269523799419403, "learning_rate": 8.089087765686973e-05, "loss": 0.829, "step": 5862 }, { "epoch": 1.1919089245781662, "grad_norm": 0.15172962844371796, "learning_rate": 8.087053798433845e-05, "loss": 0.9655, "step": 5863 }, { "epoch": 1.1921122179304737, "grad_norm": 0.14990444481372833, "learning_rate": 8.085019831180717e-05, "loss": 1.0249, "step": 5864 }, { "epoch": 1.192315511282781, "grad_norm": 0.14400093257427216, "learning_rate": 8.082985863927591e-05, "loss": 0.9218, "step": 5865 }, { "epoch": 1.1925188046350885, "grad_norm": 0.12698093056678772, "learning_rate": 8.080951896674464e-05, "loss": 0.8124, "step": 5866 }, { "epoch": 1.1927220979873958, "grad_norm": 0.13535602390766144, "learning_rate": 8.078917929421336e-05, "loss": 0.9892, "step": 5867 }, { "epoch": 1.192925391339703, "grad_norm": 0.1540592759847641, "learning_rate": 8.076883962168209e-05, "loss": 1.0578, "step": 5868 }, { "epoch": 1.1931286846920106, "grad_norm": 0.1440640687942505, "learning_rate": 8.074849994915082e-05, "loss": 1.0616, "step": 5869 }, { "epoch": 1.193331978044318, "grad_norm": 0.16932588815689087, "learning_rate": 8.072816027661955e-05, "loss": 1.1621, "step": 5870 }, { "epoch": 1.1935352713966254, "grad_norm": 0.1639503538608551, "learning_rate": 8.070782060408827e-05, "loss": 1.1424, "step": 5871 }, { "epoch": 1.1937385647489327, "grad_norm": 0.15571148693561554, "learning_rate": 8.0687480931557e-05, "loss": 1.1034, "step": 5872 }, { "epoch": 1.19394185810124, "grad_norm": 0.1500549167394638, "learning_rate": 8.066714125902574e-05, "loss": 0.921, "step": 5873 }, { "epoch": 1.1941451514535475, "grad_norm": 0.13628308475017548, "learning_rate": 8.064680158649446e-05, "loss": 0.8746, "step": 5874 }, { "epoch": 1.1943484448058548, "grad_norm": 0.14379121363162994, "learning_rate": 8.06264619139632e-05, "loss": 1.0226, "step": 5875 }, { "epoch": 1.1945517381581623, "grad_norm": 0.1455121785402298, "learning_rate": 8.060612224143191e-05, "loss": 1.0618, "step": 5876 }, { "epoch": 1.1947550315104696, "grad_norm": 0.14893971383571625, "learning_rate": 8.058578256890065e-05, "loss": 1.0479, "step": 5877 }, { "epoch": 1.1949583248627769, "grad_norm": 0.17038215696811676, "learning_rate": 8.056544289636937e-05, "loss": 1.1333, "step": 5878 }, { "epoch": 1.1951616182150844, "grad_norm": 0.16060739755630493, "learning_rate": 8.054510322383811e-05, "loss": 1.0709, "step": 5879 }, { "epoch": 1.1953649115673917, "grad_norm": 0.14657901227474213, "learning_rate": 8.052476355130682e-05, "loss": 0.9916, "step": 5880 }, { "epoch": 1.1955682049196992, "grad_norm": 0.1535673588514328, "learning_rate": 8.050442387877556e-05, "loss": 1.0064, "step": 5881 }, { "epoch": 1.1957714982720065, "grad_norm": 0.14547181129455566, "learning_rate": 8.048408420624428e-05, "loss": 1.0492, "step": 5882 }, { "epoch": 1.195974791624314, "grad_norm": 0.14238569140434265, "learning_rate": 8.046374453371302e-05, "loss": 0.9422, "step": 5883 }, { "epoch": 1.1961780849766213, "grad_norm": 0.1523994505405426, "learning_rate": 8.044340486118173e-05, "loss": 1.0175, "step": 5884 }, { "epoch": 1.1963813783289288, "grad_norm": 0.15338194370269775, "learning_rate": 8.042306518865047e-05, "loss": 1.0815, "step": 5885 }, { "epoch": 1.196584671681236, "grad_norm": 0.15962374210357666, "learning_rate": 8.04027255161192e-05, "loss": 1.0765, "step": 5886 }, { "epoch": 1.1967879650335433, "grad_norm": 0.13959115743637085, "learning_rate": 8.038238584358793e-05, "loss": 0.9235, "step": 5887 }, { "epoch": 1.1969912583858509, "grad_norm": 0.15868176519870758, "learning_rate": 8.036204617105664e-05, "loss": 1.0158, "step": 5888 }, { "epoch": 1.1971945517381581, "grad_norm": 0.1576426476240158, "learning_rate": 8.034170649852538e-05, "loss": 1.0851, "step": 5889 }, { "epoch": 1.1973978450904657, "grad_norm": 0.1631205826997757, "learning_rate": 8.03213668259941e-05, "loss": 1.1783, "step": 5890 }, { "epoch": 1.197601138442773, "grad_norm": 0.16545897722244263, "learning_rate": 8.030102715346284e-05, "loss": 1.1077, "step": 5891 }, { "epoch": 1.1978044317950802, "grad_norm": 0.1491439938545227, "learning_rate": 8.028068748093155e-05, "loss": 1.0368, "step": 5892 }, { "epoch": 1.1980077251473877, "grad_norm": 0.15724484622478485, "learning_rate": 8.026034780840029e-05, "loss": 1.0844, "step": 5893 }, { "epoch": 1.198211018499695, "grad_norm": 0.15338502824306488, "learning_rate": 8.024000813586902e-05, "loss": 1.0589, "step": 5894 }, { "epoch": 1.1984143118520025, "grad_norm": 0.1540132313966751, "learning_rate": 8.021966846333776e-05, "loss": 1.0517, "step": 5895 }, { "epoch": 1.1986176052043098, "grad_norm": 0.13505342602729797, "learning_rate": 8.019932879080647e-05, "loss": 0.9513, "step": 5896 }, { "epoch": 1.198820898556617, "grad_norm": 0.1482662409543991, "learning_rate": 8.01789891182752e-05, "loss": 1.0653, "step": 5897 }, { "epoch": 1.1990241919089246, "grad_norm": 0.15476658940315247, "learning_rate": 8.015864944574393e-05, "loss": 1.0626, "step": 5898 }, { "epoch": 1.199227485261232, "grad_norm": 0.14526322484016418, "learning_rate": 8.013830977321265e-05, "loss": 0.9032, "step": 5899 }, { "epoch": 1.1994307786135394, "grad_norm": 0.15483404695987701, "learning_rate": 8.011797010068138e-05, "loss": 1.091, "step": 5900 }, { "epoch": 1.1996340719658467, "grad_norm": 0.15213781595230103, "learning_rate": 8.00976304281501e-05, "loss": 0.9738, "step": 5901 }, { "epoch": 1.199837365318154, "grad_norm": 0.15703366696834564, "learning_rate": 8.007729075561884e-05, "loss": 1.1734, "step": 5902 }, { "epoch": 1.2000406586704615, "grad_norm": 0.14850756525993347, "learning_rate": 8.005695108308756e-05, "loss": 1.0282, "step": 5903 }, { "epoch": 1.2002439520227688, "grad_norm": 0.134627103805542, "learning_rate": 8.003661141055629e-05, "loss": 0.8677, "step": 5904 }, { "epoch": 1.2004472453750763, "grad_norm": 0.16509543359279633, "learning_rate": 8.001627173802501e-05, "loss": 1.1819, "step": 5905 }, { "epoch": 1.2006505387273836, "grad_norm": 0.15316465497016907, "learning_rate": 7.999593206549375e-05, "loss": 1.2094, "step": 5906 }, { "epoch": 1.2008538320796909, "grad_norm": 0.1590406894683838, "learning_rate": 7.997559239296248e-05, "loss": 1.0965, "step": 5907 }, { "epoch": 1.2010571254319984, "grad_norm": 0.13455016911029816, "learning_rate": 7.99552527204312e-05, "loss": 0.9488, "step": 5908 }, { "epoch": 1.2012604187843057, "grad_norm": 0.13477842509746552, "learning_rate": 7.993491304789992e-05, "loss": 0.9218, "step": 5909 }, { "epoch": 1.2014637121366132, "grad_norm": 0.14440806210041046, "learning_rate": 7.991457337536866e-05, "loss": 0.9193, "step": 5910 }, { "epoch": 1.2016670054889205, "grad_norm": 0.15229295194149017, "learning_rate": 7.989423370283739e-05, "loss": 1.0363, "step": 5911 }, { "epoch": 1.201870298841228, "grad_norm": 0.15386514365673065, "learning_rate": 7.987389403030611e-05, "loss": 1.0401, "step": 5912 }, { "epoch": 1.2020735921935353, "grad_norm": 0.14064916968345642, "learning_rate": 7.985355435777484e-05, "loss": 0.944, "step": 5913 }, { "epoch": 1.2022768855458426, "grad_norm": 0.14266358315944672, "learning_rate": 7.983321468524357e-05, "loss": 0.9544, "step": 5914 }, { "epoch": 1.20248017889815, "grad_norm": 0.13964015245437622, "learning_rate": 7.98128750127123e-05, "loss": 0.9036, "step": 5915 }, { "epoch": 1.2026834722504574, "grad_norm": 0.15103323757648468, "learning_rate": 7.979253534018102e-05, "loss": 1.051, "step": 5916 }, { "epoch": 1.2028867656027649, "grad_norm": 0.14747385680675507, "learning_rate": 7.977219566764975e-05, "loss": 0.8971, "step": 5917 }, { "epoch": 1.2030900589550722, "grad_norm": 0.14294147491455078, "learning_rate": 7.975185599511849e-05, "loss": 0.9279, "step": 5918 }, { "epoch": 1.2032933523073797, "grad_norm": 0.13962894678115845, "learning_rate": 7.973151632258721e-05, "loss": 0.9662, "step": 5919 }, { "epoch": 1.203496645659687, "grad_norm": 0.16897960007190704, "learning_rate": 7.971117665005593e-05, "loss": 1.2553, "step": 5920 }, { "epoch": 1.2036999390119942, "grad_norm": 0.1562163382768631, "learning_rate": 7.969083697752466e-05, "loss": 1.1168, "step": 5921 }, { "epoch": 1.2039032323643017, "grad_norm": 0.13830183446407318, "learning_rate": 7.96704973049934e-05, "loss": 0.8403, "step": 5922 }, { "epoch": 1.204106525716609, "grad_norm": 0.13156530261039734, "learning_rate": 7.965015763246212e-05, "loss": 0.952, "step": 5923 }, { "epoch": 1.2043098190689165, "grad_norm": 0.1500665247440338, "learning_rate": 7.962981795993085e-05, "loss": 1.0058, "step": 5924 }, { "epoch": 1.2045131124212238, "grad_norm": 0.14364342391490936, "learning_rate": 7.960947828739957e-05, "loss": 0.9573, "step": 5925 }, { "epoch": 1.2047164057735311, "grad_norm": 0.1456497609615326, "learning_rate": 7.958913861486831e-05, "loss": 0.9619, "step": 5926 }, { "epoch": 1.2049196991258386, "grad_norm": 0.13306300342082977, "learning_rate": 7.956879894233703e-05, "loss": 1.0108, "step": 5927 }, { "epoch": 1.205122992478146, "grad_norm": 0.16582590341567993, "learning_rate": 7.954845926980576e-05, "loss": 1.1483, "step": 5928 }, { "epoch": 1.2053262858304534, "grad_norm": 0.14681321382522583, "learning_rate": 7.952811959727448e-05, "loss": 0.9306, "step": 5929 }, { "epoch": 1.2055295791827607, "grad_norm": 0.1417584866285324, "learning_rate": 7.950777992474322e-05, "loss": 0.872, "step": 5930 }, { "epoch": 1.205732872535068, "grad_norm": 0.14118684828281403, "learning_rate": 7.948744025221194e-05, "loss": 0.9267, "step": 5931 }, { "epoch": 1.2059361658873755, "grad_norm": 0.13047218322753906, "learning_rate": 7.946710057968068e-05, "loss": 0.8934, "step": 5932 }, { "epoch": 1.2061394592396828, "grad_norm": 0.15511366724967957, "learning_rate": 7.944676090714939e-05, "loss": 1.1041, "step": 5933 }, { "epoch": 1.2063427525919903, "grad_norm": 0.14702750742435455, "learning_rate": 7.942642123461813e-05, "loss": 0.9171, "step": 5934 }, { "epoch": 1.2065460459442976, "grad_norm": 0.16108551621437073, "learning_rate": 7.940608156208686e-05, "loss": 0.9684, "step": 5935 }, { "epoch": 1.2067493392966049, "grad_norm": 0.15319964289665222, "learning_rate": 7.93857418895556e-05, "loss": 1.0696, "step": 5936 }, { "epoch": 1.2069526326489124, "grad_norm": 0.15028350055217743, "learning_rate": 7.93654022170243e-05, "loss": 0.9735, "step": 5937 }, { "epoch": 1.2071559260012197, "grad_norm": 0.15127158164978027, "learning_rate": 7.934506254449304e-05, "loss": 1.0975, "step": 5938 }, { "epoch": 1.2073592193535272, "grad_norm": 0.16552859544754028, "learning_rate": 7.932472287196177e-05, "loss": 1.0935, "step": 5939 }, { "epoch": 1.2075625127058345, "grad_norm": 0.14383484423160553, "learning_rate": 7.930438319943049e-05, "loss": 1.0105, "step": 5940 }, { "epoch": 1.207765806058142, "grad_norm": 0.15808679163455963, "learning_rate": 7.928404352689922e-05, "loss": 1.124, "step": 5941 }, { "epoch": 1.2079690994104493, "grad_norm": 0.1643362194299698, "learning_rate": 7.926370385436794e-05, "loss": 1.2667, "step": 5942 }, { "epoch": 1.2081723927627566, "grad_norm": 0.1681181788444519, "learning_rate": 7.924336418183668e-05, "loss": 1.132, "step": 5943 }, { "epoch": 1.208375686115064, "grad_norm": 0.17151467502117157, "learning_rate": 7.92230245093054e-05, "loss": 1.0677, "step": 5944 }, { "epoch": 1.2085789794673714, "grad_norm": 0.13617511093616486, "learning_rate": 7.920268483677413e-05, "loss": 0.8566, "step": 5945 }, { "epoch": 1.2087822728196789, "grad_norm": 0.1504543572664261, "learning_rate": 7.918234516424285e-05, "loss": 0.9746, "step": 5946 }, { "epoch": 1.2089855661719862, "grad_norm": 0.1469312161207199, "learning_rate": 7.916200549171159e-05, "loss": 0.9503, "step": 5947 }, { "epoch": 1.2091888595242937, "grad_norm": 0.1474432349205017, "learning_rate": 7.914166581918031e-05, "loss": 0.9862, "step": 5948 }, { "epoch": 1.209392152876601, "grad_norm": 0.1602153331041336, "learning_rate": 7.912132614664904e-05, "loss": 1.0523, "step": 5949 }, { "epoch": 1.2095954462289082, "grad_norm": 0.13873906433582306, "learning_rate": 7.910098647411776e-05, "loss": 0.8833, "step": 5950 }, { "epoch": 1.2097987395812158, "grad_norm": 0.18601791560649872, "learning_rate": 7.90806468015865e-05, "loss": 1.258, "step": 5951 }, { "epoch": 1.210002032933523, "grad_norm": 0.15273120999336243, "learning_rate": 7.906030712905523e-05, "loss": 1.0081, "step": 5952 }, { "epoch": 1.2102053262858306, "grad_norm": 0.16047626733779907, "learning_rate": 7.903996745652395e-05, "loss": 1.1502, "step": 5953 }, { "epoch": 1.2104086196381378, "grad_norm": 0.1480061113834381, "learning_rate": 7.901962778399267e-05, "loss": 1.0017, "step": 5954 }, { "epoch": 1.2106119129904451, "grad_norm": 0.16537447273731232, "learning_rate": 7.899928811146141e-05, "loss": 1.0887, "step": 5955 }, { "epoch": 1.2108152063427526, "grad_norm": 0.16818997263908386, "learning_rate": 7.897894843893014e-05, "loss": 1.0813, "step": 5956 }, { "epoch": 1.21101849969506, "grad_norm": 0.14905446767807007, "learning_rate": 7.895860876639886e-05, "loss": 1.1452, "step": 5957 }, { "epoch": 1.2112217930473674, "grad_norm": 0.16087594628334045, "learning_rate": 7.893826909386759e-05, "loss": 1.0707, "step": 5958 }, { "epoch": 1.2114250863996747, "grad_norm": 0.145945206284523, "learning_rate": 7.891792942133632e-05, "loss": 1.0248, "step": 5959 }, { "epoch": 1.211628379751982, "grad_norm": 0.151754230260849, "learning_rate": 7.889758974880505e-05, "loss": 1.0147, "step": 5960 }, { "epoch": 1.2118316731042895, "grad_norm": 0.139401376247406, "learning_rate": 7.887725007627377e-05, "loss": 0.9278, "step": 5961 }, { "epoch": 1.2120349664565968, "grad_norm": 0.1569097489118576, "learning_rate": 7.88569104037425e-05, "loss": 1.0747, "step": 5962 }, { "epoch": 1.2122382598089043, "grad_norm": 0.14463907480239868, "learning_rate": 7.883657073121124e-05, "loss": 0.9767, "step": 5963 }, { "epoch": 1.2124415531612116, "grad_norm": 0.1596505045890808, "learning_rate": 7.881623105867996e-05, "loss": 1.1514, "step": 5964 }, { "epoch": 1.212644846513519, "grad_norm": 0.16433337330818176, "learning_rate": 7.879589138614868e-05, "loss": 1.1582, "step": 5965 }, { "epoch": 1.2128481398658264, "grad_norm": 0.14704091846942902, "learning_rate": 7.877555171361741e-05, "loss": 0.9008, "step": 5966 }, { "epoch": 1.2130514332181337, "grad_norm": 0.16440390050411224, "learning_rate": 7.875521204108615e-05, "loss": 1.0331, "step": 5967 }, { "epoch": 1.2132547265704412, "grad_norm": 0.14984196424484253, "learning_rate": 7.873487236855487e-05, "loss": 1.0201, "step": 5968 }, { "epoch": 1.2134580199227485, "grad_norm": 0.13804790377616882, "learning_rate": 7.87145326960236e-05, "loss": 0.8756, "step": 5969 }, { "epoch": 1.213661313275056, "grad_norm": 0.15829437971115112, "learning_rate": 7.869419302349232e-05, "loss": 1.2565, "step": 5970 }, { "epoch": 1.2138646066273633, "grad_norm": 0.16005225479602814, "learning_rate": 7.867385335096106e-05, "loss": 1.0816, "step": 5971 }, { "epoch": 1.2140678999796706, "grad_norm": 0.14023050665855408, "learning_rate": 7.865351367842978e-05, "loss": 1.0056, "step": 5972 }, { "epoch": 1.214271193331978, "grad_norm": 0.1558290719985962, "learning_rate": 7.863317400589851e-05, "loss": 0.9293, "step": 5973 }, { "epoch": 1.2144744866842854, "grad_norm": 0.16956967115402222, "learning_rate": 7.861283433336723e-05, "loss": 1.0609, "step": 5974 }, { "epoch": 1.2146777800365929, "grad_norm": 0.13711321353912354, "learning_rate": 7.859249466083597e-05, "loss": 1.0161, "step": 5975 }, { "epoch": 1.2148810733889002, "grad_norm": 0.15096637606620789, "learning_rate": 7.85721549883047e-05, "loss": 0.9326, "step": 5976 }, { "epoch": 1.2150843667412077, "grad_norm": 0.15365126729011536, "learning_rate": 7.855181531577342e-05, "loss": 0.9785, "step": 5977 }, { "epoch": 1.215287660093515, "grad_norm": 0.1664636880159378, "learning_rate": 7.853147564324214e-05, "loss": 1.1646, "step": 5978 }, { "epoch": 1.2154909534458223, "grad_norm": 0.14389696717262268, "learning_rate": 7.851113597071088e-05, "loss": 1.0031, "step": 5979 }, { "epoch": 1.2156942467981298, "grad_norm": 0.1665438562631607, "learning_rate": 7.84907962981796e-05, "loss": 1.1069, "step": 5980 }, { "epoch": 1.215897540150437, "grad_norm": 0.15469186007976532, "learning_rate": 7.847045662564833e-05, "loss": 1.0227, "step": 5981 }, { "epoch": 1.2161008335027446, "grad_norm": 0.16661310195922852, "learning_rate": 7.845011695311705e-05, "loss": 1.1753, "step": 5982 }, { "epoch": 1.2163041268550518, "grad_norm": 0.13519755005836487, "learning_rate": 7.842977728058578e-05, "loss": 0.9268, "step": 5983 }, { "epoch": 1.2165074202073591, "grad_norm": 0.15839457511901855, "learning_rate": 7.840943760805452e-05, "loss": 1.1744, "step": 5984 }, { "epoch": 1.2167107135596666, "grad_norm": 0.15252776443958282, "learning_rate": 7.838909793552324e-05, "loss": 1.0487, "step": 5985 }, { "epoch": 1.216914006911974, "grad_norm": 0.15469424426555634, "learning_rate": 7.836875826299197e-05, "loss": 1.0426, "step": 5986 }, { "epoch": 1.2171173002642814, "grad_norm": 0.13677896559238434, "learning_rate": 7.834841859046069e-05, "loss": 0.8818, "step": 5987 }, { "epoch": 1.2173205936165887, "grad_norm": 0.13671749830245972, "learning_rate": 7.832807891792943e-05, "loss": 0.867, "step": 5988 }, { "epoch": 1.217523886968896, "grad_norm": 0.14619436860084534, "learning_rate": 7.830773924539815e-05, "loss": 0.9299, "step": 5989 }, { "epoch": 1.2177271803212035, "grad_norm": 0.15766948461532593, "learning_rate": 7.828739957286688e-05, "loss": 0.9758, "step": 5990 }, { "epoch": 1.2179304736735108, "grad_norm": 0.14414988458156586, "learning_rate": 7.82670599003356e-05, "loss": 1.0246, "step": 5991 }, { "epoch": 1.2181337670258183, "grad_norm": 0.1571853905916214, "learning_rate": 7.824672022780434e-05, "loss": 1.0623, "step": 5992 }, { "epoch": 1.2183370603781256, "grad_norm": 0.158509761095047, "learning_rate": 7.822638055527306e-05, "loss": 1.0383, "step": 5993 }, { "epoch": 1.218540353730433, "grad_norm": 0.16249847412109375, "learning_rate": 7.820604088274179e-05, "loss": 1.1868, "step": 5994 }, { "epoch": 1.2187436470827404, "grad_norm": 0.15063871443271637, "learning_rate": 7.818570121021051e-05, "loss": 1.0497, "step": 5995 }, { "epoch": 1.2189469404350477, "grad_norm": 0.15637479722499847, "learning_rate": 7.816536153767925e-05, "loss": 1.11, "step": 5996 }, { "epoch": 1.2191502337873552, "grad_norm": 0.12896780669689178, "learning_rate": 7.814502186514798e-05, "loss": 0.8355, "step": 5997 }, { "epoch": 1.2193535271396625, "grad_norm": 0.1477995067834854, "learning_rate": 7.81246821926167e-05, "loss": 1.0115, "step": 5998 }, { "epoch": 1.21955682049197, "grad_norm": 0.1346842646598816, "learning_rate": 7.810434252008542e-05, "loss": 0.8145, "step": 5999 }, { "epoch": 1.2197601138442773, "grad_norm": 0.16976101696491241, "learning_rate": 7.808400284755416e-05, "loss": 1.2802, "step": 6000 }, { "epoch": 1.2199634071965846, "grad_norm": 0.1319088190793991, "learning_rate": 7.806366317502289e-05, "loss": 0.8274, "step": 6001 }, { "epoch": 1.220166700548892, "grad_norm": 0.13133682310581207, "learning_rate": 7.804332350249161e-05, "loss": 0.8474, "step": 6002 }, { "epoch": 1.2203699939011994, "grad_norm": 0.1556311994791031, "learning_rate": 7.802298382996034e-05, "loss": 1.0508, "step": 6003 }, { "epoch": 1.2205732872535069, "grad_norm": 0.15803690254688263, "learning_rate": 7.800264415742907e-05, "loss": 1.1573, "step": 6004 }, { "epoch": 1.2207765806058142, "grad_norm": 0.1543922871351242, "learning_rate": 7.79823044848978e-05, "loss": 1.1319, "step": 6005 }, { "epoch": 1.2209798739581217, "grad_norm": 0.14866527915000916, "learning_rate": 7.796196481236652e-05, "loss": 0.949, "step": 6006 }, { "epoch": 1.221183167310429, "grad_norm": 0.14351116120815277, "learning_rate": 7.794162513983525e-05, "loss": 0.9472, "step": 6007 }, { "epoch": 1.2213864606627363, "grad_norm": 0.14699599146842957, "learning_rate": 7.792128546730399e-05, "loss": 0.9011, "step": 6008 }, { "epoch": 1.2215897540150438, "grad_norm": 0.17406338453292847, "learning_rate": 7.790094579477271e-05, "loss": 1.2005, "step": 6009 }, { "epoch": 1.221793047367351, "grad_norm": 0.15509524941444397, "learning_rate": 7.788060612224143e-05, "loss": 0.9375, "step": 6010 }, { "epoch": 1.2219963407196586, "grad_norm": 0.13967086374759674, "learning_rate": 7.786026644971016e-05, "loss": 0.9858, "step": 6011 }, { "epoch": 1.2221996340719659, "grad_norm": 0.1699836403131485, "learning_rate": 7.78399267771789e-05, "loss": 1.0934, "step": 6012 }, { "epoch": 1.2224029274242731, "grad_norm": 0.14084777235984802, "learning_rate": 7.781958710464762e-05, "loss": 0.8928, "step": 6013 }, { "epoch": 1.2226062207765807, "grad_norm": 0.1351868063211441, "learning_rate": 7.779924743211635e-05, "loss": 0.9211, "step": 6014 }, { "epoch": 1.222809514128888, "grad_norm": 0.1394532471895218, "learning_rate": 7.777890775958507e-05, "loss": 0.9834, "step": 6015 }, { "epoch": 1.2230128074811955, "grad_norm": 0.1532890498638153, "learning_rate": 7.775856808705381e-05, "loss": 1.07, "step": 6016 }, { "epoch": 1.2232161008335027, "grad_norm": 0.15940631926059723, "learning_rate": 7.773822841452253e-05, "loss": 0.9896, "step": 6017 }, { "epoch": 1.22341939418581, "grad_norm": 0.141846165060997, "learning_rate": 7.771788874199126e-05, "loss": 0.8771, "step": 6018 }, { "epoch": 1.2236226875381175, "grad_norm": 0.16429439187049866, "learning_rate": 7.769754906945998e-05, "loss": 1.1372, "step": 6019 }, { "epoch": 1.2238259808904248, "grad_norm": 0.1476045548915863, "learning_rate": 7.767720939692872e-05, "loss": 0.9792, "step": 6020 }, { "epoch": 1.2240292742427323, "grad_norm": 0.13393555581569672, "learning_rate": 7.765686972439744e-05, "loss": 0.9564, "step": 6021 }, { "epoch": 1.2242325675950396, "grad_norm": 0.15074948966503143, "learning_rate": 7.763653005186617e-05, "loss": 0.9082, "step": 6022 }, { "epoch": 1.224435860947347, "grad_norm": 0.14458337426185608, "learning_rate": 7.761619037933489e-05, "loss": 0.9419, "step": 6023 }, { "epoch": 1.2246391542996544, "grad_norm": 0.12875951826572418, "learning_rate": 7.759585070680362e-05, "loss": 0.8965, "step": 6024 }, { "epoch": 1.2248424476519617, "grad_norm": 0.16608628630638123, "learning_rate": 7.757551103427236e-05, "loss": 1.0796, "step": 6025 }, { "epoch": 1.2250457410042692, "grad_norm": 0.16221550107002258, "learning_rate": 7.755517136174108e-05, "loss": 1.0232, "step": 6026 }, { "epoch": 1.2252490343565765, "grad_norm": 0.1537492722272873, "learning_rate": 7.75348316892098e-05, "loss": 0.9851, "step": 6027 }, { "epoch": 1.225452327708884, "grad_norm": 0.15429674088954926, "learning_rate": 7.751449201667853e-05, "loss": 1.035, "step": 6028 }, { "epoch": 1.2256556210611913, "grad_norm": 0.15351472795009613, "learning_rate": 7.749415234414727e-05, "loss": 1.0984, "step": 6029 }, { "epoch": 1.2258589144134986, "grad_norm": 0.16499385237693787, "learning_rate": 7.747381267161599e-05, "loss": 1.1478, "step": 6030 }, { "epoch": 1.226062207765806, "grad_norm": 0.16109612584114075, "learning_rate": 7.745347299908472e-05, "loss": 1.1764, "step": 6031 }, { "epoch": 1.2262655011181134, "grad_norm": 0.14844362437725067, "learning_rate": 7.743313332655344e-05, "loss": 0.9723, "step": 6032 }, { "epoch": 1.226468794470421, "grad_norm": 0.142217755317688, "learning_rate": 7.741279365402218e-05, "loss": 0.8857, "step": 6033 }, { "epoch": 1.2266720878227282, "grad_norm": 0.14022211730480194, "learning_rate": 7.73924539814909e-05, "loss": 0.9567, "step": 6034 }, { "epoch": 1.2268753811750357, "grad_norm": 0.15379805862903595, "learning_rate": 7.737211430895963e-05, "loss": 1.0376, "step": 6035 }, { "epoch": 1.227078674527343, "grad_norm": 0.1771107167005539, "learning_rate": 7.735177463642835e-05, "loss": 1.1723, "step": 6036 }, { "epoch": 1.2272819678796503, "grad_norm": 0.14327488839626312, "learning_rate": 7.733143496389709e-05, "loss": 0.9283, "step": 6037 }, { "epoch": 1.2274852612319578, "grad_norm": 0.14578698575496674, "learning_rate": 7.731109529136581e-05, "loss": 0.9931, "step": 6038 }, { "epoch": 1.227688554584265, "grad_norm": 0.15075939893722534, "learning_rate": 7.729075561883454e-05, "loss": 1.0464, "step": 6039 }, { "epoch": 1.2278918479365726, "grad_norm": 0.16097469627857208, "learning_rate": 7.727041594630326e-05, "loss": 1.0914, "step": 6040 }, { "epoch": 1.2280951412888799, "grad_norm": 0.15317900478839874, "learning_rate": 7.7250076273772e-05, "loss": 0.9143, "step": 6041 }, { "epoch": 1.2282984346411872, "grad_norm": 0.17345957458019257, "learning_rate": 7.722973660124073e-05, "loss": 1.2072, "step": 6042 }, { "epoch": 1.2285017279934947, "grad_norm": 0.15269114077091217, "learning_rate": 7.720939692870945e-05, "loss": 1.1181, "step": 6043 }, { "epoch": 1.228705021345802, "grad_norm": 0.15105299651622772, "learning_rate": 7.718905725617817e-05, "loss": 1.0661, "step": 6044 }, { "epoch": 1.2289083146981095, "grad_norm": 0.17497089505195618, "learning_rate": 7.716871758364691e-05, "loss": 1.2526, "step": 6045 }, { "epoch": 1.2291116080504167, "grad_norm": 0.14528773725032806, "learning_rate": 7.714837791111564e-05, "loss": 0.9004, "step": 6046 }, { "epoch": 1.229314901402724, "grad_norm": 0.1562442183494568, "learning_rate": 7.712803823858436e-05, "loss": 1.0503, "step": 6047 }, { "epoch": 1.2295181947550315, "grad_norm": 0.15427474677562714, "learning_rate": 7.710769856605309e-05, "loss": 1.0865, "step": 6048 }, { "epoch": 1.2297214881073388, "grad_norm": 0.15478043258190155, "learning_rate": 7.708735889352182e-05, "loss": 1.0287, "step": 6049 }, { "epoch": 1.2299247814596463, "grad_norm": 0.16193340718746185, "learning_rate": 7.706701922099055e-05, "loss": 1.0488, "step": 6050 }, { "epoch": 1.2301280748119536, "grad_norm": 0.14678221940994263, "learning_rate": 7.704667954845927e-05, "loss": 1.0299, "step": 6051 }, { "epoch": 1.230331368164261, "grad_norm": 0.14202548563480377, "learning_rate": 7.7026339875928e-05, "loss": 0.8996, "step": 6052 }, { "epoch": 1.2305346615165684, "grad_norm": 0.15006931126117706, "learning_rate": 7.700600020339674e-05, "loss": 1.0583, "step": 6053 }, { "epoch": 1.2307379548688757, "grad_norm": 0.15628725290298462, "learning_rate": 7.698566053086546e-05, "loss": 1.0701, "step": 6054 }, { "epoch": 1.2309412482211832, "grad_norm": 0.13345082104206085, "learning_rate": 7.696532085833418e-05, "loss": 0.8766, "step": 6055 }, { "epoch": 1.2311445415734905, "grad_norm": 0.1641533076763153, "learning_rate": 7.694498118580291e-05, "loss": 1.0282, "step": 6056 }, { "epoch": 1.2313478349257978, "grad_norm": 0.16435573995113373, "learning_rate": 7.692464151327165e-05, "loss": 1.0209, "step": 6057 }, { "epoch": 1.2315511282781053, "grad_norm": 0.16523127257823944, "learning_rate": 7.690430184074037e-05, "loss": 1.1916, "step": 6058 }, { "epoch": 1.2317544216304126, "grad_norm": 0.15313848853111267, "learning_rate": 7.68839621682091e-05, "loss": 1.0295, "step": 6059 }, { "epoch": 1.23195771498272, "grad_norm": 0.15203092992305756, "learning_rate": 7.686362249567782e-05, "loss": 0.921, "step": 6060 }, { "epoch": 1.2321610083350274, "grad_norm": 0.1742718368768692, "learning_rate": 7.684328282314656e-05, "loss": 1.178, "step": 6061 }, { "epoch": 1.232364301687335, "grad_norm": 0.1357606202363968, "learning_rate": 7.682294315061528e-05, "loss": 0.8214, "step": 6062 }, { "epoch": 1.2325675950396422, "grad_norm": 0.1643107533454895, "learning_rate": 7.680260347808401e-05, "loss": 1.2033, "step": 6063 }, { "epoch": 1.2327708883919497, "grad_norm": 0.14524690806865692, "learning_rate": 7.678226380555273e-05, "loss": 0.9755, "step": 6064 }, { "epoch": 1.232974181744257, "grad_norm": 0.1577269285917282, "learning_rate": 7.676192413302146e-05, "loss": 1.0717, "step": 6065 }, { "epoch": 1.2331774750965643, "grad_norm": 0.1647750288248062, "learning_rate": 7.67415844604902e-05, "loss": 1.1883, "step": 6066 }, { "epoch": 1.2333807684488718, "grad_norm": 0.15586699545383453, "learning_rate": 7.672124478795892e-05, "loss": 1.1943, "step": 6067 }, { "epoch": 1.233584061801179, "grad_norm": 0.15125897526741028, "learning_rate": 7.670090511542764e-05, "loss": 0.9812, "step": 6068 }, { "epoch": 1.2337873551534866, "grad_norm": 0.15304671227931976, "learning_rate": 7.668056544289637e-05, "loss": 1.087, "step": 6069 }, { "epoch": 1.2339906485057939, "grad_norm": 0.16718313097953796, "learning_rate": 7.66602257703651e-05, "loss": 1.1206, "step": 6070 }, { "epoch": 1.2341939418581012, "grad_norm": 0.13378532230854034, "learning_rate": 7.663988609783383e-05, "loss": 0.8837, "step": 6071 }, { "epoch": 1.2343972352104087, "grad_norm": 0.15577539801597595, "learning_rate": 7.661954642530255e-05, "loss": 0.9845, "step": 6072 }, { "epoch": 1.234600528562716, "grad_norm": 0.15913674235343933, "learning_rate": 7.659920675277128e-05, "loss": 1.02, "step": 6073 }, { "epoch": 1.2348038219150235, "grad_norm": 0.159415602684021, "learning_rate": 7.657886708024002e-05, "loss": 1.1367, "step": 6074 }, { "epoch": 1.2350071152673308, "grad_norm": 0.16024009883403778, "learning_rate": 7.655852740770874e-05, "loss": 1.1639, "step": 6075 }, { "epoch": 1.235210408619638, "grad_norm": 0.14657573401927948, "learning_rate": 7.653818773517747e-05, "loss": 0.9429, "step": 6076 }, { "epoch": 1.2354137019719456, "grad_norm": 0.1571348011493683, "learning_rate": 7.651784806264619e-05, "loss": 1.1514, "step": 6077 }, { "epoch": 1.2356169953242528, "grad_norm": 0.14193733036518097, "learning_rate": 7.649750839011493e-05, "loss": 0.9404, "step": 6078 }, { "epoch": 1.2358202886765604, "grad_norm": 0.1605028212070465, "learning_rate": 7.647716871758365e-05, "loss": 1.0891, "step": 6079 }, { "epoch": 1.2360235820288676, "grad_norm": 0.14684657752513885, "learning_rate": 7.645682904505238e-05, "loss": 1.0208, "step": 6080 }, { "epoch": 1.236226875381175, "grad_norm": 0.14104479551315308, "learning_rate": 7.64364893725211e-05, "loss": 0.9516, "step": 6081 }, { "epoch": 1.2364301687334824, "grad_norm": 0.15257036685943604, "learning_rate": 7.641614969998984e-05, "loss": 1.1801, "step": 6082 }, { "epoch": 1.2366334620857897, "grad_norm": 0.14425641298294067, "learning_rate": 7.639581002745856e-05, "loss": 0.9674, "step": 6083 }, { "epoch": 1.2368367554380972, "grad_norm": 0.13775646686553955, "learning_rate": 7.637547035492729e-05, "loss": 0.9788, "step": 6084 }, { "epoch": 1.2370400487904045, "grad_norm": 0.17104454338550568, "learning_rate": 7.635513068239601e-05, "loss": 1.0818, "step": 6085 }, { "epoch": 1.2372433421427118, "grad_norm": 0.15287339687347412, "learning_rate": 7.633479100986475e-05, "loss": 1.0966, "step": 6086 }, { "epoch": 1.2374466354950193, "grad_norm": 0.17102481424808502, "learning_rate": 7.631445133733348e-05, "loss": 1.2057, "step": 6087 }, { "epoch": 1.2376499288473266, "grad_norm": 0.14067615568637848, "learning_rate": 7.62941116648022e-05, "loss": 0.8911, "step": 6088 }, { "epoch": 1.2378532221996341, "grad_norm": 0.13696187734603882, "learning_rate": 7.627377199227092e-05, "loss": 1.0254, "step": 6089 }, { "epoch": 1.2380565155519414, "grad_norm": 0.1611851453781128, "learning_rate": 7.625343231973966e-05, "loss": 1.1413, "step": 6090 }, { "epoch": 1.238259808904249, "grad_norm": 0.1464037150144577, "learning_rate": 7.623309264720839e-05, "loss": 0.9052, "step": 6091 }, { "epoch": 1.2384631022565562, "grad_norm": 0.16069039702415466, "learning_rate": 7.621275297467711e-05, "loss": 1.0247, "step": 6092 }, { "epoch": 1.2386663956088637, "grad_norm": 0.1654343605041504, "learning_rate": 7.619241330214584e-05, "loss": 0.9899, "step": 6093 }, { "epoch": 1.238869688961171, "grad_norm": 0.1397971659898758, "learning_rate": 7.617207362961457e-05, "loss": 1.0895, "step": 6094 }, { "epoch": 1.2390729823134783, "grad_norm": 0.14901459217071533, "learning_rate": 7.61517339570833e-05, "loss": 0.9465, "step": 6095 }, { "epoch": 1.2392762756657858, "grad_norm": 0.15366655588150024, "learning_rate": 7.613139428455202e-05, "loss": 1.1269, "step": 6096 }, { "epoch": 1.239479569018093, "grad_norm": 0.15471157431602478, "learning_rate": 7.611105461202075e-05, "loss": 0.9961, "step": 6097 }, { "epoch": 1.2396828623704006, "grad_norm": 0.1551191508769989, "learning_rate": 7.609071493948948e-05, "loss": 1.0627, "step": 6098 }, { "epoch": 1.2398861557227079, "grad_norm": 0.15324456989765167, "learning_rate": 7.607037526695821e-05, "loss": 0.9308, "step": 6099 }, { "epoch": 1.2400894490750152, "grad_norm": 0.14543670415878296, "learning_rate": 7.605003559442693e-05, "loss": 1.0393, "step": 6100 }, { "epoch": 1.2402927424273227, "grad_norm": 0.16721047461032867, "learning_rate": 7.602969592189566e-05, "loss": 1.0621, "step": 6101 }, { "epoch": 1.24049603577963, "grad_norm": 0.17093773186206818, "learning_rate": 7.60093562493644e-05, "loss": 1.0743, "step": 6102 }, { "epoch": 1.2406993291319375, "grad_norm": 0.17696061730384827, "learning_rate": 7.598901657683312e-05, "loss": 0.9628, "step": 6103 }, { "epoch": 1.2409026224842448, "grad_norm": 0.14208592474460602, "learning_rate": 7.596867690430185e-05, "loss": 0.9654, "step": 6104 }, { "epoch": 1.241105915836552, "grad_norm": 0.15835708379745483, "learning_rate": 7.594833723177057e-05, "loss": 1.1394, "step": 6105 }, { "epoch": 1.2413092091888596, "grad_norm": 0.1430591642856598, "learning_rate": 7.59279975592393e-05, "loss": 0.9112, "step": 6106 }, { "epoch": 1.2415125025411669, "grad_norm": 0.1534785032272339, "learning_rate": 7.590765788670803e-05, "loss": 0.9188, "step": 6107 }, { "epoch": 1.2417157958934744, "grad_norm": 0.15656499564647675, "learning_rate": 7.588731821417676e-05, "loss": 0.979, "step": 6108 }, { "epoch": 1.2419190892457816, "grad_norm": 0.1518164575099945, "learning_rate": 7.586697854164548e-05, "loss": 1.1044, "step": 6109 }, { "epoch": 1.242122382598089, "grad_norm": 0.14292199909687042, "learning_rate": 7.58466388691142e-05, "loss": 0.8677, "step": 6110 }, { "epoch": 1.2423256759503964, "grad_norm": 0.16828525066375732, "learning_rate": 7.582629919658294e-05, "loss": 1.1678, "step": 6111 }, { "epoch": 1.2425289693027037, "grad_norm": 0.1284974068403244, "learning_rate": 7.580595952405167e-05, "loss": 0.8286, "step": 6112 }, { "epoch": 1.2427322626550112, "grad_norm": 0.14029505848884583, "learning_rate": 7.578561985152039e-05, "loss": 0.9423, "step": 6113 }, { "epoch": 1.2429355560073185, "grad_norm": 0.1682073324918747, "learning_rate": 7.576528017898912e-05, "loss": 1.1954, "step": 6114 }, { "epoch": 1.2431388493596258, "grad_norm": 0.1521628051996231, "learning_rate": 7.574494050645786e-05, "loss": 1.0827, "step": 6115 }, { "epoch": 1.2433421427119333, "grad_norm": 0.15839159488677979, "learning_rate": 7.572460083392658e-05, "loss": 1.2435, "step": 6116 }, { "epoch": 1.2435454360642406, "grad_norm": 0.14326351881027222, "learning_rate": 7.57042611613953e-05, "loss": 0.9765, "step": 6117 }, { "epoch": 1.2437487294165481, "grad_norm": 0.15199799835681915, "learning_rate": 7.568392148886403e-05, "loss": 0.9618, "step": 6118 }, { "epoch": 1.2439520227688554, "grad_norm": 0.14783890545368195, "learning_rate": 7.566358181633277e-05, "loss": 1.0208, "step": 6119 }, { "epoch": 1.244155316121163, "grad_norm": 0.15681447088718414, "learning_rate": 7.564324214380149e-05, "loss": 1.0856, "step": 6120 }, { "epoch": 1.2443586094734702, "grad_norm": 0.1497943103313446, "learning_rate": 7.562290247127022e-05, "loss": 0.9668, "step": 6121 }, { "epoch": 1.2445619028257777, "grad_norm": 0.14835524559020996, "learning_rate": 7.560256279873894e-05, "loss": 0.9293, "step": 6122 }, { "epoch": 1.244765196178085, "grad_norm": 0.13807836174964905, "learning_rate": 7.558222312620768e-05, "loss": 0.95, "step": 6123 }, { "epoch": 1.2449684895303923, "grad_norm": 0.14951051771640778, "learning_rate": 7.55618834536764e-05, "loss": 0.9511, "step": 6124 }, { "epoch": 1.2451717828826998, "grad_norm": 0.16398605704307556, "learning_rate": 7.554154378114513e-05, "loss": 1.008, "step": 6125 }, { "epoch": 1.245375076235007, "grad_norm": 0.12974978983402252, "learning_rate": 7.552120410861385e-05, "loss": 0.8797, "step": 6126 }, { "epoch": 1.2455783695873146, "grad_norm": 0.15278978645801544, "learning_rate": 7.550086443608259e-05, "loss": 1.0084, "step": 6127 }, { "epoch": 1.245781662939622, "grad_norm": 0.14859604835510254, "learning_rate": 7.548052476355131e-05, "loss": 0.9819, "step": 6128 }, { "epoch": 1.2459849562919292, "grad_norm": 0.13911408185958862, "learning_rate": 7.546018509102004e-05, "loss": 0.9986, "step": 6129 }, { "epoch": 1.2461882496442367, "grad_norm": 0.1627921313047409, "learning_rate": 7.543984541848876e-05, "loss": 1.0073, "step": 6130 }, { "epoch": 1.246391542996544, "grad_norm": 0.14569565653800964, "learning_rate": 7.54195057459575e-05, "loss": 0.9272, "step": 6131 }, { "epoch": 1.2465948363488515, "grad_norm": 0.15063758194446564, "learning_rate": 7.539916607342623e-05, "loss": 1.0539, "step": 6132 }, { "epoch": 1.2467981297011588, "grad_norm": 0.15409833192825317, "learning_rate": 7.537882640089495e-05, "loss": 1.0715, "step": 6133 }, { "epoch": 1.247001423053466, "grad_norm": 0.15179461240768433, "learning_rate": 7.535848672836367e-05, "loss": 0.9781, "step": 6134 }, { "epoch": 1.2472047164057736, "grad_norm": 0.14283648133277893, "learning_rate": 7.533814705583241e-05, "loss": 0.9186, "step": 6135 }, { "epoch": 1.2474080097580809, "grad_norm": 0.16481667757034302, "learning_rate": 7.531780738330114e-05, "loss": 1.1317, "step": 6136 }, { "epoch": 1.2476113031103884, "grad_norm": 0.15260998904705048, "learning_rate": 7.529746771076986e-05, "loss": 1.029, "step": 6137 }, { "epoch": 1.2478145964626957, "grad_norm": 0.1735289990901947, "learning_rate": 7.527712803823859e-05, "loss": 1.1954, "step": 6138 }, { "epoch": 1.248017889815003, "grad_norm": 0.15304701030254364, "learning_rate": 7.525678836570732e-05, "loss": 0.9608, "step": 6139 }, { "epoch": 1.2482211831673105, "grad_norm": 0.14913487434387207, "learning_rate": 7.523644869317605e-05, "loss": 0.9169, "step": 6140 }, { "epoch": 1.2484244765196177, "grad_norm": 0.15657873451709747, "learning_rate": 7.521610902064477e-05, "loss": 1.0074, "step": 6141 }, { "epoch": 1.2486277698719253, "grad_norm": 0.1431896835565567, "learning_rate": 7.51957693481135e-05, "loss": 0.9779, "step": 6142 }, { "epoch": 1.2488310632242325, "grad_norm": 0.1579836755990982, "learning_rate": 7.517542967558223e-05, "loss": 1.0718, "step": 6143 }, { "epoch": 1.2490343565765398, "grad_norm": 0.1540554016828537, "learning_rate": 7.515509000305096e-05, "loss": 1.003, "step": 6144 }, { "epoch": 1.2492376499288473, "grad_norm": 0.1616765558719635, "learning_rate": 7.513475033051968e-05, "loss": 1.1288, "step": 6145 }, { "epoch": 1.2494409432811546, "grad_norm": 0.12108495086431503, "learning_rate": 7.511441065798841e-05, "loss": 0.8396, "step": 6146 }, { "epoch": 1.2496442366334621, "grad_norm": 0.1616901010274887, "learning_rate": 7.509407098545713e-05, "loss": 1.0475, "step": 6147 }, { "epoch": 1.2498475299857694, "grad_norm": 0.15256421267986298, "learning_rate": 7.507373131292587e-05, "loss": 0.988, "step": 6148 }, { "epoch": 1.2500508233380767, "grad_norm": 0.161456897854805, "learning_rate": 7.505339164039458e-05, "loss": 1.0345, "step": 6149 }, { "epoch": 1.2502541166903842, "grad_norm": 0.15116725862026215, "learning_rate": 7.503305196786332e-05, "loss": 0.9795, "step": 6150 }, { "epoch": 1.2504574100426917, "grad_norm": 0.15002092719078064, "learning_rate": 7.501271229533204e-05, "loss": 0.9661, "step": 6151 }, { "epoch": 1.250660703394999, "grad_norm": 0.14170213043689728, "learning_rate": 7.499237262280078e-05, "loss": 1.055, "step": 6152 }, { "epoch": 1.2508639967473063, "grad_norm": 0.15835924446582794, "learning_rate": 7.497203295026949e-05, "loss": 1.0649, "step": 6153 }, { "epoch": 1.2510672900996138, "grad_norm": 0.14524368941783905, "learning_rate": 7.495169327773823e-05, "loss": 0.9768, "step": 6154 }, { "epoch": 1.251270583451921, "grad_norm": 0.13836443424224854, "learning_rate": 7.493135360520696e-05, "loss": 0.9321, "step": 6155 }, { "epoch": 1.2514738768042286, "grad_norm": 0.14955168962478638, "learning_rate": 7.49110139326757e-05, "loss": 1.1414, "step": 6156 }, { "epoch": 1.251677170156536, "grad_norm": 0.14756670594215393, "learning_rate": 7.489067426014442e-05, "loss": 1.0315, "step": 6157 }, { "epoch": 1.2518804635088432, "grad_norm": 0.1563825011253357, "learning_rate": 7.487033458761314e-05, "loss": 1.1096, "step": 6158 }, { "epoch": 1.2520837568611507, "grad_norm": 0.1456524133682251, "learning_rate": 7.484999491508187e-05, "loss": 1.0566, "step": 6159 }, { "epoch": 1.252287050213458, "grad_norm": 0.1605212390422821, "learning_rate": 7.48296552425506e-05, "loss": 1.1749, "step": 6160 }, { "epoch": 1.2524903435657655, "grad_norm": 0.15637800097465515, "learning_rate": 7.480931557001933e-05, "loss": 1.0765, "step": 6161 }, { "epoch": 1.2526936369180728, "grad_norm": 0.1442786306142807, "learning_rate": 7.478897589748805e-05, "loss": 0.9542, "step": 6162 }, { "epoch": 1.25289693027038, "grad_norm": 0.12865842878818512, "learning_rate": 7.476863622495678e-05, "loss": 0.8635, "step": 6163 }, { "epoch": 1.2531002236226876, "grad_norm": 0.14644062519073486, "learning_rate": 7.474829655242552e-05, "loss": 1.0271, "step": 6164 }, { "epoch": 1.2533035169749949, "grad_norm": 0.14869025349617004, "learning_rate": 7.472795687989424e-05, "loss": 1.0252, "step": 6165 }, { "epoch": 1.2535068103273024, "grad_norm": 0.1454823762178421, "learning_rate": 7.470761720736297e-05, "loss": 1.0052, "step": 6166 }, { "epoch": 1.2537101036796097, "grad_norm": 0.15685810148715973, "learning_rate": 7.468727753483169e-05, "loss": 1.0575, "step": 6167 }, { "epoch": 1.253913397031917, "grad_norm": 0.16611304879188538, "learning_rate": 7.466693786230043e-05, "loss": 1.242, "step": 6168 }, { "epoch": 1.2541166903842245, "grad_norm": 0.15010212361812592, "learning_rate": 7.464659818976915e-05, "loss": 1.0678, "step": 6169 }, { "epoch": 1.2543199837365318, "grad_norm": 0.14836570620536804, "learning_rate": 7.462625851723788e-05, "loss": 1.0183, "step": 6170 }, { "epoch": 1.2545232770888393, "grad_norm": 0.14831973612308502, "learning_rate": 7.46059188447066e-05, "loss": 0.9672, "step": 6171 }, { "epoch": 1.2547265704411465, "grad_norm": 0.15523511171340942, "learning_rate": 7.458557917217534e-05, "loss": 1.0253, "step": 6172 }, { "epoch": 1.2549298637934538, "grad_norm": 0.1539629101753235, "learning_rate": 7.456523949964406e-05, "loss": 0.9151, "step": 6173 }, { "epoch": 1.2551331571457613, "grad_norm": 0.17283432185649872, "learning_rate": 7.454489982711279e-05, "loss": 1.0992, "step": 6174 }, { "epoch": 1.2553364504980686, "grad_norm": 0.13191667199134827, "learning_rate": 7.452456015458151e-05, "loss": 0.8772, "step": 6175 }, { "epoch": 1.2555397438503761, "grad_norm": 0.16160672903060913, "learning_rate": 7.450422048205025e-05, "loss": 1.0877, "step": 6176 }, { "epoch": 1.2557430372026834, "grad_norm": 0.13833656907081604, "learning_rate": 7.448388080951897e-05, "loss": 0.9369, "step": 6177 }, { "epoch": 1.2559463305549907, "grad_norm": 0.1532142162322998, "learning_rate": 7.44635411369877e-05, "loss": 0.977, "step": 6178 }, { "epoch": 1.2561496239072982, "grad_norm": 0.14689487218856812, "learning_rate": 7.444320146445642e-05, "loss": 0.9764, "step": 6179 }, { "epoch": 1.2563529172596057, "grad_norm": 0.1418536752462387, "learning_rate": 7.442286179192516e-05, "loss": 1.0234, "step": 6180 }, { "epoch": 1.256556210611913, "grad_norm": 0.14861689507961273, "learning_rate": 7.440252211939389e-05, "loss": 1.07, "step": 6181 }, { "epoch": 1.2567595039642203, "grad_norm": 0.1540762186050415, "learning_rate": 7.438218244686261e-05, "loss": 1.0956, "step": 6182 }, { "epoch": 1.2569627973165278, "grad_norm": 0.16416653990745544, "learning_rate": 7.436184277433134e-05, "loss": 1.1411, "step": 6183 }, { "epoch": 1.2571660906688351, "grad_norm": 0.14933699369430542, "learning_rate": 7.434150310180007e-05, "loss": 1.008, "step": 6184 }, { "epoch": 1.2573693840211426, "grad_norm": 0.14696922898292542, "learning_rate": 7.43211634292688e-05, "loss": 0.9848, "step": 6185 }, { "epoch": 1.25757267737345, "grad_norm": 0.142256960272789, "learning_rate": 7.430082375673752e-05, "loss": 0.9146, "step": 6186 }, { "epoch": 1.2577759707257572, "grad_norm": 0.1598552167415619, "learning_rate": 7.428048408420625e-05, "loss": 1.1008, "step": 6187 }, { "epoch": 1.2579792640780647, "grad_norm": 0.1336602419614792, "learning_rate": 7.426014441167497e-05, "loss": 0.8115, "step": 6188 }, { "epoch": 1.258182557430372, "grad_norm": 0.1413356214761734, "learning_rate": 7.423980473914371e-05, "loss": 0.9462, "step": 6189 }, { "epoch": 1.2583858507826795, "grad_norm": 0.15689724683761597, "learning_rate": 7.421946506661242e-05, "loss": 1.0426, "step": 6190 }, { "epoch": 1.2585891441349868, "grad_norm": 0.14870372414588928, "learning_rate": 7.419912539408116e-05, "loss": 1.0125, "step": 6191 }, { "epoch": 1.258792437487294, "grad_norm": 0.1621605008840561, "learning_rate": 7.417878572154988e-05, "loss": 1.098, "step": 6192 }, { "epoch": 1.2589957308396016, "grad_norm": 0.1651010513305664, "learning_rate": 7.415844604901862e-05, "loss": 1.2495, "step": 6193 }, { "epoch": 1.2591990241919089, "grad_norm": 0.16045209765434265, "learning_rate": 7.413810637648733e-05, "loss": 1.0258, "step": 6194 }, { "epoch": 1.2594023175442164, "grad_norm": 0.1479748636484146, "learning_rate": 7.411776670395607e-05, "loss": 0.9255, "step": 6195 }, { "epoch": 1.2596056108965237, "grad_norm": 0.14603246748447418, "learning_rate": 7.40974270314248e-05, "loss": 0.9924, "step": 6196 }, { "epoch": 1.259808904248831, "grad_norm": 0.14301127195358276, "learning_rate": 7.407708735889353e-05, "loss": 0.9358, "step": 6197 }, { "epoch": 1.2600121976011385, "grad_norm": 0.14495980739593506, "learning_rate": 7.405674768636224e-05, "loss": 1.071, "step": 6198 }, { "epoch": 1.2602154909534458, "grad_norm": 0.14776700735092163, "learning_rate": 7.403640801383098e-05, "loss": 1.1025, "step": 6199 }, { "epoch": 1.2604187843057533, "grad_norm": 0.14632262289524078, "learning_rate": 7.40160683412997e-05, "loss": 1.027, "step": 6200 }, { "epoch": 1.2606220776580606, "grad_norm": 0.16701005399227142, "learning_rate": 7.399572866876844e-05, "loss": 1.2313, "step": 6201 }, { "epoch": 1.2608253710103678, "grad_norm": 0.15396632254123688, "learning_rate": 7.397538899623715e-05, "loss": 1.1548, "step": 6202 }, { "epoch": 1.2610286643626754, "grad_norm": 0.15287038683891296, "learning_rate": 7.395504932370589e-05, "loss": 1.0077, "step": 6203 }, { "epoch": 1.2612319577149826, "grad_norm": 0.12379388511180878, "learning_rate": 7.393470965117462e-05, "loss": 0.7992, "step": 6204 }, { "epoch": 1.2614352510672902, "grad_norm": 0.1505446881055832, "learning_rate": 7.391436997864335e-05, "loss": 0.9166, "step": 6205 }, { "epoch": 1.2616385444195974, "grad_norm": 0.14362283051013947, "learning_rate": 7.389403030611207e-05, "loss": 0.9532, "step": 6206 }, { "epoch": 1.2618418377719047, "grad_norm": 0.14815200865268707, "learning_rate": 7.38736906335808e-05, "loss": 1.0684, "step": 6207 }, { "epoch": 1.2620451311242122, "grad_norm": 0.15640223026275635, "learning_rate": 7.385335096104953e-05, "loss": 1.2277, "step": 6208 }, { "epoch": 1.2622484244765197, "grad_norm": 0.1339944303035736, "learning_rate": 7.383301128851827e-05, "loss": 0.9633, "step": 6209 }, { "epoch": 1.262451717828827, "grad_norm": 0.15692713856697083, "learning_rate": 7.381267161598698e-05, "loss": 0.9958, "step": 6210 }, { "epoch": 1.2626550111811343, "grad_norm": 0.14405904710292816, "learning_rate": 7.379233194345571e-05, "loss": 1.0001, "step": 6211 }, { "epoch": 1.2628583045334418, "grad_norm": 0.1682267040014267, "learning_rate": 7.377199227092444e-05, "loss": 1.1671, "step": 6212 }, { "epoch": 1.2630615978857491, "grad_norm": 0.1584296077489853, "learning_rate": 7.375165259839318e-05, "loss": 0.9898, "step": 6213 }, { "epoch": 1.2632648912380566, "grad_norm": 0.14520923793315887, "learning_rate": 7.373131292586189e-05, "loss": 1.0861, "step": 6214 }, { "epoch": 1.263468184590364, "grad_norm": 0.1591317057609558, "learning_rate": 7.371097325333063e-05, "loss": 1.0768, "step": 6215 }, { "epoch": 1.2636714779426712, "grad_norm": 0.12823057174682617, "learning_rate": 7.369063358079935e-05, "loss": 0.8455, "step": 6216 }, { "epoch": 1.2638747712949787, "grad_norm": 0.15325266122817993, "learning_rate": 7.367029390826809e-05, "loss": 0.938, "step": 6217 }, { "epoch": 1.264078064647286, "grad_norm": 0.1660403162240982, "learning_rate": 7.364995423573681e-05, "loss": 1.1529, "step": 6218 }, { "epoch": 1.2642813579995935, "grad_norm": 0.13842836022377014, "learning_rate": 7.362961456320554e-05, "loss": 0.9513, "step": 6219 }, { "epoch": 1.2644846513519008, "grad_norm": 0.13178154826164246, "learning_rate": 7.360927489067426e-05, "loss": 0.8728, "step": 6220 }, { "epoch": 1.264687944704208, "grad_norm": 0.1772044599056244, "learning_rate": 7.3588935218143e-05, "loss": 1.1937, "step": 6221 }, { "epoch": 1.2648912380565156, "grad_norm": 0.1507595181465149, "learning_rate": 7.356859554561172e-05, "loss": 0.9797, "step": 6222 }, { "epoch": 1.2650945314088229, "grad_norm": 0.16947486996650696, "learning_rate": 7.354825587308045e-05, "loss": 1.0339, "step": 6223 }, { "epoch": 1.2652978247611304, "grad_norm": 0.15474039316177368, "learning_rate": 7.352791620054917e-05, "loss": 0.9519, "step": 6224 }, { "epoch": 1.2655011181134377, "grad_norm": 0.14558108150959015, "learning_rate": 7.350757652801791e-05, "loss": 0.9138, "step": 6225 }, { "epoch": 1.265704411465745, "grad_norm": 0.14644969999790192, "learning_rate": 7.348723685548664e-05, "loss": 1.0068, "step": 6226 }, { "epoch": 1.2659077048180525, "grad_norm": 0.17006921768188477, "learning_rate": 7.346689718295536e-05, "loss": 1.0676, "step": 6227 }, { "epoch": 1.2661109981703598, "grad_norm": 0.16292281448841095, "learning_rate": 7.344655751042409e-05, "loss": 1.0636, "step": 6228 }, { "epoch": 1.2663142915226673, "grad_norm": 0.15299753844738007, "learning_rate": 7.342621783789282e-05, "loss": 1.1362, "step": 6229 }, { "epoch": 1.2665175848749746, "grad_norm": 0.14997923374176025, "learning_rate": 7.340587816536155e-05, "loss": 0.9706, "step": 6230 }, { "epoch": 1.2667208782272819, "grad_norm": 0.14713294804096222, "learning_rate": 7.338553849283026e-05, "loss": 0.9487, "step": 6231 }, { "epoch": 1.2669241715795894, "grad_norm": 0.1373048573732376, "learning_rate": 7.3365198820299e-05, "loss": 0.8719, "step": 6232 }, { "epoch": 1.2671274649318967, "grad_norm": 0.14754988253116608, "learning_rate": 7.334485914776772e-05, "loss": 1.0212, "step": 6233 }, { "epoch": 1.2673307582842042, "grad_norm": 0.15924742817878723, "learning_rate": 7.332451947523646e-05, "loss": 1.0609, "step": 6234 }, { "epoch": 1.2675340516365114, "grad_norm": 0.1677163541316986, "learning_rate": 7.330417980270517e-05, "loss": 1.1928, "step": 6235 }, { "epoch": 1.2677373449888187, "grad_norm": 0.1397400200366974, "learning_rate": 7.328384013017391e-05, "loss": 0.8587, "step": 6236 }, { "epoch": 1.2679406383411262, "grad_norm": 0.14857067167758942, "learning_rate": 7.326350045764263e-05, "loss": 0.9723, "step": 6237 }, { "epoch": 1.2681439316934338, "grad_norm": 0.15431994199752808, "learning_rate": 7.324316078511137e-05, "loss": 1.2133, "step": 6238 }, { "epoch": 1.268347225045741, "grad_norm": 0.14655791223049164, "learning_rate": 7.322282111258008e-05, "loss": 0.9676, "step": 6239 }, { "epoch": 1.2685505183980483, "grad_norm": 0.16156570613384247, "learning_rate": 7.320248144004882e-05, "loss": 1.1429, "step": 6240 }, { "epoch": 1.2687538117503558, "grad_norm": 0.1661580204963684, "learning_rate": 7.318214176751754e-05, "loss": 1.1261, "step": 6241 }, { "epoch": 1.2689571051026631, "grad_norm": 0.15335386991500854, "learning_rate": 7.316180209498628e-05, "loss": 0.9301, "step": 6242 }, { "epoch": 1.2691603984549706, "grad_norm": 0.17967920005321503, "learning_rate": 7.314146242245499e-05, "loss": 1.1584, "step": 6243 }, { "epoch": 1.269363691807278, "grad_norm": 0.15790143609046936, "learning_rate": 7.312112274992373e-05, "loss": 1.0314, "step": 6244 }, { "epoch": 1.2695669851595852, "grad_norm": 0.17167986929416656, "learning_rate": 7.310078307739246e-05, "loss": 1.0303, "step": 6245 }, { "epoch": 1.2697702785118927, "grad_norm": 0.13613730669021606, "learning_rate": 7.308044340486119e-05, "loss": 0.9468, "step": 6246 }, { "epoch": 1.2699735718642, "grad_norm": 0.15060873329639435, "learning_rate": 7.30601037323299e-05, "loss": 1.0411, "step": 6247 }, { "epoch": 1.2701768652165075, "grad_norm": 0.14653441309928894, "learning_rate": 7.303976405979864e-05, "loss": 0.9397, "step": 6248 }, { "epoch": 1.2703801585688148, "grad_norm": 0.17043618857860565, "learning_rate": 7.301942438726737e-05, "loss": 1.04, "step": 6249 }, { "epoch": 1.270583451921122, "grad_norm": 0.1663060486316681, "learning_rate": 7.29990847147361e-05, "loss": 1.1406, "step": 6250 }, { "epoch": 1.2707867452734296, "grad_norm": 0.1324852705001831, "learning_rate": 7.297874504220482e-05, "loss": 0.8765, "step": 6251 }, { "epoch": 1.270990038625737, "grad_norm": 0.14403606951236725, "learning_rate": 7.295840536967355e-05, "loss": 0.8504, "step": 6252 }, { "epoch": 1.2711933319780444, "grad_norm": 0.1579236090183258, "learning_rate": 7.293806569714228e-05, "loss": 0.9858, "step": 6253 }, { "epoch": 1.2713966253303517, "grad_norm": 0.14210145175457, "learning_rate": 7.291772602461102e-05, "loss": 0.9863, "step": 6254 }, { "epoch": 1.271599918682659, "grad_norm": 0.1411530077457428, "learning_rate": 7.289738635207973e-05, "loss": 0.9379, "step": 6255 }, { "epoch": 1.2718032120349665, "grad_norm": 0.13940487802028656, "learning_rate": 7.287704667954846e-05, "loss": 0.9373, "step": 6256 }, { "epoch": 1.2720065053872738, "grad_norm": 0.16028715670108795, "learning_rate": 7.285670700701719e-05, "loss": 1.1488, "step": 6257 }, { "epoch": 1.2722097987395813, "grad_norm": 0.15219241380691528, "learning_rate": 7.283636733448593e-05, "loss": 0.9849, "step": 6258 }, { "epoch": 1.2724130920918886, "grad_norm": 0.14046461880207062, "learning_rate": 7.281602766195464e-05, "loss": 0.9298, "step": 6259 }, { "epoch": 1.2726163854441959, "grad_norm": 0.14675304293632507, "learning_rate": 7.279568798942338e-05, "loss": 1.0084, "step": 6260 }, { "epoch": 1.2728196787965034, "grad_norm": 0.13606025278568268, "learning_rate": 7.27753483168921e-05, "loss": 0.7989, "step": 6261 }, { "epoch": 1.2730229721488107, "grad_norm": 0.156595841050148, "learning_rate": 7.275500864436084e-05, "loss": 0.9788, "step": 6262 }, { "epoch": 1.2732262655011182, "grad_norm": 0.15707367658615112, "learning_rate": 7.273466897182955e-05, "loss": 1.047, "step": 6263 }, { "epoch": 1.2734295588534255, "grad_norm": 0.15577323734760284, "learning_rate": 7.271432929929829e-05, "loss": 1.0125, "step": 6264 }, { "epoch": 1.2736328522057327, "grad_norm": 0.16746199131011963, "learning_rate": 7.269398962676701e-05, "loss": 1.0778, "step": 6265 }, { "epoch": 1.2738361455580403, "grad_norm": 0.1447450965642929, "learning_rate": 7.267364995423575e-05, "loss": 0.9396, "step": 6266 }, { "epoch": 1.2740394389103478, "grad_norm": 0.13501743972301483, "learning_rate": 7.265331028170446e-05, "loss": 0.8996, "step": 6267 }, { "epoch": 1.274242732262655, "grad_norm": 0.15393932163715363, "learning_rate": 7.26329706091732e-05, "loss": 1.0001, "step": 6268 }, { "epoch": 1.2744460256149623, "grad_norm": 0.1617959439754486, "learning_rate": 7.261263093664192e-05, "loss": 1.0903, "step": 6269 }, { "epoch": 1.2746493189672699, "grad_norm": 0.15818221867084503, "learning_rate": 7.259229126411066e-05, "loss": 1.1434, "step": 6270 }, { "epoch": 1.2748526123195771, "grad_norm": 0.15312804281711578, "learning_rate": 7.257195159157937e-05, "loss": 0.9829, "step": 6271 }, { "epoch": 1.2750559056718846, "grad_norm": 0.1665237545967102, "learning_rate": 7.25516119190481e-05, "loss": 1.1018, "step": 6272 }, { "epoch": 1.275259199024192, "grad_norm": 0.14515459537506104, "learning_rate": 7.253127224651683e-05, "loss": 0.9767, "step": 6273 }, { "epoch": 1.2754624923764992, "grad_norm": 0.14783386886119843, "learning_rate": 7.251093257398556e-05, "loss": 0.9554, "step": 6274 }, { "epoch": 1.2756657857288067, "grad_norm": 0.15919606387615204, "learning_rate": 7.24905929014543e-05, "loss": 1.1356, "step": 6275 }, { "epoch": 1.275869079081114, "grad_norm": 0.1607399433851242, "learning_rate": 7.247025322892301e-05, "loss": 1.0811, "step": 6276 }, { "epoch": 1.2760723724334215, "grad_norm": 0.14537842571735382, "learning_rate": 7.244991355639175e-05, "loss": 0.9406, "step": 6277 }, { "epoch": 1.2762756657857288, "grad_norm": 0.16683556139469147, "learning_rate": 7.242957388386047e-05, "loss": 1.1229, "step": 6278 }, { "epoch": 1.276478959138036, "grad_norm": 0.15231481194496155, "learning_rate": 7.240923421132921e-05, "loss": 1.0703, "step": 6279 }, { "epoch": 1.2766822524903436, "grad_norm": 0.1400855928659439, "learning_rate": 7.238889453879792e-05, "loss": 0.9923, "step": 6280 }, { "epoch": 1.276885545842651, "grad_norm": 0.14501504600048065, "learning_rate": 7.236855486626666e-05, "loss": 0.8628, "step": 6281 }, { "epoch": 1.2770888391949584, "grad_norm": 0.14576661586761475, "learning_rate": 7.234821519373538e-05, "loss": 1.0154, "step": 6282 }, { "epoch": 1.2772921325472657, "grad_norm": 0.1263686865568161, "learning_rate": 7.232787552120412e-05, "loss": 0.8553, "step": 6283 }, { "epoch": 1.277495425899573, "grad_norm": 0.13566042482852936, "learning_rate": 7.230753584867283e-05, "loss": 0.8437, "step": 6284 }, { "epoch": 1.2776987192518805, "grad_norm": 0.14391617476940155, "learning_rate": 7.228719617614157e-05, "loss": 0.9109, "step": 6285 }, { "epoch": 1.2779020126041878, "grad_norm": 0.1513233482837677, "learning_rate": 7.22668565036103e-05, "loss": 1.0576, "step": 6286 }, { "epoch": 1.2781053059564953, "grad_norm": 0.16668738424777985, "learning_rate": 7.224651683107903e-05, "loss": 1.1339, "step": 6287 }, { "epoch": 1.2783085993088026, "grad_norm": 0.1570490002632141, "learning_rate": 7.222617715854774e-05, "loss": 1.0722, "step": 6288 }, { "epoch": 1.2785118926611099, "grad_norm": 0.14549487829208374, "learning_rate": 7.220583748601648e-05, "loss": 1.0256, "step": 6289 }, { "epoch": 1.2787151860134174, "grad_norm": 0.15154211223125458, "learning_rate": 7.21854978134852e-05, "loss": 0.9687, "step": 6290 }, { "epoch": 1.2789184793657247, "grad_norm": 0.14945222437381744, "learning_rate": 7.216515814095394e-05, "loss": 1.0218, "step": 6291 }, { "epoch": 1.2791217727180322, "grad_norm": 0.14871571958065033, "learning_rate": 7.214481846842265e-05, "loss": 1.057, "step": 6292 }, { "epoch": 1.2793250660703395, "grad_norm": 0.1632535755634308, "learning_rate": 7.212447879589139e-05, "loss": 1.1433, "step": 6293 }, { "epoch": 1.2795283594226468, "grad_norm": 0.1654619425535202, "learning_rate": 7.210413912336012e-05, "loss": 1.0672, "step": 6294 }, { "epoch": 1.2797316527749543, "grad_norm": 0.14849208295345306, "learning_rate": 7.208379945082885e-05, "loss": 0.9965, "step": 6295 }, { "epoch": 1.2799349461272618, "grad_norm": 0.141541987657547, "learning_rate": 7.206345977829757e-05, "loss": 0.9438, "step": 6296 }, { "epoch": 1.280138239479569, "grad_norm": 0.13782508671283722, "learning_rate": 7.20431201057663e-05, "loss": 0.8736, "step": 6297 }, { "epoch": 1.2803415328318763, "grad_norm": 0.14343823492527008, "learning_rate": 7.202278043323503e-05, "loss": 0.9404, "step": 6298 }, { "epoch": 1.2805448261841839, "grad_norm": 0.16715972125530243, "learning_rate": 7.200244076070377e-05, "loss": 0.998, "step": 6299 }, { "epoch": 1.2807481195364911, "grad_norm": 0.14622175693511963, "learning_rate": 7.198210108817248e-05, "loss": 0.9514, "step": 6300 }, { "epoch": 1.2809514128887987, "grad_norm": 0.13806544244289398, "learning_rate": 7.196176141564121e-05, "loss": 0.8665, "step": 6301 }, { "epoch": 1.281154706241106, "grad_norm": 0.1409773975610733, "learning_rate": 7.194142174310994e-05, "loss": 0.8885, "step": 6302 }, { "epoch": 1.2813579995934132, "grad_norm": 0.1428183764219284, "learning_rate": 7.192108207057868e-05, "loss": 1.0111, "step": 6303 }, { "epoch": 1.2815612929457207, "grad_norm": 0.1585017442703247, "learning_rate": 7.190074239804739e-05, "loss": 1.1361, "step": 6304 }, { "epoch": 1.281764586298028, "grad_norm": 0.15969137847423553, "learning_rate": 7.188040272551613e-05, "loss": 1.104, "step": 6305 }, { "epoch": 1.2819678796503355, "grad_norm": 0.13520383834838867, "learning_rate": 7.186006305298485e-05, "loss": 0.8027, "step": 6306 }, { "epoch": 1.2821711730026428, "grad_norm": 0.17720559239387512, "learning_rate": 7.183972338045359e-05, "loss": 1.1919, "step": 6307 }, { "epoch": 1.2823744663549501, "grad_norm": 0.15073487162590027, "learning_rate": 7.18193837079223e-05, "loss": 0.9968, "step": 6308 }, { "epoch": 1.2825777597072576, "grad_norm": 0.15003164112567902, "learning_rate": 7.179904403539104e-05, "loss": 0.9668, "step": 6309 }, { "epoch": 1.282781053059565, "grad_norm": 0.17337696254253387, "learning_rate": 7.177870436285976e-05, "loss": 1.2393, "step": 6310 }, { "epoch": 1.2829843464118724, "grad_norm": 0.14511317014694214, "learning_rate": 7.17583646903285e-05, "loss": 1.0078, "step": 6311 }, { "epoch": 1.2831876397641797, "grad_norm": 0.15251484513282776, "learning_rate": 7.173802501779721e-05, "loss": 1.0664, "step": 6312 }, { "epoch": 1.283390933116487, "grad_norm": 0.14497815072536469, "learning_rate": 7.171768534526594e-05, "loss": 0.9364, "step": 6313 }, { "epoch": 1.2835942264687945, "grad_norm": 0.15960972011089325, "learning_rate": 7.169734567273467e-05, "loss": 1.1536, "step": 6314 }, { "epoch": 1.2837975198211018, "grad_norm": 0.1687343269586563, "learning_rate": 7.16770060002034e-05, "loss": 1.168, "step": 6315 }, { "epoch": 1.2840008131734093, "grad_norm": 0.14428341388702393, "learning_rate": 7.165666632767212e-05, "loss": 0.8795, "step": 6316 }, { "epoch": 1.2842041065257166, "grad_norm": 0.15656180679798126, "learning_rate": 7.163632665514085e-05, "loss": 0.9864, "step": 6317 }, { "epoch": 1.2844073998780239, "grad_norm": 0.14219792187213898, "learning_rate": 7.161598698260958e-05, "loss": 1.0347, "step": 6318 }, { "epoch": 1.2846106932303314, "grad_norm": 0.1517137736082077, "learning_rate": 7.159564731007831e-05, "loss": 1.1361, "step": 6319 }, { "epoch": 1.2848139865826387, "grad_norm": 0.16079369187355042, "learning_rate": 7.157530763754703e-05, "loss": 1.0762, "step": 6320 }, { "epoch": 1.2850172799349462, "grad_norm": 0.1493234634399414, "learning_rate": 7.155496796501576e-05, "loss": 0.8898, "step": 6321 }, { "epoch": 1.2852205732872535, "grad_norm": 0.16232939064502716, "learning_rate": 7.15346282924845e-05, "loss": 0.8951, "step": 6322 }, { "epoch": 1.2854238666395608, "grad_norm": 0.1398724615573883, "learning_rate": 7.151428861995322e-05, "loss": 0.8304, "step": 6323 }, { "epoch": 1.2856271599918683, "grad_norm": 0.13646571338176727, "learning_rate": 7.149394894742195e-05, "loss": 0.8362, "step": 6324 }, { "epoch": 1.2858304533441758, "grad_norm": 0.14222213625907898, "learning_rate": 7.147360927489067e-05, "loss": 1.0308, "step": 6325 }, { "epoch": 1.286033746696483, "grad_norm": 0.1417984813451767, "learning_rate": 7.145326960235941e-05, "loss": 0.9775, "step": 6326 }, { "epoch": 1.2862370400487904, "grad_norm": 0.1756143420934677, "learning_rate": 7.143292992982813e-05, "loss": 1.041, "step": 6327 }, { "epoch": 1.2864403334010976, "grad_norm": 0.15248575806617737, "learning_rate": 7.141259025729686e-05, "loss": 0.9245, "step": 6328 }, { "epoch": 1.2866436267534052, "grad_norm": 0.14683492481708527, "learning_rate": 7.139225058476558e-05, "loss": 0.957, "step": 6329 }, { "epoch": 1.2868469201057127, "grad_norm": 0.16053462028503418, "learning_rate": 7.137191091223432e-05, "loss": 1.1298, "step": 6330 }, { "epoch": 1.28705021345802, "grad_norm": 0.16956062614917755, "learning_rate": 7.135157123970304e-05, "loss": 1.0127, "step": 6331 }, { "epoch": 1.2872535068103272, "grad_norm": 0.14983762800693512, "learning_rate": 7.133123156717177e-05, "loss": 0.9301, "step": 6332 }, { "epoch": 1.2874568001626348, "grad_norm": 0.15941564738750458, "learning_rate": 7.131089189464049e-05, "loss": 1.1836, "step": 6333 }, { "epoch": 1.287660093514942, "grad_norm": 0.15132249891757965, "learning_rate": 7.129055222210923e-05, "loss": 1.0463, "step": 6334 }, { "epoch": 1.2878633868672495, "grad_norm": 0.15944363176822662, "learning_rate": 7.127021254957795e-05, "loss": 0.9344, "step": 6335 }, { "epoch": 1.2880666802195568, "grad_norm": 0.15733520686626434, "learning_rate": 7.124987287704669e-05, "loss": 1.0877, "step": 6336 }, { "epoch": 1.2882699735718641, "grad_norm": 0.1465538740158081, "learning_rate": 7.12295332045154e-05, "loss": 0.8985, "step": 6337 }, { "epoch": 1.2884732669241716, "grad_norm": 0.16172672808170319, "learning_rate": 7.120919353198414e-05, "loss": 0.9402, "step": 6338 }, { "epoch": 1.288676560276479, "grad_norm": 0.14572155475616455, "learning_rate": 7.118885385945287e-05, "loss": 0.9993, "step": 6339 }, { "epoch": 1.2888798536287864, "grad_norm": 0.15885762870311737, "learning_rate": 7.11685141869216e-05, "loss": 1.1093, "step": 6340 }, { "epoch": 1.2890831469810937, "grad_norm": 0.15180082619190216, "learning_rate": 7.114817451439032e-05, "loss": 1.0909, "step": 6341 }, { "epoch": 1.289286440333401, "grad_norm": 0.13964922726154327, "learning_rate": 7.112783484185905e-05, "loss": 0.8004, "step": 6342 }, { "epoch": 1.2894897336857085, "grad_norm": 0.16804420948028564, "learning_rate": 7.110749516932778e-05, "loss": 0.9914, "step": 6343 }, { "epoch": 1.2896930270380158, "grad_norm": 0.16537639498710632, "learning_rate": 7.108715549679652e-05, "loss": 1.1608, "step": 6344 }, { "epoch": 1.2898963203903233, "grad_norm": 0.14818671345710754, "learning_rate": 7.106681582426523e-05, "loss": 0.9843, "step": 6345 }, { "epoch": 1.2900996137426306, "grad_norm": 0.17077666521072388, "learning_rate": 7.104647615173396e-05, "loss": 1.2554, "step": 6346 }, { "epoch": 1.290302907094938, "grad_norm": 0.1623799055814743, "learning_rate": 7.102613647920269e-05, "loss": 1.0618, "step": 6347 }, { "epoch": 1.2905062004472454, "grad_norm": 0.1475660651922226, "learning_rate": 7.100579680667143e-05, "loss": 1.0486, "step": 6348 }, { "epoch": 1.2907094937995527, "grad_norm": 0.14870400726795197, "learning_rate": 7.098545713414014e-05, "loss": 0.9235, "step": 6349 }, { "epoch": 1.2909127871518602, "grad_norm": 0.1594904661178589, "learning_rate": 7.096511746160888e-05, "loss": 1.097, "step": 6350 }, { "epoch": 1.2911160805041675, "grad_norm": 0.13609673082828522, "learning_rate": 7.09447777890776e-05, "loss": 0.8692, "step": 6351 }, { "epoch": 1.2913193738564748, "grad_norm": 0.16340984404087067, "learning_rate": 7.092443811654634e-05, "loss": 1.1089, "step": 6352 }, { "epoch": 1.2915226672087823, "grad_norm": 0.13281512260437012, "learning_rate": 7.090409844401505e-05, "loss": 0.8938, "step": 6353 }, { "epoch": 1.2917259605610896, "grad_norm": 0.16493913531303406, "learning_rate": 7.088375877148377e-05, "loss": 1.0575, "step": 6354 }, { "epoch": 1.291929253913397, "grad_norm": 0.14465326070785522, "learning_rate": 7.086341909895251e-05, "loss": 0.938, "step": 6355 }, { "epoch": 1.2921325472657044, "grad_norm": 0.16361325979232788, "learning_rate": 7.084307942642124e-05, "loss": 1.0142, "step": 6356 }, { "epoch": 1.2923358406180117, "grad_norm": 0.14788828790187836, "learning_rate": 7.082273975388996e-05, "loss": 1.0733, "step": 6357 }, { "epoch": 1.2925391339703192, "grad_norm": 0.15104612708091736, "learning_rate": 7.080240008135869e-05, "loss": 0.8354, "step": 6358 }, { "epoch": 1.2927424273226267, "grad_norm": 0.1521586775779724, "learning_rate": 7.078206040882742e-05, "loss": 0.967, "step": 6359 }, { "epoch": 1.292945720674934, "grad_norm": 0.1353432983160019, "learning_rate": 7.076172073629615e-05, "loss": 0.8673, "step": 6360 }, { "epoch": 1.2931490140272413, "grad_norm": 0.14922721683979034, "learning_rate": 7.074138106376487e-05, "loss": 0.9632, "step": 6361 }, { "epoch": 1.2933523073795488, "grad_norm": 0.16160540282726288, "learning_rate": 7.07210413912336e-05, "loss": 1.0371, "step": 6362 }, { "epoch": 1.293555600731856, "grad_norm": 0.1590534895658493, "learning_rate": 7.070070171870233e-05, "loss": 1.1161, "step": 6363 }, { "epoch": 1.2937588940841636, "grad_norm": 0.15857507288455963, "learning_rate": 7.068036204617106e-05, "loss": 1.0829, "step": 6364 }, { "epoch": 1.2939621874364708, "grad_norm": 0.15210936963558197, "learning_rate": 7.066002237363978e-05, "loss": 0.9997, "step": 6365 }, { "epoch": 1.2941654807887781, "grad_norm": 0.17027819156646729, "learning_rate": 7.063968270110851e-05, "loss": 1.1079, "step": 6366 }, { "epoch": 1.2943687741410856, "grad_norm": 0.15695655345916748, "learning_rate": 7.061934302857725e-05, "loss": 1.0675, "step": 6367 }, { "epoch": 1.294572067493393, "grad_norm": 0.15736332535743713, "learning_rate": 7.059900335604597e-05, "loss": 1.0686, "step": 6368 }, { "epoch": 1.2947753608457004, "grad_norm": 0.15472574532032013, "learning_rate": 7.05786636835147e-05, "loss": 1.0212, "step": 6369 }, { "epoch": 1.2949786541980077, "grad_norm": 0.17373047769069672, "learning_rate": 7.055832401098342e-05, "loss": 1.133, "step": 6370 }, { "epoch": 1.295181947550315, "grad_norm": 0.17195232212543488, "learning_rate": 7.053798433845216e-05, "loss": 1.1432, "step": 6371 }, { "epoch": 1.2953852409026225, "grad_norm": 0.14202889800071716, "learning_rate": 7.051764466592088e-05, "loss": 0.934, "step": 6372 }, { "epoch": 1.2955885342549298, "grad_norm": 0.1472562849521637, "learning_rate": 7.04973049933896e-05, "loss": 0.8657, "step": 6373 }, { "epoch": 1.2957918276072373, "grad_norm": 0.1462807059288025, "learning_rate": 7.047696532085833e-05, "loss": 1.0438, "step": 6374 }, { "epoch": 1.2959951209595446, "grad_norm": 0.16334788501262665, "learning_rate": 7.045662564832707e-05, "loss": 1.084, "step": 6375 }, { "epoch": 1.296198414311852, "grad_norm": 0.14090010523796082, "learning_rate": 7.043628597579579e-05, "loss": 0.9945, "step": 6376 }, { "epoch": 1.2964017076641594, "grad_norm": 0.13263094425201416, "learning_rate": 7.041594630326452e-05, "loss": 0.8053, "step": 6377 }, { "epoch": 1.2966050010164667, "grad_norm": 0.12848879396915436, "learning_rate": 7.039560663073324e-05, "loss": 0.8986, "step": 6378 }, { "epoch": 1.2968082943687742, "grad_norm": 0.16026438772678375, "learning_rate": 7.037526695820198e-05, "loss": 0.9383, "step": 6379 }, { "epoch": 1.2970115877210815, "grad_norm": 0.14478953182697296, "learning_rate": 7.03549272856707e-05, "loss": 0.9431, "step": 6380 }, { "epoch": 1.2972148810733888, "grad_norm": 0.16707251965999603, "learning_rate": 7.033458761313943e-05, "loss": 1.0886, "step": 6381 }, { "epoch": 1.2974181744256963, "grad_norm": 0.12309828400611877, "learning_rate": 7.031424794060815e-05, "loss": 0.8204, "step": 6382 }, { "epoch": 1.2976214677780036, "grad_norm": 0.17878857254981995, "learning_rate": 7.029390826807689e-05, "loss": 1.1639, "step": 6383 }, { "epoch": 1.297824761130311, "grad_norm": 0.15972448885440826, "learning_rate": 7.027356859554562e-05, "loss": 0.9845, "step": 6384 }, { "epoch": 1.2980280544826184, "grad_norm": 0.15551124513149261, "learning_rate": 7.025322892301434e-05, "loss": 0.9243, "step": 6385 }, { "epoch": 1.2982313478349257, "grad_norm": 0.1571023315191269, "learning_rate": 7.023288925048306e-05, "loss": 1.0748, "step": 6386 }, { "epoch": 1.2984346411872332, "grad_norm": 0.14938150346279144, "learning_rate": 7.02125495779518e-05, "loss": 1.0755, "step": 6387 }, { "epoch": 1.2986379345395407, "grad_norm": 0.14772380888462067, "learning_rate": 7.019220990542053e-05, "loss": 1.0526, "step": 6388 }, { "epoch": 1.298841227891848, "grad_norm": 0.16789360344409943, "learning_rate": 7.017187023288925e-05, "loss": 1.2418, "step": 6389 }, { "epoch": 1.2990445212441553, "grad_norm": 0.1547492891550064, "learning_rate": 7.015153056035798e-05, "loss": 1.2118, "step": 6390 }, { "epoch": 1.2992478145964628, "grad_norm": 0.16406840085983276, "learning_rate": 7.013119088782671e-05, "loss": 1.1584, "step": 6391 }, { "epoch": 1.29945110794877, "grad_norm": 0.15339142084121704, "learning_rate": 7.011085121529544e-05, "loss": 1.0863, "step": 6392 }, { "epoch": 1.2996544013010776, "grad_norm": 0.151136577129364, "learning_rate": 7.009051154276418e-05, "loss": 1.152, "step": 6393 }, { "epoch": 1.2998576946533849, "grad_norm": 0.16153016686439514, "learning_rate": 7.007017187023289e-05, "loss": 1.0774, "step": 6394 }, { "epoch": 1.3000609880056921, "grad_norm": 0.15656860172748566, "learning_rate": 7.004983219770163e-05, "loss": 1.0618, "step": 6395 }, { "epoch": 1.3002642813579997, "grad_norm": 0.1635272204875946, "learning_rate": 7.002949252517035e-05, "loss": 1.0452, "step": 6396 }, { "epoch": 1.300467574710307, "grad_norm": 0.14776968955993652, "learning_rate": 7.000915285263907e-05, "loss": 0.8822, "step": 6397 }, { "epoch": 1.3006708680626144, "grad_norm": 0.14170758426189423, "learning_rate": 6.99888131801078e-05, "loss": 1.0016, "step": 6398 }, { "epoch": 1.3008741614149217, "grad_norm": 0.14958837628364563, "learning_rate": 6.996847350757652e-05, "loss": 1.1093, "step": 6399 }, { "epoch": 1.301077454767229, "grad_norm": 0.1530427783727646, "learning_rate": 6.994813383504526e-05, "loss": 0.9991, "step": 6400 }, { "epoch": 1.3012807481195365, "grad_norm": 0.1651686280965805, "learning_rate": 6.992779416251399e-05, "loss": 1.0096, "step": 6401 }, { "epoch": 1.3014840414718438, "grad_norm": 0.14131589233875275, "learning_rate": 6.990745448998271e-05, "loss": 0.9807, "step": 6402 }, { "epoch": 1.3016873348241513, "grad_norm": 0.16334564983844757, "learning_rate": 6.988711481745143e-05, "loss": 0.9987, "step": 6403 }, { "epoch": 1.3018906281764586, "grad_norm": 0.16743913292884827, "learning_rate": 6.986677514492017e-05, "loss": 1.1994, "step": 6404 }, { "epoch": 1.302093921528766, "grad_norm": 0.15577523410320282, "learning_rate": 6.98464354723889e-05, "loss": 0.9693, "step": 6405 }, { "epoch": 1.3022972148810734, "grad_norm": 0.16516022384166718, "learning_rate": 6.982609579985762e-05, "loss": 1.102, "step": 6406 }, { "epoch": 1.3025005082333807, "grad_norm": 0.13962024450302124, "learning_rate": 6.980575612732635e-05, "loss": 0.885, "step": 6407 }, { "epoch": 1.3027038015856882, "grad_norm": 0.1331530511379242, "learning_rate": 6.978541645479508e-05, "loss": 0.9392, "step": 6408 }, { "epoch": 1.3029070949379955, "grad_norm": 0.15082453191280365, "learning_rate": 6.976507678226381e-05, "loss": 0.9806, "step": 6409 }, { "epoch": 1.3031103882903028, "grad_norm": 0.15130026638507843, "learning_rate": 6.974473710973253e-05, "loss": 0.9098, "step": 6410 }, { "epoch": 1.3033136816426103, "grad_norm": 0.1474311500787735, "learning_rate": 6.972439743720126e-05, "loss": 0.9963, "step": 6411 }, { "epoch": 1.3035169749949176, "grad_norm": 0.143365278840065, "learning_rate": 6.970405776467e-05, "loss": 0.9287, "step": 6412 }, { "epoch": 1.303720268347225, "grad_norm": 0.1591702550649643, "learning_rate": 6.968371809213872e-05, "loss": 1.0042, "step": 6413 }, { "epoch": 1.3039235616995324, "grad_norm": 0.14767907559871674, "learning_rate": 6.966337841960744e-05, "loss": 1.0521, "step": 6414 }, { "epoch": 1.3041268550518397, "grad_norm": 0.165874645113945, "learning_rate": 6.964303874707617e-05, "loss": 1.0874, "step": 6415 }, { "epoch": 1.3043301484041472, "grad_norm": 0.137615367770195, "learning_rate": 6.962269907454491e-05, "loss": 0.8331, "step": 6416 }, { "epoch": 1.3045334417564547, "grad_norm": 0.15427331626415253, "learning_rate": 6.960235940201363e-05, "loss": 1.0074, "step": 6417 }, { "epoch": 1.304736735108762, "grad_norm": 0.1635177880525589, "learning_rate": 6.958201972948236e-05, "loss": 1.1218, "step": 6418 }, { "epoch": 1.3049400284610693, "grad_norm": 0.15335600078105927, "learning_rate": 6.956168005695108e-05, "loss": 1.0503, "step": 6419 }, { "epoch": 1.3051433218133768, "grad_norm": 0.1703837513923645, "learning_rate": 6.954134038441982e-05, "loss": 1.1265, "step": 6420 }, { "epoch": 1.305346615165684, "grad_norm": 0.1549668163061142, "learning_rate": 6.952100071188854e-05, "loss": 0.9123, "step": 6421 }, { "epoch": 1.3055499085179916, "grad_norm": 0.17017358541488647, "learning_rate": 6.950066103935727e-05, "loss": 1.1507, "step": 6422 }, { "epoch": 1.3057532018702989, "grad_norm": 0.1454373002052307, "learning_rate": 6.948032136682599e-05, "loss": 0.9948, "step": 6423 }, { "epoch": 1.3059564952226062, "grad_norm": 0.16227105259895325, "learning_rate": 6.945998169429473e-05, "loss": 1.183, "step": 6424 }, { "epoch": 1.3061597885749137, "grad_norm": 0.1732262670993805, "learning_rate": 6.943964202176345e-05, "loss": 1.2444, "step": 6425 }, { "epoch": 1.306363081927221, "grad_norm": 0.1399673968553543, "learning_rate": 6.941930234923218e-05, "loss": 0.9059, "step": 6426 }, { "epoch": 1.3065663752795285, "grad_norm": 0.15425364673137665, "learning_rate": 6.93989626767009e-05, "loss": 1.0113, "step": 6427 }, { "epoch": 1.3067696686318357, "grad_norm": 0.16954706609249115, "learning_rate": 6.937862300416964e-05, "loss": 1.0581, "step": 6428 }, { "epoch": 1.306972961984143, "grad_norm": 0.17263741791248322, "learning_rate": 6.935828333163837e-05, "loss": 1.2699, "step": 6429 }, { "epoch": 1.3071762553364505, "grad_norm": 0.1669914275407791, "learning_rate": 6.933794365910709e-05, "loss": 1.0944, "step": 6430 }, { "epoch": 1.3073795486887578, "grad_norm": 0.1626417487859726, "learning_rate": 6.931760398657581e-05, "loss": 1.0985, "step": 6431 }, { "epoch": 1.3075828420410653, "grad_norm": 0.15837189555168152, "learning_rate": 6.929726431404455e-05, "loss": 0.9834, "step": 6432 }, { "epoch": 1.3077861353933726, "grad_norm": 0.14864079654216766, "learning_rate": 6.927692464151328e-05, "loss": 1.0147, "step": 6433 }, { "epoch": 1.30798942874568, "grad_norm": 0.13315309584140778, "learning_rate": 6.9256584968982e-05, "loss": 0.9718, "step": 6434 }, { "epoch": 1.3081927220979874, "grad_norm": 0.14859481155872345, "learning_rate": 6.923624529645073e-05, "loss": 0.9433, "step": 6435 }, { "epoch": 1.3083960154502947, "grad_norm": 0.1559152603149414, "learning_rate": 6.921590562391946e-05, "loss": 1.0366, "step": 6436 }, { "epoch": 1.3085993088026022, "grad_norm": 0.1377761960029602, "learning_rate": 6.919556595138819e-05, "loss": 0.8622, "step": 6437 }, { "epoch": 1.3088026021549095, "grad_norm": 0.15666405856609344, "learning_rate": 6.917522627885691e-05, "loss": 1.0464, "step": 6438 }, { "epoch": 1.3090058955072168, "grad_norm": 0.1416643261909485, "learning_rate": 6.915488660632564e-05, "loss": 1.0275, "step": 6439 }, { "epoch": 1.3092091888595243, "grad_norm": 0.15593840181827545, "learning_rate": 6.913454693379436e-05, "loss": 0.9657, "step": 6440 }, { "epoch": 1.3094124822118316, "grad_norm": 0.1602342128753662, "learning_rate": 6.91142072612631e-05, "loss": 1.0536, "step": 6441 }, { "epoch": 1.309615775564139, "grad_norm": 0.14962245523929596, "learning_rate": 6.909386758873182e-05, "loss": 1.0307, "step": 6442 }, { "epoch": 1.3098190689164464, "grad_norm": 0.1534949541091919, "learning_rate": 6.907352791620055e-05, "loss": 1.0847, "step": 6443 }, { "epoch": 1.3100223622687537, "grad_norm": 0.1267688274383545, "learning_rate": 6.905318824366927e-05, "loss": 0.8466, "step": 6444 }, { "epoch": 1.3102256556210612, "grad_norm": 0.14033158123493195, "learning_rate": 6.903284857113801e-05, "loss": 0.9265, "step": 6445 }, { "epoch": 1.3104289489733687, "grad_norm": 0.15192466974258423, "learning_rate": 6.901250889860674e-05, "loss": 0.931, "step": 6446 }, { "epoch": 1.310632242325676, "grad_norm": 0.15281356871128082, "learning_rate": 6.899216922607546e-05, "loss": 1.0464, "step": 6447 }, { "epoch": 1.3108355356779833, "grad_norm": 0.15561819076538086, "learning_rate": 6.897182955354418e-05, "loss": 1.0109, "step": 6448 }, { "epoch": 1.3110388290302908, "grad_norm": 0.13406097888946533, "learning_rate": 6.895148988101292e-05, "loss": 0.828, "step": 6449 }, { "epoch": 1.311242122382598, "grad_norm": 0.13384568691253662, "learning_rate": 6.893115020848165e-05, "loss": 0.9232, "step": 6450 }, { "epoch": 1.3114454157349056, "grad_norm": 0.16319598257541656, "learning_rate": 6.891081053595037e-05, "loss": 1.0027, "step": 6451 }, { "epoch": 1.3116487090872129, "grad_norm": 0.15781641006469727, "learning_rate": 6.88904708634191e-05, "loss": 1.1818, "step": 6452 }, { "epoch": 1.3118520024395202, "grad_norm": 0.16732369363307953, "learning_rate": 6.887013119088783e-05, "loss": 1.0994, "step": 6453 }, { "epoch": 1.3120552957918277, "grad_norm": 0.14292466640472412, "learning_rate": 6.884979151835656e-05, "loss": 0.8309, "step": 6454 }, { "epoch": 1.312258589144135, "grad_norm": 0.15096823871135712, "learning_rate": 6.882945184582528e-05, "loss": 0.9477, "step": 6455 }, { "epoch": 1.3124618824964425, "grad_norm": 0.14904417097568512, "learning_rate": 6.880911217329401e-05, "loss": 1.0092, "step": 6456 }, { "epoch": 1.3126651758487498, "grad_norm": 0.1489609032869339, "learning_rate": 6.878877250076275e-05, "loss": 0.9442, "step": 6457 }, { "epoch": 1.312868469201057, "grad_norm": 0.14513355493545532, "learning_rate": 6.876843282823147e-05, "loss": 0.8775, "step": 6458 }, { "epoch": 1.3130717625533646, "grad_norm": 0.15297158062458038, "learning_rate": 6.87480931557002e-05, "loss": 0.9901, "step": 6459 }, { "epoch": 1.3132750559056718, "grad_norm": 0.15674805641174316, "learning_rate": 6.872775348316892e-05, "loss": 1.0478, "step": 6460 }, { "epoch": 1.3134783492579793, "grad_norm": 0.132054403424263, "learning_rate": 6.870741381063766e-05, "loss": 0.8865, "step": 6461 }, { "epoch": 1.3136816426102866, "grad_norm": 0.1640552580356598, "learning_rate": 6.868707413810638e-05, "loss": 1.1175, "step": 6462 }, { "epoch": 1.313884935962594, "grad_norm": 0.13734549283981323, "learning_rate": 6.86667344655751e-05, "loss": 0.9978, "step": 6463 }, { "epoch": 1.3140882293149014, "grad_norm": 0.12565961480140686, "learning_rate": 6.864639479304383e-05, "loss": 0.8271, "step": 6464 }, { "epoch": 1.3142915226672087, "grad_norm": 0.14138418436050415, "learning_rate": 6.862605512051257e-05, "loss": 0.9234, "step": 6465 }, { "epoch": 1.3144948160195162, "grad_norm": 0.1520802527666092, "learning_rate": 6.860571544798129e-05, "loss": 1.0683, "step": 6466 }, { "epoch": 1.3146981093718235, "grad_norm": 0.1719176024198532, "learning_rate": 6.858537577545002e-05, "loss": 1.0938, "step": 6467 }, { "epoch": 1.3149014027241308, "grad_norm": 0.15719756484031677, "learning_rate": 6.856503610291874e-05, "loss": 1.1215, "step": 6468 }, { "epoch": 1.3151046960764383, "grad_norm": 0.15599600970745087, "learning_rate": 6.854469643038748e-05, "loss": 0.9656, "step": 6469 }, { "epoch": 1.3153079894287456, "grad_norm": 0.15448269248008728, "learning_rate": 6.85243567578562e-05, "loss": 1.0572, "step": 6470 }, { "epoch": 1.3155112827810531, "grad_norm": 0.1604229360818863, "learning_rate": 6.850401708532493e-05, "loss": 1.1076, "step": 6471 }, { "epoch": 1.3157145761333604, "grad_norm": 0.13794146478176117, "learning_rate": 6.848367741279365e-05, "loss": 0.8873, "step": 6472 }, { "epoch": 1.3159178694856677, "grad_norm": 0.16124792397022247, "learning_rate": 6.846333774026239e-05, "loss": 1.1082, "step": 6473 }, { "epoch": 1.3161211628379752, "grad_norm": 0.14321772754192352, "learning_rate": 6.844299806773112e-05, "loss": 0.9342, "step": 6474 }, { "epoch": 1.3163244561902827, "grad_norm": 0.14759552478790283, "learning_rate": 6.842265839519984e-05, "loss": 0.9947, "step": 6475 }, { "epoch": 1.31652774954259, "grad_norm": 0.1335640698671341, "learning_rate": 6.840231872266856e-05, "loss": 0.9202, "step": 6476 }, { "epoch": 1.3167310428948973, "grad_norm": 0.14602088928222656, "learning_rate": 6.83819790501373e-05, "loss": 0.8976, "step": 6477 }, { "epoch": 1.3169343362472048, "grad_norm": 0.15512679517269135, "learning_rate": 6.836163937760603e-05, "loss": 1.0567, "step": 6478 }, { "epoch": 1.317137629599512, "grad_norm": 0.14310981333255768, "learning_rate": 6.834129970507475e-05, "loss": 0.9974, "step": 6479 }, { "epoch": 1.3173409229518196, "grad_norm": 0.1316903829574585, "learning_rate": 6.832096003254348e-05, "loss": 0.9084, "step": 6480 }, { "epoch": 1.3175442163041269, "grad_norm": 0.15756045281887054, "learning_rate": 6.83006203600122e-05, "loss": 1.0742, "step": 6481 }, { "epoch": 1.3177475096564342, "grad_norm": 0.1692349910736084, "learning_rate": 6.828028068748094e-05, "loss": 1.2026, "step": 6482 }, { "epoch": 1.3179508030087417, "grad_norm": 0.1590135246515274, "learning_rate": 6.825994101494966e-05, "loss": 1.0709, "step": 6483 }, { "epoch": 1.318154096361049, "grad_norm": 0.15329840779304504, "learning_rate": 6.823960134241839e-05, "loss": 0.9894, "step": 6484 }, { "epoch": 1.3183573897133565, "grad_norm": 0.17357125878334045, "learning_rate": 6.821926166988711e-05, "loss": 1.1774, "step": 6485 }, { "epoch": 1.3185606830656638, "grad_norm": 0.15878167748451233, "learning_rate": 6.819892199735585e-05, "loss": 0.9951, "step": 6486 }, { "epoch": 1.318763976417971, "grad_norm": 0.14225813746452332, "learning_rate": 6.817858232482457e-05, "loss": 0.8056, "step": 6487 }, { "epoch": 1.3189672697702786, "grad_norm": 0.15075825154781342, "learning_rate": 6.81582426522933e-05, "loss": 1.0133, "step": 6488 }, { "epoch": 1.3191705631225858, "grad_norm": 0.1345609575510025, "learning_rate": 6.813790297976202e-05, "loss": 0.818, "step": 6489 }, { "epoch": 1.3193738564748934, "grad_norm": 0.147307887673378, "learning_rate": 6.811756330723076e-05, "loss": 1.0098, "step": 6490 }, { "epoch": 1.3195771498272006, "grad_norm": 0.17987053096294403, "learning_rate": 6.809722363469949e-05, "loss": 1.1921, "step": 6491 }, { "epoch": 1.319780443179508, "grad_norm": 0.13262401521205902, "learning_rate": 6.807688396216821e-05, "loss": 0.8263, "step": 6492 }, { "epoch": 1.3199837365318154, "grad_norm": 0.15621908009052277, "learning_rate": 6.805654428963693e-05, "loss": 1.0055, "step": 6493 }, { "epoch": 1.3201870298841227, "grad_norm": 0.1636243760585785, "learning_rate": 6.803620461710567e-05, "loss": 1.1082, "step": 6494 }, { "epoch": 1.3203903232364302, "grad_norm": 0.14250467717647552, "learning_rate": 6.80158649445744e-05, "loss": 0.9626, "step": 6495 }, { "epoch": 1.3205936165887375, "grad_norm": 0.15674500167369843, "learning_rate": 6.799552527204312e-05, "loss": 1.0997, "step": 6496 }, { "epoch": 1.3207969099410448, "grad_norm": 0.1689048558473587, "learning_rate": 6.797518559951185e-05, "loss": 1.1284, "step": 6497 }, { "epoch": 1.3210002032933523, "grad_norm": 0.15494292974472046, "learning_rate": 6.795484592698058e-05, "loss": 1.0416, "step": 6498 }, { "epoch": 1.3212034966456596, "grad_norm": 0.1549694538116455, "learning_rate": 6.793450625444931e-05, "loss": 0.9951, "step": 6499 }, { "epoch": 1.3214067899979671, "grad_norm": 0.16461819410324097, "learning_rate": 6.791416658191803e-05, "loss": 1.1806, "step": 6500 }, { "epoch": 1.3216100833502744, "grad_norm": 0.171092689037323, "learning_rate": 6.789382690938676e-05, "loss": 1.2561, "step": 6501 }, { "epoch": 1.3218133767025817, "grad_norm": 0.1471683531999588, "learning_rate": 6.78734872368555e-05, "loss": 1.0088, "step": 6502 }, { "epoch": 1.3220166700548892, "grad_norm": 0.14350157976150513, "learning_rate": 6.785314756432422e-05, "loss": 0.9284, "step": 6503 }, { "epoch": 1.3222199634071967, "grad_norm": 0.1558438092470169, "learning_rate": 6.783280789179294e-05, "loss": 1.0972, "step": 6504 }, { "epoch": 1.322423256759504, "grad_norm": 0.15591087937355042, "learning_rate": 6.781246821926167e-05, "loss": 1.1083, "step": 6505 }, { "epoch": 1.3226265501118113, "grad_norm": 0.14936377108097076, "learning_rate": 6.779212854673041e-05, "loss": 0.8758, "step": 6506 }, { "epoch": 1.3228298434641188, "grad_norm": 0.1665930598974228, "learning_rate": 6.777178887419913e-05, "loss": 1.0902, "step": 6507 }, { "epoch": 1.323033136816426, "grad_norm": 0.1540941298007965, "learning_rate": 6.775144920166786e-05, "loss": 1.1394, "step": 6508 }, { "epoch": 1.3232364301687336, "grad_norm": 0.1532563716173172, "learning_rate": 6.773110952913658e-05, "loss": 0.9367, "step": 6509 }, { "epoch": 1.323439723521041, "grad_norm": 0.1737111210823059, "learning_rate": 6.771076985660532e-05, "loss": 1.0788, "step": 6510 }, { "epoch": 1.3236430168733482, "grad_norm": 0.15493230521678925, "learning_rate": 6.769043018407404e-05, "loss": 1.0181, "step": 6511 }, { "epoch": 1.3238463102256557, "grad_norm": 0.14142099022865295, "learning_rate": 6.767009051154277e-05, "loss": 0.8269, "step": 6512 }, { "epoch": 1.324049603577963, "grad_norm": 0.15842103958129883, "learning_rate": 6.764975083901149e-05, "loss": 1.054, "step": 6513 }, { "epoch": 1.3242528969302705, "grad_norm": 0.15117564797401428, "learning_rate": 6.762941116648023e-05, "loss": 1.0109, "step": 6514 }, { "epoch": 1.3244561902825778, "grad_norm": 0.13780778646469116, "learning_rate": 6.760907149394895e-05, "loss": 0.8528, "step": 6515 }, { "epoch": 1.324659483634885, "grad_norm": 0.14925047755241394, "learning_rate": 6.758873182141768e-05, "loss": 1.0393, "step": 6516 }, { "epoch": 1.3248627769871926, "grad_norm": 0.1592148393392563, "learning_rate": 6.75683921488864e-05, "loss": 1.105, "step": 6517 }, { "epoch": 1.3250660703394999, "grad_norm": 0.14861708879470825, "learning_rate": 6.754805247635514e-05, "loss": 0.9957, "step": 6518 }, { "epoch": 1.3252693636918074, "grad_norm": 0.15767233073711395, "learning_rate": 6.752771280382387e-05, "loss": 0.9189, "step": 6519 }, { "epoch": 1.3254726570441147, "grad_norm": 0.1405247300863266, "learning_rate": 6.750737313129259e-05, "loss": 0.9857, "step": 6520 }, { "epoch": 1.325675950396422, "grad_norm": 0.1638457179069519, "learning_rate": 6.748703345876131e-05, "loss": 1.0637, "step": 6521 }, { "epoch": 1.3258792437487295, "grad_norm": 0.13822193443775177, "learning_rate": 6.746669378623004e-05, "loss": 0.9613, "step": 6522 }, { "epoch": 1.3260825371010367, "grad_norm": 0.16013990342617035, "learning_rate": 6.744635411369878e-05, "loss": 1.0393, "step": 6523 }, { "epoch": 1.3262858304533442, "grad_norm": 0.1428498774766922, "learning_rate": 6.74260144411675e-05, "loss": 0.98, "step": 6524 }, { "epoch": 1.3264891238056515, "grad_norm": 0.15162871778011322, "learning_rate": 6.740567476863623e-05, "loss": 0.9981, "step": 6525 }, { "epoch": 1.3266924171579588, "grad_norm": 0.14506307244300842, "learning_rate": 6.738533509610495e-05, "loss": 0.9618, "step": 6526 }, { "epoch": 1.3268957105102663, "grad_norm": 0.15427903831005096, "learning_rate": 6.736499542357369e-05, "loss": 0.9897, "step": 6527 }, { "epoch": 1.3270990038625736, "grad_norm": 0.1582299917936325, "learning_rate": 6.734465575104241e-05, "loss": 1.045, "step": 6528 }, { "epoch": 1.3273022972148811, "grad_norm": 0.15421707928180695, "learning_rate": 6.732431607851114e-05, "loss": 1.0482, "step": 6529 }, { "epoch": 1.3275055905671884, "grad_norm": 0.14663353562355042, "learning_rate": 6.730397640597986e-05, "loss": 0.9977, "step": 6530 }, { "epoch": 1.3277088839194957, "grad_norm": 0.16096587479114532, "learning_rate": 6.72836367334486e-05, "loss": 1.1346, "step": 6531 }, { "epoch": 1.3279121772718032, "grad_norm": 0.1554766148328781, "learning_rate": 6.726329706091732e-05, "loss": 0.9453, "step": 6532 }, { "epoch": 1.3281154706241107, "grad_norm": 0.1384490728378296, "learning_rate": 6.724295738838605e-05, "loss": 1.0076, "step": 6533 }, { "epoch": 1.328318763976418, "grad_norm": 0.16109387576580048, "learning_rate": 6.722261771585477e-05, "loss": 0.9302, "step": 6534 }, { "epoch": 1.3285220573287253, "grad_norm": 0.1677289754152298, "learning_rate": 6.720227804332351e-05, "loss": 1.0598, "step": 6535 }, { "epoch": 1.3287253506810328, "grad_norm": 0.16336236894130707, "learning_rate": 6.718193837079224e-05, "loss": 1.0249, "step": 6536 }, { "epoch": 1.32892864403334, "grad_norm": 0.14014337956905365, "learning_rate": 6.716159869826096e-05, "loss": 0.8409, "step": 6537 }, { "epoch": 1.3291319373856476, "grad_norm": 0.15761636197566986, "learning_rate": 6.714125902572968e-05, "loss": 1.0118, "step": 6538 }, { "epoch": 1.329335230737955, "grad_norm": 0.12533292174339294, "learning_rate": 6.712091935319842e-05, "loss": 0.8514, "step": 6539 }, { "epoch": 1.3295385240902622, "grad_norm": 0.1495424211025238, "learning_rate": 6.710057968066715e-05, "loss": 0.9738, "step": 6540 }, { "epoch": 1.3297418174425697, "grad_norm": 0.15687566995620728, "learning_rate": 6.708024000813587e-05, "loss": 0.9854, "step": 6541 }, { "epoch": 1.329945110794877, "grad_norm": 0.1550702452659607, "learning_rate": 6.70599003356046e-05, "loss": 1.1639, "step": 6542 }, { "epoch": 1.3301484041471845, "grad_norm": 0.1597202569246292, "learning_rate": 6.703956066307333e-05, "loss": 1.0971, "step": 6543 }, { "epoch": 1.3303516974994918, "grad_norm": 0.15224111080169678, "learning_rate": 6.701922099054206e-05, "loss": 1.0292, "step": 6544 }, { "epoch": 1.330554990851799, "grad_norm": 0.12689407169818878, "learning_rate": 6.699888131801078e-05, "loss": 0.8408, "step": 6545 }, { "epoch": 1.3307582842041066, "grad_norm": 0.14697900414466858, "learning_rate": 6.697854164547951e-05, "loss": 1.0648, "step": 6546 }, { "epoch": 1.3309615775564139, "grad_norm": 0.16403964161872864, "learning_rate": 6.695820197294825e-05, "loss": 1.1699, "step": 6547 }, { "epoch": 1.3311648709087214, "grad_norm": 0.14086556434631348, "learning_rate": 6.693786230041697e-05, "loss": 0.9051, "step": 6548 }, { "epoch": 1.3313681642610287, "grad_norm": 0.1466771811246872, "learning_rate": 6.69175226278857e-05, "loss": 1.022, "step": 6549 }, { "epoch": 1.331571457613336, "grad_norm": 0.16425777971744537, "learning_rate": 6.689718295535442e-05, "loss": 1.0438, "step": 6550 }, { "epoch": 1.3317747509656435, "grad_norm": 0.16799181699752808, "learning_rate": 6.687684328282316e-05, "loss": 1.1315, "step": 6551 }, { "epoch": 1.3319780443179507, "grad_norm": 0.14717894792556763, "learning_rate": 6.685650361029188e-05, "loss": 0.9153, "step": 6552 }, { "epoch": 1.3321813376702583, "grad_norm": 0.15937882661819458, "learning_rate": 6.68361639377606e-05, "loss": 1.1034, "step": 6553 }, { "epoch": 1.3323846310225655, "grad_norm": 0.14805994927883148, "learning_rate": 6.681582426522933e-05, "loss": 0.8675, "step": 6554 }, { "epoch": 1.3325879243748728, "grad_norm": 0.15249277651309967, "learning_rate": 6.679548459269807e-05, "loss": 1.135, "step": 6555 }, { "epoch": 1.3327912177271803, "grad_norm": 0.1499241143465042, "learning_rate": 6.677514492016679e-05, "loss": 0.9384, "step": 6556 }, { "epoch": 1.3329945110794876, "grad_norm": 0.16107714176177979, "learning_rate": 6.675480524763552e-05, "loss": 1.1863, "step": 6557 }, { "epoch": 1.3331978044317951, "grad_norm": 0.15378479659557343, "learning_rate": 6.673446557510424e-05, "loss": 0.9822, "step": 6558 }, { "epoch": 1.3334010977841024, "grad_norm": 0.13990168273448944, "learning_rate": 6.671412590257298e-05, "loss": 0.8935, "step": 6559 }, { "epoch": 1.3336043911364097, "grad_norm": 0.15115247666835785, "learning_rate": 6.66937862300417e-05, "loss": 0.9831, "step": 6560 }, { "epoch": 1.3338076844887172, "grad_norm": 0.17793822288513184, "learning_rate": 6.667344655751043e-05, "loss": 1.052, "step": 6561 }, { "epoch": 1.3340109778410247, "grad_norm": 0.16342271864414215, "learning_rate": 6.665310688497915e-05, "loss": 1.1521, "step": 6562 }, { "epoch": 1.334214271193332, "grad_norm": 0.171544149518013, "learning_rate": 6.663276721244788e-05, "loss": 1.1458, "step": 6563 }, { "epoch": 1.3344175645456393, "grad_norm": 0.17183944582939148, "learning_rate": 6.661242753991662e-05, "loss": 1.1076, "step": 6564 }, { "epoch": 1.3346208578979466, "grad_norm": 0.1565820276737213, "learning_rate": 6.659208786738534e-05, "loss": 0.9565, "step": 6565 }, { "epoch": 1.334824151250254, "grad_norm": 0.1608911156654358, "learning_rate": 6.657174819485406e-05, "loss": 1.0161, "step": 6566 }, { "epoch": 1.3350274446025616, "grad_norm": 0.15111416578292847, "learning_rate": 6.655140852232279e-05, "loss": 0.9956, "step": 6567 }, { "epoch": 1.335230737954869, "grad_norm": 0.15531237423419952, "learning_rate": 6.653106884979153e-05, "loss": 0.9907, "step": 6568 }, { "epoch": 1.3354340313071762, "grad_norm": 0.13924407958984375, "learning_rate": 6.651072917726025e-05, "loss": 0.9618, "step": 6569 }, { "epoch": 1.3356373246594837, "grad_norm": 0.16933512687683105, "learning_rate": 6.649038950472898e-05, "loss": 1.1476, "step": 6570 }, { "epoch": 1.335840618011791, "grad_norm": 0.1602686047554016, "learning_rate": 6.64700498321977e-05, "loss": 1.0173, "step": 6571 }, { "epoch": 1.3360439113640985, "grad_norm": 0.16032569110393524, "learning_rate": 6.644971015966644e-05, "loss": 1.0694, "step": 6572 }, { "epoch": 1.3362472047164058, "grad_norm": 0.15900370478630066, "learning_rate": 6.642937048713516e-05, "loss": 1.0537, "step": 6573 }, { "epoch": 1.336450498068713, "grad_norm": 0.16162076592445374, "learning_rate": 6.640903081460389e-05, "loss": 1.078, "step": 6574 }, { "epoch": 1.3366537914210206, "grad_norm": 0.18270519375801086, "learning_rate": 6.638869114207261e-05, "loss": 1.1913, "step": 6575 }, { "epoch": 1.3368570847733279, "grad_norm": 0.16038493812084198, "learning_rate": 6.636835146954135e-05, "loss": 1.0904, "step": 6576 }, { "epoch": 1.3370603781256354, "grad_norm": 0.1554563045501709, "learning_rate": 6.634801179701007e-05, "loss": 1.0685, "step": 6577 }, { "epoch": 1.3372636714779427, "grad_norm": 0.1518605649471283, "learning_rate": 6.63276721244788e-05, "loss": 0.9499, "step": 6578 }, { "epoch": 1.33746696483025, "grad_norm": 0.14802587032318115, "learning_rate": 6.630733245194752e-05, "loss": 0.9432, "step": 6579 }, { "epoch": 1.3376702581825575, "grad_norm": 0.14748792350292206, "learning_rate": 6.628699277941626e-05, "loss": 0.9841, "step": 6580 }, { "epoch": 1.3378735515348648, "grad_norm": 0.15375643968582153, "learning_rate": 6.626665310688499e-05, "loss": 1.1379, "step": 6581 }, { "epoch": 1.3380768448871723, "grad_norm": 0.13825255632400513, "learning_rate": 6.624631343435371e-05, "loss": 0.935, "step": 6582 }, { "epoch": 1.3382801382394796, "grad_norm": 0.16053850948810577, "learning_rate": 6.622597376182243e-05, "loss": 1.0313, "step": 6583 }, { "epoch": 1.3384834315917868, "grad_norm": 0.1293373554944992, "learning_rate": 6.620563408929117e-05, "loss": 0.8012, "step": 6584 }, { "epoch": 1.3386867249440944, "grad_norm": 0.14056281745433807, "learning_rate": 6.61852944167599e-05, "loss": 0.9578, "step": 6585 }, { "epoch": 1.3388900182964016, "grad_norm": 0.15911336243152618, "learning_rate": 6.616495474422862e-05, "loss": 1.1905, "step": 6586 }, { "epoch": 1.3390933116487092, "grad_norm": 0.16372515261173248, "learning_rate": 6.614461507169735e-05, "loss": 1.1501, "step": 6587 }, { "epoch": 1.3392966050010164, "grad_norm": 0.14411291480064392, "learning_rate": 6.612427539916608e-05, "loss": 0.9337, "step": 6588 }, { "epoch": 1.3394998983533237, "grad_norm": 0.15178608894348145, "learning_rate": 6.610393572663481e-05, "loss": 0.9428, "step": 6589 }, { "epoch": 1.3397031917056312, "grad_norm": 0.14842566847801208, "learning_rate": 6.608359605410353e-05, "loss": 0.9875, "step": 6590 }, { "epoch": 1.3399064850579385, "grad_norm": 0.14400069415569305, "learning_rate": 6.606325638157226e-05, "loss": 1.0277, "step": 6591 }, { "epoch": 1.340109778410246, "grad_norm": 0.14039835333824158, "learning_rate": 6.6042916709041e-05, "loss": 0.9858, "step": 6592 }, { "epoch": 1.3403130717625533, "grad_norm": 0.16195842623710632, "learning_rate": 6.602257703650972e-05, "loss": 1.1181, "step": 6593 }, { "epoch": 1.3405163651148606, "grad_norm": 0.15187060832977295, "learning_rate": 6.600223736397844e-05, "loss": 1.0089, "step": 6594 }, { "epoch": 1.3407196584671681, "grad_norm": 0.1642741560935974, "learning_rate": 6.598189769144717e-05, "loss": 1.0599, "step": 6595 }, { "epoch": 1.3409229518194756, "grad_norm": 0.16725251078605652, "learning_rate": 6.59615580189159e-05, "loss": 1.069, "step": 6596 }, { "epoch": 1.341126245171783, "grad_norm": 0.1582929641008377, "learning_rate": 6.594121834638463e-05, "loss": 0.8783, "step": 6597 }, { "epoch": 1.3413295385240902, "grad_norm": 0.149756520986557, "learning_rate": 6.592087867385336e-05, "loss": 0.9534, "step": 6598 }, { "epoch": 1.3415328318763977, "grad_norm": 0.13332435488700867, "learning_rate": 6.590053900132208e-05, "loss": 0.9478, "step": 6599 }, { "epoch": 1.341736125228705, "grad_norm": 0.1550353765487671, "learning_rate": 6.588019932879082e-05, "loss": 0.9224, "step": 6600 }, { "epoch": 1.3419394185810125, "grad_norm": 0.15263979136943817, "learning_rate": 6.585985965625954e-05, "loss": 0.9526, "step": 6601 }, { "epoch": 1.3421427119333198, "grad_norm": 0.1639474779367447, "learning_rate": 6.583951998372827e-05, "loss": 1.0998, "step": 6602 }, { "epoch": 1.342346005285627, "grad_norm": 0.15959811210632324, "learning_rate": 6.581918031119699e-05, "loss": 1.061, "step": 6603 }, { "epoch": 1.3425492986379346, "grad_norm": 0.1565035730600357, "learning_rate": 6.579884063866572e-05, "loss": 1.0391, "step": 6604 }, { "epoch": 1.3427525919902419, "grad_norm": 0.14973071217536926, "learning_rate": 6.577850096613445e-05, "loss": 1.0028, "step": 6605 }, { "epoch": 1.3429558853425494, "grad_norm": 0.14768008887767792, "learning_rate": 6.575816129360316e-05, "loss": 0.9624, "step": 6606 }, { "epoch": 1.3431591786948567, "grad_norm": 0.1557006537914276, "learning_rate": 6.57378216210719e-05, "loss": 1.0577, "step": 6607 }, { "epoch": 1.343362472047164, "grad_norm": 0.13694258034229279, "learning_rate": 6.571748194854063e-05, "loss": 0.9131, "step": 6608 }, { "epoch": 1.3435657653994715, "grad_norm": 0.16022591292858124, "learning_rate": 6.569714227600937e-05, "loss": 0.95, "step": 6609 }, { "epoch": 1.3437690587517788, "grad_norm": 0.15533234179019928, "learning_rate": 6.567680260347808e-05, "loss": 1.0009, "step": 6610 }, { "epoch": 1.3439723521040863, "grad_norm": 0.13041260838508606, "learning_rate": 6.565646293094681e-05, "loss": 0.748, "step": 6611 }, { "epoch": 1.3441756454563936, "grad_norm": 0.14454182982444763, "learning_rate": 6.563612325841554e-05, "loss": 0.8943, "step": 6612 }, { "epoch": 1.3443789388087009, "grad_norm": 0.14608418941497803, "learning_rate": 6.561578358588428e-05, "loss": 0.8877, "step": 6613 }, { "epoch": 1.3445822321610084, "grad_norm": 0.17098468542099, "learning_rate": 6.559544391335299e-05, "loss": 0.8842, "step": 6614 }, { "epoch": 1.3447855255133156, "grad_norm": 0.17818143963813782, "learning_rate": 6.557510424082173e-05, "loss": 1.073, "step": 6615 }, { "epoch": 1.3449888188656232, "grad_norm": 0.14222939312458038, "learning_rate": 6.555476456829045e-05, "loss": 0.9136, "step": 6616 }, { "epoch": 1.3451921122179304, "grad_norm": 0.1513351947069168, "learning_rate": 6.553442489575919e-05, "loss": 0.9578, "step": 6617 }, { "epoch": 1.3453954055702377, "grad_norm": 0.17002101242542267, "learning_rate": 6.551408522322791e-05, "loss": 1.1007, "step": 6618 }, { "epoch": 1.3455986989225452, "grad_norm": 0.15087753534317017, "learning_rate": 6.549374555069664e-05, "loss": 1.0228, "step": 6619 }, { "epoch": 1.3458019922748525, "grad_norm": 0.14262332022190094, "learning_rate": 6.547340587816536e-05, "loss": 0.8861, "step": 6620 }, { "epoch": 1.34600528562716, "grad_norm": 0.14896303415298462, "learning_rate": 6.54530662056341e-05, "loss": 1.0326, "step": 6621 }, { "epoch": 1.3462085789794673, "grad_norm": 0.1533699333667755, "learning_rate": 6.543272653310282e-05, "loss": 1.1764, "step": 6622 }, { "epoch": 1.3464118723317746, "grad_norm": 0.15011627972126007, "learning_rate": 6.541238686057155e-05, "loss": 1.0611, "step": 6623 }, { "epoch": 1.3466151656840821, "grad_norm": 0.1621440351009369, "learning_rate": 6.539204718804027e-05, "loss": 0.9875, "step": 6624 }, { "epoch": 1.3468184590363896, "grad_norm": 0.1371341049671173, "learning_rate": 6.537170751550901e-05, "loss": 0.9026, "step": 6625 }, { "epoch": 1.347021752388697, "grad_norm": 0.1659182608127594, "learning_rate": 6.535136784297774e-05, "loss": 1.1134, "step": 6626 }, { "epoch": 1.3472250457410042, "grad_norm": 0.14671935141086578, "learning_rate": 6.533102817044646e-05, "loss": 0.9337, "step": 6627 }, { "epoch": 1.3474283390933117, "grad_norm": 0.16731108725070953, "learning_rate": 6.531068849791518e-05, "loss": 1.297, "step": 6628 }, { "epoch": 1.347631632445619, "grad_norm": 0.1497422158718109, "learning_rate": 6.529034882538392e-05, "loss": 0.9333, "step": 6629 }, { "epoch": 1.3478349257979265, "grad_norm": 0.15278853476047516, "learning_rate": 6.527000915285265e-05, "loss": 0.8934, "step": 6630 }, { "epoch": 1.3480382191502338, "grad_norm": 0.14825202524662018, "learning_rate": 6.524966948032137e-05, "loss": 1.027, "step": 6631 }, { "epoch": 1.348241512502541, "grad_norm": 0.1355981081724167, "learning_rate": 6.52293298077901e-05, "loss": 0.8277, "step": 6632 }, { "epoch": 1.3484448058548486, "grad_norm": 0.15200775861740112, "learning_rate": 6.520899013525883e-05, "loss": 1.0864, "step": 6633 }, { "epoch": 1.348648099207156, "grad_norm": 0.15310457348823547, "learning_rate": 6.518865046272756e-05, "loss": 0.9925, "step": 6634 }, { "epoch": 1.3488513925594634, "grad_norm": 0.15348269045352936, "learning_rate": 6.516831079019628e-05, "loss": 1.0431, "step": 6635 }, { "epoch": 1.3490546859117707, "grad_norm": 0.17338477075099945, "learning_rate": 6.514797111766501e-05, "loss": 1.017, "step": 6636 }, { "epoch": 1.349257979264078, "grad_norm": 0.15221281349658966, "learning_rate": 6.512763144513374e-05, "loss": 1.0562, "step": 6637 }, { "epoch": 1.3494612726163855, "grad_norm": 0.13660123944282532, "learning_rate": 6.510729177260247e-05, "loss": 0.8976, "step": 6638 }, { "epoch": 1.3496645659686928, "grad_norm": 0.15344764292240143, "learning_rate": 6.50869521000712e-05, "loss": 1.0725, "step": 6639 }, { "epoch": 1.3498678593210003, "grad_norm": 0.13767392933368683, "learning_rate": 6.506661242753992e-05, "loss": 0.9795, "step": 6640 }, { "epoch": 1.3500711526733076, "grad_norm": 0.19037991762161255, "learning_rate": 6.504627275500866e-05, "loss": 1.3235, "step": 6641 }, { "epoch": 1.3502744460256149, "grad_norm": 0.153898686170578, "learning_rate": 6.502593308247738e-05, "loss": 0.9877, "step": 6642 }, { "epoch": 1.3504777393779224, "grad_norm": 0.13859529793262482, "learning_rate": 6.50055934099461e-05, "loss": 0.8863, "step": 6643 }, { "epoch": 1.3506810327302297, "grad_norm": 0.1579197347164154, "learning_rate": 6.498525373741483e-05, "loss": 1.0816, "step": 6644 }, { "epoch": 1.3508843260825372, "grad_norm": 0.1565553843975067, "learning_rate": 6.496491406488355e-05, "loss": 1.07, "step": 6645 }, { "epoch": 1.3510876194348445, "grad_norm": 0.14760999381542206, "learning_rate": 6.494457439235229e-05, "loss": 0.9851, "step": 6646 }, { "epoch": 1.3512909127871517, "grad_norm": 0.1595640629529953, "learning_rate": 6.4924234719821e-05, "loss": 1.109, "step": 6647 }, { "epoch": 1.3514942061394593, "grad_norm": 0.169783353805542, "learning_rate": 6.490389504728974e-05, "loss": 0.9721, "step": 6648 }, { "epoch": 1.3516974994917665, "grad_norm": 0.1549369841814041, "learning_rate": 6.488355537475847e-05, "loss": 1.0158, "step": 6649 }, { "epoch": 1.351900792844074, "grad_norm": 0.1654866486787796, "learning_rate": 6.48632157022272e-05, "loss": 1.2272, "step": 6650 }, { "epoch": 1.3521040861963813, "grad_norm": 0.16200962662696838, "learning_rate": 6.484287602969591e-05, "loss": 0.9346, "step": 6651 }, { "epoch": 1.3523073795486886, "grad_norm": 0.18851538002490997, "learning_rate": 6.482253635716465e-05, "loss": 1.1206, "step": 6652 }, { "epoch": 1.3525106729009961, "grad_norm": 0.16858980059623718, "learning_rate": 6.480219668463338e-05, "loss": 1.087, "step": 6653 }, { "epoch": 1.3527139662533036, "grad_norm": 0.17450284957885742, "learning_rate": 6.478185701210211e-05, "loss": 1.0501, "step": 6654 }, { "epoch": 1.352917259605611, "grad_norm": 0.1536058485507965, "learning_rate": 6.476151733957083e-05, "loss": 1.1982, "step": 6655 }, { "epoch": 1.3531205529579182, "grad_norm": 0.16767753660678864, "learning_rate": 6.474117766703956e-05, "loss": 1.1293, "step": 6656 }, { "epoch": 1.3533238463102257, "grad_norm": 0.1527155041694641, "learning_rate": 6.472083799450829e-05, "loss": 1.0746, "step": 6657 }, { "epoch": 1.353527139662533, "grad_norm": 0.13894875347614288, "learning_rate": 6.470049832197703e-05, "loss": 0.9043, "step": 6658 }, { "epoch": 1.3537304330148405, "grad_norm": 0.16773444414138794, "learning_rate": 6.468015864944574e-05, "loss": 1.1785, "step": 6659 }, { "epoch": 1.3539337263671478, "grad_norm": 0.1570262908935547, "learning_rate": 6.465981897691448e-05, "loss": 1.1152, "step": 6660 }, { "epoch": 1.354137019719455, "grad_norm": 0.16130763292312622, "learning_rate": 6.46394793043832e-05, "loss": 1.0144, "step": 6661 }, { "epoch": 1.3543403130717626, "grad_norm": 0.15057474374771118, "learning_rate": 6.461913963185194e-05, "loss": 1.0204, "step": 6662 }, { "epoch": 1.35454360642407, "grad_norm": 0.15316952764987946, "learning_rate": 6.459879995932065e-05, "loss": 1.0506, "step": 6663 }, { "epoch": 1.3547468997763774, "grad_norm": 0.1641097515821457, "learning_rate": 6.457846028678939e-05, "loss": 1.0777, "step": 6664 }, { "epoch": 1.3549501931286847, "grad_norm": 0.15480592846870422, "learning_rate": 6.455812061425811e-05, "loss": 1.0339, "step": 6665 }, { "epoch": 1.355153486480992, "grad_norm": 0.14954860508441925, "learning_rate": 6.453778094172685e-05, "loss": 0.9126, "step": 6666 }, { "epoch": 1.3553567798332995, "grad_norm": 0.17514324188232422, "learning_rate": 6.451744126919556e-05, "loss": 1.0698, "step": 6667 }, { "epoch": 1.3555600731856068, "grad_norm": 0.16481555998325348, "learning_rate": 6.44971015966643e-05, "loss": 1.1386, "step": 6668 }, { "epoch": 1.3557633665379143, "grad_norm": 0.15936695039272308, "learning_rate": 6.447676192413302e-05, "loss": 1.1154, "step": 6669 }, { "epoch": 1.3559666598902216, "grad_norm": 0.14329056441783905, "learning_rate": 6.445642225160176e-05, "loss": 0.9836, "step": 6670 }, { "epoch": 1.3561699532425289, "grad_norm": 0.1695948988199234, "learning_rate": 6.443608257907047e-05, "loss": 1.0999, "step": 6671 }, { "epoch": 1.3563732465948364, "grad_norm": 0.15652230381965637, "learning_rate": 6.441574290653921e-05, "loss": 0.992, "step": 6672 }, { "epoch": 1.3565765399471437, "grad_norm": 0.14709965884685516, "learning_rate": 6.439540323400793e-05, "loss": 1.0105, "step": 6673 }, { "epoch": 1.3567798332994512, "grad_norm": 0.14747577905654907, "learning_rate": 6.437506356147667e-05, "loss": 0.9413, "step": 6674 }, { "epoch": 1.3569831266517585, "grad_norm": 0.16515463590621948, "learning_rate": 6.435472388894538e-05, "loss": 0.9619, "step": 6675 }, { "epoch": 1.3571864200040658, "grad_norm": 0.1489870250225067, "learning_rate": 6.433438421641412e-05, "loss": 0.893, "step": 6676 }, { "epoch": 1.3573897133563733, "grad_norm": 0.1626134067773819, "learning_rate": 6.431404454388285e-05, "loss": 1.051, "step": 6677 }, { "epoch": 1.3575930067086805, "grad_norm": 0.16492238640785217, "learning_rate": 6.429370487135158e-05, "loss": 1.1629, "step": 6678 }, { "epoch": 1.357796300060988, "grad_norm": 0.1518096923828125, "learning_rate": 6.427336519882031e-05, "loss": 0.958, "step": 6679 }, { "epoch": 1.3579995934132953, "grad_norm": 0.14562292397022247, "learning_rate": 6.425302552628903e-05, "loss": 0.944, "step": 6680 }, { "epoch": 1.3582028867656026, "grad_norm": 0.17256395518779755, "learning_rate": 6.423268585375776e-05, "loss": 1.1921, "step": 6681 }, { "epoch": 1.3584061801179101, "grad_norm": 0.15304385125637054, "learning_rate": 6.42123461812265e-05, "loss": 1.0311, "step": 6682 }, { "epoch": 1.3586094734702177, "grad_norm": 0.159259632229805, "learning_rate": 6.419200650869522e-05, "loss": 1.0585, "step": 6683 }, { "epoch": 1.358812766822525, "grad_norm": 0.15427778661251068, "learning_rate": 6.417166683616394e-05, "loss": 1.0914, "step": 6684 }, { "epoch": 1.3590160601748322, "grad_norm": 0.14755503833293915, "learning_rate": 6.415132716363267e-05, "loss": 0.9637, "step": 6685 }, { "epoch": 1.3592193535271397, "grad_norm": 0.16953536868095398, "learning_rate": 6.413098749110139e-05, "loss": 1.0948, "step": 6686 }, { "epoch": 1.359422646879447, "grad_norm": 0.15146459639072418, "learning_rate": 6.411064781857013e-05, "loss": 0.8806, "step": 6687 }, { "epoch": 1.3596259402317545, "grad_norm": 0.14081160724163055, "learning_rate": 6.409030814603884e-05, "loss": 0.9432, "step": 6688 }, { "epoch": 1.3598292335840618, "grad_norm": 0.1574667990207672, "learning_rate": 6.406996847350758e-05, "loss": 1.1607, "step": 6689 }, { "epoch": 1.3600325269363691, "grad_norm": 0.14919863641262054, "learning_rate": 6.40496288009763e-05, "loss": 0.9258, "step": 6690 }, { "epoch": 1.3602358202886766, "grad_norm": 0.15582725405693054, "learning_rate": 6.402928912844504e-05, "loss": 0.9811, "step": 6691 }, { "epoch": 1.360439113640984, "grad_norm": 0.16567249596118927, "learning_rate": 6.400894945591375e-05, "loss": 1.1126, "step": 6692 }, { "epoch": 1.3606424069932914, "grad_norm": 0.1419053077697754, "learning_rate": 6.398860978338249e-05, "loss": 0.9852, "step": 6693 }, { "epoch": 1.3608457003455987, "grad_norm": 0.1396085023880005, "learning_rate": 6.396827011085122e-05, "loss": 0.9273, "step": 6694 }, { "epoch": 1.361048993697906, "grad_norm": 0.1635066717863083, "learning_rate": 6.394793043831995e-05, "loss": 1.1869, "step": 6695 }, { "epoch": 1.3612522870502135, "grad_norm": 0.14211490750312805, "learning_rate": 6.392759076578866e-05, "loss": 0.8805, "step": 6696 }, { "epoch": 1.3614555804025208, "grad_norm": 0.13765402138233185, "learning_rate": 6.39072510932574e-05, "loss": 0.8661, "step": 6697 }, { "epoch": 1.3616588737548283, "grad_norm": 0.15493972599506378, "learning_rate": 6.388691142072613e-05, "loss": 1.0499, "step": 6698 }, { "epoch": 1.3618621671071356, "grad_norm": 0.14159010350704193, "learning_rate": 6.386657174819486e-05, "loss": 0.9232, "step": 6699 }, { "epoch": 1.3620654604594429, "grad_norm": 0.14542338252067566, "learning_rate": 6.384623207566358e-05, "loss": 0.9365, "step": 6700 }, { "epoch": 1.3622687538117504, "grad_norm": 0.16629654169082642, "learning_rate": 6.382589240313231e-05, "loss": 1.1661, "step": 6701 }, { "epoch": 1.3624720471640577, "grad_norm": 0.15916363894939423, "learning_rate": 6.380555273060104e-05, "loss": 0.9942, "step": 6702 }, { "epoch": 1.3626753405163652, "grad_norm": 0.1526755392551422, "learning_rate": 6.378521305806978e-05, "loss": 1.049, "step": 6703 }, { "epoch": 1.3628786338686725, "grad_norm": 0.15990319848060608, "learning_rate": 6.376487338553849e-05, "loss": 1.0295, "step": 6704 }, { "epoch": 1.3630819272209798, "grad_norm": 0.1506752073764801, "learning_rate": 6.374453371300723e-05, "loss": 1.1331, "step": 6705 }, { "epoch": 1.3632852205732873, "grad_norm": 0.15335889160633087, "learning_rate": 6.372419404047595e-05, "loss": 1.1036, "step": 6706 }, { "epoch": 1.3634885139255946, "grad_norm": 0.15802709758281708, "learning_rate": 6.370385436794469e-05, "loss": 1.029, "step": 6707 }, { "epoch": 1.363691807277902, "grad_norm": 0.13966608047485352, "learning_rate": 6.36835146954134e-05, "loss": 0.9259, "step": 6708 }, { "epoch": 1.3638951006302094, "grad_norm": 0.12807948887348175, "learning_rate": 6.366317502288214e-05, "loss": 0.8966, "step": 6709 }, { "epoch": 1.3640983939825166, "grad_norm": 0.14886990189552307, "learning_rate": 6.364283535035086e-05, "loss": 0.9927, "step": 6710 }, { "epoch": 1.3643016873348242, "grad_norm": 0.13951446115970612, "learning_rate": 6.36224956778196e-05, "loss": 0.8846, "step": 6711 }, { "epoch": 1.3645049806871317, "grad_norm": 0.1629723608493805, "learning_rate": 6.360215600528831e-05, "loss": 1.147, "step": 6712 }, { "epoch": 1.364708274039439, "grad_norm": 0.1576232761144638, "learning_rate": 6.358181633275705e-05, "loss": 0.9147, "step": 6713 }, { "epoch": 1.3649115673917462, "grad_norm": 0.1496376097202301, "learning_rate": 6.356147666022577e-05, "loss": 0.9403, "step": 6714 }, { "epoch": 1.3651148607440537, "grad_norm": 0.144853413105011, "learning_rate": 6.354113698769451e-05, "loss": 0.9713, "step": 6715 }, { "epoch": 1.365318154096361, "grad_norm": 0.15076938271522522, "learning_rate": 6.352079731516322e-05, "loss": 0.9689, "step": 6716 }, { "epoch": 1.3655214474486685, "grad_norm": 0.15283893048763275, "learning_rate": 6.350045764263196e-05, "loss": 1.0111, "step": 6717 }, { "epoch": 1.3657247408009758, "grad_norm": 0.16923551261425018, "learning_rate": 6.348011797010068e-05, "loss": 1.1435, "step": 6718 }, { "epoch": 1.3659280341532831, "grad_norm": 0.16161029040813446, "learning_rate": 6.345977829756942e-05, "loss": 1.2014, "step": 6719 }, { "epoch": 1.3661313275055906, "grad_norm": 0.15895690023899078, "learning_rate": 6.343943862503813e-05, "loss": 1.1507, "step": 6720 }, { "epoch": 1.366334620857898, "grad_norm": 0.17195092141628265, "learning_rate": 6.341909895250687e-05, "loss": 1.1747, "step": 6721 }, { "epoch": 1.3665379142102054, "grad_norm": 0.14363127946853638, "learning_rate": 6.33987592799756e-05, "loss": 0.9227, "step": 6722 }, { "epoch": 1.3667412075625127, "grad_norm": 0.14739079773426056, "learning_rate": 6.337841960744433e-05, "loss": 0.9343, "step": 6723 }, { "epoch": 1.36694450091482, "grad_norm": 0.16694903373718262, "learning_rate": 6.335807993491304e-05, "loss": 1.1243, "step": 6724 }, { "epoch": 1.3671477942671275, "grad_norm": 0.15602125227451324, "learning_rate": 6.333774026238178e-05, "loss": 1.1501, "step": 6725 }, { "epoch": 1.3673510876194348, "grad_norm": 0.14906419813632965, "learning_rate": 6.33174005898505e-05, "loss": 1.0106, "step": 6726 }, { "epoch": 1.3675543809717423, "grad_norm": 0.17862831056118011, "learning_rate": 6.329706091731924e-05, "loss": 1.2313, "step": 6727 }, { "epoch": 1.3677576743240496, "grad_norm": 0.1424816995859146, "learning_rate": 6.327672124478796e-05, "loss": 1.1375, "step": 6728 }, { "epoch": 1.3679609676763569, "grad_norm": 0.15217390656471252, "learning_rate": 6.325638157225668e-05, "loss": 1.1057, "step": 6729 }, { "epoch": 1.3681642610286644, "grad_norm": 0.1580178588628769, "learning_rate": 6.323604189972542e-05, "loss": 0.9414, "step": 6730 }, { "epoch": 1.3683675543809717, "grad_norm": 0.14510442316532135, "learning_rate": 6.321570222719414e-05, "loss": 0.9276, "step": 6731 }, { "epoch": 1.3685708477332792, "grad_norm": 0.15985293686389923, "learning_rate": 6.319536255466287e-05, "loss": 1.2001, "step": 6732 }, { "epoch": 1.3687741410855865, "grad_norm": 0.16616447269916534, "learning_rate": 6.317502288213159e-05, "loss": 1.0798, "step": 6733 }, { "epoch": 1.3689774344378938, "grad_norm": 0.15168656408786774, "learning_rate": 6.315468320960033e-05, "loss": 1.0198, "step": 6734 }, { "epoch": 1.3691807277902013, "grad_norm": 0.15670320391654968, "learning_rate": 6.313434353706905e-05, "loss": 1.0823, "step": 6735 }, { "epoch": 1.3693840211425086, "grad_norm": 0.15181729197502136, "learning_rate": 6.311400386453779e-05, "loss": 1.0202, "step": 6736 }, { "epoch": 1.369587314494816, "grad_norm": 0.15091702342033386, "learning_rate": 6.30936641920065e-05, "loss": 0.9388, "step": 6737 }, { "epoch": 1.3697906078471234, "grad_norm": 0.1362704038619995, "learning_rate": 6.307332451947524e-05, "loss": 0.8708, "step": 6738 }, { "epoch": 1.3699939011994307, "grad_norm": 0.1704317331314087, "learning_rate": 6.305298484694397e-05, "loss": 1.1833, "step": 6739 }, { "epoch": 1.3701971945517382, "grad_norm": 0.14978265762329102, "learning_rate": 6.30326451744127e-05, "loss": 1.0367, "step": 6740 }, { "epoch": 1.3704004879040457, "grad_norm": 0.1564272940158844, "learning_rate": 6.301230550188141e-05, "loss": 1.0774, "step": 6741 }, { "epoch": 1.370603781256353, "grad_norm": 0.157321035861969, "learning_rate": 6.299196582935015e-05, "loss": 1.0637, "step": 6742 }, { "epoch": 1.3708070746086602, "grad_norm": 0.14078298211097717, "learning_rate": 6.297162615681888e-05, "loss": 1.0666, "step": 6743 }, { "epoch": 1.3710103679609678, "grad_norm": 0.17371569573879242, "learning_rate": 6.295128648428761e-05, "loss": 1.0625, "step": 6744 }, { "epoch": 1.371213661313275, "grad_norm": 0.15704642236232758, "learning_rate": 6.293094681175633e-05, "loss": 1.0261, "step": 6745 }, { "epoch": 1.3714169546655826, "grad_norm": 0.15526318550109863, "learning_rate": 6.291060713922506e-05, "loss": 1.0443, "step": 6746 }, { "epoch": 1.3716202480178898, "grad_norm": 0.16687656939029694, "learning_rate": 6.289026746669379e-05, "loss": 1.0815, "step": 6747 }, { "epoch": 1.3718235413701971, "grad_norm": 0.1629595309495926, "learning_rate": 6.286992779416253e-05, "loss": 1.0846, "step": 6748 }, { "epoch": 1.3720268347225046, "grad_norm": 0.1666935831308365, "learning_rate": 6.284958812163124e-05, "loss": 1.1281, "step": 6749 }, { "epoch": 1.372230128074812, "grad_norm": 0.1540839672088623, "learning_rate": 6.282924844909997e-05, "loss": 1.0241, "step": 6750 }, { "epoch": 1.3724334214271194, "grad_norm": 0.16391707956790924, "learning_rate": 6.28089087765687e-05, "loss": 1.0616, "step": 6751 }, { "epoch": 1.3726367147794267, "grad_norm": 0.18039560317993164, "learning_rate": 6.278856910403744e-05, "loss": 1.1968, "step": 6752 }, { "epoch": 1.372840008131734, "grad_norm": 0.15921954810619354, "learning_rate": 6.276822943150615e-05, "loss": 0.9092, "step": 6753 }, { "epoch": 1.3730433014840415, "grad_norm": 0.15045933425426483, "learning_rate": 6.274788975897489e-05, "loss": 1.0155, "step": 6754 }, { "epoch": 1.3732465948363488, "grad_norm": 0.14296181499958038, "learning_rate": 6.272755008644361e-05, "loss": 0.9719, "step": 6755 }, { "epoch": 1.3734498881886563, "grad_norm": 0.1567305028438568, "learning_rate": 6.270721041391235e-05, "loss": 1.1043, "step": 6756 }, { "epoch": 1.3736531815409636, "grad_norm": 0.17999158799648285, "learning_rate": 6.268687074138106e-05, "loss": 1.262, "step": 6757 }, { "epoch": 1.373856474893271, "grad_norm": 0.1612483412027359, "learning_rate": 6.26665310688498e-05, "loss": 1.0309, "step": 6758 }, { "epoch": 1.3740597682455784, "grad_norm": 0.1687174290418625, "learning_rate": 6.264619139631852e-05, "loss": 1.0872, "step": 6759 }, { "epoch": 1.3742630615978857, "grad_norm": 0.14612844586372375, "learning_rate": 6.262585172378726e-05, "loss": 0.9855, "step": 6760 }, { "epoch": 1.3744663549501932, "grad_norm": 0.15046149492263794, "learning_rate": 6.260551205125597e-05, "loss": 0.997, "step": 6761 }, { "epoch": 1.3746696483025005, "grad_norm": 0.16940894722938538, "learning_rate": 6.258517237872471e-05, "loss": 1.1318, "step": 6762 }, { "epoch": 1.3748729416548078, "grad_norm": 0.1634390503168106, "learning_rate": 6.256483270619343e-05, "loss": 1.1433, "step": 6763 }, { "epoch": 1.3750762350071153, "grad_norm": 0.14166627824306488, "learning_rate": 6.254449303366217e-05, "loss": 0.8849, "step": 6764 }, { "epoch": 1.3752795283594226, "grad_norm": 0.1518392264842987, "learning_rate": 6.252415336113088e-05, "loss": 0.9538, "step": 6765 }, { "epoch": 1.37548282171173, "grad_norm": 0.1509694904088974, "learning_rate": 6.250381368859962e-05, "loss": 1.137, "step": 6766 }, { "epoch": 1.3756861150640374, "grad_norm": 0.16514116525650024, "learning_rate": 6.248347401606835e-05, "loss": 0.894, "step": 6767 }, { "epoch": 1.3758894084163447, "grad_norm": 0.15788832306861877, "learning_rate": 6.246313434353708e-05, "loss": 0.9714, "step": 6768 }, { "epoch": 1.3760927017686522, "grad_norm": 0.1754407286643982, "learning_rate": 6.24427946710058e-05, "loss": 1.1633, "step": 6769 }, { "epoch": 1.3762959951209597, "grad_norm": 0.15434888005256653, "learning_rate": 6.242245499847452e-05, "loss": 1.0316, "step": 6770 }, { "epoch": 1.376499288473267, "grad_norm": 0.15279196202754974, "learning_rate": 6.240211532594326e-05, "loss": 0.9891, "step": 6771 }, { "epoch": 1.3767025818255743, "grad_norm": 0.15830421447753906, "learning_rate": 6.238177565341198e-05, "loss": 1.1078, "step": 6772 }, { "epoch": 1.3769058751778818, "grad_norm": 0.1486409604549408, "learning_rate": 6.23614359808807e-05, "loss": 1.0091, "step": 6773 }, { "epoch": 1.377109168530189, "grad_norm": 0.13369891047477722, "learning_rate": 6.234109630834943e-05, "loss": 0.9391, "step": 6774 }, { "epoch": 1.3773124618824966, "grad_norm": 0.15606249868869781, "learning_rate": 6.232075663581817e-05, "loss": 0.9228, "step": 6775 }, { "epoch": 1.3775157552348039, "grad_norm": 0.1541958600282669, "learning_rate": 6.230041696328689e-05, "loss": 0.9437, "step": 6776 }, { "epoch": 1.3777190485871111, "grad_norm": 0.14433227479457855, "learning_rate": 6.228007729075562e-05, "loss": 0.8677, "step": 6777 }, { "epoch": 1.3779223419394186, "grad_norm": 0.17673447728157043, "learning_rate": 6.225973761822434e-05, "loss": 1.1797, "step": 6778 }, { "epoch": 1.378125635291726, "grad_norm": 0.14402912557125092, "learning_rate": 6.223939794569308e-05, "loss": 1.0249, "step": 6779 }, { "epoch": 1.3783289286440334, "grad_norm": 0.17239032685756683, "learning_rate": 6.22190582731618e-05, "loss": 1.1306, "step": 6780 }, { "epoch": 1.3785322219963407, "grad_norm": 0.147936150431633, "learning_rate": 6.219871860063053e-05, "loss": 0.9927, "step": 6781 }, { "epoch": 1.378735515348648, "grad_norm": 0.16032187640666962, "learning_rate": 6.217837892809925e-05, "loss": 0.9473, "step": 6782 }, { "epoch": 1.3789388087009555, "grad_norm": 0.14192989468574524, "learning_rate": 6.215803925556799e-05, "loss": 0.8604, "step": 6783 }, { "epoch": 1.3791421020532628, "grad_norm": 0.16771048307418823, "learning_rate": 6.213769958303672e-05, "loss": 1.118, "step": 6784 }, { "epoch": 1.3793453954055703, "grad_norm": 0.14662449061870575, "learning_rate": 6.211735991050544e-05, "loss": 1.0081, "step": 6785 }, { "epoch": 1.3795486887578776, "grad_norm": 0.1614457666873932, "learning_rate": 6.209702023797416e-05, "loss": 1.2436, "step": 6786 }, { "epoch": 1.379751982110185, "grad_norm": 0.17378991842269897, "learning_rate": 6.20766805654429e-05, "loss": 1.1431, "step": 6787 }, { "epoch": 1.3799552754624924, "grad_norm": 0.1449557989835739, "learning_rate": 6.205634089291163e-05, "loss": 1.0458, "step": 6788 }, { "epoch": 1.3801585688147997, "grad_norm": 0.1508321315050125, "learning_rate": 6.203600122038035e-05, "loss": 0.9055, "step": 6789 }, { "epoch": 1.3803618621671072, "grad_norm": 0.14709822833538055, "learning_rate": 6.201566154784908e-05, "loss": 0.9653, "step": 6790 }, { "epoch": 1.3805651555194145, "grad_norm": 0.15812620520591736, "learning_rate": 6.199532187531781e-05, "loss": 1.0011, "step": 6791 }, { "epoch": 1.3807684488717218, "grad_norm": 0.135645791888237, "learning_rate": 6.197498220278654e-05, "loss": 0.8306, "step": 6792 }, { "epoch": 1.3809717422240293, "grad_norm": 0.15195050835609436, "learning_rate": 6.195464253025528e-05, "loss": 0.9038, "step": 6793 }, { "epoch": 1.3811750355763366, "grad_norm": 0.16135457158088684, "learning_rate": 6.193430285772399e-05, "loss": 1.1356, "step": 6794 }, { "epoch": 1.381378328928644, "grad_norm": 0.16234029829502106, "learning_rate": 6.191396318519272e-05, "loss": 0.9694, "step": 6795 }, { "epoch": 1.3815816222809514, "grad_norm": 0.16726230084896088, "learning_rate": 6.189362351266145e-05, "loss": 1.0675, "step": 6796 }, { "epoch": 1.3817849156332587, "grad_norm": 0.15473772585391998, "learning_rate": 6.187328384013019e-05, "loss": 1.0468, "step": 6797 }, { "epoch": 1.3819882089855662, "grad_norm": 0.1563412994146347, "learning_rate": 6.18529441675989e-05, "loss": 1.0272, "step": 6798 }, { "epoch": 1.3821915023378737, "grad_norm": 0.1610526293516159, "learning_rate": 6.183260449506764e-05, "loss": 1.1039, "step": 6799 }, { "epoch": 1.382394795690181, "grad_norm": 0.1570015698671341, "learning_rate": 6.181226482253636e-05, "loss": 1.0261, "step": 6800 }, { "epoch": 1.3825980890424883, "grad_norm": 0.16958126425743103, "learning_rate": 6.17919251500051e-05, "loss": 1.015, "step": 6801 }, { "epoch": 1.3828013823947956, "grad_norm": 0.15728716552257538, "learning_rate": 6.177158547747381e-05, "loss": 0.9877, "step": 6802 }, { "epoch": 1.383004675747103, "grad_norm": 0.15608420968055725, "learning_rate": 6.175124580494255e-05, "loss": 1.0438, "step": 6803 }, { "epoch": 1.3832079690994106, "grad_norm": 0.1442839503288269, "learning_rate": 6.173090613241127e-05, "loss": 0.9444, "step": 6804 }, { "epoch": 1.3834112624517179, "grad_norm": 0.15385085344314575, "learning_rate": 6.171056645988001e-05, "loss": 0.9501, "step": 6805 }, { "epoch": 1.3836145558040251, "grad_norm": 0.16265326738357544, "learning_rate": 6.169022678734872e-05, "loss": 1.0807, "step": 6806 }, { "epoch": 1.3838178491563327, "grad_norm": 0.1667504757642746, "learning_rate": 6.166988711481746e-05, "loss": 1.0506, "step": 6807 }, { "epoch": 1.38402114250864, "grad_norm": 0.132582888007164, "learning_rate": 6.164954744228618e-05, "loss": 0.9013, "step": 6808 }, { "epoch": 1.3842244358609475, "grad_norm": 0.1588151454925537, "learning_rate": 6.162920776975492e-05, "loss": 1.1206, "step": 6809 }, { "epoch": 1.3844277292132547, "grad_norm": 0.16293421387672424, "learning_rate": 6.160886809722363e-05, "loss": 1.1359, "step": 6810 }, { "epoch": 1.384631022565562, "grad_norm": 0.18560263514518738, "learning_rate": 6.158852842469236e-05, "loss": 1.1687, "step": 6811 }, { "epoch": 1.3848343159178695, "grad_norm": 0.14495523273944855, "learning_rate": 6.15681887521611e-05, "loss": 0.8765, "step": 6812 }, { "epoch": 1.3850376092701768, "grad_norm": 0.16819177567958832, "learning_rate": 6.154784907962982e-05, "loss": 1.1581, "step": 6813 }, { "epoch": 1.3852409026224843, "grad_norm": 0.15601789951324463, "learning_rate": 6.152750940709854e-05, "loss": 1.0977, "step": 6814 }, { "epoch": 1.3854441959747916, "grad_norm": 0.16021013259887695, "learning_rate": 6.150716973456727e-05, "loss": 0.9583, "step": 6815 }, { "epoch": 1.385647489327099, "grad_norm": 0.1587619185447693, "learning_rate": 6.1486830062036e-05, "loss": 1.0329, "step": 6816 }, { "epoch": 1.3858507826794064, "grad_norm": 0.1507323831319809, "learning_rate": 6.146649038950473e-05, "loss": 1.0919, "step": 6817 }, { "epoch": 1.3860540760317137, "grad_norm": 0.1662292331457138, "learning_rate": 6.144615071697346e-05, "loss": 1.0218, "step": 6818 }, { "epoch": 1.3862573693840212, "grad_norm": 0.13790364563465118, "learning_rate": 6.142581104444218e-05, "loss": 0.8317, "step": 6819 }, { "epoch": 1.3864606627363285, "grad_norm": 0.14748938381671906, "learning_rate": 6.140547137191092e-05, "loss": 0.9958, "step": 6820 }, { "epoch": 1.3866639560886358, "grad_norm": 0.14733365178108215, "learning_rate": 6.138513169937964e-05, "loss": 0.9832, "step": 6821 }, { "epoch": 1.3868672494409433, "grad_norm": 0.1622340977191925, "learning_rate": 6.136479202684837e-05, "loss": 1.1285, "step": 6822 }, { "epoch": 1.3870705427932506, "grad_norm": 0.15207748115062714, "learning_rate": 6.134445235431709e-05, "loss": 0.9082, "step": 6823 }, { "epoch": 1.387273836145558, "grad_norm": 0.14232246577739716, "learning_rate": 6.132411268178583e-05, "loss": 0.873, "step": 6824 }, { "epoch": 1.3874771294978654, "grad_norm": 0.1612071394920349, "learning_rate": 6.130377300925455e-05, "loss": 1.061, "step": 6825 }, { "epoch": 1.3876804228501727, "grad_norm": 0.16399390995502472, "learning_rate": 6.128343333672328e-05, "loss": 1.1603, "step": 6826 }, { "epoch": 1.3878837162024802, "grad_norm": 0.14788468182086945, "learning_rate": 6.1263093664192e-05, "loss": 0.9301, "step": 6827 }, { "epoch": 1.3880870095547875, "grad_norm": 0.17127063870429993, "learning_rate": 6.124275399166074e-05, "loss": 1.0749, "step": 6828 }, { "epoch": 1.388290302907095, "grad_norm": 0.15077459812164307, "learning_rate": 6.122241431912946e-05, "loss": 0.939, "step": 6829 }, { "epoch": 1.3884935962594023, "grad_norm": 0.16353972256183624, "learning_rate": 6.120207464659819e-05, "loss": 0.9913, "step": 6830 }, { "epoch": 1.3886968896117096, "grad_norm": 0.1791125386953354, "learning_rate": 6.118173497406691e-05, "loss": 1.2833, "step": 6831 }, { "epoch": 1.388900182964017, "grad_norm": 0.14946220815181732, "learning_rate": 6.116139530153565e-05, "loss": 1.0874, "step": 6832 }, { "epoch": 1.3891034763163246, "grad_norm": 0.1695471853017807, "learning_rate": 6.114105562900438e-05, "loss": 1.0632, "step": 6833 }, { "epoch": 1.3893067696686319, "grad_norm": 0.16645880043506622, "learning_rate": 6.11207159564731e-05, "loss": 1.1748, "step": 6834 }, { "epoch": 1.3895100630209392, "grad_norm": 0.1492735743522644, "learning_rate": 6.110037628394183e-05, "loss": 1.0108, "step": 6835 }, { "epoch": 1.3897133563732467, "grad_norm": 0.1486160010099411, "learning_rate": 6.108003661141056e-05, "loss": 1.0036, "step": 6836 }, { "epoch": 1.389916649725554, "grad_norm": 0.158762589097023, "learning_rate": 6.105969693887929e-05, "loss": 1.1261, "step": 6837 }, { "epoch": 1.3901199430778615, "grad_norm": 0.16165146231651306, "learning_rate": 6.103935726634801e-05, "loss": 1.0346, "step": 6838 }, { "epoch": 1.3903232364301688, "grad_norm": 0.147782564163208, "learning_rate": 6.101901759381674e-05, "loss": 1.0539, "step": 6839 }, { "epoch": 1.390526529782476, "grad_norm": 0.1514325588941574, "learning_rate": 6.0998677921285475e-05, "loss": 1.0151, "step": 6840 }, { "epoch": 1.3907298231347835, "grad_norm": 0.144688218832016, "learning_rate": 6.09783382487542e-05, "loss": 0.8587, "step": 6841 }, { "epoch": 1.3909331164870908, "grad_norm": 0.14820197224617004, "learning_rate": 6.095799857622293e-05, "loss": 0.9625, "step": 6842 }, { "epoch": 1.3911364098393983, "grad_norm": 0.16844014823436737, "learning_rate": 6.0937658903691655e-05, "loss": 1.0624, "step": 6843 }, { "epoch": 1.3913397031917056, "grad_norm": 0.1512283980846405, "learning_rate": 6.0917319231160386e-05, "loss": 1.0486, "step": 6844 }, { "epoch": 1.391542996544013, "grad_norm": 0.13112445175647736, "learning_rate": 6.089697955862911e-05, "loss": 0.8274, "step": 6845 }, { "epoch": 1.3917462898963204, "grad_norm": 0.16186438500881195, "learning_rate": 6.087663988609784e-05, "loss": 1.0592, "step": 6846 }, { "epoch": 1.3919495832486277, "grad_norm": 0.15068422257900238, "learning_rate": 6.0856300213566566e-05, "loss": 0.9117, "step": 6847 }, { "epoch": 1.3921528766009352, "grad_norm": 0.1424170434474945, "learning_rate": 6.08359605410353e-05, "loss": 0.887, "step": 6848 }, { "epoch": 1.3923561699532425, "grad_norm": 0.1514316201210022, "learning_rate": 6.081562086850402e-05, "loss": 0.9935, "step": 6849 }, { "epoch": 1.3925594633055498, "grad_norm": 0.16643765568733215, "learning_rate": 6.079528119597275e-05, "loss": 1.0625, "step": 6850 }, { "epoch": 1.3927627566578573, "grad_norm": 0.16870176792144775, "learning_rate": 6.077494152344148e-05, "loss": 1.1119, "step": 6851 }, { "epoch": 1.3929660500101646, "grad_norm": 0.16826272010803223, "learning_rate": 6.0754601850910195e-05, "loss": 0.9706, "step": 6852 }, { "epoch": 1.3931693433624721, "grad_norm": 0.15877872705459595, "learning_rate": 6.073426217837893e-05, "loss": 1.1201, "step": 6853 }, { "epoch": 1.3933726367147794, "grad_norm": 0.15222519636154175, "learning_rate": 6.071392250584765e-05, "loss": 1.0917, "step": 6854 }, { "epoch": 1.3935759300670867, "grad_norm": 0.15984229743480682, "learning_rate": 6.069358283331639e-05, "loss": 1.0069, "step": 6855 }, { "epoch": 1.3937792234193942, "grad_norm": 0.12499643117189407, "learning_rate": 6.067324316078511e-05, "loss": 0.8104, "step": 6856 }, { "epoch": 1.3939825167717015, "grad_norm": 0.1672174036502838, "learning_rate": 6.0652903488253845e-05, "loss": 1.0665, "step": 6857 }, { "epoch": 1.394185810124009, "grad_norm": 0.15274770557880402, "learning_rate": 6.063256381572256e-05, "loss": 1.0114, "step": 6858 }, { "epoch": 1.3943891034763163, "grad_norm": 0.1640719175338745, "learning_rate": 6.06122241431913e-05, "loss": 1.0346, "step": 6859 }, { "epoch": 1.3945923968286236, "grad_norm": 0.15316323935985565, "learning_rate": 6.059188447066002e-05, "loss": 1.0075, "step": 6860 }, { "epoch": 1.394795690180931, "grad_norm": 0.1497650146484375, "learning_rate": 6.0571544798128756e-05, "loss": 1.032, "step": 6861 }, { "epoch": 1.3949989835332386, "grad_norm": 0.15311315655708313, "learning_rate": 6.0551205125597474e-05, "loss": 0.9394, "step": 6862 }, { "epoch": 1.3952022768855459, "grad_norm": 0.16212143003940582, "learning_rate": 6.053086545306621e-05, "loss": 1.1373, "step": 6863 }, { "epoch": 1.3954055702378532, "grad_norm": 0.1610141098499298, "learning_rate": 6.051052578053493e-05, "loss": 1.068, "step": 6864 }, { "epoch": 1.3956088635901607, "grad_norm": 0.16300232708454132, "learning_rate": 6.049018610800367e-05, "loss": 1.027, "step": 6865 }, { "epoch": 1.395812156942468, "grad_norm": 0.16508182883262634, "learning_rate": 6.0469846435472385e-05, "loss": 1.0181, "step": 6866 }, { "epoch": 1.3960154502947755, "grad_norm": 0.16862276196479797, "learning_rate": 6.044950676294112e-05, "loss": 1.1144, "step": 6867 }, { "epoch": 1.3962187436470828, "grad_norm": 0.16415852308273315, "learning_rate": 6.042916709040984e-05, "loss": 1.1611, "step": 6868 }, { "epoch": 1.39642203699939, "grad_norm": 0.14976157248020172, "learning_rate": 6.040882741787858e-05, "loss": 0.9655, "step": 6869 }, { "epoch": 1.3966253303516976, "grad_norm": 0.15121661126613617, "learning_rate": 6.0388487745347297e-05, "loss": 1.0556, "step": 6870 }, { "epoch": 1.3968286237040048, "grad_norm": 0.14877311885356903, "learning_rate": 6.0368148072816035e-05, "loss": 1.1012, "step": 6871 }, { "epoch": 1.3970319170563124, "grad_norm": 0.17387987673282623, "learning_rate": 6.034780840028475e-05, "loss": 1.0203, "step": 6872 }, { "epoch": 1.3972352104086196, "grad_norm": 0.15571002662181854, "learning_rate": 6.032746872775349e-05, "loss": 0.9764, "step": 6873 }, { "epoch": 1.397438503760927, "grad_norm": 0.16663400828838348, "learning_rate": 6.030712905522221e-05, "loss": 1.0956, "step": 6874 }, { "epoch": 1.3976417971132344, "grad_norm": 0.17007339000701904, "learning_rate": 6.0286789382690946e-05, "loss": 1.0845, "step": 6875 }, { "epoch": 1.3978450904655417, "grad_norm": 0.1560770869255066, "learning_rate": 6.0266449710159664e-05, "loss": 1.0833, "step": 6876 }, { "epoch": 1.3980483838178492, "grad_norm": 0.16780199110507965, "learning_rate": 6.02461100376284e-05, "loss": 1.1923, "step": 6877 }, { "epoch": 1.3982516771701565, "grad_norm": 0.15513817965984344, "learning_rate": 6.0225770365097126e-05, "loss": 1.0929, "step": 6878 }, { "epoch": 1.3984549705224638, "grad_norm": 0.17190799117088318, "learning_rate": 6.020543069256586e-05, "loss": 1.1773, "step": 6879 }, { "epoch": 1.3986582638747713, "grad_norm": 0.14657704532146454, "learning_rate": 6.018509102003458e-05, "loss": 1.0549, "step": 6880 }, { "epoch": 1.3988615572270786, "grad_norm": 0.15406420826911926, "learning_rate": 6.016475134750331e-05, "loss": 1.0151, "step": 6881 }, { "epoch": 1.3990648505793861, "grad_norm": 0.16085892915725708, "learning_rate": 6.014441167497204e-05, "loss": 1.191, "step": 6882 }, { "epoch": 1.3992681439316934, "grad_norm": 0.1476157307624817, "learning_rate": 6.012407200244077e-05, "loss": 0.9507, "step": 6883 }, { "epoch": 1.3994714372840007, "grad_norm": 0.14898930490016937, "learning_rate": 6.010373232990949e-05, "loss": 0.8874, "step": 6884 }, { "epoch": 1.3996747306363082, "grad_norm": 0.13529084622859955, "learning_rate": 6.0083392657378224e-05, "loss": 0.9814, "step": 6885 }, { "epoch": 1.3998780239886155, "grad_norm": 0.16349592804908752, "learning_rate": 6.006305298484695e-05, "loss": 1.21, "step": 6886 }, { "epoch": 1.400081317340923, "grad_norm": 0.14734548330307007, "learning_rate": 6.004271331231568e-05, "loss": 0.9757, "step": 6887 }, { "epoch": 1.4002846106932303, "grad_norm": 0.1508965939283371, "learning_rate": 6.0022373639784405e-05, "loss": 0.9822, "step": 6888 }, { "epoch": 1.4004879040455376, "grad_norm": 0.144801065325737, "learning_rate": 6.0002033967253136e-05, "loss": 1.0354, "step": 6889 }, { "epoch": 1.400691197397845, "grad_norm": 0.1705603152513504, "learning_rate": 5.998169429472186e-05, "loss": 1.0624, "step": 6890 }, { "epoch": 1.4008944907501526, "grad_norm": 0.14195939898490906, "learning_rate": 5.996135462219059e-05, "loss": 0.8946, "step": 6891 }, { "epoch": 1.4010977841024599, "grad_norm": 0.14895643293857574, "learning_rate": 5.9941014949659316e-05, "loss": 1.0709, "step": 6892 }, { "epoch": 1.4013010774547672, "grad_norm": 0.15194135904312134, "learning_rate": 5.992067527712805e-05, "loss": 1.0716, "step": 6893 }, { "epoch": 1.4015043708070747, "grad_norm": 0.16107560694217682, "learning_rate": 5.990033560459677e-05, "loss": 1.0804, "step": 6894 }, { "epoch": 1.401707664159382, "grad_norm": 0.15506263077259064, "learning_rate": 5.987999593206549e-05, "loss": 1.0002, "step": 6895 }, { "epoch": 1.4019109575116895, "grad_norm": 0.14550381898880005, "learning_rate": 5.985965625953423e-05, "loss": 0.9334, "step": 6896 }, { "epoch": 1.4021142508639968, "grad_norm": 0.15243171155452728, "learning_rate": 5.9839316587002945e-05, "loss": 0.9551, "step": 6897 }, { "epoch": 1.402317544216304, "grad_norm": 0.1538599729537964, "learning_rate": 5.981897691447168e-05, "loss": 1.0155, "step": 6898 }, { "epoch": 1.4025208375686116, "grad_norm": 0.1565885841846466, "learning_rate": 5.97986372419404e-05, "loss": 1.0895, "step": 6899 }, { "epoch": 1.4027241309209189, "grad_norm": 0.16014187037944794, "learning_rate": 5.977829756940914e-05, "loss": 1.0441, "step": 6900 }, { "epoch": 1.4029274242732264, "grad_norm": 0.16669133305549622, "learning_rate": 5.9757957896877856e-05, "loss": 1.1586, "step": 6901 }, { "epoch": 1.4031307176255337, "grad_norm": 0.1303495317697525, "learning_rate": 5.9737618224346595e-05, "loss": 0.8572, "step": 6902 }, { "epoch": 1.403334010977841, "grad_norm": 0.14560671150684357, "learning_rate": 5.971727855181531e-05, "loss": 0.9101, "step": 6903 }, { "epoch": 1.4035373043301484, "grad_norm": 0.15325911343097687, "learning_rate": 5.969693887928405e-05, "loss": 1.002, "step": 6904 }, { "epoch": 1.4037405976824557, "grad_norm": 0.16374750435352325, "learning_rate": 5.967659920675277e-05, "loss": 1.0014, "step": 6905 }, { "epoch": 1.4039438910347632, "grad_norm": 0.16725143790245056, "learning_rate": 5.9656259534221506e-05, "loss": 1.1363, "step": 6906 }, { "epoch": 1.4041471843870705, "grad_norm": 0.15079721808433533, "learning_rate": 5.9635919861690224e-05, "loss": 1.085, "step": 6907 }, { "epoch": 1.4043504777393778, "grad_norm": 0.161165252327919, "learning_rate": 5.961558018915896e-05, "loss": 1.0947, "step": 6908 }, { "epoch": 1.4045537710916853, "grad_norm": 0.13901959359645844, "learning_rate": 5.959524051662768e-05, "loss": 0.9887, "step": 6909 }, { "epoch": 1.4047570644439926, "grad_norm": 0.17727665603160858, "learning_rate": 5.957490084409642e-05, "loss": 1.2254, "step": 6910 }, { "epoch": 1.4049603577963001, "grad_norm": 0.16449913382530212, "learning_rate": 5.9554561171565135e-05, "loss": 1.08, "step": 6911 }, { "epoch": 1.4051636511486074, "grad_norm": 0.1533275991678238, "learning_rate": 5.953422149903387e-05, "loss": 1.0263, "step": 6912 }, { "epoch": 1.4053669445009147, "grad_norm": 0.13713502883911133, "learning_rate": 5.951388182650259e-05, "loss": 0.8477, "step": 6913 }, { "epoch": 1.4055702378532222, "grad_norm": 0.15603326261043549, "learning_rate": 5.949354215397133e-05, "loss": 1.0248, "step": 6914 }, { "epoch": 1.4057735312055295, "grad_norm": 0.1701658070087433, "learning_rate": 5.9473202481440046e-05, "loss": 1.0635, "step": 6915 }, { "epoch": 1.405976824557837, "grad_norm": 0.15504860877990723, "learning_rate": 5.9452862808908784e-05, "loss": 0.9031, "step": 6916 }, { "epoch": 1.4061801179101443, "grad_norm": 0.15079347789287567, "learning_rate": 5.94325231363775e-05, "loss": 0.9189, "step": 6917 }, { "epoch": 1.4063834112624516, "grad_norm": 0.15121297538280487, "learning_rate": 5.941218346384624e-05, "loss": 0.9538, "step": 6918 }, { "epoch": 1.406586704614759, "grad_norm": 0.14441393315792084, "learning_rate": 5.939184379131496e-05, "loss": 0.9817, "step": 6919 }, { "epoch": 1.4067899979670666, "grad_norm": 0.15504005551338196, "learning_rate": 5.9371504118783696e-05, "loss": 0.917, "step": 6920 }, { "epoch": 1.406993291319374, "grad_norm": 0.1546851098537445, "learning_rate": 5.9351164446252413e-05, "loss": 1.056, "step": 6921 }, { "epoch": 1.4071965846716812, "grad_norm": 0.14573045074939728, "learning_rate": 5.933082477372115e-05, "loss": 0.8798, "step": 6922 }, { "epoch": 1.4073998780239887, "grad_norm": 0.14356198906898499, "learning_rate": 5.931048510118987e-05, "loss": 0.9946, "step": 6923 }, { "epoch": 1.407603171376296, "grad_norm": 0.15910224616527557, "learning_rate": 5.929014542865861e-05, "loss": 1.0241, "step": 6924 }, { "epoch": 1.4078064647286035, "grad_norm": 0.15715257823467255, "learning_rate": 5.9269805756127325e-05, "loss": 1.094, "step": 6925 }, { "epoch": 1.4080097580809108, "grad_norm": 0.15548628568649292, "learning_rate": 5.924946608359606e-05, "loss": 1.0977, "step": 6926 }, { "epoch": 1.408213051433218, "grad_norm": 0.15223243832588196, "learning_rate": 5.922912641106478e-05, "loss": 1.0118, "step": 6927 }, { "epoch": 1.4084163447855256, "grad_norm": 0.15407241880893707, "learning_rate": 5.920878673853352e-05, "loss": 0.998, "step": 6928 }, { "epoch": 1.4086196381378329, "grad_norm": 0.1569782793521881, "learning_rate": 5.9188447066002236e-05, "loss": 0.9734, "step": 6929 }, { "epoch": 1.4088229314901404, "grad_norm": 0.1538730412721634, "learning_rate": 5.9168107393470974e-05, "loss": 0.9054, "step": 6930 }, { "epoch": 1.4090262248424477, "grad_norm": 0.15854963660240173, "learning_rate": 5.914776772093969e-05, "loss": 0.9483, "step": 6931 }, { "epoch": 1.409229518194755, "grad_norm": 0.1683579981327057, "learning_rate": 5.912742804840843e-05, "loss": 1.0284, "step": 6932 }, { "epoch": 1.4094328115470625, "grad_norm": 0.16705630719661713, "learning_rate": 5.910708837587715e-05, "loss": 1.2061, "step": 6933 }, { "epoch": 1.4096361048993697, "grad_norm": 0.16468504071235657, "learning_rate": 5.9086748703345886e-05, "loss": 1.1124, "step": 6934 }, { "epoch": 1.4098393982516773, "grad_norm": 0.1535244882106781, "learning_rate": 5.90664090308146e-05, "loss": 1.0106, "step": 6935 }, { "epoch": 1.4100426916039845, "grad_norm": 0.16361945867538452, "learning_rate": 5.904606935828333e-05, "loss": 1.0341, "step": 6936 }, { "epoch": 1.4102459849562918, "grad_norm": 0.17501762509346008, "learning_rate": 5.9025729685752066e-05, "loss": 1.0759, "step": 6937 }, { "epoch": 1.4104492783085993, "grad_norm": 0.1443677693605423, "learning_rate": 5.9005390013220784e-05, "loss": 0.9883, "step": 6938 }, { "epoch": 1.4106525716609066, "grad_norm": 0.15969394147396088, "learning_rate": 5.898505034068952e-05, "loss": 1.0426, "step": 6939 }, { "epoch": 1.4108558650132141, "grad_norm": 0.14605602622032166, "learning_rate": 5.896471066815824e-05, "loss": 1.074, "step": 6940 }, { "epoch": 1.4110591583655214, "grad_norm": 0.15779732167720795, "learning_rate": 5.894437099562698e-05, "loss": 0.9753, "step": 6941 }, { "epoch": 1.4112624517178287, "grad_norm": 0.1594490110874176, "learning_rate": 5.8924031323095695e-05, "loss": 0.94, "step": 6942 }, { "epoch": 1.4114657450701362, "grad_norm": 0.15903176367282867, "learning_rate": 5.890369165056443e-05, "loss": 1.0057, "step": 6943 }, { "epoch": 1.4116690384224435, "grad_norm": 0.1548534482717514, "learning_rate": 5.888335197803315e-05, "loss": 0.9543, "step": 6944 }, { "epoch": 1.411872331774751, "grad_norm": 0.15342067182064056, "learning_rate": 5.886301230550189e-05, "loss": 0.9265, "step": 6945 }, { "epoch": 1.4120756251270583, "grad_norm": 0.1446494311094284, "learning_rate": 5.8842672632970606e-05, "loss": 0.777, "step": 6946 }, { "epoch": 1.4122789184793656, "grad_norm": 0.16386401653289795, "learning_rate": 5.8822332960439344e-05, "loss": 1.0756, "step": 6947 }, { "epoch": 1.412482211831673, "grad_norm": 0.16524311900138855, "learning_rate": 5.880199328790806e-05, "loss": 1.0776, "step": 6948 }, { "epoch": 1.4126855051839806, "grad_norm": 0.16265372931957245, "learning_rate": 5.87816536153768e-05, "loss": 1.0153, "step": 6949 }, { "epoch": 1.412888798536288, "grad_norm": 0.1490945667028427, "learning_rate": 5.876131394284552e-05, "loss": 1.0136, "step": 6950 }, { "epoch": 1.4130920918885952, "grad_norm": 0.17091575264930725, "learning_rate": 5.8740974270314256e-05, "loss": 1.1794, "step": 6951 }, { "epoch": 1.4132953852409027, "grad_norm": 0.17527081072330475, "learning_rate": 5.8720634597782973e-05, "loss": 1.1898, "step": 6952 }, { "epoch": 1.41349867859321, "grad_norm": 0.1660010814666748, "learning_rate": 5.870029492525171e-05, "loss": 1.1198, "step": 6953 }, { "epoch": 1.4137019719455175, "grad_norm": 0.17469993233680725, "learning_rate": 5.867995525272043e-05, "loss": 1.2353, "step": 6954 }, { "epoch": 1.4139052652978248, "grad_norm": 0.15284724533557892, "learning_rate": 5.865961558018917e-05, "loss": 1.0579, "step": 6955 }, { "epoch": 1.414108558650132, "grad_norm": 0.15508383512496948, "learning_rate": 5.8639275907657885e-05, "loss": 1.0113, "step": 6956 }, { "epoch": 1.4143118520024396, "grad_norm": 0.16307425498962402, "learning_rate": 5.861893623512662e-05, "loss": 0.9934, "step": 6957 }, { "epoch": 1.4145151453547469, "grad_norm": 0.1423729807138443, "learning_rate": 5.859859656259534e-05, "loss": 0.9477, "step": 6958 }, { "epoch": 1.4147184387070544, "grad_norm": 0.17191347479820251, "learning_rate": 5.857825689006408e-05, "loss": 1.1453, "step": 6959 }, { "epoch": 1.4149217320593617, "grad_norm": 0.14529524743556976, "learning_rate": 5.8557917217532796e-05, "loss": 1.06, "step": 6960 }, { "epoch": 1.415125025411669, "grad_norm": 0.16226617991924286, "learning_rate": 5.8537577545001534e-05, "loss": 1.085, "step": 6961 }, { "epoch": 1.4153283187639765, "grad_norm": 0.15359675884246826, "learning_rate": 5.851723787247025e-05, "loss": 1.0975, "step": 6962 }, { "epoch": 1.4155316121162838, "grad_norm": 0.1587379425764084, "learning_rate": 5.849689819993899e-05, "loss": 1.073, "step": 6963 }, { "epoch": 1.4157349054685913, "grad_norm": 0.14959818124771118, "learning_rate": 5.847655852740771e-05, "loss": 1.0038, "step": 6964 }, { "epoch": 1.4159381988208986, "grad_norm": 0.16169403493404388, "learning_rate": 5.8456218854876446e-05, "loss": 1.038, "step": 6965 }, { "epoch": 1.4161414921732058, "grad_norm": 0.16977471113204956, "learning_rate": 5.843587918234516e-05, "loss": 1.0902, "step": 6966 }, { "epoch": 1.4163447855255134, "grad_norm": 0.18036596477031708, "learning_rate": 5.84155395098139e-05, "loss": 1.3127, "step": 6967 }, { "epoch": 1.4165480788778206, "grad_norm": 0.15857172012329102, "learning_rate": 5.839519983728262e-05, "loss": 0.9297, "step": 6968 }, { "epoch": 1.4167513722301281, "grad_norm": 0.16160105168819427, "learning_rate": 5.837486016475136e-05, "loss": 0.9612, "step": 6969 }, { "epoch": 1.4169546655824354, "grad_norm": 0.15947425365447998, "learning_rate": 5.8354520492220075e-05, "loss": 1.0329, "step": 6970 }, { "epoch": 1.4171579589347427, "grad_norm": 0.15058821439743042, "learning_rate": 5.833418081968881e-05, "loss": 1.0494, "step": 6971 }, { "epoch": 1.4173612522870502, "grad_norm": 0.14137932658195496, "learning_rate": 5.831384114715753e-05, "loss": 0.8643, "step": 6972 }, { "epoch": 1.4175645456393575, "grad_norm": 0.15573449432849884, "learning_rate": 5.829350147462627e-05, "loss": 0.9866, "step": 6973 }, { "epoch": 1.417767838991665, "grad_norm": 0.17434577643871307, "learning_rate": 5.8273161802094986e-05, "loss": 1.0533, "step": 6974 }, { "epoch": 1.4179711323439723, "grad_norm": 0.14981883764266968, "learning_rate": 5.8252822129563724e-05, "loss": 0.9438, "step": 6975 }, { "epoch": 1.4181744256962796, "grad_norm": 0.17315039038658142, "learning_rate": 5.823248245703244e-05, "loss": 1.1614, "step": 6976 }, { "epoch": 1.4183777190485871, "grad_norm": 0.1493794322013855, "learning_rate": 5.8212142784501166e-05, "loss": 0.9655, "step": 6977 }, { "epoch": 1.4185810124008946, "grad_norm": 0.16317151486873627, "learning_rate": 5.81918031119699e-05, "loss": 1.0783, "step": 6978 }, { "epoch": 1.418784305753202, "grad_norm": 0.17804372310638428, "learning_rate": 5.817146343943862e-05, "loss": 1.1467, "step": 6979 }, { "epoch": 1.4189875991055092, "grad_norm": 0.14259546995162964, "learning_rate": 5.815112376690735e-05, "loss": 0.9369, "step": 6980 }, { "epoch": 1.4191908924578167, "grad_norm": 0.16736005246639252, "learning_rate": 5.813078409437608e-05, "loss": 1.0807, "step": 6981 }, { "epoch": 1.419394185810124, "grad_norm": 0.17183224856853485, "learning_rate": 5.811044442184481e-05, "loss": 1.1465, "step": 6982 }, { "epoch": 1.4195974791624315, "grad_norm": 0.16544954478740692, "learning_rate": 5.809010474931353e-05, "loss": 1.0671, "step": 6983 }, { "epoch": 1.4198007725147388, "grad_norm": 0.15119296312332153, "learning_rate": 5.8069765076782265e-05, "loss": 1.0459, "step": 6984 }, { "epoch": 1.420004065867046, "grad_norm": 0.14908432960510254, "learning_rate": 5.804942540425099e-05, "loss": 0.9562, "step": 6985 }, { "epoch": 1.4202073592193536, "grad_norm": 0.15488240122795105, "learning_rate": 5.802908573171972e-05, "loss": 1.0218, "step": 6986 }, { "epoch": 1.4204106525716609, "grad_norm": 0.16753670573234558, "learning_rate": 5.8008746059188445e-05, "loss": 1.0103, "step": 6987 }, { "epoch": 1.4206139459239684, "grad_norm": 0.15095356106758118, "learning_rate": 5.7988406386657176e-05, "loss": 0.9328, "step": 6988 }, { "epoch": 1.4208172392762757, "grad_norm": 0.15303654968738556, "learning_rate": 5.79680667141259e-05, "loss": 1.1052, "step": 6989 }, { "epoch": 1.421020532628583, "grad_norm": 0.15543197095394135, "learning_rate": 5.794772704159463e-05, "loss": 0.9239, "step": 6990 }, { "epoch": 1.4212238259808905, "grad_norm": 0.15274298191070557, "learning_rate": 5.7927387369063356e-05, "loss": 1.0457, "step": 6991 }, { "epoch": 1.4214271193331978, "grad_norm": 0.15703609585762024, "learning_rate": 5.790704769653209e-05, "loss": 0.9673, "step": 6992 }, { "epoch": 1.4216304126855053, "grad_norm": 0.1640772670507431, "learning_rate": 5.788670802400081e-05, "loss": 1.124, "step": 6993 }, { "epoch": 1.4218337060378126, "grad_norm": 0.16401225328445435, "learning_rate": 5.786636835146955e-05, "loss": 1.1243, "step": 6994 }, { "epoch": 1.4220369993901198, "grad_norm": 0.13957446813583374, "learning_rate": 5.784602867893827e-05, "loss": 0.9091, "step": 6995 }, { "epoch": 1.4222402927424274, "grad_norm": 0.14374911785125732, "learning_rate": 5.7825689006407006e-05, "loss": 0.9343, "step": 6996 }, { "epoch": 1.4224435860947346, "grad_norm": 0.1541910171508789, "learning_rate": 5.780534933387572e-05, "loss": 1.0193, "step": 6997 }, { "epoch": 1.4226468794470422, "grad_norm": 0.17299696803092957, "learning_rate": 5.778500966134446e-05, "loss": 1.1652, "step": 6998 }, { "epoch": 1.4228501727993494, "grad_norm": 0.15225568413734436, "learning_rate": 5.776466998881318e-05, "loss": 1.0108, "step": 6999 }, { "epoch": 1.4230534661516567, "grad_norm": 0.17570187151432037, "learning_rate": 5.774433031628192e-05, "loss": 1.2147, "step": 7000 }, { "epoch": 1.4232567595039642, "grad_norm": 0.16006618738174438, "learning_rate": 5.7723990643750635e-05, "loss": 1.044, "step": 7001 }, { "epoch": 1.4234600528562715, "grad_norm": 0.15728577971458435, "learning_rate": 5.770365097121937e-05, "loss": 1.1534, "step": 7002 }, { "epoch": 1.423663346208579, "grad_norm": 0.15255065262317657, "learning_rate": 5.768331129868809e-05, "loss": 0.9349, "step": 7003 }, { "epoch": 1.4238666395608863, "grad_norm": 0.14307983219623566, "learning_rate": 5.766297162615683e-05, "loss": 0.8236, "step": 7004 }, { "epoch": 1.4240699329131936, "grad_norm": 0.16135892271995544, "learning_rate": 5.7642631953625546e-05, "loss": 1.2023, "step": 7005 }, { "epoch": 1.4242732262655011, "grad_norm": 0.15913139283657074, "learning_rate": 5.7622292281094284e-05, "loss": 1.0975, "step": 7006 }, { "epoch": 1.4244765196178086, "grad_norm": 0.14146681129932404, "learning_rate": 5.7601952608563e-05, "loss": 0.9771, "step": 7007 }, { "epoch": 1.424679812970116, "grad_norm": 0.15485510230064392, "learning_rate": 5.758161293603174e-05, "loss": 1.0722, "step": 7008 }, { "epoch": 1.4248831063224232, "grad_norm": 0.1600322425365448, "learning_rate": 5.756127326350046e-05, "loss": 0.9445, "step": 7009 }, { "epoch": 1.4250863996747307, "grad_norm": 0.15899471938610077, "learning_rate": 5.7540933590969195e-05, "loss": 0.9792, "step": 7010 }, { "epoch": 1.425289693027038, "grad_norm": 0.16935110092163086, "learning_rate": 5.752059391843791e-05, "loss": 1.1119, "step": 7011 }, { "epoch": 1.4254929863793455, "grad_norm": 0.1555667668581009, "learning_rate": 5.750025424590665e-05, "loss": 1.01, "step": 7012 }, { "epoch": 1.4256962797316528, "grad_norm": 0.15003176033496857, "learning_rate": 5.747991457337537e-05, "loss": 0.9769, "step": 7013 }, { "epoch": 1.42589957308396, "grad_norm": 0.15961889922618866, "learning_rate": 5.745957490084411e-05, "loss": 0.9535, "step": 7014 }, { "epoch": 1.4261028664362676, "grad_norm": 0.1631239503622055, "learning_rate": 5.7439235228312825e-05, "loss": 0.9852, "step": 7015 }, { "epoch": 1.426306159788575, "grad_norm": 0.16456563770771027, "learning_rate": 5.741889555578156e-05, "loss": 1.0641, "step": 7016 }, { "epoch": 1.4265094531408824, "grad_norm": 0.15899543464183807, "learning_rate": 5.739855588325028e-05, "loss": 0.9772, "step": 7017 }, { "epoch": 1.4267127464931897, "grad_norm": 0.1519063413143158, "learning_rate": 5.7378216210719005e-05, "loss": 0.981, "step": 7018 }, { "epoch": 1.426916039845497, "grad_norm": 0.14312393963336945, "learning_rate": 5.7357876538187736e-05, "loss": 0.9407, "step": 7019 }, { "epoch": 1.4271193331978045, "grad_norm": 0.16814564168453217, "learning_rate": 5.733753686565646e-05, "loss": 1.1335, "step": 7020 }, { "epoch": 1.4273226265501118, "grad_norm": 0.17560997605323792, "learning_rate": 5.731719719312519e-05, "loss": 1.2545, "step": 7021 }, { "epoch": 1.4275259199024193, "grad_norm": 0.16009865701198578, "learning_rate": 5.7296857520593916e-05, "loss": 1.1073, "step": 7022 }, { "epoch": 1.4277292132547266, "grad_norm": 0.14616774022579193, "learning_rate": 5.727651784806265e-05, "loss": 0.9322, "step": 7023 }, { "epoch": 1.4279325066070339, "grad_norm": 0.15338778495788574, "learning_rate": 5.725617817553137e-05, "loss": 1.0607, "step": 7024 }, { "epoch": 1.4281357999593414, "grad_norm": 0.1624404937028885, "learning_rate": 5.72358385030001e-05, "loss": 1.2047, "step": 7025 }, { "epoch": 1.4283390933116487, "grad_norm": 0.1730973869562149, "learning_rate": 5.721549883046883e-05, "loss": 1.0891, "step": 7026 }, { "epoch": 1.4285423866639562, "grad_norm": 0.16130545735359192, "learning_rate": 5.719515915793756e-05, "loss": 1.1156, "step": 7027 }, { "epoch": 1.4287456800162635, "grad_norm": 0.1761913150548935, "learning_rate": 5.717481948540628e-05, "loss": 1.0594, "step": 7028 }, { "epoch": 1.4289489733685707, "grad_norm": 0.15182644128799438, "learning_rate": 5.7154479812875014e-05, "loss": 0.9237, "step": 7029 }, { "epoch": 1.4291522667208783, "grad_norm": 0.1545775681734085, "learning_rate": 5.713414014034374e-05, "loss": 1.0466, "step": 7030 }, { "epoch": 1.4293555600731855, "grad_norm": 0.15703696012496948, "learning_rate": 5.711380046781247e-05, "loss": 0.9723, "step": 7031 }, { "epoch": 1.429558853425493, "grad_norm": 0.15546222031116486, "learning_rate": 5.7093460795281195e-05, "loss": 0.9767, "step": 7032 }, { "epoch": 1.4297621467778003, "grad_norm": 0.16714654862880707, "learning_rate": 5.7073121122749926e-05, "loss": 1.0734, "step": 7033 }, { "epoch": 1.4299654401301076, "grad_norm": 0.15429727733135223, "learning_rate": 5.705278145021865e-05, "loss": 0.9735, "step": 7034 }, { "epoch": 1.4301687334824151, "grad_norm": 0.1618785411119461, "learning_rate": 5.703244177768738e-05, "loss": 1.1881, "step": 7035 }, { "epoch": 1.4303720268347226, "grad_norm": 0.16058021783828735, "learning_rate": 5.7012102105156106e-05, "loss": 1.0477, "step": 7036 }, { "epoch": 1.43057532018703, "grad_norm": 0.15133486688137054, "learning_rate": 5.699176243262484e-05, "loss": 0.9531, "step": 7037 }, { "epoch": 1.4307786135393372, "grad_norm": 0.15109995007514954, "learning_rate": 5.697142276009356e-05, "loss": 1.1346, "step": 7038 }, { "epoch": 1.4309819068916445, "grad_norm": 0.16786424815654755, "learning_rate": 5.695108308756229e-05, "loss": 1.0919, "step": 7039 }, { "epoch": 1.431185200243952, "grad_norm": 0.1384085714817047, "learning_rate": 5.693074341503102e-05, "loss": 0.8499, "step": 7040 }, { "epoch": 1.4313884935962595, "grad_norm": 0.14978425204753876, "learning_rate": 5.691040374249975e-05, "loss": 1.0893, "step": 7041 }, { "epoch": 1.4315917869485668, "grad_norm": 0.1528119444847107, "learning_rate": 5.689006406996847e-05, "loss": 1.0491, "step": 7042 }, { "epoch": 1.431795080300874, "grad_norm": 0.1478254795074463, "learning_rate": 5.6869724397437204e-05, "loss": 0.976, "step": 7043 }, { "epoch": 1.4319983736531816, "grad_norm": 0.16806158423423767, "learning_rate": 5.684938472490593e-05, "loss": 1.1223, "step": 7044 }, { "epoch": 1.432201667005489, "grad_norm": 0.16098909080028534, "learning_rate": 5.682904505237466e-05, "loss": 1.096, "step": 7045 }, { "epoch": 1.4324049603577964, "grad_norm": 0.15493112802505493, "learning_rate": 5.6808705379843384e-05, "loss": 0.9423, "step": 7046 }, { "epoch": 1.4326082537101037, "grad_norm": 0.16899438202381134, "learning_rate": 5.6788365707312116e-05, "loss": 1.1543, "step": 7047 }, { "epoch": 1.432811547062411, "grad_norm": 0.16474638879299164, "learning_rate": 5.676802603478084e-05, "loss": 1.1847, "step": 7048 }, { "epoch": 1.4330148404147185, "grad_norm": 0.15597054362297058, "learning_rate": 5.674768636224957e-05, "loss": 1.1128, "step": 7049 }, { "epoch": 1.4332181337670258, "grad_norm": 0.15467366576194763, "learning_rate": 5.6727346689718296e-05, "loss": 1.0166, "step": 7050 }, { "epoch": 1.4334214271193333, "grad_norm": 0.13610722124576569, "learning_rate": 5.670700701718703e-05, "loss": 0.8721, "step": 7051 }, { "epoch": 1.4336247204716406, "grad_norm": 0.17068330943584442, "learning_rate": 5.668666734465575e-05, "loss": 1.1137, "step": 7052 }, { "epoch": 1.4338280138239479, "grad_norm": 0.15879547595977783, "learning_rate": 5.666632767212449e-05, "loss": 0.9649, "step": 7053 }, { "epoch": 1.4340313071762554, "grad_norm": 0.14434680342674255, "learning_rate": 5.664598799959321e-05, "loss": 1.0354, "step": 7054 }, { "epoch": 1.4342346005285627, "grad_norm": 0.15887115895748138, "learning_rate": 5.6625648327061945e-05, "loss": 1.152, "step": 7055 }, { "epoch": 1.4344378938808702, "grad_norm": 0.15456242859363556, "learning_rate": 5.660530865453066e-05, "loss": 0.9352, "step": 7056 }, { "epoch": 1.4346411872331775, "grad_norm": 0.1485803872346878, "learning_rate": 5.65849689819994e-05, "loss": 1.0388, "step": 7057 }, { "epoch": 1.4348444805854847, "grad_norm": 0.14560282230377197, "learning_rate": 5.656462930946812e-05, "loss": 0.8804, "step": 7058 }, { "epoch": 1.4350477739377923, "grad_norm": 0.16780616343021393, "learning_rate": 5.654428963693686e-05, "loss": 1.2334, "step": 7059 }, { "epoch": 1.4352510672900995, "grad_norm": 0.16232897341251373, "learning_rate": 5.6523949964405574e-05, "loss": 1.0873, "step": 7060 }, { "epoch": 1.435454360642407, "grad_norm": 0.16009031236171722, "learning_rate": 5.65036102918743e-05, "loss": 1.043, "step": 7061 }, { "epoch": 1.4356576539947143, "grad_norm": 0.17730426788330078, "learning_rate": 5.648327061934303e-05, "loss": 1.0882, "step": 7062 }, { "epoch": 1.4358609473470216, "grad_norm": 0.16423147916793823, "learning_rate": 5.6462930946811755e-05, "loss": 1.1773, "step": 7063 }, { "epoch": 1.4360642406993291, "grad_norm": 0.14594976603984833, "learning_rate": 5.6442591274280486e-05, "loss": 0.8925, "step": 7064 }, { "epoch": 1.4362675340516364, "grad_norm": 0.1569969207048416, "learning_rate": 5.642225160174921e-05, "loss": 1.122, "step": 7065 }, { "epoch": 1.436470827403944, "grad_norm": 0.1664332002401352, "learning_rate": 5.640191192921794e-05, "loss": 1.2314, "step": 7066 }, { "epoch": 1.4366741207562512, "grad_norm": 0.1459050178527832, "learning_rate": 5.6381572256686666e-05, "loss": 1.0424, "step": 7067 }, { "epoch": 1.4368774141085585, "grad_norm": 0.14823487401008606, "learning_rate": 5.63612325841554e-05, "loss": 1.0088, "step": 7068 }, { "epoch": 1.437080707460866, "grad_norm": 0.15456153452396393, "learning_rate": 5.634089291162412e-05, "loss": 0.9902, "step": 7069 }, { "epoch": 1.4372840008131735, "grad_norm": 0.15063978731632233, "learning_rate": 5.632055323909285e-05, "loss": 0.9833, "step": 7070 }, { "epoch": 1.4374872941654808, "grad_norm": 0.1326543688774109, "learning_rate": 5.630021356656158e-05, "loss": 0.8563, "step": 7071 }, { "epoch": 1.437690587517788, "grad_norm": 0.1607581377029419, "learning_rate": 5.627987389403031e-05, "loss": 1.1066, "step": 7072 }, { "epoch": 1.4378938808700956, "grad_norm": 0.17142783105373383, "learning_rate": 5.625953422149903e-05, "loss": 1.0698, "step": 7073 }, { "epoch": 1.438097174222403, "grad_norm": 0.1687779724597931, "learning_rate": 5.6239194548967764e-05, "loss": 1.1368, "step": 7074 }, { "epoch": 1.4383004675747104, "grad_norm": 0.15616458654403687, "learning_rate": 5.621885487643649e-05, "loss": 0.9204, "step": 7075 }, { "epoch": 1.4385037609270177, "grad_norm": 0.14602220058441162, "learning_rate": 5.619851520390522e-05, "loss": 0.9276, "step": 7076 }, { "epoch": 1.438707054279325, "grad_norm": 0.14642690122127533, "learning_rate": 5.6178175531373944e-05, "loss": 1.0513, "step": 7077 }, { "epoch": 1.4389103476316325, "grad_norm": 0.14626696705818176, "learning_rate": 5.6157835858842676e-05, "loss": 0.9175, "step": 7078 }, { "epoch": 1.4391136409839398, "grad_norm": 0.14647045731544495, "learning_rate": 5.61374961863114e-05, "loss": 0.8942, "step": 7079 }, { "epoch": 1.4393169343362473, "grad_norm": 0.14955741167068481, "learning_rate": 5.611715651378013e-05, "loss": 0.8223, "step": 7080 }, { "epoch": 1.4395202276885546, "grad_norm": 0.1502169966697693, "learning_rate": 5.6096816841248856e-05, "loss": 0.8907, "step": 7081 }, { "epoch": 1.4397235210408619, "grad_norm": 0.15933147072792053, "learning_rate": 5.607647716871759e-05, "loss": 0.9065, "step": 7082 }, { "epoch": 1.4399268143931694, "grad_norm": 0.14342406392097473, "learning_rate": 5.605613749618631e-05, "loss": 0.8965, "step": 7083 }, { "epoch": 1.4401301077454767, "grad_norm": 0.14823979139328003, "learning_rate": 5.603579782365504e-05, "loss": 0.9602, "step": 7084 }, { "epoch": 1.4403334010977842, "grad_norm": 0.14995329082012177, "learning_rate": 5.601545815112377e-05, "loss": 0.9335, "step": 7085 }, { "epoch": 1.4405366944500915, "grad_norm": 0.15548937022686005, "learning_rate": 5.59951184785925e-05, "loss": 1.0812, "step": 7086 }, { "epoch": 1.4407399878023988, "grad_norm": 0.16731050610542297, "learning_rate": 5.597477880606122e-05, "loss": 1.098, "step": 7087 }, { "epoch": 1.4409432811547063, "grad_norm": 0.16047613322734833, "learning_rate": 5.5954439133529954e-05, "loss": 0.9614, "step": 7088 }, { "epoch": 1.4411465745070136, "grad_norm": 0.15521658957004547, "learning_rate": 5.593409946099868e-05, "loss": 0.9903, "step": 7089 }, { "epoch": 1.441349867859321, "grad_norm": 0.15141330659389496, "learning_rate": 5.591375978846741e-05, "loss": 1.0571, "step": 7090 }, { "epoch": 1.4415531612116284, "grad_norm": 0.16350041329860687, "learning_rate": 5.5893420115936134e-05, "loss": 1.0909, "step": 7091 }, { "epoch": 1.4417564545639356, "grad_norm": 0.15235590934753418, "learning_rate": 5.5873080443404866e-05, "loss": 1.0361, "step": 7092 }, { "epoch": 1.4419597479162432, "grad_norm": 0.16429497301578522, "learning_rate": 5.585274077087359e-05, "loss": 1.1117, "step": 7093 }, { "epoch": 1.4421630412685504, "grad_norm": 0.14635255932807922, "learning_rate": 5.583240109834232e-05, "loss": 0.8734, "step": 7094 }, { "epoch": 1.442366334620858, "grad_norm": 0.16316108405590057, "learning_rate": 5.5812061425811046e-05, "loss": 0.9637, "step": 7095 }, { "epoch": 1.4425696279731652, "grad_norm": 0.16636328399181366, "learning_rate": 5.579172175327978e-05, "loss": 1.2268, "step": 7096 }, { "epoch": 1.4427729213254725, "grad_norm": 0.1398182213306427, "learning_rate": 5.57713820807485e-05, "loss": 0.9425, "step": 7097 }, { "epoch": 1.44297621467778, "grad_norm": 0.17444168031215668, "learning_rate": 5.575104240821723e-05, "loss": 1.0043, "step": 7098 }, { "epoch": 1.4431795080300875, "grad_norm": 0.14904524385929108, "learning_rate": 5.573070273568596e-05, "loss": 0.9592, "step": 7099 }, { "epoch": 1.4433828013823948, "grad_norm": 0.17846673727035522, "learning_rate": 5.571036306315469e-05, "loss": 1.1698, "step": 7100 }, { "epoch": 1.4435860947347021, "grad_norm": 0.16818009316921234, "learning_rate": 5.569002339062341e-05, "loss": 1.0848, "step": 7101 }, { "epoch": 1.4437893880870096, "grad_norm": 0.1551516056060791, "learning_rate": 5.566968371809214e-05, "loss": 1.0045, "step": 7102 }, { "epoch": 1.443992681439317, "grad_norm": 0.17592954635620117, "learning_rate": 5.564934404556087e-05, "loss": 1.0643, "step": 7103 }, { "epoch": 1.4441959747916244, "grad_norm": 0.15838932991027832, "learning_rate": 5.562900437302959e-05, "loss": 1.1457, "step": 7104 }, { "epoch": 1.4443992681439317, "grad_norm": 0.17779286205768585, "learning_rate": 5.5608664700498324e-05, "loss": 1.0554, "step": 7105 }, { "epoch": 1.444602561496239, "grad_norm": 0.14753217995166779, "learning_rate": 5.558832502796705e-05, "loss": 0.9007, "step": 7106 }, { "epoch": 1.4448058548485465, "grad_norm": 0.15274490416049957, "learning_rate": 5.556798535543578e-05, "loss": 0.9968, "step": 7107 }, { "epoch": 1.4450091482008538, "grad_norm": 0.1444222331047058, "learning_rate": 5.5547645682904504e-05, "loss": 0.899, "step": 7108 }, { "epoch": 1.4452124415531613, "grad_norm": 0.16003580391407013, "learning_rate": 5.5527306010373236e-05, "loss": 1.0092, "step": 7109 }, { "epoch": 1.4454157349054686, "grad_norm": 0.16005735099315643, "learning_rate": 5.550696633784196e-05, "loss": 1.1286, "step": 7110 }, { "epoch": 1.4456190282577759, "grad_norm": 0.16826081275939941, "learning_rate": 5.548662666531069e-05, "loss": 1.034, "step": 7111 }, { "epoch": 1.4458223216100834, "grad_norm": 0.1600271761417389, "learning_rate": 5.5466286992779416e-05, "loss": 1.1682, "step": 7112 }, { "epoch": 1.4460256149623907, "grad_norm": 0.1603696346282959, "learning_rate": 5.544594732024815e-05, "loss": 1.04, "step": 7113 }, { "epoch": 1.4462289083146982, "grad_norm": 0.149916410446167, "learning_rate": 5.542560764771687e-05, "loss": 0.8601, "step": 7114 }, { "epoch": 1.4464322016670055, "grad_norm": 0.14548246562480927, "learning_rate": 5.54052679751856e-05, "loss": 0.9762, "step": 7115 }, { "epoch": 1.4466354950193128, "grad_norm": 0.16768839955329895, "learning_rate": 5.538492830265433e-05, "loss": 1.0964, "step": 7116 }, { "epoch": 1.4468387883716203, "grad_norm": 0.1450057178735733, "learning_rate": 5.536458863012306e-05, "loss": 0.8969, "step": 7117 }, { "epoch": 1.4470420817239276, "grad_norm": 0.15958139300346375, "learning_rate": 5.534424895759178e-05, "loss": 1.1208, "step": 7118 }, { "epoch": 1.447245375076235, "grad_norm": 0.13964813947677612, "learning_rate": 5.5323909285060514e-05, "loss": 0.8724, "step": 7119 }, { "epoch": 1.4474486684285424, "grad_norm": 0.14848533272743225, "learning_rate": 5.530356961252924e-05, "loss": 0.9282, "step": 7120 }, { "epoch": 1.4476519617808496, "grad_norm": 0.14798136055469513, "learning_rate": 5.528322993999797e-05, "loss": 1.087, "step": 7121 }, { "epoch": 1.4478552551331572, "grad_norm": 0.1584753543138504, "learning_rate": 5.5262890267466694e-05, "loss": 1.1091, "step": 7122 }, { "epoch": 1.4480585484854644, "grad_norm": 0.1623314768075943, "learning_rate": 5.5242550594935425e-05, "loss": 1.1573, "step": 7123 }, { "epoch": 1.448261841837772, "grad_norm": 0.15471680462360382, "learning_rate": 5.522221092240415e-05, "loss": 0.944, "step": 7124 }, { "epoch": 1.4484651351900792, "grad_norm": 0.1478460431098938, "learning_rate": 5.520187124987288e-05, "loss": 0.9436, "step": 7125 }, { "epoch": 1.4486684285423865, "grad_norm": 0.16853754222393036, "learning_rate": 5.5181531577341606e-05, "loss": 1.0439, "step": 7126 }, { "epoch": 1.448871721894694, "grad_norm": 0.16511282324790955, "learning_rate": 5.516119190481034e-05, "loss": 1.0378, "step": 7127 }, { "epoch": 1.4490750152470016, "grad_norm": 0.16189835965633392, "learning_rate": 5.514085223227906e-05, "loss": 1.0768, "step": 7128 }, { "epoch": 1.4492783085993088, "grad_norm": 0.16869410872459412, "learning_rate": 5.512051255974779e-05, "loss": 1.011, "step": 7129 }, { "epoch": 1.4494816019516161, "grad_norm": 0.15597373247146606, "learning_rate": 5.510017288721652e-05, "loss": 0.9521, "step": 7130 }, { "epoch": 1.4496848953039236, "grad_norm": 0.14260385930538177, "learning_rate": 5.507983321468525e-05, "loss": 0.8399, "step": 7131 }, { "epoch": 1.449888188656231, "grad_norm": 0.14267964661121368, "learning_rate": 5.505949354215397e-05, "loss": 0.925, "step": 7132 }, { "epoch": 1.4500914820085384, "grad_norm": 0.1477990448474884, "learning_rate": 5.5039153869622704e-05, "loss": 0.9636, "step": 7133 }, { "epoch": 1.4502947753608457, "grad_norm": 0.16236381232738495, "learning_rate": 5.501881419709143e-05, "loss": 1.1057, "step": 7134 }, { "epoch": 1.450498068713153, "grad_norm": 0.1601257473230362, "learning_rate": 5.499847452456016e-05, "loss": 1.1647, "step": 7135 }, { "epoch": 1.4507013620654605, "grad_norm": 0.15447348356246948, "learning_rate": 5.4978134852028884e-05, "loss": 1.0227, "step": 7136 }, { "epoch": 1.4509046554177678, "grad_norm": 0.15148869156837463, "learning_rate": 5.4957795179497615e-05, "loss": 0.9926, "step": 7137 }, { "epoch": 1.4511079487700753, "grad_norm": 0.1739932894706726, "learning_rate": 5.493745550696634e-05, "loss": 1.1065, "step": 7138 }, { "epoch": 1.4513112421223826, "grad_norm": 0.14690914750099182, "learning_rate": 5.491711583443507e-05, "loss": 0.9786, "step": 7139 }, { "epoch": 1.45151453547469, "grad_norm": 0.15842650830745697, "learning_rate": 5.4896776161903796e-05, "loss": 1.06, "step": 7140 }, { "epoch": 1.4517178288269974, "grad_norm": 0.1690451055765152, "learning_rate": 5.487643648937253e-05, "loss": 1.0769, "step": 7141 }, { "epoch": 1.4519211221793047, "grad_norm": 0.14513088762760162, "learning_rate": 5.485609681684125e-05, "loss": 0.9437, "step": 7142 }, { "epoch": 1.4521244155316122, "grad_norm": 0.15545345842838287, "learning_rate": 5.4835757144309976e-05, "loss": 0.8904, "step": 7143 }, { "epoch": 1.4523277088839195, "grad_norm": 0.1784391701221466, "learning_rate": 5.481541747177871e-05, "loss": 1.2087, "step": 7144 }, { "epoch": 1.4525310022362268, "grad_norm": 0.14300981163978577, "learning_rate": 5.479507779924743e-05, "loss": 0.9506, "step": 7145 }, { "epoch": 1.4527342955885343, "grad_norm": 0.18284142017364502, "learning_rate": 5.477473812671616e-05, "loss": 1.178, "step": 7146 }, { "epoch": 1.4529375889408416, "grad_norm": 0.16167539358139038, "learning_rate": 5.475439845418489e-05, "loss": 0.9989, "step": 7147 }, { "epoch": 1.453140882293149, "grad_norm": 0.17664743959903717, "learning_rate": 5.473405878165362e-05, "loss": 1.0911, "step": 7148 }, { "epoch": 1.4533441756454564, "grad_norm": 0.17195507884025574, "learning_rate": 5.471371910912234e-05, "loss": 1.2898, "step": 7149 }, { "epoch": 1.4535474689977637, "grad_norm": 0.15981166064739227, "learning_rate": 5.4693379436591074e-05, "loss": 1.0563, "step": 7150 }, { "epoch": 1.4537507623500712, "grad_norm": 0.16478121280670166, "learning_rate": 5.46730397640598e-05, "loss": 1.0171, "step": 7151 }, { "epoch": 1.4539540557023785, "grad_norm": 0.1612006425857544, "learning_rate": 5.465270009152853e-05, "loss": 0.9867, "step": 7152 }, { "epoch": 1.454157349054686, "grad_norm": 0.15977860987186432, "learning_rate": 5.4632360418997254e-05, "loss": 1.0997, "step": 7153 }, { "epoch": 1.4543606424069933, "grad_norm": 0.16326823830604553, "learning_rate": 5.4612020746465985e-05, "loss": 0.881, "step": 7154 }, { "epoch": 1.4545639357593005, "grad_norm": 0.13994139432907104, "learning_rate": 5.459168107393471e-05, "loss": 0.9196, "step": 7155 }, { "epoch": 1.454767229111608, "grad_norm": 0.1567743569612503, "learning_rate": 5.457134140140344e-05, "loss": 1.0773, "step": 7156 }, { "epoch": 1.4549705224639156, "grad_norm": 0.15339693427085876, "learning_rate": 5.4551001728872166e-05, "loss": 0.9771, "step": 7157 }, { "epoch": 1.4551738158162228, "grad_norm": 0.1575535386800766, "learning_rate": 5.45306620563409e-05, "loss": 1.0254, "step": 7158 }, { "epoch": 1.4553771091685301, "grad_norm": 0.14848507940769196, "learning_rate": 5.451032238380962e-05, "loss": 0.9372, "step": 7159 }, { "epoch": 1.4555804025208376, "grad_norm": 0.14759337902069092, "learning_rate": 5.448998271127835e-05, "loss": 1.083, "step": 7160 }, { "epoch": 1.455783695873145, "grad_norm": 0.15212364494800568, "learning_rate": 5.446964303874708e-05, "loss": 1.0207, "step": 7161 }, { "epoch": 1.4559869892254524, "grad_norm": 0.16381527483463287, "learning_rate": 5.444930336621581e-05, "loss": 1.056, "step": 7162 }, { "epoch": 1.4561902825777597, "grad_norm": 0.14709602296352386, "learning_rate": 5.442896369368453e-05, "loss": 1.0069, "step": 7163 }, { "epoch": 1.456393575930067, "grad_norm": 0.1730957329273224, "learning_rate": 5.4408624021153264e-05, "loss": 1.2098, "step": 7164 }, { "epoch": 1.4565968692823745, "grad_norm": 0.16779474914073944, "learning_rate": 5.438828434862199e-05, "loss": 1.025, "step": 7165 }, { "epoch": 1.4568001626346818, "grad_norm": 0.15644671022891998, "learning_rate": 5.436794467609072e-05, "loss": 1.1397, "step": 7166 }, { "epoch": 1.4570034559869893, "grad_norm": 0.16888047754764557, "learning_rate": 5.4347605003559444e-05, "loss": 1.0564, "step": 7167 }, { "epoch": 1.4572067493392966, "grad_norm": 0.14582239091396332, "learning_rate": 5.4327265331028175e-05, "loss": 1.0946, "step": 7168 }, { "epoch": 1.457410042691604, "grad_norm": 0.17389684915542603, "learning_rate": 5.43069256584969e-05, "loss": 1.1177, "step": 7169 }, { "epoch": 1.4576133360439114, "grad_norm": 0.15936750173568726, "learning_rate": 5.428658598596563e-05, "loss": 0.8842, "step": 7170 }, { "epoch": 1.4578166293962187, "grad_norm": 0.14537377655506134, "learning_rate": 5.4266246313434355e-05, "loss": 0.9604, "step": 7171 }, { "epoch": 1.4580199227485262, "grad_norm": 0.1616072803735733, "learning_rate": 5.424590664090309e-05, "loss": 1.1369, "step": 7172 }, { "epoch": 1.4582232161008335, "grad_norm": 0.1477457731962204, "learning_rate": 5.422556696837181e-05, "loss": 0.9956, "step": 7173 }, { "epoch": 1.4584265094531408, "grad_norm": 0.1592814177274704, "learning_rate": 5.420522729584054e-05, "loss": 1.0028, "step": 7174 }, { "epoch": 1.4586298028054483, "grad_norm": 0.1648184210062027, "learning_rate": 5.418488762330927e-05, "loss": 1.0978, "step": 7175 }, { "epoch": 1.4588330961577556, "grad_norm": 0.15443973243236542, "learning_rate": 5.4164547950778e-05, "loss": 1.0634, "step": 7176 }, { "epoch": 1.459036389510063, "grad_norm": 0.15998506546020508, "learning_rate": 5.414420827824672e-05, "loss": 1.0185, "step": 7177 }, { "epoch": 1.4592396828623704, "grad_norm": 0.14759159088134766, "learning_rate": 5.4123868605715454e-05, "loss": 0.9069, "step": 7178 }, { "epoch": 1.4594429762146777, "grad_norm": 0.15868420898914337, "learning_rate": 5.410352893318418e-05, "loss": 1.0571, "step": 7179 }, { "epoch": 1.4596462695669852, "grad_norm": 0.15032470226287842, "learning_rate": 5.408318926065291e-05, "loss": 0.9006, "step": 7180 }, { "epoch": 1.4598495629192925, "grad_norm": 0.16723573207855225, "learning_rate": 5.4062849588121634e-05, "loss": 1.0758, "step": 7181 }, { "epoch": 1.4600528562716, "grad_norm": 0.17118899524211884, "learning_rate": 5.4042509915590365e-05, "loss": 1.0273, "step": 7182 }, { "epoch": 1.4602561496239073, "grad_norm": 0.17768427729606628, "learning_rate": 5.402217024305909e-05, "loss": 1.1246, "step": 7183 }, { "epoch": 1.4604594429762146, "grad_norm": 0.14075639843940735, "learning_rate": 5.4001830570527814e-05, "loss": 0.874, "step": 7184 }, { "epoch": 1.460662736328522, "grad_norm": 0.15958434343338013, "learning_rate": 5.3981490897996545e-05, "loss": 1.0438, "step": 7185 }, { "epoch": 1.4608660296808296, "grad_norm": 0.14858020842075348, "learning_rate": 5.396115122546527e-05, "loss": 1.0044, "step": 7186 }, { "epoch": 1.4610693230331369, "grad_norm": 0.15487819910049438, "learning_rate": 5.3940811552934e-05, "loss": 1.0189, "step": 7187 }, { "epoch": 1.4612726163854441, "grad_norm": 0.14576295018196106, "learning_rate": 5.3920471880402726e-05, "loss": 0.998, "step": 7188 }, { "epoch": 1.4614759097377517, "grad_norm": 0.1598920375108719, "learning_rate": 5.390013220787146e-05, "loss": 1.0848, "step": 7189 }, { "epoch": 1.461679203090059, "grad_norm": 0.18071123957633972, "learning_rate": 5.387979253534018e-05, "loss": 1.274, "step": 7190 }, { "epoch": 1.4618824964423665, "grad_norm": 0.15929163992404938, "learning_rate": 5.385945286280891e-05, "loss": 1.0846, "step": 7191 }, { "epoch": 1.4620857897946737, "grad_norm": 0.17697520554065704, "learning_rate": 5.383911319027764e-05, "loss": 1.2681, "step": 7192 }, { "epoch": 1.462289083146981, "grad_norm": 0.1605558842420578, "learning_rate": 5.381877351774637e-05, "loss": 1.0593, "step": 7193 }, { "epoch": 1.4624923764992885, "grad_norm": 0.16298873722553253, "learning_rate": 5.379843384521509e-05, "loss": 1.0556, "step": 7194 }, { "epoch": 1.4626956698515958, "grad_norm": 0.1575620025396347, "learning_rate": 5.3778094172683824e-05, "loss": 0.9694, "step": 7195 }, { "epoch": 1.4628989632039033, "grad_norm": 0.14187492430210114, "learning_rate": 5.375775450015255e-05, "loss": 0.8606, "step": 7196 }, { "epoch": 1.4631022565562106, "grad_norm": 0.13117793202400208, "learning_rate": 5.373741482762128e-05, "loss": 0.9196, "step": 7197 }, { "epoch": 1.463305549908518, "grad_norm": 0.153340682387352, "learning_rate": 5.3717075155090004e-05, "loss": 0.9895, "step": 7198 }, { "epoch": 1.4635088432608254, "grad_norm": 0.15271683037281036, "learning_rate": 5.3696735482558735e-05, "loss": 1.0878, "step": 7199 }, { "epoch": 1.4637121366131327, "grad_norm": 0.1467808485031128, "learning_rate": 5.367639581002746e-05, "loss": 0.8338, "step": 7200 }, { "epoch": 1.4639154299654402, "grad_norm": 0.15968874096870422, "learning_rate": 5.365605613749619e-05, "loss": 0.853, "step": 7201 }, { "epoch": 1.4641187233177475, "grad_norm": 0.16309960186481476, "learning_rate": 5.3635716464964915e-05, "loss": 1.1147, "step": 7202 }, { "epoch": 1.4643220166700548, "grad_norm": 0.14612539112567902, "learning_rate": 5.361537679243365e-05, "loss": 0.9917, "step": 7203 }, { "epoch": 1.4645253100223623, "grad_norm": 0.15759453177452087, "learning_rate": 5.359503711990237e-05, "loss": 0.8709, "step": 7204 }, { "epoch": 1.4647286033746696, "grad_norm": 0.14185784757137299, "learning_rate": 5.35746974473711e-05, "loss": 0.828, "step": 7205 }, { "epoch": 1.464931896726977, "grad_norm": 0.15282003581523895, "learning_rate": 5.355435777483983e-05, "loss": 1.0519, "step": 7206 }, { "epoch": 1.4651351900792844, "grad_norm": 0.1477069854736328, "learning_rate": 5.353401810230856e-05, "loss": 0.8896, "step": 7207 }, { "epoch": 1.4653384834315917, "grad_norm": 0.14850087463855743, "learning_rate": 5.351367842977728e-05, "loss": 0.9474, "step": 7208 }, { "epoch": 1.4655417767838992, "grad_norm": 0.16145406663417816, "learning_rate": 5.3493338757246014e-05, "loss": 1.0498, "step": 7209 }, { "epoch": 1.4657450701362065, "grad_norm": 0.14072728157043457, "learning_rate": 5.347299908471474e-05, "loss": 0.9445, "step": 7210 }, { "epoch": 1.465948363488514, "grad_norm": 0.13681919872760773, "learning_rate": 5.345265941218347e-05, "loss": 0.8509, "step": 7211 }, { "epoch": 1.4661516568408213, "grad_norm": 0.16994976997375488, "learning_rate": 5.3432319739652194e-05, "loss": 1.093, "step": 7212 }, { "epoch": 1.4663549501931286, "grad_norm": 0.15965984761714935, "learning_rate": 5.3411980067120925e-05, "loss": 0.929, "step": 7213 }, { "epoch": 1.466558243545436, "grad_norm": 0.15343396365642548, "learning_rate": 5.339164039458965e-05, "loss": 0.9733, "step": 7214 }, { "epoch": 1.4667615368977436, "grad_norm": 0.1533103734254837, "learning_rate": 5.337130072205838e-05, "loss": 0.9694, "step": 7215 }, { "epoch": 1.4669648302500509, "grad_norm": 0.14872144162654877, "learning_rate": 5.3350961049527105e-05, "loss": 1.0066, "step": 7216 }, { "epoch": 1.4671681236023582, "grad_norm": 0.1545114368200302, "learning_rate": 5.3330621376995837e-05, "loss": 1.0491, "step": 7217 }, { "epoch": 1.4673714169546657, "grad_norm": 0.1492878943681717, "learning_rate": 5.331028170446456e-05, "loss": 0.9975, "step": 7218 }, { "epoch": 1.467574710306973, "grad_norm": 0.14209310710430145, "learning_rate": 5.328994203193329e-05, "loss": 0.9145, "step": 7219 }, { "epoch": 1.4677780036592805, "grad_norm": 0.14603732526302338, "learning_rate": 5.326960235940202e-05, "loss": 0.9558, "step": 7220 }, { "epoch": 1.4679812970115877, "grad_norm": 0.161206915974617, "learning_rate": 5.324926268687075e-05, "loss": 1.0212, "step": 7221 }, { "epoch": 1.468184590363895, "grad_norm": 0.18879006803035736, "learning_rate": 5.322892301433947e-05, "loss": 1.1686, "step": 7222 }, { "epoch": 1.4683878837162025, "grad_norm": 0.16243679821491241, "learning_rate": 5.3208583341808204e-05, "loss": 1.1121, "step": 7223 }, { "epoch": 1.4685911770685098, "grad_norm": 0.15002648532390594, "learning_rate": 5.318824366927693e-05, "loss": 0.9707, "step": 7224 }, { "epoch": 1.4687944704208173, "grad_norm": 0.167652428150177, "learning_rate": 5.316790399674566e-05, "loss": 1.2484, "step": 7225 }, { "epoch": 1.4689977637731246, "grad_norm": 0.15510743856430054, "learning_rate": 5.3147564324214384e-05, "loss": 1.0713, "step": 7226 }, { "epoch": 1.469201057125432, "grad_norm": 0.14573627710342407, "learning_rate": 5.312722465168311e-05, "loss": 0.9877, "step": 7227 }, { "epoch": 1.4694043504777394, "grad_norm": 0.15572385489940643, "learning_rate": 5.310688497915184e-05, "loss": 1.0008, "step": 7228 }, { "epoch": 1.4696076438300467, "grad_norm": 0.1423737108707428, "learning_rate": 5.3086545306620564e-05, "loss": 0.8672, "step": 7229 }, { "epoch": 1.4698109371823542, "grad_norm": 0.16984347999095917, "learning_rate": 5.3066205634089295e-05, "loss": 1.0749, "step": 7230 }, { "epoch": 1.4700142305346615, "grad_norm": 0.15581457316875458, "learning_rate": 5.304586596155802e-05, "loss": 0.9514, "step": 7231 }, { "epoch": 1.4702175238869688, "grad_norm": 0.16326789557933807, "learning_rate": 5.302552628902675e-05, "loss": 0.9513, "step": 7232 }, { "epoch": 1.4704208172392763, "grad_norm": 0.1650605946779251, "learning_rate": 5.3005186616495475e-05, "loss": 0.964, "step": 7233 }, { "epoch": 1.4706241105915836, "grad_norm": 0.17731869220733643, "learning_rate": 5.2984846943964207e-05, "loss": 1.0705, "step": 7234 }, { "epoch": 1.470827403943891, "grad_norm": 0.14582465589046478, "learning_rate": 5.296450727143293e-05, "loss": 0.9405, "step": 7235 }, { "epoch": 1.4710306972961984, "grad_norm": 0.158736914396286, "learning_rate": 5.294416759890166e-05, "loss": 1.0272, "step": 7236 }, { "epoch": 1.4712339906485057, "grad_norm": 0.13593992590904236, "learning_rate": 5.292382792637039e-05, "loss": 0.9333, "step": 7237 }, { "epoch": 1.4714372840008132, "grad_norm": 0.17388807237148285, "learning_rate": 5.290348825383912e-05, "loss": 1.0486, "step": 7238 }, { "epoch": 1.4716405773531205, "grad_norm": 0.14610832929611206, "learning_rate": 5.288314858130784e-05, "loss": 0.7977, "step": 7239 }, { "epoch": 1.471843870705428, "grad_norm": 0.16025376319885254, "learning_rate": 5.2862808908776574e-05, "loss": 0.9542, "step": 7240 }, { "epoch": 1.4720471640577353, "grad_norm": 0.15763407945632935, "learning_rate": 5.28424692362453e-05, "loss": 1.0019, "step": 7241 }, { "epoch": 1.4722504574100426, "grad_norm": 0.1741354763507843, "learning_rate": 5.282212956371403e-05, "loss": 1.1706, "step": 7242 }, { "epoch": 1.47245375076235, "grad_norm": 0.14979106187820435, "learning_rate": 5.2801789891182754e-05, "loss": 0.9875, "step": 7243 }, { "epoch": 1.4726570441146576, "grad_norm": 0.15838104486465454, "learning_rate": 5.2781450218651485e-05, "loss": 0.9881, "step": 7244 }, { "epoch": 1.4728603374669649, "grad_norm": 0.17783360183238983, "learning_rate": 5.276111054612021e-05, "loss": 1.1805, "step": 7245 }, { "epoch": 1.4730636308192722, "grad_norm": 0.15877732634544373, "learning_rate": 5.274077087358894e-05, "loss": 1.0377, "step": 7246 }, { "epoch": 1.4732669241715795, "grad_norm": 0.159766286611557, "learning_rate": 5.2720431201057665e-05, "loss": 1.0324, "step": 7247 }, { "epoch": 1.473470217523887, "grad_norm": 0.1405743509531021, "learning_rate": 5.2700091528526396e-05, "loss": 0.8722, "step": 7248 }, { "epoch": 1.4736735108761945, "grad_norm": 0.1469542533159256, "learning_rate": 5.267975185599512e-05, "loss": 1.0329, "step": 7249 }, { "epoch": 1.4738768042285018, "grad_norm": 0.14367324113845825, "learning_rate": 5.265941218346385e-05, "loss": 0.9612, "step": 7250 }, { "epoch": 1.474080097580809, "grad_norm": 0.17195919156074524, "learning_rate": 5.263907251093258e-05, "loss": 1.1384, "step": 7251 }, { "epoch": 1.4742833909331166, "grad_norm": 0.1590740978717804, "learning_rate": 5.261873283840131e-05, "loss": 1.0834, "step": 7252 }, { "epoch": 1.4744866842854238, "grad_norm": 0.17466309666633606, "learning_rate": 5.259839316587003e-05, "loss": 1.0824, "step": 7253 }, { "epoch": 1.4746899776377314, "grad_norm": 0.15977461636066437, "learning_rate": 5.2578053493338764e-05, "loss": 1.0415, "step": 7254 }, { "epoch": 1.4748932709900386, "grad_norm": 0.15965703129768372, "learning_rate": 5.255771382080749e-05, "loss": 1.0812, "step": 7255 }, { "epoch": 1.475096564342346, "grad_norm": 0.17108100652694702, "learning_rate": 5.253737414827622e-05, "loss": 1.2236, "step": 7256 }, { "epoch": 1.4752998576946534, "grad_norm": 0.1626596599817276, "learning_rate": 5.2517034475744944e-05, "loss": 1.1297, "step": 7257 }, { "epoch": 1.4755031510469607, "grad_norm": 0.16700440645217896, "learning_rate": 5.2496694803213675e-05, "loss": 1.1175, "step": 7258 }, { "epoch": 1.4757064443992682, "grad_norm": 0.15646840631961823, "learning_rate": 5.24763551306824e-05, "loss": 0.9831, "step": 7259 }, { "epoch": 1.4759097377515755, "grad_norm": 0.14843416213989258, "learning_rate": 5.245601545815113e-05, "loss": 0.9445, "step": 7260 }, { "epoch": 1.4761130311038828, "grad_norm": 0.13760924339294434, "learning_rate": 5.2435675785619855e-05, "loss": 0.9984, "step": 7261 }, { "epoch": 1.4763163244561903, "grad_norm": 0.16428524255752563, "learning_rate": 5.2415336113088586e-05, "loss": 1.0017, "step": 7262 }, { "epoch": 1.4765196178084976, "grad_norm": 0.14717566967010498, "learning_rate": 5.239499644055731e-05, "loss": 0.8459, "step": 7263 }, { "epoch": 1.4767229111608051, "grad_norm": 0.16615287959575653, "learning_rate": 5.237465676802604e-05, "loss": 1.0156, "step": 7264 }, { "epoch": 1.4769262045131124, "grad_norm": 0.14632081985473633, "learning_rate": 5.2354317095494767e-05, "loss": 0.9946, "step": 7265 }, { "epoch": 1.4771294978654197, "grad_norm": 0.1523721069097519, "learning_rate": 5.23339774229635e-05, "loss": 0.9279, "step": 7266 }, { "epoch": 1.4773327912177272, "grad_norm": 0.16455121338367462, "learning_rate": 5.231363775043222e-05, "loss": 1.1135, "step": 7267 }, { "epoch": 1.4775360845700345, "grad_norm": 0.1441405862569809, "learning_rate": 5.229329807790094e-05, "loss": 0.902, "step": 7268 }, { "epoch": 1.477739377922342, "grad_norm": 0.1584034264087677, "learning_rate": 5.227295840536968e-05, "loss": 1.0865, "step": 7269 }, { "epoch": 1.4779426712746493, "grad_norm": 0.14937792718410492, "learning_rate": 5.2252618732838396e-05, "loss": 1.0402, "step": 7270 }, { "epoch": 1.4781459646269566, "grad_norm": 0.15211902558803558, "learning_rate": 5.2232279060307134e-05, "loss": 1.1008, "step": 7271 }, { "epoch": 1.478349257979264, "grad_norm": 0.1474466472864151, "learning_rate": 5.221193938777585e-05, "loss": 0.9956, "step": 7272 }, { "epoch": 1.4785525513315716, "grad_norm": 0.1490325927734375, "learning_rate": 5.219159971524459e-05, "loss": 0.988, "step": 7273 }, { "epoch": 1.4787558446838789, "grad_norm": 0.18194523453712463, "learning_rate": 5.217126004271331e-05, "loss": 1.0924, "step": 7274 }, { "epoch": 1.4789591380361862, "grad_norm": 0.16092152893543243, "learning_rate": 5.2150920370182045e-05, "loss": 0.9684, "step": 7275 }, { "epoch": 1.4791624313884935, "grad_norm": 0.13924293220043182, "learning_rate": 5.213058069765076e-05, "loss": 0.8937, "step": 7276 }, { "epoch": 1.479365724740801, "grad_norm": 0.16983318328857422, "learning_rate": 5.21102410251195e-05, "loss": 1.0294, "step": 7277 }, { "epoch": 1.4795690180931085, "grad_norm": 0.1642337441444397, "learning_rate": 5.208990135258822e-05, "loss": 1.0405, "step": 7278 }, { "epoch": 1.4797723114454158, "grad_norm": 0.14544382691383362, "learning_rate": 5.2069561680056956e-05, "loss": 1.0347, "step": 7279 }, { "epoch": 1.479975604797723, "grad_norm": 0.16924284398555756, "learning_rate": 5.204922200752568e-05, "loss": 1.1748, "step": 7280 }, { "epoch": 1.4801788981500306, "grad_norm": 0.13653366267681122, "learning_rate": 5.202888233499441e-05, "loss": 0.8597, "step": 7281 }, { "epoch": 1.4803821915023379, "grad_norm": 0.16047342121601105, "learning_rate": 5.2008542662463137e-05, "loss": 0.9599, "step": 7282 }, { "epoch": 1.4805854848546454, "grad_norm": 0.16013282537460327, "learning_rate": 5.198820298993187e-05, "loss": 0.9445, "step": 7283 }, { "epoch": 1.4807887782069526, "grad_norm": 0.15885138511657715, "learning_rate": 5.196786331740059e-05, "loss": 0.9222, "step": 7284 }, { "epoch": 1.48099207155926, "grad_norm": 0.15349248051643372, "learning_rate": 5.1947523644869324e-05, "loss": 1.096, "step": 7285 }, { "epoch": 1.4811953649115674, "grad_norm": 0.165516197681427, "learning_rate": 5.192718397233805e-05, "loss": 1.0377, "step": 7286 }, { "epoch": 1.4813986582638747, "grad_norm": 0.13928279280662537, "learning_rate": 5.190684429980678e-05, "loss": 0.8888, "step": 7287 }, { "epoch": 1.4816019516161822, "grad_norm": 0.1546129435300827, "learning_rate": 5.1886504627275504e-05, "loss": 0.9901, "step": 7288 }, { "epoch": 1.4818052449684895, "grad_norm": 0.14182148873806, "learning_rate": 5.1866164954744235e-05, "loss": 0.873, "step": 7289 }, { "epoch": 1.4820085383207968, "grad_norm": 0.14797116816043854, "learning_rate": 5.184582528221296e-05, "loss": 0.9726, "step": 7290 }, { "epoch": 1.4822118316731043, "grad_norm": 0.16081807017326355, "learning_rate": 5.182548560968169e-05, "loss": 1.1054, "step": 7291 }, { "epoch": 1.4824151250254116, "grad_norm": 0.14762349426746368, "learning_rate": 5.1805145937150415e-05, "loss": 1.0579, "step": 7292 }, { "epoch": 1.4826184183777191, "grad_norm": 0.14200522005558014, "learning_rate": 5.1784806264619146e-05, "loss": 0.8272, "step": 7293 }, { "epoch": 1.4828217117300264, "grad_norm": 0.1498144418001175, "learning_rate": 5.176446659208787e-05, "loss": 1.0083, "step": 7294 }, { "epoch": 1.4830250050823337, "grad_norm": 0.13944904506206512, "learning_rate": 5.17441269195566e-05, "loss": 0.8614, "step": 7295 }, { "epoch": 1.4832282984346412, "grad_norm": 0.16318222880363464, "learning_rate": 5.1723787247025326e-05, "loss": 1.0711, "step": 7296 }, { "epoch": 1.4834315917869485, "grad_norm": 0.15789909660816193, "learning_rate": 5.170344757449406e-05, "loss": 0.9389, "step": 7297 }, { "epoch": 1.483634885139256, "grad_norm": 0.15837034583091736, "learning_rate": 5.168310790196278e-05, "loss": 1.0717, "step": 7298 }, { "epoch": 1.4838381784915633, "grad_norm": 0.1504717320203781, "learning_rate": 5.1662768229431513e-05, "loss": 1.033, "step": 7299 }, { "epoch": 1.4840414718438706, "grad_norm": 0.13542811572551727, "learning_rate": 5.164242855690024e-05, "loss": 0.9785, "step": 7300 }, { "epoch": 1.484244765196178, "grad_norm": 0.17283979058265686, "learning_rate": 5.162208888436897e-05, "loss": 1.0065, "step": 7301 }, { "epoch": 1.4844480585484854, "grad_norm": 0.14844317734241486, "learning_rate": 5.1601749211837694e-05, "loss": 0.9527, "step": 7302 }, { "epoch": 1.484651351900793, "grad_norm": 0.13952547311782837, "learning_rate": 5.1581409539306425e-05, "loss": 0.879, "step": 7303 }, { "epoch": 1.4848546452531002, "grad_norm": 0.1655363291501999, "learning_rate": 5.156106986677515e-05, "loss": 1.0691, "step": 7304 }, { "epoch": 1.4850579386054075, "grad_norm": 0.14126764237880707, "learning_rate": 5.154073019424388e-05, "loss": 0.9801, "step": 7305 }, { "epoch": 1.485261231957715, "grad_norm": 0.14697974920272827, "learning_rate": 5.1520390521712605e-05, "loss": 1.0006, "step": 7306 }, { "epoch": 1.4854645253100225, "grad_norm": 0.14936833083629608, "learning_rate": 5.1500050849181336e-05, "loss": 1.0641, "step": 7307 }, { "epoch": 1.4856678186623298, "grad_norm": 0.15351562201976776, "learning_rate": 5.147971117665006e-05, "loss": 0.8251, "step": 7308 }, { "epoch": 1.485871112014637, "grad_norm": 0.14514027535915375, "learning_rate": 5.145937150411878e-05, "loss": 0.9151, "step": 7309 }, { "epoch": 1.4860744053669446, "grad_norm": 0.16834424436092377, "learning_rate": 5.1439031831587516e-05, "loss": 1.1145, "step": 7310 }, { "epoch": 1.4862776987192519, "grad_norm": 0.16079841554164886, "learning_rate": 5.1418692159056234e-05, "loss": 1.1091, "step": 7311 }, { "epoch": 1.4864809920715594, "grad_norm": 0.16498316824436188, "learning_rate": 5.139835248652497e-05, "loss": 1.1002, "step": 7312 }, { "epoch": 1.4866842854238667, "grad_norm": 0.13437385857105255, "learning_rate": 5.137801281399369e-05, "loss": 0.9243, "step": 7313 }, { "epoch": 1.486887578776174, "grad_norm": 0.14202702045440674, "learning_rate": 5.135767314146243e-05, "loss": 0.9927, "step": 7314 }, { "epoch": 1.4870908721284815, "grad_norm": 0.1714879423379898, "learning_rate": 5.1337333468931145e-05, "loss": 0.922, "step": 7315 }, { "epoch": 1.4872941654807887, "grad_norm": 0.14939868450164795, "learning_rate": 5.1316993796399883e-05, "loss": 0.9081, "step": 7316 }, { "epoch": 1.4874974588330963, "grad_norm": 0.15223632752895355, "learning_rate": 5.12966541238686e-05, "loss": 0.9435, "step": 7317 }, { "epoch": 1.4877007521854035, "grad_norm": 0.1733781099319458, "learning_rate": 5.127631445133734e-05, "loss": 1.1178, "step": 7318 }, { "epoch": 1.4879040455377108, "grad_norm": 0.18424783647060394, "learning_rate": 5.125597477880606e-05, "loss": 1.1653, "step": 7319 }, { "epoch": 1.4881073388900183, "grad_norm": 0.16070927679538727, "learning_rate": 5.1235635106274795e-05, "loss": 1.1551, "step": 7320 }, { "epoch": 1.4883106322423256, "grad_norm": 0.1407182812690735, "learning_rate": 5.121529543374351e-05, "loss": 0.9149, "step": 7321 }, { "epoch": 1.4885139255946331, "grad_norm": 0.1472930908203125, "learning_rate": 5.119495576121225e-05, "loss": 1.0594, "step": 7322 }, { "epoch": 1.4887172189469404, "grad_norm": 0.17095035314559937, "learning_rate": 5.117461608868097e-05, "loss": 1.1867, "step": 7323 }, { "epoch": 1.4889205122992477, "grad_norm": 0.15441139042377472, "learning_rate": 5.1154276416149706e-05, "loss": 1.0417, "step": 7324 }, { "epoch": 1.4891238056515552, "grad_norm": 0.16108761727809906, "learning_rate": 5.1133936743618424e-05, "loss": 0.994, "step": 7325 }, { "epoch": 1.4893270990038625, "grad_norm": 0.1708153337240219, "learning_rate": 5.111359707108716e-05, "loss": 1.1945, "step": 7326 }, { "epoch": 1.48953039235617, "grad_norm": 0.15345723927021027, "learning_rate": 5.109325739855588e-05, "loss": 0.99, "step": 7327 }, { "epoch": 1.4897336857084773, "grad_norm": 0.19498048722743988, "learning_rate": 5.107291772602462e-05, "loss": 1.1735, "step": 7328 }, { "epoch": 1.4899369790607846, "grad_norm": 0.1828971654176712, "learning_rate": 5.1052578053493335e-05, "loss": 0.9769, "step": 7329 }, { "epoch": 1.490140272413092, "grad_norm": 0.15867459774017334, "learning_rate": 5.103223838096207e-05, "loss": 1.0853, "step": 7330 }, { "epoch": 1.4903435657653994, "grad_norm": 0.14858923852443695, "learning_rate": 5.101189870843079e-05, "loss": 0.9781, "step": 7331 }, { "epoch": 1.490546859117707, "grad_norm": 0.1680002361536026, "learning_rate": 5.099155903589953e-05, "loss": 1.1203, "step": 7332 }, { "epoch": 1.4907501524700142, "grad_norm": 0.1647680550813675, "learning_rate": 5.097121936336825e-05, "loss": 0.9581, "step": 7333 }, { "epoch": 1.4909534458223215, "grad_norm": 0.1701551079750061, "learning_rate": 5.0950879690836985e-05, "loss": 1.0304, "step": 7334 }, { "epoch": 1.491156739174629, "grad_norm": 0.14726072549819946, "learning_rate": 5.09305400183057e-05, "loss": 0.9659, "step": 7335 }, { "epoch": 1.4913600325269365, "grad_norm": 0.15449485182762146, "learning_rate": 5.091020034577444e-05, "loss": 1.0142, "step": 7336 }, { "epoch": 1.4915633258792438, "grad_norm": 0.17248129844665527, "learning_rate": 5.0889860673243165e-05, "loss": 1.1532, "step": 7337 }, { "epoch": 1.491766619231551, "grad_norm": 0.1599951684474945, "learning_rate": 5.0869521000711896e-05, "loss": 1.0177, "step": 7338 }, { "epoch": 1.4919699125838586, "grad_norm": 0.17572374641895294, "learning_rate": 5.084918132818062e-05, "loss": 1.1042, "step": 7339 }, { "epoch": 1.4921732059361659, "grad_norm": 0.16550900042057037, "learning_rate": 5.082884165564935e-05, "loss": 0.9186, "step": 7340 }, { "epoch": 1.4923764992884734, "grad_norm": 0.15446113049983978, "learning_rate": 5.0808501983118076e-05, "loss": 1.0586, "step": 7341 }, { "epoch": 1.4925797926407807, "grad_norm": 0.17314577102661133, "learning_rate": 5.078816231058681e-05, "loss": 1.0935, "step": 7342 }, { "epoch": 1.492783085993088, "grad_norm": 0.16413532197475433, "learning_rate": 5.076782263805553e-05, "loss": 1.1736, "step": 7343 }, { "epoch": 1.4929863793453955, "grad_norm": 0.1834377646446228, "learning_rate": 5.074748296552426e-05, "loss": 1.1242, "step": 7344 }, { "epoch": 1.4931896726977028, "grad_norm": 0.16163167357444763, "learning_rate": 5.072714329299299e-05, "loss": 1.1114, "step": 7345 }, { "epoch": 1.4933929660500103, "grad_norm": 0.14689160883426666, "learning_rate": 5.070680362046172e-05, "loss": 0.9022, "step": 7346 }, { "epoch": 1.4935962594023175, "grad_norm": 0.158969908952713, "learning_rate": 5.0686463947930443e-05, "loss": 1.069, "step": 7347 }, { "epoch": 1.4937995527546248, "grad_norm": 0.1602858155965805, "learning_rate": 5.0666124275399175e-05, "loss": 1.0554, "step": 7348 }, { "epoch": 1.4940028461069323, "grad_norm": 0.16024748980998993, "learning_rate": 5.06457846028679e-05, "loss": 1.0337, "step": 7349 }, { "epoch": 1.4942061394592396, "grad_norm": 0.14937053620815277, "learning_rate": 5.062544493033662e-05, "loss": 1.019, "step": 7350 }, { "epoch": 1.4944094328115471, "grad_norm": 0.16690638661384583, "learning_rate": 5.0605105257805355e-05, "loss": 1.1478, "step": 7351 }, { "epoch": 1.4946127261638544, "grad_norm": 0.162098228931427, "learning_rate": 5.058476558527407e-05, "loss": 1.026, "step": 7352 }, { "epoch": 1.4948160195161617, "grad_norm": 0.16602452099323273, "learning_rate": 5.056442591274281e-05, "loss": 0.9843, "step": 7353 }, { "epoch": 1.4950193128684692, "grad_norm": 0.15590983629226685, "learning_rate": 5.054408624021153e-05, "loss": 0.9234, "step": 7354 }, { "epoch": 1.4952226062207765, "grad_norm": 0.15621311962604523, "learning_rate": 5.0523746567680266e-05, "loss": 0.9961, "step": 7355 }, { "epoch": 1.495425899573084, "grad_norm": 0.14333868026733398, "learning_rate": 5.0503406895148984e-05, "loss": 0.9153, "step": 7356 }, { "epoch": 1.4956291929253913, "grad_norm": 0.19151797890663147, "learning_rate": 5.048306722261772e-05, "loss": 1.319, "step": 7357 }, { "epoch": 1.4958324862776986, "grad_norm": 0.15350469946861267, "learning_rate": 5.046272755008644e-05, "loss": 1.0148, "step": 7358 }, { "epoch": 1.4960357796300061, "grad_norm": 0.1540667861700058, "learning_rate": 5.044238787755518e-05, "loss": 0.9678, "step": 7359 }, { "epoch": 1.4962390729823134, "grad_norm": 0.1420007050037384, "learning_rate": 5.0422048205023895e-05, "loss": 1.0249, "step": 7360 }, { "epoch": 1.496442366334621, "grad_norm": 0.1667315810918808, "learning_rate": 5.040170853249263e-05, "loss": 1.1174, "step": 7361 }, { "epoch": 1.4966456596869282, "grad_norm": 0.13870370388031006, "learning_rate": 5.038136885996135e-05, "loss": 1.0738, "step": 7362 }, { "epoch": 1.4968489530392355, "grad_norm": 0.14800742268562317, "learning_rate": 5.036102918743009e-05, "loss": 0.9589, "step": 7363 }, { "epoch": 1.497052246391543, "grad_norm": 0.15649986267089844, "learning_rate": 5.034068951489881e-05, "loss": 0.9814, "step": 7364 }, { "epoch": 1.4972555397438505, "grad_norm": 0.15127186477184296, "learning_rate": 5.0320349842367545e-05, "loss": 0.9122, "step": 7365 }, { "epoch": 1.4974588330961578, "grad_norm": 0.14589205384254456, "learning_rate": 5.030001016983626e-05, "loss": 0.9516, "step": 7366 }, { "epoch": 1.497662126448465, "grad_norm": 0.14768987894058228, "learning_rate": 5.0279670497305e-05, "loss": 1.1394, "step": 7367 }, { "epoch": 1.4978654198007726, "grad_norm": 0.1471613049507141, "learning_rate": 5.025933082477372e-05, "loss": 0.8922, "step": 7368 }, { "epoch": 1.4980687131530799, "grad_norm": 0.15747645497322083, "learning_rate": 5.0238991152242456e-05, "loss": 1.0475, "step": 7369 }, { "epoch": 1.4982720065053874, "grad_norm": 0.16879907250404358, "learning_rate": 5.0218651479711174e-05, "loss": 1.2512, "step": 7370 }, { "epoch": 1.4984752998576947, "grad_norm": 0.1588255614042282, "learning_rate": 5.019831180717991e-05, "loss": 1.0016, "step": 7371 }, { "epoch": 1.498678593210002, "grad_norm": 0.1563660055398941, "learning_rate": 5.017797213464863e-05, "loss": 0.9547, "step": 7372 }, { "epoch": 1.4988818865623095, "grad_norm": 0.15730784833431244, "learning_rate": 5.015763246211737e-05, "loss": 0.9483, "step": 7373 }, { "epoch": 1.4990851799146168, "grad_norm": 0.14192761480808258, "learning_rate": 5.0137292789586085e-05, "loss": 0.9, "step": 7374 }, { "epoch": 1.4992884732669243, "grad_norm": 0.1614556461572647, "learning_rate": 5.011695311705482e-05, "loss": 1.0514, "step": 7375 }, { "epoch": 1.4994917666192316, "grad_norm": 0.133713498711586, "learning_rate": 5.009661344452354e-05, "loss": 0.9055, "step": 7376 }, { "epoch": 1.4996950599715388, "grad_norm": 0.16982313990592957, "learning_rate": 5.007627377199228e-05, "loss": 1.1643, "step": 7377 }, { "epoch": 1.4998983533238464, "grad_norm": 0.14745911955833435, "learning_rate": 5.0055934099460997e-05, "loss": 1.0721, "step": 7378 }, { "epoch": 1.5001016466761536, "grad_norm": 0.1416764110326767, "learning_rate": 5.0035594426929735e-05, "loss": 0.8848, "step": 7379 }, { "epoch": 1.5003049400284612, "grad_norm": 0.15031202137470245, "learning_rate": 5.001525475439845e-05, "loss": 0.9917, "step": 7380 }, { "epoch": 1.5005082333807684, "grad_norm": 0.1697656214237213, "learning_rate": 4.9994915081867184e-05, "loss": 1.0059, "step": 7381 }, { "epoch": 1.5007115267330757, "grad_norm": 0.16710081696510315, "learning_rate": 4.997457540933591e-05, "loss": 1.0506, "step": 7382 }, { "epoch": 1.5009148200853832, "grad_norm": 0.1541258841753006, "learning_rate": 4.995423573680464e-05, "loss": 1.0352, "step": 7383 }, { "epoch": 1.5011181134376907, "grad_norm": 0.1533016413450241, "learning_rate": 4.9933896064273364e-05, "loss": 1.0231, "step": 7384 }, { "epoch": 1.501321406789998, "grad_norm": 0.1787251979112625, "learning_rate": 4.9913556391742095e-05, "loss": 1.122, "step": 7385 }, { "epoch": 1.5015247001423053, "grad_norm": 0.15674886107444763, "learning_rate": 4.989321671921082e-05, "loss": 0.9164, "step": 7386 }, { "epoch": 1.5017279934946126, "grad_norm": 0.1377156525850296, "learning_rate": 4.987287704667955e-05, "loss": 0.8968, "step": 7387 }, { "epoch": 1.5019312868469201, "grad_norm": 0.1509624719619751, "learning_rate": 4.9852537374148275e-05, "loss": 0.9561, "step": 7388 }, { "epoch": 1.5021345801992276, "grad_norm": 0.1546659767627716, "learning_rate": 4.9832197701617006e-05, "loss": 0.9526, "step": 7389 }, { "epoch": 1.502337873551535, "grad_norm": 0.1705237329006195, "learning_rate": 4.981185802908573e-05, "loss": 1.1172, "step": 7390 }, { "epoch": 1.5025411669038422, "grad_norm": 0.16375771164894104, "learning_rate": 4.979151835655446e-05, "loss": 1.0827, "step": 7391 }, { "epoch": 1.5027444602561495, "grad_norm": 0.16147857904434204, "learning_rate": 4.9771178684023186e-05, "loss": 1.007, "step": 7392 }, { "epoch": 1.502947753608457, "grad_norm": 0.16066159307956696, "learning_rate": 4.975083901149192e-05, "loss": 1.1456, "step": 7393 }, { "epoch": 1.5031510469607645, "grad_norm": 0.15176479518413544, "learning_rate": 4.973049933896064e-05, "loss": 1.16, "step": 7394 }, { "epoch": 1.5033543403130718, "grad_norm": 0.19379957020282745, "learning_rate": 4.9710159666429373e-05, "loss": 1.0532, "step": 7395 }, { "epoch": 1.503557633665379, "grad_norm": 0.14722734689712524, "learning_rate": 4.9689819993898105e-05, "loss": 0.8578, "step": 7396 }, { "epoch": 1.5037609270176864, "grad_norm": 0.14202560484409332, "learning_rate": 4.966948032136683e-05, "loss": 0.8695, "step": 7397 }, { "epoch": 1.5039642203699939, "grad_norm": 0.14851757884025574, "learning_rate": 4.964914064883556e-05, "loss": 0.9655, "step": 7398 }, { "epoch": 1.5041675137223014, "grad_norm": 0.16248172521591187, "learning_rate": 4.9628800976304285e-05, "loss": 0.9906, "step": 7399 }, { "epoch": 1.5043708070746087, "grad_norm": 0.1500251442193985, "learning_rate": 4.9608461303773016e-05, "loss": 0.9284, "step": 7400 }, { "epoch": 1.504574100426916, "grad_norm": 0.17004640400409698, "learning_rate": 4.958812163124174e-05, "loss": 0.9729, "step": 7401 }, { "epoch": 1.5047773937792233, "grad_norm": 0.15960298478603363, "learning_rate": 4.956778195871047e-05, "loss": 1.0183, "step": 7402 }, { "epoch": 1.5049806871315308, "grad_norm": 0.15985259413719177, "learning_rate": 4.9547442286179196e-05, "loss": 0.9871, "step": 7403 }, { "epoch": 1.5051839804838383, "grad_norm": 0.12836353480815887, "learning_rate": 4.952710261364793e-05, "loss": 0.8302, "step": 7404 }, { "epoch": 1.5053872738361456, "grad_norm": 0.18904882669448853, "learning_rate": 4.950676294111665e-05, "loss": 1.2109, "step": 7405 }, { "epoch": 1.5055905671884529, "grad_norm": 0.16572581231594086, "learning_rate": 4.948642326858538e-05, "loss": 1.2165, "step": 7406 }, { "epoch": 1.5057938605407604, "grad_norm": 0.13275861740112305, "learning_rate": 4.946608359605411e-05, "loss": 0.8393, "step": 7407 }, { "epoch": 1.5059971538930677, "grad_norm": 0.15469948947429657, "learning_rate": 4.944574392352284e-05, "loss": 1.1301, "step": 7408 }, { "epoch": 1.5062004472453752, "grad_norm": 0.15814034640789032, "learning_rate": 4.942540425099156e-05, "loss": 1.0332, "step": 7409 }, { "epoch": 1.5064037405976825, "grad_norm": 0.14642979204654694, "learning_rate": 4.9405064578460295e-05, "loss": 0.9571, "step": 7410 }, { "epoch": 1.5066070339499897, "grad_norm": 0.13333429396152496, "learning_rate": 4.938472490592902e-05, "loss": 0.8927, "step": 7411 }, { "epoch": 1.5068103273022972, "grad_norm": 0.15922658145427704, "learning_rate": 4.9364385233397743e-05, "loss": 1.0756, "step": 7412 }, { "epoch": 1.5070136206546048, "grad_norm": 0.15627671778202057, "learning_rate": 4.934404556086647e-05, "loss": 0.8736, "step": 7413 }, { "epoch": 1.507216914006912, "grad_norm": 0.1477116048336029, "learning_rate": 4.93237058883352e-05, "loss": 0.9684, "step": 7414 }, { "epoch": 1.5074202073592193, "grad_norm": 0.15267114341259003, "learning_rate": 4.9303366215803924e-05, "loss": 0.9281, "step": 7415 }, { "epoch": 1.5076235007115266, "grad_norm": 0.14677409827709198, "learning_rate": 4.9283026543272655e-05, "loss": 1.0185, "step": 7416 }, { "epoch": 1.5078267940638341, "grad_norm": 0.1623264104127884, "learning_rate": 4.926268687074138e-05, "loss": 1.1089, "step": 7417 }, { "epoch": 1.5080300874161416, "grad_norm": 0.16942016780376434, "learning_rate": 4.924234719821011e-05, "loss": 1.2174, "step": 7418 }, { "epoch": 1.508233380768449, "grad_norm": 0.1675270050764084, "learning_rate": 4.9222007525678835e-05, "loss": 1.0502, "step": 7419 }, { "epoch": 1.5084366741207562, "grad_norm": 0.13707569241523743, "learning_rate": 4.9201667853147566e-05, "loss": 0.8527, "step": 7420 }, { "epoch": 1.5086399674730635, "grad_norm": 0.16163641214370728, "learning_rate": 4.918132818061629e-05, "loss": 1.0057, "step": 7421 }, { "epoch": 1.508843260825371, "grad_norm": 0.15098832547664642, "learning_rate": 4.916098850808502e-05, "loss": 0.9811, "step": 7422 }, { "epoch": 1.5090465541776785, "grad_norm": 0.16301870346069336, "learning_rate": 4.9140648835553746e-05, "loss": 1.1367, "step": 7423 }, { "epoch": 1.5092498475299858, "grad_norm": 0.16614048182964325, "learning_rate": 4.912030916302248e-05, "loss": 1.2153, "step": 7424 }, { "epoch": 1.509453140882293, "grad_norm": 0.14212031662464142, "learning_rate": 4.90999694904912e-05, "loss": 0.9264, "step": 7425 }, { "epoch": 1.5096564342346004, "grad_norm": 0.1504111886024475, "learning_rate": 4.907962981795993e-05, "loss": 0.928, "step": 7426 }, { "epoch": 1.509859727586908, "grad_norm": 0.1598103642463684, "learning_rate": 4.905929014542866e-05, "loss": 1.0198, "step": 7427 }, { "epoch": 1.5100630209392154, "grad_norm": 0.1484471559524536, "learning_rate": 4.903895047289739e-05, "loss": 0.9336, "step": 7428 }, { "epoch": 1.5102663142915227, "grad_norm": 0.14721804857254028, "learning_rate": 4.9018610800366114e-05, "loss": 0.8471, "step": 7429 }, { "epoch": 1.51046960764383, "grad_norm": 0.15284955501556396, "learning_rate": 4.8998271127834845e-05, "loss": 1.0969, "step": 7430 }, { "epoch": 1.5106729009961373, "grad_norm": 0.1515507698059082, "learning_rate": 4.897793145530357e-05, "loss": 1.0801, "step": 7431 }, { "epoch": 1.5108761943484448, "grad_norm": 0.15165692567825317, "learning_rate": 4.89575917827723e-05, "loss": 1.0073, "step": 7432 }, { "epoch": 1.5110794877007523, "grad_norm": 0.16193105280399323, "learning_rate": 4.8937252110241025e-05, "loss": 1.0088, "step": 7433 }, { "epoch": 1.5112827810530596, "grad_norm": 0.16101758182048798, "learning_rate": 4.8916912437709756e-05, "loss": 0.8903, "step": 7434 }, { "epoch": 1.5114860744053669, "grad_norm": 0.15236888825893402, "learning_rate": 4.889657276517848e-05, "loss": 0.9741, "step": 7435 }, { "epoch": 1.5116893677576744, "grad_norm": 0.15327027440071106, "learning_rate": 4.887623309264721e-05, "loss": 1.0023, "step": 7436 }, { "epoch": 1.5118926611099817, "grad_norm": 0.1660035252571106, "learning_rate": 4.8855893420115936e-05, "loss": 1.1632, "step": 7437 }, { "epoch": 1.5120959544622892, "grad_norm": 0.14389246702194214, "learning_rate": 4.883555374758467e-05, "loss": 0.8665, "step": 7438 }, { "epoch": 1.5122992478145965, "grad_norm": 0.15834391117095947, "learning_rate": 4.881521407505339e-05, "loss": 1.0066, "step": 7439 }, { "epoch": 1.5125025411669037, "grad_norm": 0.1547255963087082, "learning_rate": 4.879487440252212e-05, "loss": 0.9848, "step": 7440 }, { "epoch": 1.5127058345192113, "grad_norm": 0.1583252251148224, "learning_rate": 4.877453472999085e-05, "loss": 1.0239, "step": 7441 }, { "epoch": 1.5129091278715188, "grad_norm": 0.17128023505210876, "learning_rate": 4.875419505745958e-05, "loss": 1.0968, "step": 7442 }, { "epoch": 1.513112421223826, "grad_norm": 0.15349778532981873, "learning_rate": 4.8733855384928303e-05, "loss": 0.9062, "step": 7443 }, { "epoch": 1.5133157145761333, "grad_norm": 0.1570233255624771, "learning_rate": 4.8713515712397035e-05, "loss": 0.9631, "step": 7444 }, { "epoch": 1.5135190079284406, "grad_norm": 0.1659766435623169, "learning_rate": 4.869317603986576e-05, "loss": 1.077, "step": 7445 }, { "epoch": 1.5137223012807481, "grad_norm": 0.16281379759311676, "learning_rate": 4.867283636733449e-05, "loss": 1.0399, "step": 7446 }, { "epoch": 1.5139255946330556, "grad_norm": 0.16851572692394257, "learning_rate": 4.8652496694803215e-05, "loss": 1.0689, "step": 7447 }, { "epoch": 1.514128887985363, "grad_norm": 0.14893779158592224, "learning_rate": 4.8632157022271946e-05, "loss": 0.8254, "step": 7448 }, { "epoch": 1.5143321813376702, "grad_norm": 0.15788701176643372, "learning_rate": 4.861181734974067e-05, "loss": 0.9946, "step": 7449 }, { "epoch": 1.5145354746899775, "grad_norm": 0.14474782347679138, "learning_rate": 4.85914776772094e-05, "loss": 0.8394, "step": 7450 }, { "epoch": 1.514738768042285, "grad_norm": 0.15891660749912262, "learning_rate": 4.8571138004678126e-05, "loss": 0.9978, "step": 7451 }, { "epoch": 1.5149420613945925, "grad_norm": 0.15987549722194672, "learning_rate": 4.855079833214686e-05, "loss": 0.9478, "step": 7452 }, { "epoch": 1.5151453547468998, "grad_norm": 0.14981964230537415, "learning_rate": 4.853045865961558e-05, "loss": 1.0904, "step": 7453 }, { "epoch": 1.515348648099207, "grad_norm": 0.17206816375255585, "learning_rate": 4.8510118987084306e-05, "loss": 1.238, "step": 7454 }, { "epoch": 1.5155519414515144, "grad_norm": 0.17081889510154724, "learning_rate": 4.848977931455304e-05, "loss": 1.0376, "step": 7455 }, { "epoch": 1.515755234803822, "grad_norm": 0.15734606981277466, "learning_rate": 4.846943964202176e-05, "loss": 0.9347, "step": 7456 }, { "epoch": 1.5159585281561294, "grad_norm": 0.14848117530345917, "learning_rate": 4.844909996949049e-05, "loss": 1.103, "step": 7457 }, { "epoch": 1.5161618215084367, "grad_norm": 0.18724314868450165, "learning_rate": 4.842876029695922e-05, "loss": 1.1861, "step": 7458 }, { "epoch": 1.516365114860744, "grad_norm": 0.1609761267900467, "learning_rate": 4.840842062442795e-05, "loss": 1.0222, "step": 7459 }, { "epoch": 1.5165684082130513, "grad_norm": 0.14589814841747284, "learning_rate": 4.8388080951896673e-05, "loss": 0.909, "step": 7460 }, { "epoch": 1.5167717015653588, "grad_norm": 0.1623678058385849, "learning_rate": 4.8367741279365405e-05, "loss": 1.032, "step": 7461 }, { "epoch": 1.5169749949176663, "grad_norm": 0.15050582587718964, "learning_rate": 4.834740160683413e-05, "loss": 0.9827, "step": 7462 }, { "epoch": 1.5171782882699736, "grad_norm": 0.15027949213981628, "learning_rate": 4.832706193430286e-05, "loss": 0.9398, "step": 7463 }, { "epoch": 1.5173815816222809, "grad_norm": 0.18657010793685913, "learning_rate": 4.8306722261771585e-05, "loss": 1.3522, "step": 7464 }, { "epoch": 1.5175848749745884, "grad_norm": 0.16288447380065918, "learning_rate": 4.8286382589240316e-05, "loss": 1.0864, "step": 7465 }, { "epoch": 1.5177881683268957, "grad_norm": 0.14163288474082947, "learning_rate": 4.826604291670904e-05, "loss": 0.8257, "step": 7466 }, { "epoch": 1.5179914616792032, "grad_norm": 0.1605214625597, "learning_rate": 4.824570324417777e-05, "loss": 1.016, "step": 7467 }, { "epoch": 1.5181947550315105, "grad_norm": 0.15949556231498718, "learning_rate": 4.8225363571646496e-05, "loss": 1.0179, "step": 7468 }, { "epoch": 1.5183980483838178, "grad_norm": 0.1667870730161667, "learning_rate": 4.820502389911523e-05, "loss": 1.127, "step": 7469 }, { "epoch": 1.5186013417361253, "grad_norm": 0.1582157164812088, "learning_rate": 4.818468422658395e-05, "loss": 0.9873, "step": 7470 }, { "epoch": 1.5188046350884326, "grad_norm": 0.1699240505695343, "learning_rate": 4.816434455405268e-05, "loss": 1.0634, "step": 7471 }, { "epoch": 1.51900792844074, "grad_norm": 0.1658029556274414, "learning_rate": 4.814400488152141e-05, "loss": 1.1744, "step": 7472 }, { "epoch": 1.5192112217930474, "grad_norm": 0.16187456250190735, "learning_rate": 4.812366520899014e-05, "loss": 0.9219, "step": 7473 }, { "epoch": 1.5194145151453546, "grad_norm": 0.15792818367481232, "learning_rate": 4.810332553645886e-05, "loss": 1.0043, "step": 7474 }, { "epoch": 1.5196178084976621, "grad_norm": 0.16355274617671967, "learning_rate": 4.8082985863927595e-05, "loss": 1.1079, "step": 7475 }, { "epoch": 1.5198211018499697, "grad_norm": 0.15543818473815918, "learning_rate": 4.806264619139632e-05, "loss": 1.0714, "step": 7476 }, { "epoch": 1.520024395202277, "grad_norm": 0.14773859083652496, "learning_rate": 4.804230651886505e-05, "loss": 0.8847, "step": 7477 }, { "epoch": 1.5202276885545842, "grad_norm": 0.16230449080467224, "learning_rate": 4.8021966846333775e-05, "loss": 0.9657, "step": 7478 }, { "epoch": 1.5204309819068915, "grad_norm": 0.15943102538585663, "learning_rate": 4.8001627173802506e-05, "loss": 1.0591, "step": 7479 }, { "epoch": 1.520634275259199, "grad_norm": 0.12978902459144592, "learning_rate": 4.798128750127123e-05, "loss": 0.8861, "step": 7480 }, { "epoch": 1.5208375686115065, "grad_norm": 0.1726493388414383, "learning_rate": 4.796094782873996e-05, "loss": 1.0854, "step": 7481 }, { "epoch": 1.5210408619638138, "grad_norm": 0.15407046675682068, "learning_rate": 4.7940608156208686e-05, "loss": 1.0619, "step": 7482 }, { "epoch": 1.5212441553161211, "grad_norm": 0.1641497015953064, "learning_rate": 4.792026848367742e-05, "loss": 1.03, "step": 7483 }, { "epoch": 1.5214474486684284, "grad_norm": 0.15649664402008057, "learning_rate": 4.789992881114614e-05, "loss": 0.91, "step": 7484 }, { "epoch": 1.521650742020736, "grad_norm": 0.14974331855773926, "learning_rate": 4.787958913861487e-05, "loss": 0.9223, "step": 7485 }, { "epoch": 1.5218540353730434, "grad_norm": 0.16601666808128357, "learning_rate": 4.78592494660836e-05, "loss": 0.9536, "step": 7486 }, { "epoch": 1.5220573287253507, "grad_norm": 0.15077362954616547, "learning_rate": 4.783890979355233e-05, "loss": 1.0178, "step": 7487 }, { "epoch": 1.522260622077658, "grad_norm": 0.16418759524822235, "learning_rate": 4.781857012102105e-05, "loss": 1.0563, "step": 7488 }, { "epoch": 1.5224639154299653, "grad_norm": 0.1693541556596756, "learning_rate": 4.7798230448489784e-05, "loss": 1.1455, "step": 7489 }, { "epoch": 1.5226672087822728, "grad_norm": 0.1629193276166916, "learning_rate": 4.777789077595851e-05, "loss": 0.9781, "step": 7490 }, { "epoch": 1.5228705021345803, "grad_norm": 0.15155495703220367, "learning_rate": 4.775755110342724e-05, "loss": 0.9964, "step": 7491 }, { "epoch": 1.5230737954868876, "grad_norm": 0.167188361287117, "learning_rate": 4.7737211430895965e-05, "loss": 1.0437, "step": 7492 }, { "epoch": 1.5232770888391949, "grad_norm": 0.15585185587406158, "learning_rate": 4.7716871758364696e-05, "loss": 0.9516, "step": 7493 }, { "epoch": 1.5234803821915024, "grad_norm": 0.1460978090763092, "learning_rate": 4.769653208583342e-05, "loss": 0.9499, "step": 7494 }, { "epoch": 1.5236836755438097, "grad_norm": 0.1444913148880005, "learning_rate": 4.7676192413302145e-05, "loss": 0.9712, "step": 7495 }, { "epoch": 1.5238869688961172, "grad_norm": 0.15407614409923553, "learning_rate": 4.7655852740770876e-05, "loss": 1.0388, "step": 7496 }, { "epoch": 1.5240902622484245, "grad_norm": 0.14768806099891663, "learning_rate": 4.76355130682396e-05, "loss": 0.9901, "step": 7497 }, { "epoch": 1.5242935556007318, "grad_norm": 0.17865929007530212, "learning_rate": 4.761517339570833e-05, "loss": 1.1832, "step": 7498 }, { "epoch": 1.5244968489530393, "grad_norm": 0.16334161162376404, "learning_rate": 4.7594833723177056e-05, "loss": 1.2179, "step": 7499 }, { "epoch": 1.5247001423053466, "grad_norm": 0.14917631447315216, "learning_rate": 4.757449405064579e-05, "loss": 0.9683, "step": 7500 }, { "epoch": 1.524903435657654, "grad_norm": 0.16842851042747498, "learning_rate": 4.755415437811451e-05, "loss": 1.1311, "step": 7501 }, { "epoch": 1.5251067290099614, "grad_norm": 0.1710801124572754, "learning_rate": 4.753381470558324e-05, "loss": 1.051, "step": 7502 }, { "epoch": 1.5253100223622686, "grad_norm": 0.14032766222953796, "learning_rate": 4.751347503305197e-05, "loss": 0.886, "step": 7503 }, { "epoch": 1.5255133157145762, "grad_norm": 0.16021350026130676, "learning_rate": 4.74931353605207e-05, "loss": 1.1057, "step": 7504 }, { "epoch": 1.5257166090668837, "grad_norm": 0.13777956366539001, "learning_rate": 4.747279568798942e-05, "loss": 0.8405, "step": 7505 }, { "epoch": 1.525919902419191, "grad_norm": 0.15827859938144684, "learning_rate": 4.7452456015458155e-05, "loss": 0.9807, "step": 7506 }, { "epoch": 1.5261231957714982, "grad_norm": 0.14875048398971558, "learning_rate": 4.743211634292688e-05, "loss": 0.9142, "step": 7507 }, { "epoch": 1.5263264891238055, "grad_norm": 0.1471138298511505, "learning_rate": 4.741177667039561e-05, "loss": 0.9305, "step": 7508 }, { "epoch": 1.526529782476113, "grad_norm": 0.15858818590641022, "learning_rate": 4.7391436997864335e-05, "loss": 1.0326, "step": 7509 }, { "epoch": 1.5267330758284205, "grad_norm": 0.14757820963859558, "learning_rate": 4.7371097325333066e-05, "loss": 0.9423, "step": 7510 }, { "epoch": 1.5269363691807278, "grad_norm": 0.14671318233013153, "learning_rate": 4.735075765280179e-05, "loss": 0.9953, "step": 7511 }, { "epoch": 1.5271396625330351, "grad_norm": 0.16315753757953644, "learning_rate": 4.733041798027052e-05, "loss": 1.0674, "step": 7512 }, { "epoch": 1.5273429558853424, "grad_norm": 0.15158745646476746, "learning_rate": 4.7310078307739246e-05, "loss": 0.9416, "step": 7513 }, { "epoch": 1.52754624923765, "grad_norm": 0.1569458246231079, "learning_rate": 4.728973863520798e-05, "loss": 1.1336, "step": 7514 }, { "epoch": 1.5277495425899574, "grad_norm": 0.15651223063468933, "learning_rate": 4.72693989626767e-05, "loss": 0.9724, "step": 7515 }, { "epoch": 1.5279528359422647, "grad_norm": 0.1413620412349701, "learning_rate": 4.724905929014543e-05, "loss": 0.9164, "step": 7516 }, { "epoch": 1.528156129294572, "grad_norm": 0.14873231947422028, "learning_rate": 4.722871961761416e-05, "loss": 0.9904, "step": 7517 }, { "epoch": 1.5283594226468793, "grad_norm": 0.16632919013500214, "learning_rate": 4.720837994508289e-05, "loss": 1.0332, "step": 7518 }, { "epoch": 1.5285627159991868, "grad_norm": 0.1570500135421753, "learning_rate": 4.718804027255161e-05, "loss": 1.0491, "step": 7519 }, { "epoch": 1.5287660093514943, "grad_norm": 0.17738598585128784, "learning_rate": 4.7167700600020344e-05, "loss": 1.049, "step": 7520 }, { "epoch": 1.5289693027038016, "grad_norm": 0.1635134369134903, "learning_rate": 4.714736092748907e-05, "loss": 1.1412, "step": 7521 }, { "epoch": 1.529172596056109, "grad_norm": 0.16301599144935608, "learning_rate": 4.71270212549578e-05, "loss": 1.0967, "step": 7522 }, { "epoch": 1.5293758894084164, "grad_norm": 0.14440634846687317, "learning_rate": 4.7106681582426525e-05, "loss": 0.8357, "step": 7523 }, { "epoch": 1.5295791827607237, "grad_norm": 0.144694983959198, "learning_rate": 4.7086341909895256e-05, "loss": 0.8722, "step": 7524 }, { "epoch": 1.5297824761130312, "grad_norm": 0.14646680653095245, "learning_rate": 4.706600223736398e-05, "loss": 0.8988, "step": 7525 }, { "epoch": 1.5299857694653385, "grad_norm": 0.1644057333469391, "learning_rate": 4.704566256483271e-05, "loss": 1.1197, "step": 7526 }, { "epoch": 1.5301890628176458, "grad_norm": 0.1693752557039261, "learning_rate": 4.7025322892301436e-05, "loss": 1.0486, "step": 7527 }, { "epoch": 1.5303923561699533, "grad_norm": 0.1716986894607544, "learning_rate": 4.700498321977017e-05, "loss": 1.1087, "step": 7528 }, { "epoch": 1.5305956495222606, "grad_norm": 0.1740422248840332, "learning_rate": 4.698464354723889e-05, "loss": 1.0909, "step": 7529 }, { "epoch": 1.530798942874568, "grad_norm": 0.15906310081481934, "learning_rate": 4.696430387470762e-05, "loss": 1.0841, "step": 7530 }, { "epoch": 1.5310022362268754, "grad_norm": 0.14159859716892242, "learning_rate": 4.694396420217635e-05, "loss": 0.8766, "step": 7531 }, { "epoch": 1.5312055295791827, "grad_norm": 0.17096573114395142, "learning_rate": 4.692362452964508e-05, "loss": 1.1308, "step": 7532 }, { "epoch": 1.5314088229314902, "grad_norm": 0.16331720352172852, "learning_rate": 4.69032848571138e-05, "loss": 0.9884, "step": 7533 }, { "epoch": 1.5316121162837977, "grad_norm": 0.15612895786762238, "learning_rate": 4.6882945184582534e-05, "loss": 0.9737, "step": 7534 }, { "epoch": 1.531815409636105, "grad_norm": 0.1716272234916687, "learning_rate": 4.686260551205126e-05, "loss": 1.2049, "step": 7535 }, { "epoch": 1.5320187029884123, "grad_norm": 0.15378396213054657, "learning_rate": 4.684226583951999e-05, "loss": 1.0315, "step": 7536 }, { "epoch": 1.5322219963407195, "grad_norm": 0.16745533049106598, "learning_rate": 4.682192616698871e-05, "loss": 1.1749, "step": 7537 }, { "epoch": 1.532425289693027, "grad_norm": 0.16122505068778992, "learning_rate": 4.680158649445744e-05, "loss": 1.1481, "step": 7538 }, { "epoch": 1.5326285830453346, "grad_norm": 0.15753133594989777, "learning_rate": 4.678124682192617e-05, "loss": 1.0085, "step": 7539 }, { "epoch": 1.5328318763976418, "grad_norm": 0.15394344925880432, "learning_rate": 4.6760907149394895e-05, "loss": 0.9611, "step": 7540 }, { "epoch": 1.5330351697499491, "grad_norm": 0.1620665341615677, "learning_rate": 4.6740567476863626e-05, "loss": 0.9435, "step": 7541 }, { "epoch": 1.5332384631022564, "grad_norm": 0.15785206854343414, "learning_rate": 4.672022780433235e-05, "loss": 1.0311, "step": 7542 }, { "epoch": 1.533441756454564, "grad_norm": 0.15812784433364868, "learning_rate": 4.669988813180108e-05, "loss": 0.9304, "step": 7543 }, { "epoch": 1.5336450498068714, "grad_norm": 0.14829683303833008, "learning_rate": 4.6679548459269806e-05, "loss": 0.9735, "step": 7544 }, { "epoch": 1.5338483431591787, "grad_norm": 0.15306831896305084, "learning_rate": 4.665920878673854e-05, "loss": 0.9459, "step": 7545 }, { "epoch": 1.534051636511486, "grad_norm": 0.1524849534034729, "learning_rate": 4.663886911420726e-05, "loss": 0.9989, "step": 7546 }, { "epoch": 1.5342549298637933, "grad_norm": 0.1524866819381714, "learning_rate": 4.661852944167599e-05, "loss": 0.9516, "step": 7547 }, { "epoch": 1.5344582232161008, "grad_norm": 0.1561049073934555, "learning_rate": 4.659818976914472e-05, "loss": 0.9629, "step": 7548 }, { "epoch": 1.5346615165684083, "grad_norm": 0.15052708983421326, "learning_rate": 4.657785009661345e-05, "loss": 0.9709, "step": 7549 }, { "epoch": 1.5348648099207156, "grad_norm": 0.16317294538021088, "learning_rate": 4.655751042408217e-05, "loss": 1.0431, "step": 7550 }, { "epoch": 1.535068103273023, "grad_norm": 0.1577170193195343, "learning_rate": 4.6537170751550904e-05, "loss": 1.0794, "step": 7551 }, { "epoch": 1.5352713966253302, "grad_norm": 0.16741138696670532, "learning_rate": 4.651683107901963e-05, "loss": 1.2215, "step": 7552 }, { "epoch": 1.5354746899776377, "grad_norm": 0.1500609666109085, "learning_rate": 4.649649140648836e-05, "loss": 0.9439, "step": 7553 }, { "epoch": 1.5356779833299452, "grad_norm": 0.15758995711803436, "learning_rate": 4.6476151733957085e-05, "loss": 0.8848, "step": 7554 }, { "epoch": 1.5358812766822525, "grad_norm": 0.14967188239097595, "learning_rate": 4.6455812061425816e-05, "loss": 0.9408, "step": 7555 }, { "epoch": 1.5360845700345598, "grad_norm": 0.13587024807929993, "learning_rate": 4.643547238889454e-05, "loss": 0.9078, "step": 7556 }, { "epoch": 1.5362878633868673, "grad_norm": 0.1551710069179535, "learning_rate": 4.641513271636327e-05, "loss": 1.0213, "step": 7557 }, { "epoch": 1.5364911567391746, "grad_norm": 0.15696901082992554, "learning_rate": 4.6394793043831996e-05, "loss": 0.9854, "step": 7558 }, { "epoch": 1.536694450091482, "grad_norm": 0.14111942052841187, "learning_rate": 4.637445337130073e-05, "loss": 0.8104, "step": 7559 }, { "epoch": 1.5368977434437894, "grad_norm": 0.16344057023525238, "learning_rate": 4.635411369876945e-05, "loss": 1.0198, "step": 7560 }, { "epoch": 1.5371010367960967, "grad_norm": 0.15371447801589966, "learning_rate": 4.633377402623818e-05, "loss": 1.1088, "step": 7561 }, { "epoch": 1.5373043301484042, "grad_norm": 0.16405069828033447, "learning_rate": 4.631343435370691e-05, "loss": 1.1632, "step": 7562 }, { "epoch": 1.5375076235007117, "grad_norm": 0.17231358587741852, "learning_rate": 4.629309468117564e-05, "loss": 1.0729, "step": 7563 }, { "epoch": 1.537710916853019, "grad_norm": 0.16849292814731598, "learning_rate": 4.627275500864436e-05, "loss": 1.1287, "step": 7564 }, { "epoch": 1.5379142102053263, "grad_norm": 0.14124159514904022, "learning_rate": 4.6252415336113094e-05, "loss": 0.9726, "step": 7565 }, { "epoch": 1.5381175035576335, "grad_norm": 0.16582997143268585, "learning_rate": 4.623207566358182e-05, "loss": 1.048, "step": 7566 }, { "epoch": 1.538320796909941, "grad_norm": 0.15703178942203522, "learning_rate": 4.621173599105055e-05, "loss": 0.9917, "step": 7567 }, { "epoch": 1.5385240902622486, "grad_norm": 0.1521129459142685, "learning_rate": 4.6191396318519274e-05, "loss": 0.9314, "step": 7568 }, { "epoch": 1.5387273836145559, "grad_norm": 0.18239177763462067, "learning_rate": 4.6171056645988006e-05, "loss": 1.1309, "step": 7569 }, { "epoch": 1.5389306769668631, "grad_norm": 0.15608282387256622, "learning_rate": 4.615071697345673e-05, "loss": 0.9907, "step": 7570 }, { "epoch": 1.5391339703191704, "grad_norm": 0.14907321333885193, "learning_rate": 4.613037730092546e-05, "loss": 0.948, "step": 7571 }, { "epoch": 1.539337263671478, "grad_norm": 0.15870921313762665, "learning_rate": 4.6110037628394186e-05, "loss": 1.0293, "step": 7572 }, { "epoch": 1.5395405570237854, "grad_norm": 0.1471608281135559, "learning_rate": 4.608969795586292e-05, "loss": 0.9045, "step": 7573 }, { "epoch": 1.5397438503760927, "grad_norm": 0.1473323255777359, "learning_rate": 4.606935828333164e-05, "loss": 0.9773, "step": 7574 }, { "epoch": 1.5399471437284, "grad_norm": 0.15672756731510162, "learning_rate": 4.604901861080037e-05, "loss": 0.9564, "step": 7575 }, { "epoch": 1.5401504370807073, "grad_norm": 0.13355454802513123, "learning_rate": 4.60286789382691e-05, "loss": 0.9043, "step": 7576 }, { "epoch": 1.5403537304330148, "grad_norm": 0.16888266801834106, "learning_rate": 4.600833926573783e-05, "loss": 1.049, "step": 7577 }, { "epoch": 1.5405570237853223, "grad_norm": 0.14586526155471802, "learning_rate": 4.5987999593206546e-05, "loss": 0.991, "step": 7578 }, { "epoch": 1.5407603171376296, "grad_norm": 0.15697935223579407, "learning_rate": 4.596765992067528e-05, "loss": 0.9276, "step": 7579 }, { "epoch": 1.540963610489937, "grad_norm": 0.1606079787015915, "learning_rate": 4.5947320248144e-05, "loss": 1.1167, "step": 7580 }, { "epoch": 1.5411669038422442, "grad_norm": 0.14496320486068726, "learning_rate": 4.592698057561273e-05, "loss": 0.978, "step": 7581 }, { "epoch": 1.5413701971945517, "grad_norm": 0.1540028601884842, "learning_rate": 4.590664090308146e-05, "loss": 0.9584, "step": 7582 }, { "epoch": 1.5415734905468592, "grad_norm": 0.14273619651794434, "learning_rate": 4.588630123055019e-05, "loss": 0.9559, "step": 7583 }, { "epoch": 1.5417767838991665, "grad_norm": 0.15364350378513336, "learning_rate": 4.586596155801891e-05, "loss": 1.067, "step": 7584 }, { "epoch": 1.5419800772514738, "grad_norm": 0.15916843712329865, "learning_rate": 4.5845621885487644e-05, "loss": 0.9734, "step": 7585 }, { "epoch": 1.5421833706037813, "grad_norm": 0.16618654131889343, "learning_rate": 4.582528221295637e-05, "loss": 1.0467, "step": 7586 }, { "epoch": 1.5423866639560886, "grad_norm": 0.1487346738576889, "learning_rate": 4.58049425404251e-05, "loss": 0.9555, "step": 7587 }, { "epoch": 1.542589957308396, "grad_norm": 0.1543288677930832, "learning_rate": 4.5784602867893825e-05, "loss": 1.0203, "step": 7588 }, { "epoch": 1.5427932506607034, "grad_norm": 0.15385927259922028, "learning_rate": 4.5764263195362556e-05, "loss": 1.0728, "step": 7589 }, { "epoch": 1.5429965440130107, "grad_norm": 0.18972186744213104, "learning_rate": 4.574392352283128e-05, "loss": 1.1087, "step": 7590 }, { "epoch": 1.5431998373653182, "grad_norm": 0.17217358946800232, "learning_rate": 4.572358385030001e-05, "loss": 1.0384, "step": 7591 }, { "epoch": 1.5434031307176257, "grad_norm": 0.15717031061649323, "learning_rate": 4.5703244177768736e-05, "loss": 0.8593, "step": 7592 }, { "epoch": 1.543606424069933, "grad_norm": 0.17360135912895203, "learning_rate": 4.568290450523747e-05, "loss": 1.1914, "step": 7593 }, { "epoch": 1.5438097174222403, "grad_norm": 0.15492455661296844, "learning_rate": 4.566256483270619e-05, "loss": 0.9039, "step": 7594 }, { "epoch": 1.5440130107745476, "grad_norm": 0.15058903396129608, "learning_rate": 4.564222516017492e-05, "loss": 0.9224, "step": 7595 }, { "epoch": 1.544216304126855, "grad_norm": 0.16502228379249573, "learning_rate": 4.562188548764365e-05, "loss": 0.9956, "step": 7596 }, { "epoch": 1.5444195974791626, "grad_norm": 0.15759393572807312, "learning_rate": 4.560154581511238e-05, "loss": 1.0067, "step": 7597 }, { "epoch": 1.5446228908314699, "grad_norm": 0.1422048658132553, "learning_rate": 4.558120614258111e-05, "loss": 0.9564, "step": 7598 }, { "epoch": 1.5448261841837772, "grad_norm": 0.13447371125221252, "learning_rate": 4.5560866470049834e-05, "loss": 0.9567, "step": 7599 }, { "epoch": 1.5450294775360844, "grad_norm": 0.1465720683336258, "learning_rate": 4.5540526797518566e-05, "loss": 0.945, "step": 7600 }, { "epoch": 1.545232770888392, "grad_norm": 0.16487425565719604, "learning_rate": 4.552018712498729e-05, "loss": 1.0972, "step": 7601 }, { "epoch": 1.5454360642406995, "grad_norm": 0.15260903537273407, "learning_rate": 4.549984745245602e-05, "loss": 0.8845, "step": 7602 }, { "epoch": 1.5456393575930067, "grad_norm": 0.16015398502349854, "learning_rate": 4.5479507779924746e-05, "loss": 1.2366, "step": 7603 }, { "epoch": 1.545842650945314, "grad_norm": 0.15431946516036987, "learning_rate": 4.545916810739348e-05, "loss": 1.0195, "step": 7604 }, { "epoch": 1.5460459442976213, "grad_norm": 0.16399477422237396, "learning_rate": 4.54388284348622e-05, "loss": 1.1461, "step": 7605 }, { "epoch": 1.5462492376499288, "grad_norm": 0.15312154591083527, "learning_rate": 4.541848876233093e-05, "loss": 1.053, "step": 7606 }, { "epoch": 1.5464525310022363, "grad_norm": 0.14725275337696075, "learning_rate": 4.539814908979966e-05, "loss": 0.8423, "step": 7607 }, { "epoch": 1.5466558243545436, "grad_norm": 0.15515272319316864, "learning_rate": 4.537780941726839e-05, "loss": 1.0329, "step": 7608 }, { "epoch": 1.546859117706851, "grad_norm": 0.14504285156726837, "learning_rate": 4.535746974473711e-05, "loss": 0.8707, "step": 7609 }, { "epoch": 1.5470624110591582, "grad_norm": 0.17818818986415863, "learning_rate": 4.5337130072205844e-05, "loss": 1.0818, "step": 7610 }, { "epoch": 1.5472657044114657, "grad_norm": 0.15549889206886292, "learning_rate": 4.531679039967457e-05, "loss": 0.8258, "step": 7611 }, { "epoch": 1.5474689977637732, "grad_norm": 0.14544916152954102, "learning_rate": 4.52964507271433e-05, "loss": 0.9576, "step": 7612 }, { "epoch": 1.5476722911160805, "grad_norm": 0.16655772924423218, "learning_rate": 4.5276111054612024e-05, "loss": 0.9521, "step": 7613 }, { "epoch": 1.5478755844683878, "grad_norm": 0.15410354733467102, "learning_rate": 4.5255771382080755e-05, "loss": 0.968, "step": 7614 }, { "epoch": 1.5480788778206953, "grad_norm": 0.16960468888282776, "learning_rate": 4.523543170954948e-05, "loss": 0.9912, "step": 7615 }, { "epoch": 1.5482821711730026, "grad_norm": 0.15351277589797974, "learning_rate": 4.521509203701821e-05, "loss": 0.9764, "step": 7616 }, { "epoch": 1.54848546452531, "grad_norm": 0.15443667769432068, "learning_rate": 4.5194752364486936e-05, "loss": 0.9311, "step": 7617 }, { "epoch": 1.5486887578776174, "grad_norm": 0.20180290937423706, "learning_rate": 4.517441269195567e-05, "loss": 1.3573, "step": 7618 }, { "epoch": 1.5488920512299247, "grad_norm": 0.14903661608695984, "learning_rate": 4.515407301942439e-05, "loss": 1.0234, "step": 7619 }, { "epoch": 1.5490953445822322, "grad_norm": 0.15551035106182098, "learning_rate": 4.5133733346893116e-05, "loss": 0.9429, "step": 7620 }, { "epoch": 1.5492986379345397, "grad_norm": 0.14853742718696594, "learning_rate": 4.511339367436184e-05, "loss": 0.9374, "step": 7621 }, { "epoch": 1.549501931286847, "grad_norm": 0.14340229332447052, "learning_rate": 4.509305400183057e-05, "loss": 0.9859, "step": 7622 }, { "epoch": 1.5497052246391543, "grad_norm": 0.15460248291492462, "learning_rate": 4.5072714329299296e-05, "loss": 1.0937, "step": 7623 }, { "epoch": 1.5499085179914616, "grad_norm": 0.1452610045671463, "learning_rate": 4.505237465676803e-05, "loss": 0.9601, "step": 7624 }, { "epoch": 1.550111811343769, "grad_norm": 0.17821797728538513, "learning_rate": 4.503203498423675e-05, "loss": 1.127, "step": 7625 }, { "epoch": 1.5503151046960766, "grad_norm": 0.14965933561325073, "learning_rate": 4.501169531170548e-05, "loss": 1.026, "step": 7626 }, { "epoch": 1.5505183980483839, "grad_norm": 0.15064261853694916, "learning_rate": 4.499135563917421e-05, "loss": 1.0008, "step": 7627 }, { "epoch": 1.5507216914006912, "grad_norm": 0.16965916752815247, "learning_rate": 4.497101596664294e-05, "loss": 1.0721, "step": 7628 }, { "epoch": 1.5509249847529984, "grad_norm": 0.15175755321979523, "learning_rate": 4.495067629411166e-05, "loss": 0.997, "step": 7629 }, { "epoch": 1.551128278105306, "grad_norm": 0.14904138445854187, "learning_rate": 4.4930336621580394e-05, "loss": 0.9649, "step": 7630 }, { "epoch": 1.5513315714576135, "grad_norm": 0.14472688734531403, "learning_rate": 4.490999694904912e-05, "loss": 0.8556, "step": 7631 }, { "epoch": 1.5515348648099208, "grad_norm": 0.16380110383033752, "learning_rate": 4.488965727651785e-05, "loss": 1.0693, "step": 7632 }, { "epoch": 1.551738158162228, "grad_norm": 0.138839453458786, "learning_rate": 4.4869317603986574e-05, "loss": 0.9683, "step": 7633 }, { "epoch": 1.5519414515145353, "grad_norm": 0.17433138191699982, "learning_rate": 4.4848977931455306e-05, "loss": 1.0886, "step": 7634 }, { "epoch": 1.5521447448668428, "grad_norm": 0.14090728759765625, "learning_rate": 4.482863825892403e-05, "loss": 0.8745, "step": 7635 }, { "epoch": 1.5523480382191504, "grad_norm": 0.16489115357398987, "learning_rate": 4.480829858639276e-05, "loss": 0.9927, "step": 7636 }, { "epoch": 1.5525513315714576, "grad_norm": 0.1592523604631424, "learning_rate": 4.4787958913861486e-05, "loss": 1.0042, "step": 7637 }, { "epoch": 1.552754624923765, "grad_norm": 0.16787073016166687, "learning_rate": 4.476761924133022e-05, "loss": 1.0864, "step": 7638 }, { "epoch": 1.5529579182760722, "grad_norm": 0.1466791182756424, "learning_rate": 4.474727956879894e-05, "loss": 0.9175, "step": 7639 }, { "epoch": 1.5531612116283797, "grad_norm": 0.15505526959896088, "learning_rate": 4.472693989626767e-05, "loss": 1.0617, "step": 7640 }, { "epoch": 1.5533645049806872, "grad_norm": 0.14803843200206757, "learning_rate": 4.47066002237364e-05, "loss": 1.0052, "step": 7641 }, { "epoch": 1.5535677983329945, "grad_norm": 0.16014695167541504, "learning_rate": 4.468626055120513e-05, "loss": 1.0093, "step": 7642 }, { "epoch": 1.5537710916853018, "grad_norm": 0.17316266894340515, "learning_rate": 4.466592087867385e-05, "loss": 1.0647, "step": 7643 }, { "epoch": 1.5539743850376093, "grad_norm": 0.15057261288166046, "learning_rate": 4.4645581206142584e-05, "loss": 0.9674, "step": 7644 }, { "epoch": 1.5541776783899166, "grad_norm": 0.14511555433273315, "learning_rate": 4.462524153361131e-05, "loss": 0.9249, "step": 7645 }, { "epoch": 1.5543809717422241, "grad_norm": 0.14440345764160156, "learning_rate": 4.460490186108004e-05, "loss": 1.0341, "step": 7646 }, { "epoch": 1.5545842650945314, "grad_norm": 0.1603761613368988, "learning_rate": 4.4584562188548764e-05, "loss": 1.0671, "step": 7647 }, { "epoch": 1.5547875584468387, "grad_norm": 0.15424849092960358, "learning_rate": 4.4564222516017496e-05, "loss": 1.0338, "step": 7648 }, { "epoch": 1.5549908517991462, "grad_norm": 0.15445691347122192, "learning_rate": 4.454388284348622e-05, "loss": 1.05, "step": 7649 }, { "epoch": 1.5551941451514537, "grad_norm": 0.1586342453956604, "learning_rate": 4.452354317095495e-05, "loss": 1.1008, "step": 7650 }, { "epoch": 1.555397438503761, "grad_norm": 0.15313756465911865, "learning_rate": 4.4503203498423676e-05, "loss": 0.909, "step": 7651 }, { "epoch": 1.5556007318560683, "grad_norm": 0.15157462656497955, "learning_rate": 4.448286382589241e-05, "loss": 1.0709, "step": 7652 }, { "epoch": 1.5558040252083756, "grad_norm": 0.14759200811386108, "learning_rate": 4.446252415336113e-05, "loss": 0.9982, "step": 7653 }, { "epoch": 1.556007318560683, "grad_norm": 0.17173556983470917, "learning_rate": 4.444218448082986e-05, "loss": 1.0587, "step": 7654 }, { "epoch": 1.5562106119129906, "grad_norm": 0.16383096575737, "learning_rate": 4.4421844808298594e-05, "loss": 1.0011, "step": 7655 }, { "epoch": 1.5564139052652979, "grad_norm": 0.14271312952041626, "learning_rate": 4.440150513576732e-05, "loss": 0.9044, "step": 7656 }, { "epoch": 1.5566171986176052, "grad_norm": 0.14554372429847717, "learning_rate": 4.438116546323605e-05, "loss": 0.942, "step": 7657 }, { "epoch": 1.5568204919699125, "grad_norm": 0.15020722150802612, "learning_rate": 4.4360825790704774e-05, "loss": 0.9441, "step": 7658 }, { "epoch": 1.55702378532222, "grad_norm": 0.171416237950325, "learning_rate": 4.4340486118173505e-05, "loss": 1.0565, "step": 7659 }, { "epoch": 1.5572270786745275, "grad_norm": 0.14867867529392242, "learning_rate": 4.432014644564223e-05, "loss": 0.8566, "step": 7660 }, { "epoch": 1.5574303720268348, "grad_norm": 0.14643310010433197, "learning_rate": 4.4299806773110954e-05, "loss": 0.9876, "step": 7661 }, { "epoch": 1.557633665379142, "grad_norm": 0.16952738165855408, "learning_rate": 4.427946710057968e-05, "loss": 1.1274, "step": 7662 }, { "epoch": 1.5578369587314493, "grad_norm": 0.1604606658220291, "learning_rate": 4.425912742804841e-05, "loss": 0.9567, "step": 7663 }, { "epoch": 1.5580402520837568, "grad_norm": 0.15354478359222412, "learning_rate": 4.4238787755517134e-05, "loss": 1.0133, "step": 7664 }, { "epoch": 1.5582435454360644, "grad_norm": 0.15854056179523468, "learning_rate": 4.4218448082985866e-05, "loss": 1.0082, "step": 7665 }, { "epoch": 1.5584468387883716, "grad_norm": 0.1458478420972824, "learning_rate": 4.419810841045459e-05, "loss": 0.9526, "step": 7666 }, { "epoch": 1.558650132140679, "grad_norm": 0.16254277527332306, "learning_rate": 4.417776873792332e-05, "loss": 1.1872, "step": 7667 }, { "epoch": 1.5588534254929862, "grad_norm": 0.14747852087020874, "learning_rate": 4.4157429065392046e-05, "loss": 0.8988, "step": 7668 }, { "epoch": 1.5590567188452937, "grad_norm": 0.14321334660053253, "learning_rate": 4.413708939286078e-05, "loss": 0.8893, "step": 7669 }, { "epoch": 1.5592600121976012, "grad_norm": 0.16480861604213715, "learning_rate": 4.41167497203295e-05, "loss": 1.0142, "step": 7670 }, { "epoch": 1.5594633055499085, "grad_norm": 0.16135086119174957, "learning_rate": 4.409641004779823e-05, "loss": 1.0068, "step": 7671 }, { "epoch": 1.5596665989022158, "grad_norm": 0.17008133232593536, "learning_rate": 4.407607037526696e-05, "loss": 0.9766, "step": 7672 }, { "epoch": 1.5598698922545233, "grad_norm": 0.172557070851326, "learning_rate": 4.405573070273569e-05, "loss": 1.1709, "step": 7673 }, { "epoch": 1.5600731856068306, "grad_norm": 0.16424237191677094, "learning_rate": 4.403539103020441e-05, "loss": 1.1869, "step": 7674 }, { "epoch": 1.5602764789591381, "grad_norm": 0.1462833434343338, "learning_rate": 4.4015051357673144e-05, "loss": 0.9837, "step": 7675 }, { "epoch": 1.5604797723114454, "grad_norm": 0.18880817294120789, "learning_rate": 4.399471168514187e-05, "loss": 1.1953, "step": 7676 }, { "epoch": 1.5606830656637527, "grad_norm": 0.17090368270874023, "learning_rate": 4.39743720126106e-05, "loss": 1.0998, "step": 7677 }, { "epoch": 1.5608863590160602, "grad_norm": 0.14799486100673676, "learning_rate": 4.3954032340079324e-05, "loss": 0.8731, "step": 7678 }, { "epoch": 1.5610896523683677, "grad_norm": 0.14751887321472168, "learning_rate": 4.3933692667548056e-05, "loss": 0.8774, "step": 7679 }, { "epoch": 1.561292945720675, "grad_norm": 0.16796378791332245, "learning_rate": 4.391335299501678e-05, "loss": 1.0867, "step": 7680 }, { "epoch": 1.5614962390729823, "grad_norm": 0.14488856494426727, "learning_rate": 4.389301332248551e-05, "loss": 0.8721, "step": 7681 }, { "epoch": 1.5616995324252896, "grad_norm": 0.1752883642911911, "learning_rate": 4.3872673649954236e-05, "loss": 1.0811, "step": 7682 }, { "epoch": 1.561902825777597, "grad_norm": 0.15939146280288696, "learning_rate": 4.385233397742297e-05, "loss": 1.0488, "step": 7683 }, { "epoch": 1.5621061191299046, "grad_norm": 0.15976473689079285, "learning_rate": 4.383199430489169e-05, "loss": 1.1246, "step": 7684 }, { "epoch": 1.562309412482212, "grad_norm": 0.16183441877365112, "learning_rate": 4.381165463236042e-05, "loss": 1.0593, "step": 7685 }, { "epoch": 1.5625127058345192, "grad_norm": 0.16328324377536774, "learning_rate": 4.379131495982915e-05, "loss": 1.1389, "step": 7686 }, { "epoch": 1.5627159991868265, "grad_norm": 0.15026815235614777, "learning_rate": 4.377097528729788e-05, "loss": 1.1026, "step": 7687 }, { "epoch": 1.562919292539134, "grad_norm": 0.1744913011789322, "learning_rate": 4.37506356147666e-05, "loss": 1.0864, "step": 7688 }, { "epoch": 1.5631225858914415, "grad_norm": 0.16448760032653809, "learning_rate": 4.3730295942235334e-05, "loss": 1.1082, "step": 7689 }, { "epoch": 1.5633258792437488, "grad_norm": 0.1707904189825058, "learning_rate": 4.370995626970406e-05, "loss": 1.176, "step": 7690 }, { "epoch": 1.563529172596056, "grad_norm": 0.16118161380290985, "learning_rate": 4.368961659717279e-05, "loss": 0.8718, "step": 7691 }, { "epoch": 1.5637324659483633, "grad_norm": 0.13948865234851837, "learning_rate": 4.3669276924641514e-05, "loss": 0.9046, "step": 7692 }, { "epoch": 1.5639357593006709, "grad_norm": 0.15104864537715912, "learning_rate": 4.3648937252110245e-05, "loss": 0.8764, "step": 7693 }, { "epoch": 1.5641390526529784, "grad_norm": 0.16980136930942535, "learning_rate": 4.362859757957897e-05, "loss": 1.035, "step": 7694 }, { "epoch": 1.5643423460052857, "grad_norm": 0.14382198452949524, "learning_rate": 4.36082579070477e-05, "loss": 0.8941, "step": 7695 }, { "epoch": 1.564545639357593, "grad_norm": 0.18109475076198578, "learning_rate": 4.3587918234516426e-05, "loss": 1.2929, "step": 7696 }, { "epoch": 1.5647489327099002, "grad_norm": 0.16553789377212524, "learning_rate": 4.356757856198516e-05, "loss": 1.0793, "step": 7697 }, { "epoch": 1.5649522260622077, "grad_norm": 0.15958553552627563, "learning_rate": 4.354723888945388e-05, "loss": 1.0562, "step": 7698 }, { "epoch": 1.5651555194145153, "grad_norm": 0.16595548391342163, "learning_rate": 4.352689921692261e-05, "loss": 0.8996, "step": 7699 }, { "epoch": 1.5653588127668225, "grad_norm": 0.14418596029281616, "learning_rate": 4.350655954439134e-05, "loss": 0.8692, "step": 7700 }, { "epoch": 1.5655621061191298, "grad_norm": 0.16741935908794403, "learning_rate": 4.348621987186007e-05, "loss": 1.0466, "step": 7701 }, { "epoch": 1.5657653994714373, "grad_norm": 0.15922079980373383, "learning_rate": 4.346588019932879e-05, "loss": 1.0183, "step": 7702 }, { "epoch": 1.5659686928237446, "grad_norm": 0.1578623652458191, "learning_rate": 4.344554052679752e-05, "loss": 0.9868, "step": 7703 }, { "epoch": 1.5661719861760521, "grad_norm": 0.15064865350723267, "learning_rate": 4.342520085426625e-05, "loss": 0.8935, "step": 7704 }, { "epoch": 1.5663752795283594, "grad_norm": 0.14492620527744293, "learning_rate": 4.340486118173497e-05, "loss": 0.8639, "step": 7705 }, { "epoch": 1.5665785728806667, "grad_norm": 0.16936084628105164, "learning_rate": 4.3384521509203704e-05, "loss": 1.1313, "step": 7706 }, { "epoch": 1.5667818662329742, "grad_norm": 0.15651068091392517, "learning_rate": 4.336418183667243e-05, "loss": 1.0447, "step": 7707 }, { "epoch": 1.5669851595852815, "grad_norm": 0.17043067514896393, "learning_rate": 4.334384216414116e-05, "loss": 1.0261, "step": 7708 }, { "epoch": 1.567188452937589, "grad_norm": 0.17802274227142334, "learning_rate": 4.3323502491609884e-05, "loss": 1.2795, "step": 7709 }, { "epoch": 1.5673917462898963, "grad_norm": 0.15010526776313782, "learning_rate": 4.3303162819078615e-05, "loss": 1.0665, "step": 7710 }, { "epoch": 1.5675950396422036, "grad_norm": 0.16469570994377136, "learning_rate": 4.328282314654734e-05, "loss": 1.024, "step": 7711 }, { "epoch": 1.567798332994511, "grad_norm": 0.14826124906539917, "learning_rate": 4.326248347401607e-05, "loss": 0.9124, "step": 7712 }, { "epoch": 1.5680016263468186, "grad_norm": 0.1755347102880478, "learning_rate": 4.3242143801484796e-05, "loss": 1.3094, "step": 7713 }, { "epoch": 1.568204919699126, "grad_norm": 0.1440453827381134, "learning_rate": 4.322180412895353e-05, "loss": 0.9519, "step": 7714 }, { "epoch": 1.5684082130514332, "grad_norm": 0.13474537432193756, "learning_rate": 4.320146445642225e-05, "loss": 0.8384, "step": 7715 }, { "epoch": 1.5686115064037405, "grad_norm": 0.17004506289958954, "learning_rate": 4.318112478389098e-05, "loss": 1.096, "step": 7716 }, { "epoch": 1.568814799756048, "grad_norm": 0.16405999660491943, "learning_rate": 4.316078511135971e-05, "loss": 1.253, "step": 7717 }, { "epoch": 1.5690180931083555, "grad_norm": 0.15710042417049408, "learning_rate": 4.314044543882844e-05, "loss": 1.0042, "step": 7718 }, { "epoch": 1.5692213864606628, "grad_norm": 0.16247068345546722, "learning_rate": 4.312010576629716e-05, "loss": 1.0507, "step": 7719 }, { "epoch": 1.56942467981297, "grad_norm": 0.15843328833580017, "learning_rate": 4.3099766093765894e-05, "loss": 1.0994, "step": 7720 }, { "epoch": 1.5696279731652774, "grad_norm": 0.17074021697044373, "learning_rate": 4.307942642123462e-05, "loss": 1.1717, "step": 7721 }, { "epoch": 1.5698312665175849, "grad_norm": 0.15466167032718658, "learning_rate": 4.305908674870335e-05, "loss": 0.9424, "step": 7722 }, { "epoch": 1.5700345598698924, "grad_norm": 0.1710813045501709, "learning_rate": 4.3038747076172074e-05, "loss": 1.0844, "step": 7723 }, { "epoch": 1.5702378532221997, "grad_norm": 0.158400759100914, "learning_rate": 4.3018407403640805e-05, "loss": 0.9608, "step": 7724 }, { "epoch": 1.570441146574507, "grad_norm": 0.16074223816394806, "learning_rate": 4.299806773110953e-05, "loss": 1.019, "step": 7725 }, { "epoch": 1.5706444399268142, "grad_norm": 0.15291184186935425, "learning_rate": 4.297772805857826e-05, "loss": 0.9826, "step": 7726 }, { "epoch": 1.5708477332791217, "grad_norm": 0.16289396584033966, "learning_rate": 4.2957388386046985e-05, "loss": 1.0473, "step": 7727 }, { "epoch": 1.5710510266314293, "grad_norm": 0.16666078567504883, "learning_rate": 4.293704871351572e-05, "loss": 1.0867, "step": 7728 }, { "epoch": 1.5712543199837365, "grad_norm": 0.1670365035533905, "learning_rate": 4.291670904098444e-05, "loss": 1.1198, "step": 7729 }, { "epoch": 1.5714576133360438, "grad_norm": 0.14713126420974731, "learning_rate": 4.289636936845317e-05, "loss": 0.8788, "step": 7730 }, { "epoch": 1.5716609066883513, "grad_norm": 0.15655474364757538, "learning_rate": 4.28760296959219e-05, "loss": 0.9751, "step": 7731 }, { "epoch": 1.5718642000406586, "grad_norm": 0.14843833446502686, "learning_rate": 4.285569002339063e-05, "loss": 0.8692, "step": 7732 }, { "epoch": 1.5720674933929661, "grad_norm": 0.17071795463562012, "learning_rate": 4.283535035085935e-05, "loss": 1.1169, "step": 7733 }, { "epoch": 1.5722707867452734, "grad_norm": 0.1631934642791748, "learning_rate": 4.2815010678328084e-05, "loss": 1.0695, "step": 7734 }, { "epoch": 1.5724740800975807, "grad_norm": 0.15543216466903687, "learning_rate": 4.279467100579681e-05, "loss": 0.9848, "step": 7735 }, { "epoch": 1.5726773734498882, "grad_norm": 0.146816223859787, "learning_rate": 4.277433133326554e-05, "loss": 0.9852, "step": 7736 }, { "epoch": 1.5728806668021955, "grad_norm": 0.15026618540287018, "learning_rate": 4.2753991660734264e-05, "loss": 1.0915, "step": 7737 }, { "epoch": 1.573083960154503, "grad_norm": 0.1689688116312027, "learning_rate": 4.2733651988202995e-05, "loss": 1.0266, "step": 7738 }, { "epoch": 1.5732872535068103, "grad_norm": 0.15777504444122314, "learning_rate": 4.271331231567172e-05, "loss": 0.9233, "step": 7739 }, { "epoch": 1.5734905468591176, "grad_norm": 0.15743210911750793, "learning_rate": 4.269297264314045e-05, "loss": 1.0239, "step": 7740 }, { "epoch": 1.573693840211425, "grad_norm": 0.17436560988426208, "learning_rate": 4.2672632970609175e-05, "loss": 1.1427, "step": 7741 }, { "epoch": 1.5738971335637326, "grad_norm": 0.15763743221759796, "learning_rate": 4.265229329807791e-05, "loss": 0.9741, "step": 7742 }, { "epoch": 1.57410042691604, "grad_norm": 0.16599921882152557, "learning_rate": 4.263195362554663e-05, "loss": 1.1221, "step": 7743 }, { "epoch": 1.5743037202683472, "grad_norm": 0.16034117341041565, "learning_rate": 4.2611613953015356e-05, "loss": 1.0118, "step": 7744 }, { "epoch": 1.5745070136206545, "grad_norm": 0.16392181813716888, "learning_rate": 4.259127428048409e-05, "loss": 1.0101, "step": 7745 }, { "epoch": 1.574710306972962, "grad_norm": 0.15004925429821014, "learning_rate": 4.257093460795281e-05, "loss": 0.9808, "step": 7746 }, { "epoch": 1.5749136003252695, "grad_norm": 0.15286050736904144, "learning_rate": 4.255059493542154e-05, "loss": 1.0109, "step": 7747 }, { "epoch": 1.5751168936775768, "grad_norm": 0.17249399423599243, "learning_rate": 4.253025526289027e-05, "loss": 1.0543, "step": 7748 }, { "epoch": 1.575320187029884, "grad_norm": 0.13769999146461487, "learning_rate": 4.2509915590359e-05, "loss": 0.9246, "step": 7749 }, { "epoch": 1.5755234803821914, "grad_norm": 0.1701616644859314, "learning_rate": 4.248957591782772e-05, "loss": 1.0894, "step": 7750 }, { "epoch": 1.5757267737344989, "grad_norm": 0.14783045649528503, "learning_rate": 4.2469236245296454e-05, "loss": 1.0035, "step": 7751 }, { "epoch": 1.5759300670868064, "grad_norm": 0.15491348505020142, "learning_rate": 4.244889657276518e-05, "loss": 0.9842, "step": 7752 }, { "epoch": 1.5761333604391137, "grad_norm": 0.1805831789970398, "learning_rate": 4.242855690023391e-05, "loss": 1.0547, "step": 7753 }, { "epoch": 1.576336653791421, "grad_norm": 0.15928557515144348, "learning_rate": 4.2408217227702634e-05, "loss": 0.9806, "step": 7754 }, { "epoch": 1.5765399471437282, "grad_norm": 0.16157962381839752, "learning_rate": 4.2387877555171365e-05, "loss": 1.0855, "step": 7755 }, { "epoch": 1.5767432404960358, "grad_norm": 0.1494404673576355, "learning_rate": 4.236753788264009e-05, "loss": 0.8701, "step": 7756 }, { "epoch": 1.5769465338483433, "grad_norm": 0.16292664408683777, "learning_rate": 4.234719821010882e-05, "loss": 1.1466, "step": 7757 }, { "epoch": 1.5771498272006506, "grad_norm": 0.14666306972503662, "learning_rate": 4.2326858537577545e-05, "loss": 0.9591, "step": 7758 }, { "epoch": 1.5773531205529578, "grad_norm": 0.17006602883338928, "learning_rate": 4.230651886504628e-05, "loss": 0.9636, "step": 7759 }, { "epoch": 1.5775564139052651, "grad_norm": 0.13627611100673676, "learning_rate": 4.2286179192515e-05, "loss": 0.8096, "step": 7760 }, { "epoch": 1.5777597072575726, "grad_norm": 0.16414345800876617, "learning_rate": 4.226583951998373e-05, "loss": 1.0992, "step": 7761 }, { "epoch": 1.5779630006098802, "grad_norm": 0.18041983246803284, "learning_rate": 4.224549984745246e-05, "loss": 1.0905, "step": 7762 }, { "epoch": 1.5781662939621874, "grad_norm": 0.14161814749240875, "learning_rate": 4.222516017492119e-05, "loss": 0.8214, "step": 7763 }, { "epoch": 1.5783695873144947, "grad_norm": 0.1440780609846115, "learning_rate": 4.220482050238991e-05, "loss": 0.9022, "step": 7764 }, { "epoch": 1.5785728806668022, "grad_norm": 0.15424734354019165, "learning_rate": 4.2184480829858644e-05, "loss": 0.9979, "step": 7765 }, { "epoch": 1.5787761740191095, "grad_norm": 0.15975473821163177, "learning_rate": 4.216414115732737e-05, "loss": 1.0316, "step": 7766 }, { "epoch": 1.578979467371417, "grad_norm": 0.16130958497524261, "learning_rate": 4.21438014847961e-05, "loss": 1.0393, "step": 7767 }, { "epoch": 1.5791827607237243, "grad_norm": 0.15411075949668884, "learning_rate": 4.2123461812264824e-05, "loss": 1.0221, "step": 7768 }, { "epoch": 1.5793860540760316, "grad_norm": 0.16490821540355682, "learning_rate": 4.2103122139733555e-05, "loss": 0.9589, "step": 7769 }, { "epoch": 1.5795893474283391, "grad_norm": 0.1558220535516739, "learning_rate": 4.208278246720228e-05, "loss": 1.0321, "step": 7770 }, { "epoch": 1.5797926407806466, "grad_norm": 0.15568438172340393, "learning_rate": 4.206244279467101e-05, "loss": 1.0994, "step": 7771 }, { "epoch": 1.579995934132954, "grad_norm": 0.1755283921957016, "learning_rate": 4.2042103122139735e-05, "loss": 1.175, "step": 7772 }, { "epoch": 1.5801992274852612, "grad_norm": 0.15807223320007324, "learning_rate": 4.2021763449608467e-05, "loss": 1.036, "step": 7773 }, { "epoch": 1.5804025208375685, "grad_norm": 0.15929797291755676, "learning_rate": 4.200142377707719e-05, "loss": 1.087, "step": 7774 }, { "epoch": 1.580605814189876, "grad_norm": 0.16737844049930573, "learning_rate": 4.198108410454592e-05, "loss": 1.0025, "step": 7775 }, { "epoch": 1.5808091075421835, "grad_norm": 0.1495136171579361, "learning_rate": 4.196074443201465e-05, "loss": 1.062, "step": 7776 }, { "epoch": 1.5810124008944908, "grad_norm": 0.14595991373062134, "learning_rate": 4.194040475948338e-05, "loss": 1.061, "step": 7777 }, { "epoch": 1.581215694246798, "grad_norm": 0.14944487810134888, "learning_rate": 4.19200650869521e-05, "loss": 0.9301, "step": 7778 }, { "epoch": 1.5814189875991054, "grad_norm": 0.15841343998908997, "learning_rate": 4.1899725414420834e-05, "loss": 1.0816, "step": 7779 }, { "epoch": 1.5816222809514129, "grad_norm": 0.15659207105636597, "learning_rate": 4.187938574188956e-05, "loss": 0.9721, "step": 7780 }, { "epoch": 1.5818255743037204, "grad_norm": 0.17073090374469757, "learning_rate": 4.185904606935829e-05, "loss": 1.2759, "step": 7781 }, { "epoch": 1.5820288676560277, "grad_norm": 0.16839954257011414, "learning_rate": 4.1838706396827014e-05, "loss": 1.1335, "step": 7782 }, { "epoch": 1.582232161008335, "grad_norm": 0.15720833837985992, "learning_rate": 4.1818366724295745e-05, "loss": 1.0261, "step": 7783 }, { "epoch": 1.5824354543606423, "grad_norm": 0.16031992435455322, "learning_rate": 4.179802705176447e-05, "loss": 1.0234, "step": 7784 }, { "epoch": 1.5826387477129498, "grad_norm": 0.1593085378408432, "learning_rate": 4.17776873792332e-05, "loss": 0.972, "step": 7785 }, { "epoch": 1.5828420410652573, "grad_norm": 0.15081000328063965, "learning_rate": 4.175734770670192e-05, "loss": 1.0116, "step": 7786 }, { "epoch": 1.5830453344175646, "grad_norm": 0.1444242298603058, "learning_rate": 4.173700803417065e-05, "loss": 0.8697, "step": 7787 }, { "epoch": 1.5832486277698719, "grad_norm": 0.1468704342842102, "learning_rate": 4.1716668361639374e-05, "loss": 0.8652, "step": 7788 }, { "epoch": 1.5834519211221791, "grad_norm": 0.14656901359558105, "learning_rate": 4.1696328689108105e-05, "loss": 0.9308, "step": 7789 }, { "epoch": 1.5836552144744866, "grad_norm": 0.17091327905654907, "learning_rate": 4.167598901657683e-05, "loss": 1.0946, "step": 7790 }, { "epoch": 1.5838585078267942, "grad_norm": 0.1493080109357834, "learning_rate": 4.165564934404556e-05, "loss": 1.0351, "step": 7791 }, { "epoch": 1.5840618011791014, "grad_norm": 0.14153340458869934, "learning_rate": 4.1635309671514286e-05, "loss": 0.9037, "step": 7792 }, { "epoch": 1.5842650945314087, "grad_norm": 0.16549350321292877, "learning_rate": 4.161496999898302e-05, "loss": 1.0766, "step": 7793 }, { "epoch": 1.5844683878837162, "grad_norm": 0.14851561188697815, "learning_rate": 4.159463032645174e-05, "loss": 0.9873, "step": 7794 }, { "epoch": 1.5846716812360235, "grad_norm": 0.1552695482969284, "learning_rate": 4.157429065392047e-05, "loss": 0.9272, "step": 7795 }, { "epoch": 1.584874974588331, "grad_norm": 0.15845531225204468, "learning_rate": 4.15539509813892e-05, "loss": 0.9872, "step": 7796 }, { "epoch": 1.5850782679406383, "grad_norm": 0.14166247844696045, "learning_rate": 4.153361130885793e-05, "loss": 0.8848, "step": 7797 }, { "epoch": 1.5852815612929456, "grad_norm": 0.15001831948757172, "learning_rate": 4.151327163632666e-05, "loss": 0.9333, "step": 7798 }, { "epoch": 1.5854848546452531, "grad_norm": 0.1301119327545166, "learning_rate": 4.1492931963795384e-05, "loss": 0.8278, "step": 7799 }, { "epoch": 1.5856881479975606, "grad_norm": 0.17601314187049866, "learning_rate": 4.1472592291264115e-05, "loss": 1.1347, "step": 7800 }, { "epoch": 1.585891441349868, "grad_norm": 0.16398315131664276, "learning_rate": 4.145225261873284e-05, "loss": 1.1043, "step": 7801 }, { "epoch": 1.5860947347021752, "grad_norm": 0.1604781299829483, "learning_rate": 4.143191294620157e-05, "loss": 0.9591, "step": 7802 }, { "epoch": 1.5862980280544825, "grad_norm": 0.1543802171945572, "learning_rate": 4.1411573273670295e-05, "loss": 0.9958, "step": 7803 }, { "epoch": 1.58650132140679, "grad_norm": 0.14899460971355438, "learning_rate": 4.1391233601139027e-05, "loss": 0.9451, "step": 7804 }, { "epoch": 1.5867046147590975, "grad_norm": 0.15172359347343445, "learning_rate": 4.137089392860775e-05, "loss": 0.9529, "step": 7805 }, { "epoch": 1.5869079081114048, "grad_norm": 0.16983261704444885, "learning_rate": 4.135055425607648e-05, "loss": 1.1318, "step": 7806 }, { "epoch": 1.587111201463712, "grad_norm": 0.1535707414150238, "learning_rate": 4.133021458354521e-05, "loss": 0.9344, "step": 7807 }, { "epoch": 1.5873144948160194, "grad_norm": 0.16929388046264648, "learning_rate": 4.130987491101394e-05, "loss": 1.1147, "step": 7808 }, { "epoch": 1.587517788168327, "grad_norm": 0.155885249376297, "learning_rate": 4.128953523848266e-05, "loss": 0.9085, "step": 7809 }, { "epoch": 1.5877210815206344, "grad_norm": 0.12913312017917633, "learning_rate": 4.1269195565951394e-05, "loss": 0.8055, "step": 7810 }, { "epoch": 1.5879243748729417, "grad_norm": 0.1531616449356079, "learning_rate": 4.124885589342012e-05, "loss": 0.9384, "step": 7811 }, { "epoch": 1.588127668225249, "grad_norm": 0.1643763780593872, "learning_rate": 4.122851622088885e-05, "loss": 1.0913, "step": 7812 }, { "epoch": 1.5883309615775563, "grad_norm": 0.18049438297748566, "learning_rate": 4.1208176548357574e-05, "loss": 1.2691, "step": 7813 }, { "epoch": 1.5885342549298638, "grad_norm": 0.15974503755569458, "learning_rate": 4.1187836875826305e-05, "loss": 1.069, "step": 7814 }, { "epoch": 1.5887375482821713, "grad_norm": 0.17281785607337952, "learning_rate": 4.116749720329503e-05, "loss": 1.0553, "step": 7815 }, { "epoch": 1.5889408416344786, "grad_norm": 0.16639898717403412, "learning_rate": 4.114715753076376e-05, "loss": 1.0811, "step": 7816 }, { "epoch": 1.5891441349867859, "grad_norm": 0.16777338087558746, "learning_rate": 4.1126817858232485e-05, "loss": 1.1234, "step": 7817 }, { "epoch": 1.5893474283390931, "grad_norm": 0.16005922853946686, "learning_rate": 4.1106478185701216e-05, "loss": 1.0027, "step": 7818 }, { "epoch": 1.5895507216914007, "grad_norm": 0.1881905049085617, "learning_rate": 4.108613851316994e-05, "loss": 1.2261, "step": 7819 }, { "epoch": 1.5897540150437082, "grad_norm": 0.16349971294403076, "learning_rate": 4.106579884063867e-05, "loss": 1.069, "step": 7820 }, { "epoch": 1.5899573083960155, "grad_norm": 0.15850189328193665, "learning_rate": 4.1045459168107397e-05, "loss": 1.0783, "step": 7821 }, { "epoch": 1.5901606017483227, "grad_norm": 0.16116869449615479, "learning_rate": 4.102511949557613e-05, "loss": 1.1343, "step": 7822 }, { "epoch": 1.5903638951006303, "grad_norm": 0.1571117639541626, "learning_rate": 4.100477982304485e-05, "loss": 0.8875, "step": 7823 }, { "epoch": 1.5905671884529375, "grad_norm": 0.15546950697898865, "learning_rate": 4.0984440150513583e-05, "loss": 1.0754, "step": 7824 }, { "epoch": 1.590770481805245, "grad_norm": 0.14929011464118958, "learning_rate": 4.096410047798231e-05, "loss": 0.9875, "step": 7825 }, { "epoch": 1.5909737751575523, "grad_norm": 0.18235322833061218, "learning_rate": 4.094376080545104e-05, "loss": 1.1838, "step": 7826 }, { "epoch": 1.5911770685098596, "grad_norm": 0.1628686487674713, "learning_rate": 4.092342113291976e-05, "loss": 1.1307, "step": 7827 }, { "epoch": 1.5913803618621671, "grad_norm": 0.17925037443637848, "learning_rate": 4.090308146038849e-05, "loss": 1.1828, "step": 7828 }, { "epoch": 1.5915836552144746, "grad_norm": 0.16033868491649628, "learning_rate": 4.088274178785721e-05, "loss": 0.8952, "step": 7829 }, { "epoch": 1.591786948566782, "grad_norm": 0.1401589959859848, "learning_rate": 4.0862402115325944e-05, "loss": 0.9508, "step": 7830 }, { "epoch": 1.5919902419190892, "grad_norm": 0.16175039112567902, "learning_rate": 4.084206244279467e-05, "loss": 1.0253, "step": 7831 }, { "epoch": 1.5921935352713965, "grad_norm": 0.1593538224697113, "learning_rate": 4.08217227702634e-05, "loss": 1.0188, "step": 7832 }, { "epoch": 1.592396828623704, "grad_norm": 0.1410837471485138, "learning_rate": 4.0801383097732124e-05, "loss": 0.8858, "step": 7833 }, { "epoch": 1.5926001219760115, "grad_norm": 0.1808803528547287, "learning_rate": 4.0781043425200855e-05, "loss": 1.1495, "step": 7834 }, { "epoch": 1.5928034153283188, "grad_norm": 0.1612858921289444, "learning_rate": 4.076070375266958e-05, "loss": 1.0378, "step": 7835 }, { "epoch": 1.593006708680626, "grad_norm": 0.16411302983760834, "learning_rate": 4.074036408013831e-05, "loss": 1.0684, "step": 7836 }, { "epoch": 1.5932100020329334, "grad_norm": 0.15776465833187103, "learning_rate": 4.0720024407607035e-05, "loss": 1.0325, "step": 7837 }, { "epoch": 1.593413295385241, "grad_norm": 0.15211142599582672, "learning_rate": 4.0699684735075767e-05, "loss": 1.1392, "step": 7838 }, { "epoch": 1.5936165887375484, "grad_norm": 0.16022664308547974, "learning_rate": 4.067934506254449e-05, "loss": 0.9728, "step": 7839 }, { "epoch": 1.5938198820898557, "grad_norm": 0.15542104840278625, "learning_rate": 4.065900539001322e-05, "loss": 1.0316, "step": 7840 }, { "epoch": 1.594023175442163, "grad_norm": 0.15578791499137878, "learning_rate": 4.063866571748195e-05, "loss": 1.0317, "step": 7841 }, { "epoch": 1.5942264687944703, "grad_norm": 0.15654177963733673, "learning_rate": 4.061832604495068e-05, "loss": 0.9775, "step": 7842 }, { "epoch": 1.5944297621467778, "grad_norm": 0.14073923230171204, "learning_rate": 4.05979863724194e-05, "loss": 0.8984, "step": 7843 }, { "epoch": 1.5946330554990853, "grad_norm": 0.16934826970100403, "learning_rate": 4.0577646699888134e-05, "loss": 0.9698, "step": 7844 }, { "epoch": 1.5948363488513926, "grad_norm": 0.16518600285053253, "learning_rate": 4.055730702735686e-05, "loss": 1.0986, "step": 7845 }, { "epoch": 1.5950396422036999, "grad_norm": 0.17020125687122345, "learning_rate": 4.053696735482559e-05, "loss": 0.9736, "step": 7846 }, { "epoch": 1.5952429355560072, "grad_norm": 0.1533995121717453, "learning_rate": 4.0516627682294314e-05, "loss": 0.9928, "step": 7847 }, { "epoch": 1.5954462289083147, "grad_norm": 0.15281830728054047, "learning_rate": 4.0496288009763045e-05, "loss": 1.1343, "step": 7848 }, { "epoch": 1.5956495222606222, "grad_norm": 0.17842935025691986, "learning_rate": 4.047594833723177e-05, "loss": 1.0334, "step": 7849 }, { "epoch": 1.5958528156129295, "grad_norm": 0.16060751676559448, "learning_rate": 4.04556086647005e-05, "loss": 1.0163, "step": 7850 }, { "epoch": 1.5960561089652368, "grad_norm": 0.18351519107818604, "learning_rate": 4.0435268992169225e-05, "loss": 1.0877, "step": 7851 }, { "epoch": 1.5962594023175443, "grad_norm": 0.160640150308609, "learning_rate": 4.0414929319637957e-05, "loss": 0.968, "step": 7852 }, { "epoch": 1.5964626956698516, "grad_norm": 0.163973867893219, "learning_rate": 4.039458964710668e-05, "loss": 1.1019, "step": 7853 }, { "epoch": 1.596665989022159, "grad_norm": 0.14241284132003784, "learning_rate": 4.037424997457541e-05, "loss": 1.0869, "step": 7854 }, { "epoch": 1.5968692823744663, "grad_norm": 0.14843124151229858, "learning_rate": 4.035391030204414e-05, "loss": 0.9404, "step": 7855 }, { "epoch": 1.5970725757267736, "grad_norm": 0.1504674255847931, "learning_rate": 4.033357062951287e-05, "loss": 0.9946, "step": 7856 }, { "epoch": 1.5972758690790811, "grad_norm": 0.14768657088279724, "learning_rate": 4.03132309569816e-05, "loss": 0.9993, "step": 7857 }, { "epoch": 1.5974791624313887, "grad_norm": 0.14613375067710876, "learning_rate": 4.0292891284450324e-05, "loss": 0.9806, "step": 7858 }, { "epoch": 1.597682455783696, "grad_norm": 0.16472722589969635, "learning_rate": 4.0272551611919055e-05, "loss": 0.9836, "step": 7859 }, { "epoch": 1.5978857491360032, "grad_norm": 0.17325203120708466, "learning_rate": 4.025221193938778e-05, "loss": 1.2605, "step": 7860 }, { "epoch": 1.5980890424883105, "grad_norm": 0.16163960099220276, "learning_rate": 4.023187226685651e-05, "loss": 1.0204, "step": 7861 }, { "epoch": 1.598292335840618, "grad_norm": 0.15421070158481598, "learning_rate": 4.0211532594325235e-05, "loss": 0.9384, "step": 7862 }, { "epoch": 1.5984956291929255, "grad_norm": 0.1609751284122467, "learning_rate": 4.0191192921793966e-05, "loss": 0.9622, "step": 7863 }, { "epoch": 1.5986989225452328, "grad_norm": 0.1648041009902954, "learning_rate": 4.017085324926269e-05, "loss": 1.046, "step": 7864 }, { "epoch": 1.5989022158975401, "grad_norm": 0.15743397176265717, "learning_rate": 4.015051357673142e-05, "loss": 0.9391, "step": 7865 }, { "epoch": 1.5991055092498474, "grad_norm": 0.18623369932174683, "learning_rate": 4.0130173904200146e-05, "loss": 1.0989, "step": 7866 }, { "epoch": 1.599308802602155, "grad_norm": 0.16265229880809784, "learning_rate": 4.010983423166888e-05, "loss": 1.0974, "step": 7867 }, { "epoch": 1.5995120959544624, "grad_norm": 0.16854321956634521, "learning_rate": 4.00894945591376e-05, "loss": 1.2044, "step": 7868 }, { "epoch": 1.5997153893067697, "grad_norm": 0.16372013092041016, "learning_rate": 4.0069154886606327e-05, "loss": 1.1691, "step": 7869 }, { "epoch": 1.599918682659077, "grad_norm": 0.1531476229429245, "learning_rate": 4.004881521407505e-05, "loss": 0.9601, "step": 7870 }, { "epoch": 1.6001219760113843, "grad_norm": 0.16468146443367004, "learning_rate": 4.002847554154378e-05, "loss": 0.971, "step": 7871 }, { "epoch": 1.6003252693636918, "grad_norm": 0.16373568773269653, "learning_rate": 4.000813586901251e-05, "loss": 0.9115, "step": 7872 }, { "epoch": 1.6005285627159993, "grad_norm": 0.1578449010848999, "learning_rate": 3.998779619648124e-05, "loss": 0.9907, "step": 7873 }, { "epoch": 1.6007318560683066, "grad_norm": 0.1387176215648651, "learning_rate": 3.996745652394996e-05, "loss": 0.9, "step": 7874 }, { "epoch": 1.6009351494206139, "grad_norm": 0.16164258122444153, "learning_rate": 3.9947116851418694e-05, "loss": 0.9879, "step": 7875 }, { "epoch": 1.6011384427729212, "grad_norm": 0.17625640332698822, "learning_rate": 3.992677717888742e-05, "loss": 0.9873, "step": 7876 }, { "epoch": 1.6013417361252287, "grad_norm": 0.14360585808753967, "learning_rate": 3.990643750635615e-05, "loss": 0.9814, "step": 7877 }, { "epoch": 1.6015450294775362, "grad_norm": 0.14609138667583466, "learning_rate": 3.9886097833824874e-05, "loss": 1.0166, "step": 7878 }, { "epoch": 1.6017483228298435, "grad_norm": 0.1621832549571991, "learning_rate": 3.9865758161293605e-05, "loss": 1.0113, "step": 7879 }, { "epoch": 1.6019516161821508, "grad_norm": 0.12803547084331512, "learning_rate": 3.984541848876233e-05, "loss": 0.882, "step": 7880 }, { "epoch": 1.6021549095344583, "grad_norm": 0.1509457230567932, "learning_rate": 3.982507881623106e-05, "loss": 0.9955, "step": 7881 }, { "epoch": 1.6023582028867656, "grad_norm": 0.1313645988702774, "learning_rate": 3.9804739143699785e-05, "loss": 0.8071, "step": 7882 }, { "epoch": 1.602561496239073, "grad_norm": 0.17041872441768646, "learning_rate": 3.9784399471168516e-05, "loss": 1.1131, "step": 7883 }, { "epoch": 1.6027647895913804, "grad_norm": 0.16551892459392548, "learning_rate": 3.976405979863724e-05, "loss": 1.0629, "step": 7884 }, { "epoch": 1.6029680829436876, "grad_norm": 0.17070813477039337, "learning_rate": 3.974372012610597e-05, "loss": 1.0618, "step": 7885 }, { "epoch": 1.6031713762959952, "grad_norm": 0.14103469252586365, "learning_rate": 3.9723380453574697e-05, "loss": 0.7861, "step": 7886 }, { "epoch": 1.6033746696483027, "grad_norm": 0.16543133556842804, "learning_rate": 3.970304078104343e-05, "loss": 1.0845, "step": 7887 }, { "epoch": 1.60357796300061, "grad_norm": 0.15007635951042175, "learning_rate": 3.968270110851215e-05, "loss": 0.8379, "step": 7888 }, { "epoch": 1.6037812563529172, "grad_norm": 0.1689690798521042, "learning_rate": 3.9662361435980884e-05, "loss": 1.0323, "step": 7889 }, { "epoch": 1.6039845497052245, "grad_norm": 0.16115900874137878, "learning_rate": 3.964202176344961e-05, "loss": 1.1273, "step": 7890 }, { "epoch": 1.604187843057532, "grad_norm": 0.16399486362934113, "learning_rate": 3.962168209091834e-05, "loss": 0.9796, "step": 7891 }, { "epoch": 1.6043911364098395, "grad_norm": 0.13806955516338348, "learning_rate": 3.9601342418387064e-05, "loss": 0.8675, "step": 7892 }, { "epoch": 1.6045944297621468, "grad_norm": 0.1680563986301422, "learning_rate": 3.9581002745855795e-05, "loss": 1.1427, "step": 7893 }, { "epoch": 1.6047977231144541, "grad_norm": 0.16438691318035126, "learning_rate": 3.956066307332452e-05, "loss": 1.0952, "step": 7894 }, { "epoch": 1.6050010164667614, "grad_norm": 0.16523820161819458, "learning_rate": 3.954032340079325e-05, "loss": 0.9865, "step": 7895 }, { "epoch": 1.605204309819069, "grad_norm": 0.1712988317012787, "learning_rate": 3.9519983728261975e-05, "loss": 1.1806, "step": 7896 }, { "epoch": 1.6054076031713764, "grad_norm": 0.16897419095039368, "learning_rate": 3.9499644055730706e-05, "loss": 1.0718, "step": 7897 }, { "epoch": 1.6056108965236837, "grad_norm": 0.15394331514835358, "learning_rate": 3.947930438319943e-05, "loss": 1.0028, "step": 7898 }, { "epoch": 1.605814189875991, "grad_norm": 0.15742948651313782, "learning_rate": 3.945896471066816e-05, "loss": 1.0539, "step": 7899 }, { "epoch": 1.6060174832282983, "grad_norm": 0.15809758007526398, "learning_rate": 3.9438625038136886e-05, "loss": 0.9196, "step": 7900 }, { "epoch": 1.6062207765806058, "grad_norm": 0.1602354645729065, "learning_rate": 3.941828536560562e-05, "loss": 0.9906, "step": 7901 }, { "epoch": 1.6064240699329133, "grad_norm": 0.15356197953224182, "learning_rate": 3.939794569307434e-05, "loss": 0.899, "step": 7902 }, { "epoch": 1.6066273632852206, "grad_norm": 0.13915377855300903, "learning_rate": 3.9377606020543073e-05, "loss": 0.9125, "step": 7903 }, { "epoch": 1.6068306566375279, "grad_norm": 0.16230888664722443, "learning_rate": 3.93572663480118e-05, "loss": 1.1311, "step": 7904 }, { "epoch": 1.6070339499898352, "grad_norm": 0.1681625247001648, "learning_rate": 3.933692667548053e-05, "loss": 1.1399, "step": 7905 }, { "epoch": 1.6072372433421427, "grad_norm": 0.16028131544589996, "learning_rate": 3.9316587002949254e-05, "loss": 1.036, "step": 7906 }, { "epoch": 1.6074405366944502, "grad_norm": 0.16155043244361877, "learning_rate": 3.9296247330417985e-05, "loss": 1.0183, "step": 7907 }, { "epoch": 1.6076438300467575, "grad_norm": 0.15950800478458405, "learning_rate": 3.927590765788671e-05, "loss": 0.9922, "step": 7908 }, { "epoch": 1.6078471233990648, "grad_norm": 0.15786461532115936, "learning_rate": 3.925556798535544e-05, "loss": 1.0742, "step": 7909 }, { "epoch": 1.6080504167513723, "grad_norm": 0.15854032337665558, "learning_rate": 3.9235228312824165e-05, "loss": 1.1184, "step": 7910 }, { "epoch": 1.6082537101036796, "grad_norm": 0.15670092403888702, "learning_rate": 3.921488864029289e-05, "loss": 0.9589, "step": 7911 }, { "epoch": 1.608457003455987, "grad_norm": 0.14891672134399414, "learning_rate": 3.919454896776162e-05, "loss": 0.9501, "step": 7912 }, { "epoch": 1.6086602968082944, "grad_norm": 0.16712558269500732, "learning_rate": 3.9174209295230345e-05, "loss": 1.0027, "step": 7913 }, { "epoch": 1.6088635901606017, "grad_norm": 0.17947891354560852, "learning_rate": 3.9153869622699076e-05, "loss": 1.2695, "step": 7914 }, { "epoch": 1.6090668835129092, "grad_norm": 0.15130610764026642, "learning_rate": 3.91335299501678e-05, "loss": 0.9322, "step": 7915 }, { "epoch": 1.6092701768652167, "grad_norm": 0.15961766242980957, "learning_rate": 3.911319027763653e-05, "loss": 1.0109, "step": 7916 }, { "epoch": 1.609473470217524, "grad_norm": 0.16595500707626343, "learning_rate": 3.9092850605105257e-05, "loss": 1.0695, "step": 7917 }, { "epoch": 1.6096767635698312, "grad_norm": 0.14795635640621185, "learning_rate": 3.907251093257399e-05, "loss": 0.9159, "step": 7918 }, { "epoch": 1.6098800569221385, "grad_norm": 0.15362434089183807, "learning_rate": 3.905217126004271e-05, "loss": 1.0065, "step": 7919 }, { "epoch": 1.610083350274446, "grad_norm": 0.14026470482349396, "learning_rate": 3.9031831587511443e-05, "loss": 0.8112, "step": 7920 }, { "epoch": 1.6102866436267536, "grad_norm": 0.14446981251239777, "learning_rate": 3.901149191498017e-05, "loss": 0.9424, "step": 7921 }, { "epoch": 1.6104899369790608, "grad_norm": 0.14863884449005127, "learning_rate": 3.89911522424489e-05, "loss": 0.9371, "step": 7922 }, { "epoch": 1.6106932303313681, "grad_norm": 0.14967021346092224, "learning_rate": 3.8970812569917624e-05, "loss": 0.9764, "step": 7923 }, { "epoch": 1.6108965236836754, "grad_norm": 0.16208118200302124, "learning_rate": 3.8950472897386355e-05, "loss": 1.0544, "step": 7924 }, { "epoch": 1.611099817035983, "grad_norm": 0.14206236600875854, "learning_rate": 3.893013322485508e-05, "loss": 0.8277, "step": 7925 }, { "epoch": 1.6113031103882904, "grad_norm": 0.14986830949783325, "learning_rate": 3.890979355232381e-05, "loss": 0.9289, "step": 7926 }, { "epoch": 1.6115064037405977, "grad_norm": 0.16842809319496155, "learning_rate": 3.8889453879792535e-05, "loss": 1.1183, "step": 7927 }, { "epoch": 1.611709697092905, "grad_norm": 0.15191489458084106, "learning_rate": 3.8869114207261266e-05, "loss": 0.8506, "step": 7928 }, { "epoch": 1.6119129904452123, "grad_norm": 0.15974675118923187, "learning_rate": 3.884877453472999e-05, "loss": 0.9915, "step": 7929 }, { "epoch": 1.6121162837975198, "grad_norm": 0.15879543125629425, "learning_rate": 3.882843486219872e-05, "loss": 1.0648, "step": 7930 }, { "epoch": 1.6123195771498273, "grad_norm": 0.1741628348827362, "learning_rate": 3.8808095189667446e-05, "loss": 1.2275, "step": 7931 }, { "epoch": 1.6125228705021346, "grad_norm": 0.1599300652742386, "learning_rate": 3.878775551713618e-05, "loss": 1.0137, "step": 7932 }, { "epoch": 1.612726163854442, "grad_norm": 0.14113320410251617, "learning_rate": 3.87674158446049e-05, "loss": 0.8713, "step": 7933 }, { "epoch": 1.6129294572067492, "grad_norm": 0.15863676369190216, "learning_rate": 3.874707617207363e-05, "loss": 0.9456, "step": 7934 }, { "epoch": 1.6131327505590567, "grad_norm": 0.17394308745861053, "learning_rate": 3.872673649954236e-05, "loss": 1.2606, "step": 7935 }, { "epoch": 1.6133360439113642, "grad_norm": 0.14900152385234833, "learning_rate": 3.870639682701109e-05, "loss": 1.0478, "step": 7936 }, { "epoch": 1.6135393372636715, "grad_norm": 0.17220915853977203, "learning_rate": 3.8686057154479814e-05, "loss": 1.1322, "step": 7937 }, { "epoch": 1.6137426306159788, "grad_norm": 0.1563771665096283, "learning_rate": 3.8665717481948545e-05, "loss": 0.97, "step": 7938 }, { "epoch": 1.6139459239682863, "grad_norm": 0.14680613577365875, "learning_rate": 3.864537780941727e-05, "loss": 1.051, "step": 7939 }, { "epoch": 1.6141492173205936, "grad_norm": 0.14794084429740906, "learning_rate": 3.8625038136886e-05, "loss": 1.0092, "step": 7940 }, { "epoch": 1.614352510672901, "grad_norm": 0.14808155596256256, "learning_rate": 3.8604698464354725e-05, "loss": 0.9683, "step": 7941 }, { "epoch": 1.6145558040252084, "grad_norm": 0.13791748881340027, "learning_rate": 3.8584358791823456e-05, "loss": 0.842, "step": 7942 }, { "epoch": 1.6147590973775157, "grad_norm": 0.15770351886749268, "learning_rate": 3.856401911929218e-05, "loss": 1.0819, "step": 7943 }, { "epoch": 1.6149623907298232, "grad_norm": 0.15158608555793762, "learning_rate": 3.854367944676091e-05, "loss": 0.8855, "step": 7944 }, { "epoch": 1.6151656840821305, "grad_norm": 0.15945862233638763, "learning_rate": 3.8523339774229636e-05, "loss": 0.9457, "step": 7945 }, { "epoch": 1.615368977434438, "grad_norm": 0.1494598239660263, "learning_rate": 3.850300010169837e-05, "loss": 0.937, "step": 7946 }, { "epoch": 1.6155722707867453, "grad_norm": 0.14226461946964264, "learning_rate": 3.848266042916709e-05, "loss": 0.9418, "step": 7947 }, { "epoch": 1.6157755641390525, "grad_norm": 0.18085266649723053, "learning_rate": 3.846232075663582e-05, "loss": 1.1923, "step": 7948 }, { "epoch": 1.61597885749136, "grad_norm": 0.16115468740463257, "learning_rate": 3.844198108410455e-05, "loss": 0.9304, "step": 7949 }, { "epoch": 1.6161821508436676, "grad_norm": 0.14617152512073517, "learning_rate": 3.842164141157328e-05, "loss": 0.8852, "step": 7950 }, { "epoch": 1.6163854441959749, "grad_norm": 0.18115727603435516, "learning_rate": 3.8401301739042003e-05, "loss": 1.0792, "step": 7951 }, { "epoch": 1.6165887375482821, "grad_norm": 0.16041114926338196, "learning_rate": 3.838096206651073e-05, "loss": 1.082, "step": 7952 }, { "epoch": 1.6167920309005894, "grad_norm": 0.1685435175895691, "learning_rate": 3.836062239397946e-05, "loss": 1.0182, "step": 7953 }, { "epoch": 1.616995324252897, "grad_norm": 0.19218194484710693, "learning_rate": 3.8340282721448184e-05, "loss": 1.2311, "step": 7954 }, { "epoch": 1.6171986176052044, "grad_norm": 0.16772547364234924, "learning_rate": 3.8319943048916915e-05, "loss": 1.1996, "step": 7955 }, { "epoch": 1.6174019109575117, "grad_norm": 0.16216479241847992, "learning_rate": 3.829960337638564e-05, "loss": 1.1126, "step": 7956 }, { "epoch": 1.617605204309819, "grad_norm": 0.1512410044670105, "learning_rate": 3.827926370385437e-05, "loss": 0.8919, "step": 7957 }, { "epoch": 1.6178084976621263, "grad_norm": 0.17002248764038086, "learning_rate": 3.8258924031323095e-05, "loss": 1.1493, "step": 7958 }, { "epoch": 1.6180117910144338, "grad_norm": 0.1534353643655777, "learning_rate": 3.8238584358791826e-05, "loss": 0.8484, "step": 7959 }, { "epoch": 1.6182150843667413, "grad_norm": 0.15800921618938446, "learning_rate": 3.821824468626055e-05, "loss": 0.9339, "step": 7960 }, { "epoch": 1.6184183777190486, "grad_norm": 0.1709788739681244, "learning_rate": 3.819790501372928e-05, "loss": 1.049, "step": 7961 }, { "epoch": 1.618621671071356, "grad_norm": 0.16946430504322052, "learning_rate": 3.8177565341198006e-05, "loss": 1.2993, "step": 7962 }, { "epoch": 1.6188249644236632, "grad_norm": 0.16733253002166748, "learning_rate": 3.815722566866674e-05, "loss": 1.1441, "step": 7963 }, { "epoch": 1.6190282577759707, "grad_norm": 0.15681183338165283, "learning_rate": 3.813688599613546e-05, "loss": 0.9289, "step": 7964 }, { "epoch": 1.6192315511282782, "grad_norm": 0.15534301102161407, "learning_rate": 3.811654632360419e-05, "loss": 1.1042, "step": 7965 }, { "epoch": 1.6194348444805855, "grad_norm": 0.1394682675600052, "learning_rate": 3.809620665107292e-05, "loss": 0.911, "step": 7966 }, { "epoch": 1.6196381378328928, "grad_norm": 0.16406503319740295, "learning_rate": 3.807586697854165e-05, "loss": 1.0233, "step": 7967 }, { "epoch": 1.6198414311852003, "grad_norm": 0.16757860779762268, "learning_rate": 3.8055527306010373e-05, "loss": 1.1248, "step": 7968 }, { "epoch": 1.6200447245375076, "grad_norm": 0.1492101550102234, "learning_rate": 3.8035187633479105e-05, "loss": 0.9446, "step": 7969 }, { "epoch": 1.620248017889815, "grad_norm": 0.15200115740299225, "learning_rate": 3.801484796094783e-05, "loss": 0.9732, "step": 7970 }, { "epoch": 1.6204513112421224, "grad_norm": 0.15554553270339966, "learning_rate": 3.799450828841656e-05, "loss": 0.917, "step": 7971 }, { "epoch": 1.6206546045944297, "grad_norm": 0.16840587556362152, "learning_rate": 3.7974168615885285e-05, "loss": 1.0727, "step": 7972 }, { "epoch": 1.6208578979467372, "grad_norm": 0.16213718056678772, "learning_rate": 3.7953828943354016e-05, "loss": 0.9955, "step": 7973 }, { "epoch": 1.6210611912990445, "grad_norm": 0.16267994046211243, "learning_rate": 3.793348927082274e-05, "loss": 1.18, "step": 7974 }, { "epoch": 1.621264484651352, "grad_norm": 0.15481674671173096, "learning_rate": 3.791314959829147e-05, "loss": 1.0469, "step": 7975 }, { "epoch": 1.6214677780036593, "grad_norm": 0.15402746200561523, "learning_rate": 3.7892809925760196e-05, "loss": 1.1399, "step": 7976 }, { "epoch": 1.6216710713559666, "grad_norm": 0.15608134865760803, "learning_rate": 3.787247025322893e-05, "loss": 0.9351, "step": 7977 }, { "epoch": 1.621874364708274, "grad_norm": 0.14963030815124512, "learning_rate": 3.785213058069765e-05, "loss": 1.0482, "step": 7978 }, { "epoch": 1.6220776580605816, "grad_norm": 0.17491912841796875, "learning_rate": 3.783179090816638e-05, "loss": 1.1366, "step": 7979 }, { "epoch": 1.6222809514128889, "grad_norm": 0.16306598484516144, "learning_rate": 3.781145123563511e-05, "loss": 0.9913, "step": 7980 }, { "epoch": 1.6224842447651961, "grad_norm": 0.15949946641921997, "learning_rate": 3.779111156310384e-05, "loss": 1.0118, "step": 7981 }, { "epoch": 1.6226875381175034, "grad_norm": 0.15290047228336334, "learning_rate": 3.777077189057256e-05, "loss": 1.0088, "step": 7982 }, { "epoch": 1.622890831469811, "grad_norm": 0.17453934252262115, "learning_rate": 3.7750432218041295e-05, "loss": 1.0716, "step": 7983 }, { "epoch": 1.6230941248221185, "grad_norm": 0.17223399877548218, "learning_rate": 3.773009254551002e-05, "loss": 1.102, "step": 7984 }, { "epoch": 1.6232974181744257, "grad_norm": 0.17039790749549866, "learning_rate": 3.770975287297875e-05, "loss": 1.0979, "step": 7985 }, { "epoch": 1.623500711526733, "grad_norm": 0.1488572359085083, "learning_rate": 3.7689413200447475e-05, "loss": 1.1012, "step": 7986 }, { "epoch": 1.6237040048790403, "grad_norm": 0.158070370554924, "learning_rate": 3.7669073527916206e-05, "loss": 1.0285, "step": 7987 }, { "epoch": 1.6239072982313478, "grad_norm": 0.1599297672510147, "learning_rate": 3.764873385538493e-05, "loss": 1.0156, "step": 7988 }, { "epoch": 1.6241105915836553, "grad_norm": 0.17132924497127533, "learning_rate": 3.762839418285366e-05, "loss": 1.1242, "step": 7989 }, { "epoch": 1.6243138849359626, "grad_norm": 0.16195577383041382, "learning_rate": 3.7608054510322386e-05, "loss": 1.0605, "step": 7990 }, { "epoch": 1.62451717828827, "grad_norm": 0.1512899249792099, "learning_rate": 3.758771483779112e-05, "loss": 0.9553, "step": 7991 }, { "epoch": 1.6247204716405772, "grad_norm": 0.16723550856113434, "learning_rate": 3.756737516525984e-05, "loss": 1.1625, "step": 7992 }, { "epoch": 1.6249237649928847, "grad_norm": 0.17061835527420044, "learning_rate": 3.7547035492728566e-05, "loss": 1.1109, "step": 7993 }, { "epoch": 1.6251270583451922, "grad_norm": 0.1567031443119049, "learning_rate": 3.752669582019729e-05, "loss": 1.0359, "step": 7994 }, { "epoch": 1.6253303516974995, "grad_norm": 0.1756792962551117, "learning_rate": 3.750635614766602e-05, "loss": 1.1771, "step": 7995 }, { "epoch": 1.6255336450498068, "grad_norm": 0.1696092188358307, "learning_rate": 3.7486016475134746e-05, "loss": 1.0773, "step": 7996 }, { "epoch": 1.625736938402114, "grad_norm": 0.16768652200698853, "learning_rate": 3.746567680260348e-05, "loss": 1.0147, "step": 7997 }, { "epoch": 1.6259402317544216, "grad_norm": 0.14477315545082092, "learning_rate": 3.744533713007221e-05, "loss": 0.932, "step": 7998 }, { "epoch": 1.626143525106729, "grad_norm": 0.17586012184619904, "learning_rate": 3.7424997457540933e-05, "loss": 1.1182, "step": 7999 }, { "epoch": 1.6263468184590364, "grad_norm": 0.16427867114543915, "learning_rate": 3.7404657785009665e-05, "loss": 0.9973, "step": 8000 }, { "epoch": 1.6265501118113437, "grad_norm": 0.17387059330940247, "learning_rate": 3.738431811247839e-05, "loss": 1.0541, "step": 8001 }, { "epoch": 1.6267534051636512, "grad_norm": 0.15927183628082275, "learning_rate": 3.736397843994712e-05, "loss": 1.0713, "step": 8002 }, { "epoch": 1.6269566985159585, "grad_norm": 0.17163364589214325, "learning_rate": 3.7343638767415845e-05, "loss": 1.1181, "step": 8003 }, { "epoch": 1.627159991868266, "grad_norm": 0.16909077763557434, "learning_rate": 3.7323299094884576e-05, "loss": 1.316, "step": 8004 }, { "epoch": 1.6273632852205733, "grad_norm": 0.15417928993701935, "learning_rate": 3.73029594223533e-05, "loss": 1.0184, "step": 8005 }, { "epoch": 1.6275665785728806, "grad_norm": 0.15921998023986816, "learning_rate": 3.728261974982203e-05, "loss": 1.0418, "step": 8006 }, { "epoch": 1.627769871925188, "grad_norm": 0.15805287659168243, "learning_rate": 3.7262280077290756e-05, "loss": 0.9744, "step": 8007 }, { "epoch": 1.6279731652774956, "grad_norm": 0.16373226046562195, "learning_rate": 3.724194040475949e-05, "loss": 1.0886, "step": 8008 }, { "epoch": 1.6281764586298029, "grad_norm": 0.17239722609519958, "learning_rate": 3.722160073222821e-05, "loss": 1.2586, "step": 8009 }, { "epoch": 1.6283797519821102, "grad_norm": 0.15971477329730988, "learning_rate": 3.720126105969694e-05, "loss": 0.9764, "step": 8010 }, { "epoch": 1.6285830453344174, "grad_norm": 0.17333199083805084, "learning_rate": 3.718092138716567e-05, "loss": 0.8852, "step": 8011 }, { "epoch": 1.628786338686725, "grad_norm": 0.16999101638793945, "learning_rate": 3.71605817146344e-05, "loss": 1.2504, "step": 8012 }, { "epoch": 1.6289896320390325, "grad_norm": 0.1401221752166748, "learning_rate": 3.714024204210312e-05, "loss": 0.8043, "step": 8013 }, { "epoch": 1.6291929253913398, "grad_norm": 0.15759146213531494, "learning_rate": 3.7119902369571855e-05, "loss": 0.9718, "step": 8014 }, { "epoch": 1.629396218743647, "grad_norm": 0.16269823908805847, "learning_rate": 3.709956269704058e-05, "loss": 1.0274, "step": 8015 }, { "epoch": 1.6295995120959543, "grad_norm": 0.15324591100215912, "learning_rate": 3.707922302450931e-05, "loss": 1.0174, "step": 8016 }, { "epoch": 1.6298028054482618, "grad_norm": 0.1714431643486023, "learning_rate": 3.7058883351978035e-05, "loss": 1.0673, "step": 8017 }, { "epoch": 1.6300060988005693, "grad_norm": 0.15541139245033264, "learning_rate": 3.7038543679446766e-05, "loss": 0.8619, "step": 8018 }, { "epoch": 1.6302093921528766, "grad_norm": 0.16794472932815552, "learning_rate": 3.701820400691549e-05, "loss": 0.9925, "step": 8019 }, { "epoch": 1.630412685505184, "grad_norm": 0.15092499554157257, "learning_rate": 3.699786433438422e-05, "loss": 1.0623, "step": 8020 }, { "epoch": 1.6306159788574912, "grad_norm": 0.1729954481124878, "learning_rate": 3.6977524661852946e-05, "loss": 1.2434, "step": 8021 }, { "epoch": 1.6308192722097987, "grad_norm": 0.17729045450687408, "learning_rate": 3.695718498932168e-05, "loss": 1.0661, "step": 8022 }, { "epoch": 1.6310225655621062, "grad_norm": 0.16118037700653076, "learning_rate": 3.69368453167904e-05, "loss": 1.1773, "step": 8023 }, { "epoch": 1.6312258589144135, "grad_norm": 0.17695970833301544, "learning_rate": 3.691650564425913e-05, "loss": 1.0619, "step": 8024 }, { "epoch": 1.6314291522667208, "grad_norm": 0.17084458470344543, "learning_rate": 3.689616597172786e-05, "loss": 1.1596, "step": 8025 }, { "epoch": 1.631632445619028, "grad_norm": 0.15691812336444855, "learning_rate": 3.687582629919659e-05, "loss": 0.9784, "step": 8026 }, { "epoch": 1.6318357389713356, "grad_norm": 0.1738874614238739, "learning_rate": 3.685548662666531e-05, "loss": 1.1159, "step": 8027 }, { "epoch": 1.6320390323236431, "grad_norm": 0.16877232491970062, "learning_rate": 3.6835146954134044e-05, "loss": 1.1092, "step": 8028 }, { "epoch": 1.6322423256759504, "grad_norm": 0.15862956643104553, "learning_rate": 3.681480728160277e-05, "loss": 0.925, "step": 8029 }, { "epoch": 1.6324456190282577, "grad_norm": 0.15145260095596313, "learning_rate": 3.67944676090715e-05, "loss": 0.9344, "step": 8030 }, { "epoch": 1.6326489123805652, "grad_norm": 0.14583422243595123, "learning_rate": 3.6774127936540225e-05, "loss": 0.8888, "step": 8031 }, { "epoch": 1.6328522057328725, "grad_norm": 0.163094624876976, "learning_rate": 3.6753788264008956e-05, "loss": 1.0331, "step": 8032 }, { "epoch": 1.63305549908518, "grad_norm": 0.1528819054365158, "learning_rate": 3.673344859147768e-05, "loss": 1.1097, "step": 8033 }, { "epoch": 1.6332587924374873, "grad_norm": 0.17181923985481262, "learning_rate": 3.671310891894641e-05, "loss": 1.0342, "step": 8034 }, { "epoch": 1.6334620857897946, "grad_norm": 0.13814687728881836, "learning_rate": 3.669276924641513e-05, "loss": 0.8469, "step": 8035 }, { "epoch": 1.633665379142102, "grad_norm": 0.1273750215768814, "learning_rate": 3.667242957388386e-05, "loss": 0.8547, "step": 8036 }, { "epoch": 1.6338686724944096, "grad_norm": 0.1615859419107437, "learning_rate": 3.6652089901352585e-05, "loss": 1.0183, "step": 8037 }, { "epoch": 1.6340719658467169, "grad_norm": 0.1491604745388031, "learning_rate": 3.6631750228821316e-05, "loss": 0.8522, "step": 8038 }, { "epoch": 1.6342752591990242, "grad_norm": 0.14688225090503693, "learning_rate": 3.661141055629004e-05, "loss": 0.9312, "step": 8039 }, { "epoch": 1.6344785525513315, "grad_norm": 0.16584450006484985, "learning_rate": 3.659107088375877e-05, "loss": 0.939, "step": 8040 }, { "epoch": 1.634681845903639, "grad_norm": 0.15862010419368744, "learning_rate": 3.6570731211227496e-05, "loss": 1.1083, "step": 8041 }, { "epoch": 1.6348851392559465, "grad_norm": 0.1356951743364334, "learning_rate": 3.655039153869623e-05, "loss": 0.8713, "step": 8042 }, { "epoch": 1.6350884326082538, "grad_norm": 0.1821877360343933, "learning_rate": 3.653005186616495e-05, "loss": 1.1237, "step": 8043 }, { "epoch": 1.635291725960561, "grad_norm": 0.15805569291114807, "learning_rate": 3.650971219363368e-05, "loss": 1.0393, "step": 8044 }, { "epoch": 1.6354950193128683, "grad_norm": 0.16079434752464294, "learning_rate": 3.648937252110241e-05, "loss": 0.9835, "step": 8045 }, { "epoch": 1.6356983126651758, "grad_norm": 0.14450377225875854, "learning_rate": 3.646903284857114e-05, "loss": 0.8792, "step": 8046 }, { "epoch": 1.6359016060174834, "grad_norm": 0.1777336746454239, "learning_rate": 3.6448693176039863e-05, "loss": 1.0516, "step": 8047 }, { "epoch": 1.6361048993697906, "grad_norm": 0.15600675344467163, "learning_rate": 3.6428353503508595e-05, "loss": 1.0868, "step": 8048 }, { "epoch": 1.636308192722098, "grad_norm": 0.16318489611148834, "learning_rate": 3.640801383097732e-05, "loss": 1.1323, "step": 8049 }, { "epoch": 1.6365114860744052, "grad_norm": 0.15596437454223633, "learning_rate": 3.638767415844605e-05, "loss": 0.9457, "step": 8050 }, { "epoch": 1.6367147794267127, "grad_norm": 0.14871814846992493, "learning_rate": 3.6367334485914775e-05, "loss": 0.9032, "step": 8051 }, { "epoch": 1.6369180727790202, "grad_norm": 0.1675577163696289, "learning_rate": 3.6346994813383506e-05, "loss": 1.0353, "step": 8052 }, { "epoch": 1.6371213661313275, "grad_norm": 0.15558558702468872, "learning_rate": 3.632665514085223e-05, "loss": 0.9969, "step": 8053 }, { "epoch": 1.6373246594836348, "grad_norm": 0.15018601715564728, "learning_rate": 3.630631546832096e-05, "loss": 0.9881, "step": 8054 }, { "epoch": 1.637527952835942, "grad_norm": 0.15155579149723053, "learning_rate": 3.6285975795789686e-05, "loss": 0.9954, "step": 8055 }, { "epoch": 1.6377312461882496, "grad_norm": 0.15577580034732819, "learning_rate": 3.626563612325842e-05, "loss": 1.0268, "step": 8056 }, { "epoch": 1.6379345395405571, "grad_norm": 0.17613066732883453, "learning_rate": 3.624529645072715e-05, "loss": 1.2132, "step": 8057 }, { "epoch": 1.6381378328928644, "grad_norm": 0.1630224734544754, "learning_rate": 3.622495677819587e-05, "loss": 1.2273, "step": 8058 }, { "epoch": 1.6383411262451717, "grad_norm": 0.15222543478012085, "learning_rate": 3.6204617105664604e-05, "loss": 1.0562, "step": 8059 }, { "epoch": 1.6385444195974792, "grad_norm": 0.16886858642101288, "learning_rate": 3.618427743313333e-05, "loss": 1.1734, "step": 8060 }, { "epoch": 1.6387477129497865, "grad_norm": 0.155827134847641, "learning_rate": 3.616393776060206e-05, "loss": 0.9629, "step": 8061 }, { "epoch": 1.638951006302094, "grad_norm": 0.16652485728263855, "learning_rate": 3.6143598088070785e-05, "loss": 1.1177, "step": 8062 }, { "epoch": 1.6391542996544013, "grad_norm": 0.15345072746276855, "learning_rate": 3.6123258415539516e-05, "loss": 0.9935, "step": 8063 }, { "epoch": 1.6393575930067086, "grad_norm": 0.15303950011730194, "learning_rate": 3.610291874300824e-05, "loss": 0.9658, "step": 8064 }, { "epoch": 1.639560886359016, "grad_norm": 0.17911545932292938, "learning_rate": 3.608257907047697e-05, "loss": 1.1169, "step": 8065 }, { "epoch": 1.6397641797113236, "grad_norm": 0.16155368089675903, "learning_rate": 3.6062239397945696e-05, "loss": 1.036, "step": 8066 }, { "epoch": 1.6399674730636309, "grad_norm": 0.13445566594600677, "learning_rate": 3.604189972541443e-05, "loss": 0.8348, "step": 8067 }, { "epoch": 1.6401707664159382, "grad_norm": 0.1601768285036087, "learning_rate": 3.602156005288315e-05, "loss": 1.1275, "step": 8068 }, { "epoch": 1.6403740597682455, "grad_norm": 0.1684492826461792, "learning_rate": 3.600122038035188e-05, "loss": 1.0041, "step": 8069 }, { "epoch": 1.640577353120553, "grad_norm": 0.1607998013496399, "learning_rate": 3.598088070782061e-05, "loss": 1.1982, "step": 8070 }, { "epoch": 1.6407806464728605, "grad_norm": 0.15837855637073517, "learning_rate": 3.596054103528934e-05, "loss": 0.8733, "step": 8071 }, { "epoch": 1.6409839398251678, "grad_norm": 0.15939421951770782, "learning_rate": 3.594020136275806e-05, "loss": 1.002, "step": 8072 }, { "epoch": 1.641187233177475, "grad_norm": 0.14361870288848877, "learning_rate": 3.5919861690226794e-05, "loss": 0.8926, "step": 8073 }, { "epoch": 1.6413905265297823, "grad_norm": 0.13794727623462677, "learning_rate": 3.589952201769552e-05, "loss": 0.8444, "step": 8074 }, { "epoch": 1.6415938198820899, "grad_norm": 0.16321788728237152, "learning_rate": 3.587918234516425e-05, "loss": 0.9826, "step": 8075 }, { "epoch": 1.6417971132343974, "grad_norm": 0.15810827910900116, "learning_rate": 3.585884267263297e-05, "loss": 1.151, "step": 8076 }, { "epoch": 1.6420004065867047, "grad_norm": 0.16769932210445404, "learning_rate": 3.58385030001017e-05, "loss": 1.095, "step": 8077 }, { "epoch": 1.642203699939012, "grad_norm": 0.1595483124256134, "learning_rate": 3.581816332757042e-05, "loss": 0.96, "step": 8078 }, { "epoch": 1.6424069932913192, "grad_norm": 0.18247397243976593, "learning_rate": 3.5797823655039155e-05, "loss": 1.1374, "step": 8079 }, { "epoch": 1.6426102866436267, "grad_norm": 0.1651366949081421, "learning_rate": 3.577748398250788e-05, "loss": 1.0132, "step": 8080 }, { "epoch": 1.6428135799959342, "grad_norm": 0.16135729849338531, "learning_rate": 3.575714430997661e-05, "loss": 1.0574, "step": 8081 }, { "epoch": 1.6430168733482415, "grad_norm": 0.15416669845581055, "learning_rate": 3.5736804637445335e-05, "loss": 1.0918, "step": 8082 }, { "epoch": 1.6432201667005488, "grad_norm": 0.1502210795879364, "learning_rate": 3.5716464964914066e-05, "loss": 0.9824, "step": 8083 }, { "epoch": 1.643423460052856, "grad_norm": 0.14438867568969727, "learning_rate": 3.569612529238279e-05, "loss": 0.9063, "step": 8084 }, { "epoch": 1.6436267534051636, "grad_norm": 0.18859823048114777, "learning_rate": 3.567578561985152e-05, "loss": 1.1704, "step": 8085 }, { "epoch": 1.6438300467574711, "grad_norm": 0.16827259957790375, "learning_rate": 3.5655445947320246e-05, "loss": 1.065, "step": 8086 }, { "epoch": 1.6440333401097784, "grad_norm": 0.1386031061410904, "learning_rate": 3.563510627478898e-05, "loss": 0.9343, "step": 8087 }, { "epoch": 1.6442366334620857, "grad_norm": 0.1561604142189026, "learning_rate": 3.56147666022577e-05, "loss": 1.0101, "step": 8088 }, { "epoch": 1.6444399268143932, "grad_norm": 0.16558410227298737, "learning_rate": 3.559442692972643e-05, "loss": 1.0733, "step": 8089 }, { "epoch": 1.6446432201667005, "grad_norm": 0.17633873224258423, "learning_rate": 3.557408725719516e-05, "loss": 1.2789, "step": 8090 }, { "epoch": 1.644846513519008, "grad_norm": 0.16376599669456482, "learning_rate": 3.555374758466389e-05, "loss": 1.2511, "step": 8091 }, { "epoch": 1.6450498068713153, "grad_norm": 0.1641642302274704, "learning_rate": 3.553340791213261e-05, "loss": 0.9553, "step": 8092 }, { "epoch": 1.6452531002236226, "grad_norm": 0.15936289727687836, "learning_rate": 3.5513068239601344e-05, "loss": 1.0167, "step": 8093 }, { "epoch": 1.64545639357593, "grad_norm": 0.1475653052330017, "learning_rate": 3.549272856707007e-05, "loss": 0.954, "step": 8094 }, { "epoch": 1.6456596869282376, "grad_norm": 0.14940956234931946, "learning_rate": 3.54723888945388e-05, "loss": 0.8779, "step": 8095 }, { "epoch": 1.645862980280545, "grad_norm": 0.15408417582511902, "learning_rate": 3.5452049222007525e-05, "loss": 1.1329, "step": 8096 }, { "epoch": 1.6460662736328522, "grad_norm": 0.1574806123971939, "learning_rate": 3.5431709549476256e-05, "loss": 1.018, "step": 8097 }, { "epoch": 1.6462695669851595, "grad_norm": 0.15124322474002838, "learning_rate": 3.541136987694498e-05, "loss": 1.0331, "step": 8098 }, { "epoch": 1.646472860337467, "grad_norm": 0.16560706496238708, "learning_rate": 3.539103020441371e-05, "loss": 1.0082, "step": 8099 }, { "epoch": 1.6466761536897745, "grad_norm": 0.16119523346424103, "learning_rate": 3.5370690531882436e-05, "loss": 0.9863, "step": 8100 }, { "epoch": 1.6468794470420818, "grad_norm": 0.1384766548871994, "learning_rate": 3.535035085935117e-05, "loss": 0.9272, "step": 8101 }, { "epoch": 1.647082740394389, "grad_norm": 0.15173615515232086, "learning_rate": 3.533001118681989e-05, "loss": 1.0343, "step": 8102 }, { "epoch": 1.6472860337466964, "grad_norm": 0.1631861925125122, "learning_rate": 3.530967151428862e-05, "loss": 1.0078, "step": 8103 }, { "epoch": 1.6474893270990039, "grad_norm": 0.14067046344280243, "learning_rate": 3.528933184175735e-05, "loss": 0.8624, "step": 8104 }, { "epoch": 1.6476926204513114, "grad_norm": 0.14784593880176544, "learning_rate": 3.526899216922608e-05, "loss": 1.0447, "step": 8105 }, { "epoch": 1.6478959138036187, "grad_norm": 0.1811901479959488, "learning_rate": 3.52486524966948e-05, "loss": 1.017, "step": 8106 }, { "epoch": 1.648099207155926, "grad_norm": 0.18699727952480316, "learning_rate": 3.5228312824163534e-05, "loss": 1.2849, "step": 8107 }, { "epoch": 1.6483025005082332, "grad_norm": 0.1428460031747818, "learning_rate": 3.520797315163226e-05, "loss": 0.9107, "step": 8108 }, { "epoch": 1.6485057938605407, "grad_norm": 0.15337370336055756, "learning_rate": 3.518763347910099e-05, "loss": 1.1027, "step": 8109 }, { "epoch": 1.6487090872128483, "grad_norm": 0.17217929661273956, "learning_rate": 3.5167293806569715e-05, "loss": 0.994, "step": 8110 }, { "epoch": 1.6489123805651555, "grad_norm": 0.16620087623596191, "learning_rate": 3.5146954134038446e-05, "loss": 1.0937, "step": 8111 }, { "epoch": 1.6491156739174628, "grad_norm": 0.1594901829957962, "learning_rate": 3.512661446150717e-05, "loss": 1.0179, "step": 8112 }, { "epoch": 1.6493189672697701, "grad_norm": 0.1666560173034668, "learning_rate": 3.51062747889759e-05, "loss": 1.0573, "step": 8113 }, { "epoch": 1.6495222606220776, "grad_norm": 0.14064495265483856, "learning_rate": 3.5085935116444626e-05, "loss": 0.8657, "step": 8114 }, { "epoch": 1.6497255539743851, "grad_norm": 0.1455429345369339, "learning_rate": 3.506559544391336e-05, "loss": 0.9457, "step": 8115 }, { "epoch": 1.6499288473266924, "grad_norm": 0.15727290511131287, "learning_rate": 3.504525577138209e-05, "loss": 0.8656, "step": 8116 }, { "epoch": 1.6501321406789997, "grad_norm": 0.17552879452705383, "learning_rate": 3.502491609885081e-05, "loss": 1.2354, "step": 8117 }, { "epoch": 1.6503354340313072, "grad_norm": 0.16221044957637787, "learning_rate": 3.500457642631954e-05, "loss": 0.9777, "step": 8118 }, { "epoch": 1.6505387273836145, "grad_norm": 0.15834292769432068, "learning_rate": 3.498423675378826e-05, "loss": 0.9961, "step": 8119 }, { "epoch": 1.650742020735922, "grad_norm": 0.1574869006872177, "learning_rate": 3.496389708125699e-05, "loss": 0.9951, "step": 8120 }, { "epoch": 1.6509453140882293, "grad_norm": 0.15078140795230865, "learning_rate": 3.494355740872572e-05, "loss": 0.9483, "step": 8121 }, { "epoch": 1.6511486074405366, "grad_norm": 0.17093241214752197, "learning_rate": 3.492321773619445e-05, "loss": 1.0966, "step": 8122 }, { "epoch": 1.651351900792844, "grad_norm": 0.15726056694984436, "learning_rate": 3.490287806366317e-05, "loss": 1.0237, "step": 8123 }, { "epoch": 1.6515551941451516, "grad_norm": 0.16391131281852722, "learning_rate": 3.4882538391131904e-05, "loss": 1.1201, "step": 8124 }, { "epoch": 1.651758487497459, "grad_norm": 0.1430152952671051, "learning_rate": 3.486219871860063e-05, "loss": 0.9279, "step": 8125 }, { "epoch": 1.6519617808497662, "grad_norm": 0.16593016684055328, "learning_rate": 3.484185904606936e-05, "loss": 0.9634, "step": 8126 }, { "epoch": 1.6521650742020735, "grad_norm": 0.1607755869626999, "learning_rate": 3.4821519373538085e-05, "loss": 1.0527, "step": 8127 }, { "epoch": 1.652368367554381, "grad_norm": 0.17133617401123047, "learning_rate": 3.4801179701006816e-05, "loss": 1.1826, "step": 8128 }, { "epoch": 1.6525716609066885, "grad_norm": 0.15653342008590698, "learning_rate": 3.478084002847554e-05, "loss": 1.0516, "step": 8129 }, { "epoch": 1.6527749542589958, "grad_norm": 0.15894630551338196, "learning_rate": 3.476050035594427e-05, "loss": 1.0225, "step": 8130 }, { "epoch": 1.652978247611303, "grad_norm": 0.1469620168209076, "learning_rate": 3.4740160683412996e-05, "loss": 0.8731, "step": 8131 }, { "epoch": 1.6531815409636104, "grad_norm": 0.15666627883911133, "learning_rate": 3.471982101088173e-05, "loss": 1.0584, "step": 8132 }, { "epoch": 1.6533848343159179, "grad_norm": 0.1571764498949051, "learning_rate": 3.469948133835045e-05, "loss": 1.0839, "step": 8133 }, { "epoch": 1.6535881276682254, "grad_norm": 0.1418662816286087, "learning_rate": 3.467914166581918e-05, "loss": 0.9333, "step": 8134 }, { "epoch": 1.6537914210205327, "grad_norm": 0.1447824090719223, "learning_rate": 3.465880199328791e-05, "loss": 0.8974, "step": 8135 }, { "epoch": 1.65399471437284, "grad_norm": 0.17725452780723572, "learning_rate": 3.463846232075664e-05, "loss": 1.0515, "step": 8136 }, { "epoch": 1.6541980077251472, "grad_norm": 0.159274160861969, "learning_rate": 3.461812264822536e-05, "loss": 1.1186, "step": 8137 }, { "epoch": 1.6544013010774548, "grad_norm": 0.15207655727863312, "learning_rate": 3.4597782975694094e-05, "loss": 1.144, "step": 8138 }, { "epoch": 1.6546045944297623, "grad_norm": 0.16574181616306305, "learning_rate": 3.457744330316282e-05, "loss": 1.1275, "step": 8139 }, { "epoch": 1.6548078877820696, "grad_norm": 0.14676649868488312, "learning_rate": 3.455710363063155e-05, "loss": 0.8919, "step": 8140 }, { "epoch": 1.6550111811343768, "grad_norm": 0.13786925375461578, "learning_rate": 3.4536763958100274e-05, "loss": 0.9018, "step": 8141 }, { "epoch": 1.6552144744866841, "grad_norm": 0.15587420761585236, "learning_rate": 3.4516424285569006e-05, "loss": 1.0176, "step": 8142 }, { "epoch": 1.6554177678389916, "grad_norm": 0.15045396983623505, "learning_rate": 3.449608461303773e-05, "loss": 1.0506, "step": 8143 }, { "epoch": 1.6556210611912991, "grad_norm": 0.15268725156784058, "learning_rate": 3.447574494050646e-05, "loss": 0.9137, "step": 8144 }, { "epoch": 1.6558243545436064, "grad_norm": 0.159629687666893, "learning_rate": 3.4455405267975186e-05, "loss": 1.1457, "step": 8145 }, { "epoch": 1.6560276478959137, "grad_norm": 0.14559295773506165, "learning_rate": 3.443506559544392e-05, "loss": 0.975, "step": 8146 }, { "epoch": 1.6562309412482212, "grad_norm": 0.17126677930355072, "learning_rate": 3.441472592291264e-05, "loss": 1.1515, "step": 8147 }, { "epoch": 1.6564342346005285, "grad_norm": 0.15481337904930115, "learning_rate": 3.439438625038137e-05, "loss": 1.0915, "step": 8148 }, { "epoch": 1.656637527952836, "grad_norm": 0.15098688006401062, "learning_rate": 3.43740465778501e-05, "loss": 0.8229, "step": 8149 }, { "epoch": 1.6568408213051433, "grad_norm": 0.14229868352413177, "learning_rate": 3.435370690531883e-05, "loss": 0.8869, "step": 8150 }, { "epoch": 1.6570441146574506, "grad_norm": 0.1660015732049942, "learning_rate": 3.433336723278755e-05, "loss": 0.9847, "step": 8151 }, { "epoch": 1.6572474080097581, "grad_norm": 0.15343834459781647, "learning_rate": 3.4313027560256284e-05, "loss": 0.9419, "step": 8152 }, { "epoch": 1.6574507013620656, "grad_norm": 0.16076374053955078, "learning_rate": 3.429268788772501e-05, "loss": 0.9908, "step": 8153 }, { "epoch": 1.657653994714373, "grad_norm": 0.1545952558517456, "learning_rate": 3.427234821519374e-05, "loss": 0.9547, "step": 8154 }, { "epoch": 1.6578572880666802, "grad_norm": 0.1620621234178543, "learning_rate": 3.4252008542662464e-05, "loss": 1.0628, "step": 8155 }, { "epoch": 1.6580605814189875, "grad_norm": 0.14856909215450287, "learning_rate": 3.4231668870131196e-05, "loss": 0.9337, "step": 8156 }, { "epoch": 1.658263874771295, "grad_norm": 0.15222874283790588, "learning_rate": 3.421132919759992e-05, "loss": 1.0522, "step": 8157 }, { "epoch": 1.6584671681236025, "grad_norm": 0.15242226421833038, "learning_rate": 3.419098952506865e-05, "loss": 0.9792, "step": 8158 }, { "epoch": 1.6586704614759098, "grad_norm": 0.14996498823165894, "learning_rate": 3.4170649852537376e-05, "loss": 0.9337, "step": 8159 }, { "epoch": 1.658873754828217, "grad_norm": 0.1503567099571228, "learning_rate": 3.41503101800061e-05, "loss": 0.978, "step": 8160 }, { "epoch": 1.6590770481805244, "grad_norm": 0.1745745986700058, "learning_rate": 3.412997050747483e-05, "loss": 1.0164, "step": 8161 }, { "epoch": 1.6592803415328319, "grad_norm": 0.16185620427131653, "learning_rate": 3.4109630834943556e-05, "loss": 1.0667, "step": 8162 }, { "epoch": 1.6594836348851394, "grad_norm": 0.1617034673690796, "learning_rate": 3.408929116241229e-05, "loss": 1.0909, "step": 8163 }, { "epoch": 1.6596869282374467, "grad_norm": 0.1608150750398636, "learning_rate": 3.406895148988101e-05, "loss": 1.0313, "step": 8164 }, { "epoch": 1.659890221589754, "grad_norm": 0.15898264944553375, "learning_rate": 3.404861181734974e-05, "loss": 1.0226, "step": 8165 }, { "epoch": 1.6600935149420613, "grad_norm": 0.15977314114570618, "learning_rate": 3.402827214481847e-05, "loss": 1.1185, "step": 8166 }, { "epoch": 1.6602968082943688, "grad_norm": 0.15889903903007507, "learning_rate": 3.40079324722872e-05, "loss": 1.0955, "step": 8167 }, { "epoch": 1.6605001016466763, "grad_norm": 0.1648290753364563, "learning_rate": 3.398759279975592e-05, "loss": 0.9548, "step": 8168 }, { "epoch": 1.6607033949989836, "grad_norm": 0.1553269475698471, "learning_rate": 3.3967253127224654e-05, "loss": 1.0261, "step": 8169 }, { "epoch": 1.6609066883512908, "grad_norm": 0.1603105664253235, "learning_rate": 3.394691345469338e-05, "loss": 1.0377, "step": 8170 }, { "epoch": 1.6611099817035981, "grad_norm": 0.1612885743379593, "learning_rate": 3.392657378216211e-05, "loss": 1.0967, "step": 8171 }, { "epoch": 1.6613132750559056, "grad_norm": 0.1672913134098053, "learning_rate": 3.3906234109630834e-05, "loss": 1.0558, "step": 8172 }, { "epoch": 1.6615165684082132, "grad_norm": 0.14767640829086304, "learning_rate": 3.3885894437099566e-05, "loss": 1.0189, "step": 8173 }, { "epoch": 1.6617198617605204, "grad_norm": 0.14969241619110107, "learning_rate": 3.386555476456829e-05, "loss": 0.9691, "step": 8174 }, { "epoch": 1.6619231551128277, "grad_norm": 0.15062014758586884, "learning_rate": 3.384521509203702e-05, "loss": 1.0036, "step": 8175 }, { "epoch": 1.6621264484651352, "grad_norm": 0.14859412610530853, "learning_rate": 3.3824875419505746e-05, "loss": 0.9816, "step": 8176 }, { "epoch": 1.6623297418174425, "grad_norm": 0.17306527495384216, "learning_rate": 3.380453574697448e-05, "loss": 1.2157, "step": 8177 }, { "epoch": 1.66253303516975, "grad_norm": 0.16912691295146942, "learning_rate": 3.37841960744432e-05, "loss": 1.118, "step": 8178 }, { "epoch": 1.6627363285220573, "grad_norm": 0.1556045114994049, "learning_rate": 3.376385640191193e-05, "loss": 0.9719, "step": 8179 }, { "epoch": 1.6629396218743646, "grad_norm": 0.17453686892986298, "learning_rate": 3.374351672938066e-05, "loss": 1.0896, "step": 8180 }, { "epoch": 1.6631429152266721, "grad_norm": 0.1661638766527176, "learning_rate": 3.372317705684939e-05, "loss": 1.1405, "step": 8181 }, { "epoch": 1.6633462085789794, "grad_norm": 0.1660672426223755, "learning_rate": 3.370283738431811e-05, "loss": 1.0092, "step": 8182 }, { "epoch": 1.663549501931287, "grad_norm": 0.1505102813243866, "learning_rate": 3.3682497711786844e-05, "loss": 0.987, "step": 8183 }, { "epoch": 1.6637527952835942, "grad_norm": 0.15693391859531403, "learning_rate": 3.366215803925557e-05, "loss": 0.9432, "step": 8184 }, { "epoch": 1.6639560886359015, "grad_norm": 0.17438814043998718, "learning_rate": 3.36418183667243e-05, "loss": 1.2763, "step": 8185 }, { "epoch": 1.664159381988209, "grad_norm": 0.16200709342956543, "learning_rate": 3.3621478694193024e-05, "loss": 0.9697, "step": 8186 }, { "epoch": 1.6643626753405165, "grad_norm": 0.1725626438856125, "learning_rate": 3.3601139021661756e-05, "loss": 1.1092, "step": 8187 }, { "epoch": 1.6645659686928238, "grad_norm": 0.17247982323169708, "learning_rate": 3.358079934913048e-05, "loss": 1.0986, "step": 8188 }, { "epoch": 1.664769262045131, "grad_norm": 0.1369389444589615, "learning_rate": 3.356045967659921e-05, "loss": 1.0207, "step": 8189 }, { "epoch": 1.6649725553974384, "grad_norm": 0.16126559674739838, "learning_rate": 3.3540120004067936e-05, "loss": 0.917, "step": 8190 }, { "epoch": 1.665175848749746, "grad_norm": 0.1521894633769989, "learning_rate": 3.351978033153667e-05, "loss": 1.0744, "step": 8191 }, { "epoch": 1.6653791421020534, "grad_norm": 0.1709037721157074, "learning_rate": 3.349944065900539e-05, "loss": 1.145, "step": 8192 }, { "epoch": 1.6655824354543607, "grad_norm": 0.18128395080566406, "learning_rate": 3.347910098647412e-05, "loss": 1.2441, "step": 8193 }, { "epoch": 1.665785728806668, "grad_norm": 0.16245591640472412, "learning_rate": 3.345876131394285e-05, "loss": 1.0459, "step": 8194 }, { "epoch": 1.6659890221589753, "grad_norm": 0.13943946361541748, "learning_rate": 3.343842164141158e-05, "loss": 0.9406, "step": 8195 }, { "epoch": 1.6661923155112828, "grad_norm": 0.15730147063732147, "learning_rate": 3.34180819688803e-05, "loss": 0.9859, "step": 8196 }, { "epoch": 1.6663956088635903, "grad_norm": 0.16129817068576813, "learning_rate": 3.3397742296349034e-05, "loss": 0.962, "step": 8197 }, { "epoch": 1.6665989022158976, "grad_norm": 0.1416439712047577, "learning_rate": 3.337740262381776e-05, "loss": 0.9637, "step": 8198 }, { "epoch": 1.6668021955682049, "grad_norm": 0.161407008767128, "learning_rate": 3.335706295128649e-05, "loss": 1.0454, "step": 8199 }, { "epoch": 1.6670054889205121, "grad_norm": 0.1587202250957489, "learning_rate": 3.3336723278755214e-05, "loss": 0.9297, "step": 8200 }, { "epoch": 1.6672087822728197, "grad_norm": 0.16926532983779907, "learning_rate": 3.331638360622394e-05, "loss": 1.21, "step": 8201 }, { "epoch": 1.6674120756251272, "grad_norm": 0.15299126505851746, "learning_rate": 3.329604393369267e-05, "loss": 1.0051, "step": 8202 }, { "epoch": 1.6676153689774345, "grad_norm": 0.13957419991493225, "learning_rate": 3.3275704261161394e-05, "loss": 0.893, "step": 8203 }, { "epoch": 1.6678186623297417, "grad_norm": 0.16063278913497925, "learning_rate": 3.3255364588630126e-05, "loss": 1.053, "step": 8204 }, { "epoch": 1.6680219556820493, "grad_norm": 0.16503667831420898, "learning_rate": 3.323502491609885e-05, "loss": 1.0465, "step": 8205 }, { "epoch": 1.6682252490343565, "grad_norm": 0.15685991942882538, "learning_rate": 3.321468524356758e-05, "loss": 0.8817, "step": 8206 }, { "epoch": 1.668428542386664, "grad_norm": 0.16033364832401276, "learning_rate": 3.3194345571036306e-05, "loss": 1.0615, "step": 8207 }, { "epoch": 1.6686318357389713, "grad_norm": 0.13920074701309204, "learning_rate": 3.317400589850504e-05, "loss": 0.945, "step": 8208 }, { "epoch": 1.6688351290912786, "grad_norm": 0.1429963856935501, "learning_rate": 3.315366622597376e-05, "loss": 0.7911, "step": 8209 }, { "epoch": 1.6690384224435861, "grad_norm": 0.1962098926305771, "learning_rate": 3.313332655344249e-05, "loss": 1.1785, "step": 8210 }, { "epoch": 1.6692417157958934, "grad_norm": 0.17582902312278748, "learning_rate": 3.311298688091122e-05, "loss": 1.0746, "step": 8211 }, { "epoch": 1.669445009148201, "grad_norm": 0.14084839820861816, "learning_rate": 3.309264720837995e-05, "loss": 0.8835, "step": 8212 }, { "epoch": 1.6696483025005082, "grad_norm": 0.15029963850975037, "learning_rate": 3.307230753584867e-05, "loss": 0.972, "step": 8213 }, { "epoch": 1.6698515958528155, "grad_norm": 0.14549775421619415, "learning_rate": 3.3051967863317404e-05, "loss": 0.8072, "step": 8214 }, { "epoch": 1.670054889205123, "grad_norm": 0.15422557294368744, "learning_rate": 3.303162819078613e-05, "loss": 0.9812, "step": 8215 }, { "epoch": 1.6702581825574305, "grad_norm": 0.16637632250785828, "learning_rate": 3.301128851825486e-05, "loss": 0.9998, "step": 8216 }, { "epoch": 1.6704614759097378, "grad_norm": 0.15709872543811798, "learning_rate": 3.2990948845723584e-05, "loss": 0.9814, "step": 8217 }, { "epoch": 1.670664769262045, "grad_norm": 0.15660080313682556, "learning_rate": 3.2970609173192315e-05, "loss": 0.9348, "step": 8218 }, { "epoch": 1.6708680626143524, "grad_norm": 0.14839830994606018, "learning_rate": 3.295026950066104e-05, "loss": 0.9283, "step": 8219 }, { "epoch": 1.67107135596666, "grad_norm": 0.14771409332752228, "learning_rate": 3.292992982812977e-05, "loss": 1.0186, "step": 8220 }, { "epoch": 1.6712746493189674, "grad_norm": 0.16282789409160614, "learning_rate": 3.2909590155598496e-05, "loss": 1.0169, "step": 8221 }, { "epoch": 1.6714779426712747, "grad_norm": 0.15665173530578613, "learning_rate": 3.288925048306723e-05, "loss": 0.9748, "step": 8222 }, { "epoch": 1.671681236023582, "grad_norm": 0.15815019607543945, "learning_rate": 3.286891081053595e-05, "loss": 1.0132, "step": 8223 }, { "epoch": 1.6718845293758893, "grad_norm": 0.156178280711174, "learning_rate": 3.284857113800468e-05, "loss": 1.1454, "step": 8224 }, { "epoch": 1.6720878227281968, "grad_norm": 0.16119812428951263, "learning_rate": 3.282823146547341e-05, "loss": 0.9755, "step": 8225 }, { "epoch": 1.6722911160805043, "grad_norm": 0.15498805046081543, "learning_rate": 3.280789179294214e-05, "loss": 1.0901, "step": 8226 }, { "epoch": 1.6724944094328116, "grad_norm": 0.17021724581718445, "learning_rate": 3.278755212041086e-05, "loss": 1.072, "step": 8227 }, { "epoch": 1.6726977027851189, "grad_norm": 0.14528462290763855, "learning_rate": 3.2767212447879594e-05, "loss": 0.8765, "step": 8228 }, { "epoch": 1.6729009961374262, "grad_norm": 0.1487378478050232, "learning_rate": 3.274687277534832e-05, "loss": 1.0009, "step": 8229 }, { "epoch": 1.6731042894897337, "grad_norm": 0.1605663150548935, "learning_rate": 3.272653310281705e-05, "loss": 1.0015, "step": 8230 }, { "epoch": 1.6733075828420412, "grad_norm": 0.16490675508975983, "learning_rate": 3.2706193430285774e-05, "loss": 1.0313, "step": 8231 }, { "epoch": 1.6735108761943485, "grad_norm": 0.15024538338184357, "learning_rate": 3.2685853757754505e-05, "loss": 0.9091, "step": 8232 }, { "epoch": 1.6737141695466558, "grad_norm": 0.13412806391716003, "learning_rate": 3.266551408522323e-05, "loss": 0.8953, "step": 8233 }, { "epoch": 1.673917462898963, "grad_norm": 0.1661340296268463, "learning_rate": 3.264517441269196e-05, "loss": 1.0639, "step": 8234 }, { "epoch": 1.6741207562512705, "grad_norm": 0.15493962168693542, "learning_rate": 3.2624834740160686e-05, "loss": 1.0315, "step": 8235 }, { "epoch": 1.674324049603578, "grad_norm": 0.14284680783748627, "learning_rate": 3.260449506762942e-05, "loss": 0.8697, "step": 8236 }, { "epoch": 1.6745273429558853, "grad_norm": 0.1528044193983078, "learning_rate": 3.258415539509814e-05, "loss": 1.037, "step": 8237 }, { "epoch": 1.6747306363081926, "grad_norm": 0.17056767642498016, "learning_rate": 3.256381572256687e-05, "loss": 1.1446, "step": 8238 }, { "epoch": 1.6749339296605001, "grad_norm": 0.1521703004837036, "learning_rate": 3.25434760500356e-05, "loss": 0.942, "step": 8239 }, { "epoch": 1.6751372230128074, "grad_norm": 0.17566891014575958, "learning_rate": 3.252313637750433e-05, "loss": 1.0178, "step": 8240 }, { "epoch": 1.675340516365115, "grad_norm": 0.1612611711025238, "learning_rate": 3.250279670497305e-05, "loss": 1.0496, "step": 8241 }, { "epoch": 1.6755438097174222, "grad_norm": 0.16044385731220245, "learning_rate": 3.248245703244178e-05, "loss": 1.0688, "step": 8242 }, { "epoch": 1.6757471030697295, "grad_norm": 0.17242781817913055, "learning_rate": 3.24621173599105e-05, "loss": 0.9866, "step": 8243 }, { "epoch": 1.675950396422037, "grad_norm": 0.19015365839004517, "learning_rate": 3.244177768737923e-05, "loss": 1.2501, "step": 8244 }, { "epoch": 1.6761536897743445, "grad_norm": 0.1537357121706009, "learning_rate": 3.242143801484796e-05, "loss": 1.1531, "step": 8245 }, { "epoch": 1.6763569831266518, "grad_norm": 0.1696067452430725, "learning_rate": 3.240109834231669e-05, "loss": 1.0693, "step": 8246 }, { "epoch": 1.676560276478959, "grad_norm": 0.16172130405902863, "learning_rate": 3.238075866978541e-05, "loss": 1.0088, "step": 8247 }, { "epoch": 1.6767635698312664, "grad_norm": 0.1545289009809494, "learning_rate": 3.2360418997254144e-05, "loss": 0.9467, "step": 8248 }, { "epoch": 1.676966863183574, "grad_norm": 0.15023134648799896, "learning_rate": 3.234007932472287e-05, "loss": 1.004, "step": 8249 }, { "epoch": 1.6771701565358814, "grad_norm": 0.16233746707439423, "learning_rate": 3.23197396521916e-05, "loss": 1.0102, "step": 8250 }, { "epoch": 1.6773734498881887, "grad_norm": 0.16804139316082, "learning_rate": 3.2299399979660324e-05, "loss": 0.9965, "step": 8251 }, { "epoch": 1.677576743240496, "grad_norm": 0.1575126051902771, "learning_rate": 3.2279060307129056e-05, "loss": 0.8674, "step": 8252 }, { "epoch": 1.6777800365928033, "grad_norm": 0.16049127280712128, "learning_rate": 3.225872063459778e-05, "loss": 1.0958, "step": 8253 }, { "epoch": 1.6779833299451108, "grad_norm": 0.15889425575733185, "learning_rate": 3.223838096206651e-05, "loss": 1.0088, "step": 8254 }, { "epoch": 1.6781866232974183, "grad_norm": 0.1613502949476242, "learning_rate": 3.2218041289535236e-05, "loss": 1.0539, "step": 8255 }, { "epoch": 1.6783899166497256, "grad_norm": 0.15224938094615936, "learning_rate": 3.219770161700397e-05, "loss": 0.9483, "step": 8256 }, { "epoch": 1.6785932100020329, "grad_norm": 0.16063757240772247, "learning_rate": 3.217736194447269e-05, "loss": 0.9488, "step": 8257 }, { "epoch": 1.6787965033543402, "grad_norm": 0.17751511931419373, "learning_rate": 3.215702227194142e-05, "loss": 1.1573, "step": 8258 }, { "epoch": 1.6789997967066477, "grad_norm": 0.15524928271770477, "learning_rate": 3.2136682599410154e-05, "loss": 0.9947, "step": 8259 }, { "epoch": 1.6792030900589552, "grad_norm": 0.1439545899629593, "learning_rate": 3.211634292687888e-05, "loss": 0.8509, "step": 8260 }, { "epoch": 1.6794063834112625, "grad_norm": 0.14437517523765564, "learning_rate": 3.209600325434761e-05, "loss": 1.0137, "step": 8261 }, { "epoch": 1.6796096767635698, "grad_norm": 0.1633983701467514, "learning_rate": 3.2075663581816334e-05, "loss": 0.9938, "step": 8262 }, { "epoch": 1.679812970115877, "grad_norm": 0.1731323003768921, "learning_rate": 3.2055323909285065e-05, "loss": 1.069, "step": 8263 }, { "epoch": 1.6800162634681846, "grad_norm": 0.14269614219665527, "learning_rate": 3.203498423675379e-05, "loss": 0.8067, "step": 8264 }, { "epoch": 1.680219556820492, "grad_norm": 0.17545704543590546, "learning_rate": 3.201464456422252e-05, "loss": 1.0096, "step": 8265 }, { "epoch": 1.6804228501727994, "grad_norm": 0.16723406314849854, "learning_rate": 3.1994304891691245e-05, "loss": 1.1946, "step": 8266 }, { "epoch": 1.6806261435251066, "grad_norm": 0.1484065055847168, "learning_rate": 3.197396521915998e-05, "loss": 0.8384, "step": 8267 }, { "epoch": 1.6808294368774142, "grad_norm": 0.1434076875448227, "learning_rate": 3.19536255466287e-05, "loss": 0.9704, "step": 8268 }, { "epoch": 1.6810327302297214, "grad_norm": 0.15108151733875275, "learning_rate": 3.193328587409743e-05, "loss": 1.017, "step": 8269 }, { "epoch": 1.681236023582029, "grad_norm": 0.16687294840812683, "learning_rate": 3.191294620156616e-05, "loss": 1.0668, "step": 8270 }, { "epoch": 1.6814393169343362, "grad_norm": 0.17139659821987152, "learning_rate": 3.189260652903489e-05, "loss": 1.175, "step": 8271 }, { "epoch": 1.6816426102866435, "grad_norm": 0.164855495095253, "learning_rate": 3.187226685650361e-05, "loss": 1.0423, "step": 8272 }, { "epoch": 1.681845903638951, "grad_norm": 0.15840567648410797, "learning_rate": 3.1851927183972344e-05, "loss": 0.9744, "step": 8273 }, { "epoch": 1.6820491969912585, "grad_norm": 0.14886628091335297, "learning_rate": 3.183158751144107e-05, "loss": 0.9777, "step": 8274 }, { "epoch": 1.6822524903435658, "grad_norm": 0.15535156428813934, "learning_rate": 3.18112478389098e-05, "loss": 0.9806, "step": 8275 }, { "epoch": 1.6824557836958731, "grad_norm": 0.14940613508224487, "learning_rate": 3.1790908166378524e-05, "loss": 0.9969, "step": 8276 }, { "epoch": 1.6826590770481804, "grad_norm": 0.15375861525535583, "learning_rate": 3.1770568493847255e-05, "loss": 0.9269, "step": 8277 }, { "epoch": 1.682862370400488, "grad_norm": 0.1783488690853119, "learning_rate": 3.175022882131598e-05, "loss": 0.9563, "step": 8278 }, { "epoch": 1.6830656637527954, "grad_norm": 0.14440646767616272, "learning_rate": 3.172988914878471e-05, "loss": 0.862, "step": 8279 }, { "epoch": 1.6832689571051027, "grad_norm": 0.18183578550815582, "learning_rate": 3.1709549476253435e-05, "loss": 1.2907, "step": 8280 }, { "epoch": 1.68347225045741, "grad_norm": 0.16171756386756897, "learning_rate": 3.1689209803722167e-05, "loss": 1.0679, "step": 8281 }, { "epoch": 1.6836755438097173, "grad_norm": 0.14062201976776123, "learning_rate": 3.166887013119089e-05, "loss": 1.0112, "step": 8282 }, { "epoch": 1.6838788371620248, "grad_norm": 0.1657455861568451, "learning_rate": 3.164853045865962e-05, "loss": 1.0113, "step": 8283 }, { "epoch": 1.6840821305143323, "grad_norm": 0.16717737913131714, "learning_rate": 3.162819078612834e-05, "loss": 1.1085, "step": 8284 }, { "epoch": 1.6842854238666396, "grad_norm": 0.15416519343852997, "learning_rate": 3.160785111359707e-05, "loss": 1.0768, "step": 8285 }, { "epoch": 1.6844887172189469, "grad_norm": 0.1505075991153717, "learning_rate": 3.1587511441065796e-05, "loss": 0.9548, "step": 8286 }, { "epoch": 1.6846920105712542, "grad_norm": 0.1609627902507782, "learning_rate": 3.156717176853453e-05, "loss": 0.9486, "step": 8287 }, { "epoch": 1.6848953039235617, "grad_norm": 0.15743689239025116, "learning_rate": 3.154683209600325e-05, "loss": 1.0202, "step": 8288 }, { "epoch": 1.6850985972758692, "grad_norm": 0.1553112119436264, "learning_rate": 3.152649242347198e-05, "loss": 1.0434, "step": 8289 }, { "epoch": 1.6853018906281765, "grad_norm": 0.17067912220954895, "learning_rate": 3.150615275094071e-05, "loss": 1.0803, "step": 8290 }, { "epoch": 1.6855051839804838, "grad_norm": 0.17109428346157074, "learning_rate": 3.148581307840944e-05, "loss": 1.161, "step": 8291 }, { "epoch": 1.685708477332791, "grad_norm": 0.15580320358276367, "learning_rate": 3.146547340587816e-05, "loss": 0.8901, "step": 8292 }, { "epoch": 1.6859117706850986, "grad_norm": 0.15687544643878937, "learning_rate": 3.1445133733346894e-05, "loss": 0.9404, "step": 8293 }, { "epoch": 1.686115064037406, "grad_norm": 0.16705960035324097, "learning_rate": 3.142479406081562e-05, "loss": 1.0112, "step": 8294 }, { "epoch": 1.6863183573897134, "grad_norm": 0.14069032669067383, "learning_rate": 3.140445438828435e-05, "loss": 0.9, "step": 8295 }, { "epoch": 1.6865216507420207, "grad_norm": 0.15529312193393707, "learning_rate": 3.1384114715753074e-05, "loss": 0.8966, "step": 8296 }, { "epoch": 1.6867249440943282, "grad_norm": 0.1676514446735382, "learning_rate": 3.1363775043221805e-05, "loss": 0.9733, "step": 8297 }, { "epoch": 1.6869282374466354, "grad_norm": 0.1454571783542633, "learning_rate": 3.134343537069053e-05, "loss": 0.8882, "step": 8298 }, { "epoch": 1.687131530798943, "grad_norm": 0.15257292985916138, "learning_rate": 3.132309569815926e-05, "loss": 1.0136, "step": 8299 }, { "epoch": 1.6873348241512502, "grad_norm": 0.1856284886598587, "learning_rate": 3.1302756025627986e-05, "loss": 1.2368, "step": 8300 }, { "epoch": 1.6875381175035575, "grad_norm": 0.14349672198295593, "learning_rate": 3.128241635309672e-05, "loss": 0.9533, "step": 8301 }, { "epoch": 1.687741410855865, "grad_norm": 0.15666766464710236, "learning_rate": 3.126207668056544e-05, "loss": 1.0255, "step": 8302 }, { "epoch": 1.6879447042081726, "grad_norm": 0.15798909962177277, "learning_rate": 3.124173700803417e-05, "loss": 1.0326, "step": 8303 }, { "epoch": 1.6881479975604798, "grad_norm": 0.161436527967453, "learning_rate": 3.12213973355029e-05, "loss": 0.9808, "step": 8304 }, { "epoch": 1.6883512909127871, "grad_norm": 0.16155371069908142, "learning_rate": 3.120105766297163e-05, "loss": 0.9841, "step": 8305 }, { "epoch": 1.6885545842650944, "grad_norm": 0.16159863770008087, "learning_rate": 3.118071799044035e-05, "loss": 1.0308, "step": 8306 }, { "epoch": 1.688757877617402, "grad_norm": 0.17386485636234283, "learning_rate": 3.1160378317909084e-05, "loss": 1.134, "step": 8307 }, { "epoch": 1.6889611709697094, "grad_norm": 0.15053819119930267, "learning_rate": 3.114003864537781e-05, "loss": 0.9602, "step": 8308 }, { "epoch": 1.6891644643220167, "grad_norm": 0.1382780522108078, "learning_rate": 3.111969897284654e-05, "loss": 0.8222, "step": 8309 }, { "epoch": 1.689367757674324, "grad_norm": 0.1593419760465622, "learning_rate": 3.1099359300315264e-05, "loss": 0.9894, "step": 8310 }, { "epoch": 1.6895710510266313, "grad_norm": 0.15639933943748474, "learning_rate": 3.1079019627783995e-05, "loss": 0.9791, "step": 8311 }, { "epoch": 1.6897743443789388, "grad_norm": 0.15747106075286865, "learning_rate": 3.105867995525272e-05, "loss": 1.0317, "step": 8312 }, { "epoch": 1.6899776377312463, "grad_norm": 0.14382624626159668, "learning_rate": 3.103834028272145e-05, "loss": 0.8478, "step": 8313 }, { "epoch": 1.6901809310835536, "grad_norm": 0.16673614084720612, "learning_rate": 3.1018000610190175e-05, "loss": 1.1815, "step": 8314 }, { "epoch": 1.690384224435861, "grad_norm": 0.16441917419433594, "learning_rate": 3.099766093765891e-05, "loss": 1.0396, "step": 8315 }, { "epoch": 1.6905875177881682, "grad_norm": 0.15857255458831787, "learning_rate": 3.097732126512764e-05, "loss": 0.9619, "step": 8316 }, { "epoch": 1.6907908111404757, "grad_norm": 0.18482261896133423, "learning_rate": 3.095698159259636e-05, "loss": 1.0986, "step": 8317 }, { "epoch": 1.6909941044927832, "grad_norm": 0.14051398634910583, "learning_rate": 3.0936641920065094e-05, "loss": 0.8485, "step": 8318 }, { "epoch": 1.6911973978450905, "grad_norm": 0.1471063792705536, "learning_rate": 3.091630224753382e-05, "loss": 0.9261, "step": 8319 }, { "epoch": 1.6914006911973978, "grad_norm": 0.18597447872161865, "learning_rate": 3.089596257500255e-05, "loss": 1.219, "step": 8320 }, { "epoch": 1.691603984549705, "grad_norm": 0.16095446050167084, "learning_rate": 3.0875622902471274e-05, "loss": 0.9458, "step": 8321 }, { "epoch": 1.6918072779020126, "grad_norm": 0.1576671600341797, "learning_rate": 3.0855283229940005e-05, "loss": 0.9753, "step": 8322 }, { "epoch": 1.69201057125432, "grad_norm": 0.15155839920043945, "learning_rate": 3.083494355740873e-05, "loss": 0.9988, "step": 8323 }, { "epoch": 1.6922138646066274, "grad_norm": 0.1764792650938034, "learning_rate": 3.081460388487746e-05, "loss": 1.0623, "step": 8324 }, { "epoch": 1.6924171579589347, "grad_norm": 0.1650528460741043, "learning_rate": 3.079426421234618e-05, "loss": 1.0193, "step": 8325 }, { "epoch": 1.6926204513112422, "grad_norm": 0.15782417356967926, "learning_rate": 3.077392453981491e-05, "loss": 1.0778, "step": 8326 }, { "epoch": 1.6928237446635495, "grad_norm": 0.15757118165493011, "learning_rate": 3.0753584867283634e-05, "loss": 1.1165, "step": 8327 }, { "epoch": 1.693027038015857, "grad_norm": 0.15187780559062958, "learning_rate": 3.0733245194752365e-05, "loss": 0.9711, "step": 8328 }, { "epoch": 1.6932303313681643, "grad_norm": 0.16234423220157623, "learning_rate": 3.071290552222109e-05, "loss": 0.9838, "step": 8329 }, { "epoch": 1.6934336247204715, "grad_norm": 0.15083719789981842, "learning_rate": 3.069256584968982e-05, "loss": 0.8399, "step": 8330 }, { "epoch": 1.693636918072779, "grad_norm": 0.13649430871009827, "learning_rate": 3.0672226177158546e-05, "loss": 0.8054, "step": 8331 }, { "epoch": 1.6938402114250866, "grad_norm": 0.18518953025341034, "learning_rate": 3.065188650462728e-05, "loss": 1.1218, "step": 8332 }, { "epoch": 1.6940435047773938, "grad_norm": 0.15932078659534454, "learning_rate": 3.0631546832096e-05, "loss": 0.9934, "step": 8333 }, { "epoch": 1.6942467981297011, "grad_norm": 0.162969708442688, "learning_rate": 3.061120715956473e-05, "loss": 0.9888, "step": 8334 }, { "epoch": 1.6944500914820084, "grad_norm": 0.16630198061466217, "learning_rate": 3.059086748703346e-05, "loss": 1.0195, "step": 8335 }, { "epoch": 1.694653384834316, "grad_norm": 0.16075250506401062, "learning_rate": 3.057052781450219e-05, "loss": 0.9938, "step": 8336 }, { "epoch": 1.6948566781866234, "grad_norm": 0.1634729504585266, "learning_rate": 3.055018814197091e-05, "loss": 1.0833, "step": 8337 }, { "epoch": 1.6950599715389307, "grad_norm": 0.17103788256645203, "learning_rate": 3.0529848469439644e-05, "loss": 1.149, "step": 8338 }, { "epoch": 1.695263264891238, "grad_norm": 0.17102780938148499, "learning_rate": 3.050950879690837e-05, "loss": 0.9977, "step": 8339 }, { "epoch": 1.6954665582435453, "grad_norm": 0.1636354923248291, "learning_rate": 3.04891691243771e-05, "loss": 1.02, "step": 8340 }, { "epoch": 1.6956698515958528, "grad_norm": 0.15153901278972626, "learning_rate": 3.0468829451845827e-05, "loss": 0.9817, "step": 8341 }, { "epoch": 1.6958731449481603, "grad_norm": 0.15859174728393555, "learning_rate": 3.0448489779314555e-05, "loss": 1.06, "step": 8342 }, { "epoch": 1.6960764383004676, "grad_norm": 0.18039724230766296, "learning_rate": 3.0428150106783283e-05, "loss": 1.1723, "step": 8343 }, { "epoch": 1.696279731652775, "grad_norm": 0.15811775624752045, "learning_rate": 3.040781043425201e-05, "loss": 1.0838, "step": 8344 }, { "epoch": 1.6964830250050822, "grad_norm": 0.15557530522346497, "learning_rate": 3.038747076172074e-05, "loss": 0.957, "step": 8345 }, { "epoch": 1.6966863183573897, "grad_norm": 0.15066535770893097, "learning_rate": 3.0367131089189467e-05, "loss": 0.8602, "step": 8346 }, { "epoch": 1.6968896117096972, "grad_norm": 0.160477414727211, "learning_rate": 3.0346791416658194e-05, "loss": 1.0156, "step": 8347 }, { "epoch": 1.6970929050620045, "grad_norm": 0.17238982021808624, "learning_rate": 3.0326451744126922e-05, "loss": 1.1039, "step": 8348 }, { "epoch": 1.6972961984143118, "grad_norm": 0.14776374399662018, "learning_rate": 3.030611207159565e-05, "loss": 0.9012, "step": 8349 }, { "epoch": 1.697499491766619, "grad_norm": 0.1604367047548294, "learning_rate": 3.0285772399064378e-05, "loss": 1.0431, "step": 8350 }, { "epoch": 1.6977027851189266, "grad_norm": 0.16607533395290375, "learning_rate": 3.0265432726533106e-05, "loss": 1.0148, "step": 8351 }, { "epoch": 1.697906078471234, "grad_norm": 0.14815792441368103, "learning_rate": 3.0245093054001834e-05, "loss": 0.9628, "step": 8352 }, { "epoch": 1.6981093718235414, "grad_norm": 0.15657465159893036, "learning_rate": 3.022475338147056e-05, "loss": 0.9876, "step": 8353 }, { "epoch": 1.6983126651758487, "grad_norm": 0.1720605343580246, "learning_rate": 3.020441370893929e-05, "loss": 1.0846, "step": 8354 }, { "epoch": 1.6985159585281562, "grad_norm": 0.16454030573368073, "learning_rate": 3.0184074036408017e-05, "loss": 1.0615, "step": 8355 }, { "epoch": 1.6987192518804635, "grad_norm": 0.14529123902320862, "learning_rate": 3.0163734363876745e-05, "loss": 0.8645, "step": 8356 }, { "epoch": 1.698922545232771, "grad_norm": 0.15737245976924896, "learning_rate": 3.0143394691345473e-05, "loss": 1.0789, "step": 8357 }, { "epoch": 1.6991258385850783, "grad_norm": 0.1643684357404709, "learning_rate": 3.01230550188142e-05, "loss": 1.0481, "step": 8358 }, { "epoch": 1.6993291319373856, "grad_norm": 0.15403226017951965, "learning_rate": 3.010271534628293e-05, "loss": 0.9181, "step": 8359 }, { "epoch": 1.699532425289693, "grad_norm": 0.16728688776493073, "learning_rate": 3.0082375673751657e-05, "loss": 1.1979, "step": 8360 }, { "epoch": 1.6997357186420006, "grad_norm": 0.17050908505916595, "learning_rate": 3.0062036001220384e-05, "loss": 1.1582, "step": 8361 }, { "epoch": 1.6999390119943079, "grad_norm": 0.16738499701023102, "learning_rate": 3.0041696328689112e-05, "loss": 1.0216, "step": 8362 }, { "epoch": 1.7001423053466151, "grad_norm": 0.12521091103553772, "learning_rate": 3.002135665615784e-05, "loss": 0.7298, "step": 8363 }, { "epoch": 1.7003455986989224, "grad_norm": 0.15040388703346252, "learning_rate": 3.0001016983626568e-05, "loss": 0.9925, "step": 8364 }, { "epoch": 1.70054889205123, "grad_norm": 0.1403646171092987, "learning_rate": 2.9980677311095296e-05, "loss": 0.9197, "step": 8365 }, { "epoch": 1.7007521854035375, "grad_norm": 0.17228379845619202, "learning_rate": 2.9960337638564024e-05, "loss": 1.1095, "step": 8366 }, { "epoch": 1.7009554787558447, "grad_norm": 0.1730489879846573, "learning_rate": 2.9939997966032745e-05, "loss": 1.1283, "step": 8367 }, { "epoch": 1.701158772108152, "grad_norm": 0.16933295130729675, "learning_rate": 2.9919658293501473e-05, "loss": 0.9463, "step": 8368 }, { "epoch": 1.7013620654604593, "grad_norm": 0.14076469838619232, "learning_rate": 2.98993186209702e-05, "loss": 0.88, "step": 8369 }, { "epoch": 1.7015653588127668, "grad_norm": 0.15840107202529907, "learning_rate": 2.9878978948438928e-05, "loss": 1.0479, "step": 8370 }, { "epoch": 1.7017686521650743, "grad_norm": 0.1655021756887436, "learning_rate": 2.9858639275907656e-05, "loss": 1.0685, "step": 8371 }, { "epoch": 1.7019719455173816, "grad_norm": 0.1701265573501587, "learning_rate": 2.9838299603376384e-05, "loss": 1.0574, "step": 8372 }, { "epoch": 1.702175238869689, "grad_norm": 0.15984782576560974, "learning_rate": 2.9817959930845112e-05, "loss": 1.0111, "step": 8373 }, { "epoch": 1.7023785322219962, "grad_norm": 0.16918201744556427, "learning_rate": 2.979762025831384e-05, "loss": 1.1721, "step": 8374 }, { "epoch": 1.7025818255743037, "grad_norm": 0.16595715284347534, "learning_rate": 2.9777280585782567e-05, "loss": 1.1378, "step": 8375 }, { "epoch": 1.7027851189266112, "grad_norm": 0.16282083094120026, "learning_rate": 2.9756940913251295e-05, "loss": 1.0117, "step": 8376 }, { "epoch": 1.7029884122789185, "grad_norm": 0.14637799561023712, "learning_rate": 2.9736601240720023e-05, "loss": 0.9034, "step": 8377 }, { "epoch": 1.7031917056312258, "grad_norm": 0.16655267775058746, "learning_rate": 2.971626156818875e-05, "loss": 1.0502, "step": 8378 }, { "epoch": 1.703394998983533, "grad_norm": 0.15839321911334991, "learning_rate": 2.969592189565748e-05, "loss": 0.9705, "step": 8379 }, { "epoch": 1.7035982923358406, "grad_norm": 0.1681317389011383, "learning_rate": 2.9675582223126207e-05, "loss": 1.0834, "step": 8380 }, { "epoch": 1.703801585688148, "grad_norm": 0.16335687041282654, "learning_rate": 2.9655242550594935e-05, "loss": 1.0838, "step": 8381 }, { "epoch": 1.7040048790404554, "grad_norm": 0.15100814402103424, "learning_rate": 2.9634902878063662e-05, "loss": 0.9322, "step": 8382 }, { "epoch": 1.7042081723927627, "grad_norm": 0.1667470633983612, "learning_rate": 2.961456320553239e-05, "loss": 1.0055, "step": 8383 }, { "epoch": 1.7044114657450702, "grad_norm": 0.18454653024673462, "learning_rate": 2.9594223533001118e-05, "loss": 1.2315, "step": 8384 }, { "epoch": 1.7046147590973775, "grad_norm": 0.15428806841373444, "learning_rate": 2.9573883860469846e-05, "loss": 0.9565, "step": 8385 }, { "epoch": 1.704818052449685, "grad_norm": 0.1717846691608429, "learning_rate": 2.9553544187938574e-05, "loss": 1.1632, "step": 8386 }, { "epoch": 1.7050213458019923, "grad_norm": 0.17111586034297943, "learning_rate": 2.95332045154073e-05, "loss": 1.0027, "step": 8387 }, { "epoch": 1.7052246391542996, "grad_norm": 0.16132821142673492, "learning_rate": 2.9512864842876033e-05, "loss": 1.0654, "step": 8388 }, { "epoch": 1.705427932506607, "grad_norm": 0.16766460239887238, "learning_rate": 2.949252517034476e-05, "loss": 1.1326, "step": 8389 }, { "epoch": 1.7056312258589146, "grad_norm": 0.1530713587999344, "learning_rate": 2.947218549781349e-05, "loss": 0.9176, "step": 8390 }, { "epoch": 1.7058345192112219, "grad_norm": 0.1681913584470749, "learning_rate": 2.9451845825282216e-05, "loss": 1.1193, "step": 8391 }, { "epoch": 1.7060378125635292, "grad_norm": 0.16072049736976624, "learning_rate": 2.9431506152750944e-05, "loss": 0.9146, "step": 8392 }, { "epoch": 1.7062411059158364, "grad_norm": 0.1597222536802292, "learning_rate": 2.9411166480219672e-05, "loss": 1.0692, "step": 8393 }, { "epoch": 1.706444399268144, "grad_norm": 0.15088988840579987, "learning_rate": 2.93908268076884e-05, "loss": 0.8583, "step": 8394 }, { "epoch": 1.7066476926204515, "grad_norm": 0.16635987162590027, "learning_rate": 2.9370487135157128e-05, "loss": 0.9959, "step": 8395 }, { "epoch": 1.7068509859727587, "grad_norm": 0.15918420255184174, "learning_rate": 2.9350147462625856e-05, "loss": 1.0151, "step": 8396 }, { "epoch": 1.707054279325066, "grad_norm": 0.15033075213432312, "learning_rate": 2.9329807790094584e-05, "loss": 1.0713, "step": 8397 }, { "epoch": 1.7072575726773733, "grad_norm": 0.15783220529556274, "learning_rate": 2.930946811756331e-05, "loss": 0.9359, "step": 8398 }, { "epoch": 1.7074608660296808, "grad_norm": 0.1573420912027359, "learning_rate": 2.928912844503204e-05, "loss": 0.9835, "step": 8399 }, { "epoch": 1.7076641593819883, "grad_norm": 0.14677788317203522, "learning_rate": 2.9268788772500767e-05, "loss": 0.8749, "step": 8400 }, { "epoch": 1.7078674527342956, "grad_norm": 0.14487434923648834, "learning_rate": 2.9248449099969495e-05, "loss": 0.843, "step": 8401 }, { "epoch": 1.708070746086603, "grad_norm": 0.14130887389183044, "learning_rate": 2.9228109427438223e-05, "loss": 0.9009, "step": 8402 }, { "epoch": 1.7082740394389102, "grad_norm": 0.16128888726234436, "learning_rate": 2.920776975490695e-05, "loss": 1.0344, "step": 8403 }, { "epoch": 1.7084773327912177, "grad_norm": 0.15809085965156555, "learning_rate": 2.918743008237568e-05, "loss": 1.0498, "step": 8404 }, { "epoch": 1.7086806261435252, "grad_norm": 0.1583137959241867, "learning_rate": 2.9167090409844406e-05, "loss": 1.0105, "step": 8405 }, { "epoch": 1.7088839194958325, "grad_norm": 0.15183697640895844, "learning_rate": 2.9146750737313134e-05, "loss": 1.0299, "step": 8406 }, { "epoch": 1.7090872128481398, "grad_norm": 0.16883176565170288, "learning_rate": 2.9126411064781862e-05, "loss": 1.0209, "step": 8407 }, { "epoch": 1.709290506200447, "grad_norm": 0.1596614122390747, "learning_rate": 2.9106071392250583e-05, "loss": 0.9837, "step": 8408 }, { "epoch": 1.7094937995527546, "grad_norm": 0.16877512633800507, "learning_rate": 2.908573171971931e-05, "loss": 1.0663, "step": 8409 }, { "epoch": 1.709697092905062, "grad_norm": 0.16950570046901703, "learning_rate": 2.906539204718804e-05, "loss": 0.9625, "step": 8410 }, { "epoch": 1.7099003862573694, "grad_norm": 0.15808042883872986, "learning_rate": 2.9045052374656767e-05, "loss": 0.9905, "step": 8411 }, { "epoch": 1.7101036796096767, "grad_norm": 0.1412404328584671, "learning_rate": 2.9024712702125495e-05, "loss": 0.9483, "step": 8412 }, { "epoch": 1.7103069729619842, "grad_norm": 0.13882005214691162, "learning_rate": 2.9004373029594222e-05, "loss": 0.9071, "step": 8413 }, { "epoch": 1.7105102663142915, "grad_norm": 0.1533912867307663, "learning_rate": 2.898403335706295e-05, "loss": 0.9896, "step": 8414 }, { "epoch": 1.710713559666599, "grad_norm": 0.1600223332643509, "learning_rate": 2.8963693684531678e-05, "loss": 1.0544, "step": 8415 }, { "epoch": 1.7109168530189063, "grad_norm": 0.15461799502372742, "learning_rate": 2.8943354012000406e-05, "loss": 1.0122, "step": 8416 }, { "epoch": 1.7111201463712136, "grad_norm": 0.16230104863643646, "learning_rate": 2.8923014339469134e-05, "loss": 1.0511, "step": 8417 }, { "epoch": 1.711323439723521, "grad_norm": 0.1758674830198288, "learning_rate": 2.890267466693786e-05, "loss": 1.1104, "step": 8418 }, { "epoch": 1.7115267330758284, "grad_norm": 0.16923977434635162, "learning_rate": 2.888233499440659e-05, "loss": 1.086, "step": 8419 }, { "epoch": 1.7117300264281359, "grad_norm": 0.16900435090065002, "learning_rate": 2.8861995321875317e-05, "loss": 1.1129, "step": 8420 }, { "epoch": 1.7119333197804432, "grad_norm": 0.13067689538002014, "learning_rate": 2.8841655649344045e-05, "loss": 0.7697, "step": 8421 }, { "epoch": 1.7121366131327505, "grad_norm": 0.14663127064704895, "learning_rate": 2.8821315976812773e-05, "loss": 0.8994, "step": 8422 }, { "epoch": 1.712339906485058, "grad_norm": 0.13897158205509186, "learning_rate": 2.88009763042815e-05, "loss": 0.8763, "step": 8423 }, { "epoch": 1.7125431998373655, "grad_norm": 0.14679236710071564, "learning_rate": 2.878063663175023e-05, "loss": 0.9154, "step": 8424 }, { "epoch": 1.7127464931896728, "grad_norm": 0.17221155762672424, "learning_rate": 2.8760296959218957e-05, "loss": 1.1052, "step": 8425 }, { "epoch": 1.71294978654198, "grad_norm": 0.17302305996418, "learning_rate": 2.8739957286687684e-05, "loss": 1.0898, "step": 8426 }, { "epoch": 1.7131530798942873, "grad_norm": 0.1686973124742508, "learning_rate": 2.8719617614156412e-05, "loss": 1.1318, "step": 8427 }, { "epoch": 1.7133563732465948, "grad_norm": 0.14503951370716095, "learning_rate": 2.869927794162514e-05, "loss": 1.0085, "step": 8428 }, { "epoch": 1.7135596665989024, "grad_norm": 0.17462506890296936, "learning_rate": 2.8678938269093868e-05, "loss": 1.0794, "step": 8429 }, { "epoch": 1.7137629599512096, "grad_norm": 0.16551320254802704, "learning_rate": 2.8658598596562596e-05, "loss": 1.0279, "step": 8430 }, { "epoch": 1.713966253303517, "grad_norm": 0.16597230732440948, "learning_rate": 2.8638258924031324e-05, "loss": 0.9885, "step": 8431 }, { "epoch": 1.7141695466558242, "grad_norm": 0.15454904735088348, "learning_rate": 2.861791925150005e-05, "loss": 1.0524, "step": 8432 }, { "epoch": 1.7143728400081317, "grad_norm": 0.14820949733257294, "learning_rate": 2.859757957896878e-05, "loss": 0.9286, "step": 8433 }, { "epoch": 1.7145761333604392, "grad_norm": 0.17061541974544525, "learning_rate": 2.8577239906437507e-05, "loss": 1.0819, "step": 8434 }, { "epoch": 1.7147794267127465, "grad_norm": 0.1703917384147644, "learning_rate": 2.8556900233906235e-05, "loss": 0.955, "step": 8435 }, { "epoch": 1.7149827200650538, "grad_norm": 0.15599867701530457, "learning_rate": 2.8536560561374963e-05, "loss": 0.9612, "step": 8436 }, { "epoch": 1.715186013417361, "grad_norm": 0.16769281029701233, "learning_rate": 2.851622088884369e-05, "loss": 1.0019, "step": 8437 }, { "epoch": 1.7153893067696686, "grad_norm": 0.16862094402313232, "learning_rate": 2.849588121631242e-05, "loss": 1.128, "step": 8438 }, { "epoch": 1.7155926001219761, "grad_norm": 0.1613055318593979, "learning_rate": 2.8475541543781146e-05, "loss": 1.0835, "step": 8439 }, { "epoch": 1.7157958934742834, "grad_norm": 0.14935019612312317, "learning_rate": 2.8455201871249874e-05, "loss": 0.9514, "step": 8440 }, { "epoch": 1.7159991868265907, "grad_norm": 0.1651276797056198, "learning_rate": 2.8434862198718602e-05, "loss": 1.1831, "step": 8441 }, { "epoch": 1.7162024801788982, "grad_norm": 0.1765395849943161, "learning_rate": 2.841452252618733e-05, "loss": 1.1981, "step": 8442 }, { "epoch": 1.7164057735312055, "grad_norm": 0.15795816481113434, "learning_rate": 2.8394182853656058e-05, "loss": 0.9309, "step": 8443 }, { "epoch": 1.716609066883513, "grad_norm": 0.17122101783752441, "learning_rate": 2.8373843181124786e-05, "loss": 1.1546, "step": 8444 }, { "epoch": 1.7168123602358203, "grad_norm": 0.15440845489501953, "learning_rate": 2.8353503508593514e-05, "loss": 0.9551, "step": 8445 }, { "epoch": 1.7170156535881276, "grad_norm": 0.15325379371643066, "learning_rate": 2.8333163836062245e-05, "loss": 1.0167, "step": 8446 }, { "epoch": 1.717218946940435, "grad_norm": 0.14501266181468964, "learning_rate": 2.8312824163530973e-05, "loss": 0.9593, "step": 8447 }, { "epoch": 1.7174222402927424, "grad_norm": 0.15866196155548096, "learning_rate": 2.82924844909997e-05, "loss": 1.0065, "step": 8448 }, { "epoch": 1.7176255336450499, "grad_norm": 0.17114543914794922, "learning_rate": 2.827214481846843e-05, "loss": 1.0405, "step": 8449 }, { "epoch": 1.7178288269973572, "grad_norm": 0.17069396376609802, "learning_rate": 2.825180514593715e-05, "loss": 1.122, "step": 8450 }, { "epoch": 1.7180321203496645, "grad_norm": 0.18055550754070282, "learning_rate": 2.8231465473405877e-05, "loss": 1.1829, "step": 8451 }, { "epoch": 1.718235413701972, "grad_norm": 0.18622195720672607, "learning_rate": 2.8211125800874605e-05, "loss": 1.1597, "step": 8452 }, { "epoch": 1.7184387070542795, "grad_norm": 0.16644831001758575, "learning_rate": 2.8190786128343333e-05, "loss": 1.0496, "step": 8453 }, { "epoch": 1.7186420004065868, "grad_norm": 0.1377044916152954, "learning_rate": 2.817044645581206e-05, "loss": 0.8372, "step": 8454 }, { "epoch": 1.718845293758894, "grad_norm": 0.14078965783119202, "learning_rate": 2.815010678328079e-05, "loss": 0.8854, "step": 8455 }, { "epoch": 1.7190485871112013, "grad_norm": 0.14400875568389893, "learning_rate": 2.8129767110749517e-05, "loss": 0.896, "step": 8456 }, { "epoch": 1.7192518804635089, "grad_norm": 0.16292329132556915, "learning_rate": 2.8109427438218244e-05, "loss": 1.0493, "step": 8457 }, { "epoch": 1.7194551738158164, "grad_norm": 0.1717640608549118, "learning_rate": 2.8089087765686972e-05, "loss": 1.2165, "step": 8458 }, { "epoch": 1.7196584671681237, "grad_norm": 0.14946547150611877, "learning_rate": 2.80687480931557e-05, "loss": 1.0082, "step": 8459 }, { "epoch": 1.719861760520431, "grad_norm": 0.157221719622612, "learning_rate": 2.8048408420624428e-05, "loss": 1.048, "step": 8460 }, { "epoch": 1.7200650538727382, "grad_norm": 0.18664824962615967, "learning_rate": 2.8028068748093156e-05, "loss": 1.3382, "step": 8461 }, { "epoch": 1.7202683472250457, "grad_norm": 0.1760917603969574, "learning_rate": 2.8007729075561884e-05, "loss": 1.078, "step": 8462 }, { "epoch": 1.7204716405773532, "grad_norm": 0.15950821340084076, "learning_rate": 2.798738940303061e-05, "loss": 0.9671, "step": 8463 }, { "epoch": 1.7206749339296605, "grad_norm": 0.1471697837114334, "learning_rate": 2.796704973049934e-05, "loss": 0.9057, "step": 8464 }, { "epoch": 1.7208782272819678, "grad_norm": 0.15753808617591858, "learning_rate": 2.7946710057968067e-05, "loss": 0.9723, "step": 8465 }, { "epoch": 1.721081520634275, "grad_norm": 0.17277003824710846, "learning_rate": 2.7926370385436795e-05, "loss": 1.2068, "step": 8466 }, { "epoch": 1.7212848139865826, "grad_norm": 0.15204370021820068, "learning_rate": 2.7906030712905523e-05, "loss": 0.9869, "step": 8467 }, { "epoch": 1.7214881073388901, "grad_norm": 0.15919062495231628, "learning_rate": 2.788569104037425e-05, "loss": 0.9743, "step": 8468 }, { "epoch": 1.7216914006911974, "grad_norm": 0.1599516123533249, "learning_rate": 2.786535136784298e-05, "loss": 0.9231, "step": 8469 }, { "epoch": 1.7218946940435047, "grad_norm": 0.16378311812877655, "learning_rate": 2.7845011695311706e-05, "loss": 1.0418, "step": 8470 }, { "epoch": 1.722097987395812, "grad_norm": 0.16055697202682495, "learning_rate": 2.7824672022780434e-05, "loss": 1.0666, "step": 8471 }, { "epoch": 1.7223012807481195, "grad_norm": 0.15769603848457336, "learning_rate": 2.7804332350249162e-05, "loss": 0.8926, "step": 8472 }, { "epoch": 1.722504574100427, "grad_norm": 0.16030184924602509, "learning_rate": 2.778399267771789e-05, "loss": 1.1119, "step": 8473 }, { "epoch": 1.7227078674527343, "grad_norm": 0.16417063772678375, "learning_rate": 2.7763653005186618e-05, "loss": 1.1115, "step": 8474 }, { "epoch": 1.7229111608050416, "grad_norm": 0.17272762954235077, "learning_rate": 2.7743313332655346e-05, "loss": 1.0994, "step": 8475 }, { "epoch": 1.723114454157349, "grad_norm": 0.14636796712875366, "learning_rate": 2.7722973660124073e-05, "loss": 0.9825, "step": 8476 }, { "epoch": 1.7233177475096564, "grad_norm": 0.15340691804885864, "learning_rate": 2.77026339875928e-05, "loss": 1.0674, "step": 8477 }, { "epoch": 1.723521040861964, "grad_norm": 0.1439710259437561, "learning_rate": 2.768229431506153e-05, "loss": 0.9338, "step": 8478 }, { "epoch": 1.7237243342142712, "grad_norm": 0.1715303510427475, "learning_rate": 2.7661954642530257e-05, "loss": 1.1869, "step": 8479 }, { "epoch": 1.7239276275665785, "grad_norm": 0.16392652690410614, "learning_rate": 2.7641614969998985e-05, "loss": 1.1187, "step": 8480 }, { "epoch": 1.724130920918886, "grad_norm": 0.16006645560264587, "learning_rate": 2.7621275297467713e-05, "loss": 0.9226, "step": 8481 }, { "epoch": 1.7243342142711935, "grad_norm": 0.1707492172718048, "learning_rate": 2.760093562493644e-05, "loss": 0.8886, "step": 8482 }, { "epoch": 1.7245375076235008, "grad_norm": 0.15266121923923492, "learning_rate": 2.758059595240517e-05, "loss": 0.9215, "step": 8483 }, { "epoch": 1.724740800975808, "grad_norm": 0.1601337194442749, "learning_rate": 2.7560256279873896e-05, "loss": 0.9596, "step": 8484 }, { "epoch": 1.7249440943281154, "grad_norm": 0.17204086482524872, "learning_rate": 2.7539916607342624e-05, "loss": 1.1118, "step": 8485 }, { "epoch": 1.7251473876804229, "grad_norm": 0.1657676100730896, "learning_rate": 2.7519576934811352e-05, "loss": 1.0268, "step": 8486 }, { "epoch": 1.7253506810327304, "grad_norm": 0.14587603509426117, "learning_rate": 2.749923726228008e-05, "loss": 0.9695, "step": 8487 }, { "epoch": 1.7255539743850377, "grad_norm": 0.18296758830547333, "learning_rate": 2.7478897589748808e-05, "loss": 1.0893, "step": 8488 }, { "epoch": 1.725757267737345, "grad_norm": 0.15682336688041687, "learning_rate": 2.7458557917217536e-05, "loss": 1.0495, "step": 8489 }, { "epoch": 1.7259605610896522, "grad_norm": 0.15566644072532654, "learning_rate": 2.7438218244686263e-05, "loss": 1.0622, "step": 8490 }, { "epoch": 1.7261638544419597, "grad_norm": 0.14512281119823456, "learning_rate": 2.7417878572154988e-05, "loss": 0.9449, "step": 8491 }, { "epoch": 1.7263671477942673, "grad_norm": 0.18086154758930206, "learning_rate": 2.7397538899623716e-05, "loss": 1.2128, "step": 8492 }, { "epoch": 1.7265704411465745, "grad_norm": 0.15448005497455597, "learning_rate": 2.7377199227092444e-05, "loss": 0.974, "step": 8493 }, { "epoch": 1.7267737344988818, "grad_norm": 0.17739900946617126, "learning_rate": 2.735685955456117e-05, "loss": 1.2166, "step": 8494 }, { "epoch": 1.7269770278511891, "grad_norm": 0.14011867344379425, "learning_rate": 2.73365198820299e-05, "loss": 0.9215, "step": 8495 }, { "epoch": 1.7271803212034966, "grad_norm": 0.16140539944171906, "learning_rate": 2.7316180209498627e-05, "loss": 0.9666, "step": 8496 }, { "epoch": 1.7273836145558041, "grad_norm": 0.17576666176319122, "learning_rate": 2.7295840536967355e-05, "loss": 1.0835, "step": 8497 }, { "epoch": 1.7275869079081114, "grad_norm": 0.17040586471557617, "learning_rate": 2.7275500864436083e-05, "loss": 1.1259, "step": 8498 }, { "epoch": 1.7277902012604187, "grad_norm": 0.1456398367881775, "learning_rate": 2.725516119190481e-05, "loss": 0.9026, "step": 8499 }, { "epoch": 1.727993494612726, "grad_norm": 0.1592492163181305, "learning_rate": 2.723482151937354e-05, "loss": 1.0813, "step": 8500 }, { "epoch": 1.7281967879650335, "grad_norm": 0.162064328789711, "learning_rate": 2.7214481846842266e-05, "loss": 1.0652, "step": 8501 }, { "epoch": 1.728400081317341, "grad_norm": 0.16277560591697693, "learning_rate": 2.7194142174310994e-05, "loss": 1.013, "step": 8502 }, { "epoch": 1.7286033746696483, "grad_norm": 0.1707112044095993, "learning_rate": 2.7173802501779722e-05, "loss": 1.137, "step": 8503 }, { "epoch": 1.7288066680219556, "grad_norm": 0.14752034842967987, "learning_rate": 2.715346282924845e-05, "loss": 0.9267, "step": 8504 }, { "epoch": 1.729009961374263, "grad_norm": 0.17012381553649902, "learning_rate": 2.7133123156717178e-05, "loss": 0.9322, "step": 8505 }, { "epoch": 1.7292132547265704, "grad_norm": 0.14695444703102112, "learning_rate": 2.7112783484185906e-05, "loss": 1.0449, "step": 8506 }, { "epoch": 1.729416548078878, "grad_norm": 0.14833886921405792, "learning_rate": 2.7092443811654633e-05, "loss": 0.8641, "step": 8507 }, { "epoch": 1.7296198414311852, "grad_norm": 0.1851876676082611, "learning_rate": 2.707210413912336e-05, "loss": 1.1952, "step": 8508 }, { "epoch": 1.7298231347834925, "grad_norm": 0.14561158418655396, "learning_rate": 2.705176446659209e-05, "loss": 1.0551, "step": 8509 }, { "epoch": 1.7300264281358, "grad_norm": 0.14987611770629883, "learning_rate": 2.7031424794060817e-05, "loss": 0.9107, "step": 8510 }, { "epoch": 1.7302297214881075, "grad_norm": 0.1488702893257141, "learning_rate": 2.7011085121529545e-05, "loss": 0.9323, "step": 8511 }, { "epoch": 1.7304330148404148, "grad_norm": 0.16169512271881104, "learning_rate": 2.6990745448998273e-05, "loss": 1.0332, "step": 8512 }, { "epoch": 1.730636308192722, "grad_norm": 0.17924533784389496, "learning_rate": 2.6970405776467e-05, "loss": 1.3094, "step": 8513 }, { "epoch": 1.7308396015450294, "grad_norm": 0.17188727855682373, "learning_rate": 2.695006610393573e-05, "loss": 1.1575, "step": 8514 }, { "epoch": 1.7310428948973369, "grad_norm": 0.1498914211988449, "learning_rate": 2.6929726431404456e-05, "loss": 1.1123, "step": 8515 }, { "epoch": 1.7312461882496444, "grad_norm": 0.16860176622867584, "learning_rate": 2.6909386758873184e-05, "loss": 1.156, "step": 8516 }, { "epoch": 1.7314494816019517, "grad_norm": 0.1492467224597931, "learning_rate": 2.6889047086341912e-05, "loss": 0.912, "step": 8517 }, { "epoch": 1.731652774954259, "grad_norm": 0.1606607884168625, "learning_rate": 2.686870741381064e-05, "loss": 0.8947, "step": 8518 }, { "epoch": 1.7318560683065662, "grad_norm": 0.15366114675998688, "learning_rate": 2.6848367741279368e-05, "loss": 1.0691, "step": 8519 }, { "epoch": 1.7320593616588738, "grad_norm": 0.1537121832370758, "learning_rate": 2.6828028068748095e-05, "loss": 1.0498, "step": 8520 }, { "epoch": 1.7322626550111813, "grad_norm": 0.1556290239095688, "learning_rate": 2.6807688396216823e-05, "loss": 0.9811, "step": 8521 }, { "epoch": 1.7324659483634886, "grad_norm": 0.16528859734535217, "learning_rate": 2.678734872368555e-05, "loss": 1.1268, "step": 8522 }, { "epoch": 1.7326692417157958, "grad_norm": 0.16671013832092285, "learning_rate": 2.676700905115428e-05, "loss": 1.1383, "step": 8523 }, { "epoch": 1.7328725350681031, "grad_norm": 0.15109676122665405, "learning_rate": 2.6746669378623007e-05, "loss": 1.0987, "step": 8524 }, { "epoch": 1.7330758284204106, "grad_norm": 0.18379352986812592, "learning_rate": 2.6726329706091735e-05, "loss": 1.0118, "step": 8525 }, { "epoch": 1.7332791217727181, "grad_norm": 0.14987234771251678, "learning_rate": 2.6705990033560463e-05, "loss": 0.9417, "step": 8526 }, { "epoch": 1.7334824151250254, "grad_norm": 0.1707787662744522, "learning_rate": 2.668565036102919e-05, "loss": 1.1333, "step": 8527 }, { "epoch": 1.7336857084773327, "grad_norm": 0.17603743076324463, "learning_rate": 2.6665310688497918e-05, "loss": 1.0761, "step": 8528 }, { "epoch": 1.73388900182964, "grad_norm": 0.17123474180698395, "learning_rate": 2.6644971015966646e-05, "loss": 1.0749, "step": 8529 }, { "epoch": 1.7340922951819475, "grad_norm": 0.15946339070796967, "learning_rate": 2.6624631343435374e-05, "loss": 0.9557, "step": 8530 }, { "epoch": 1.734295588534255, "grad_norm": 0.15838012099266052, "learning_rate": 2.6604291670904102e-05, "loss": 0.9923, "step": 8531 }, { "epoch": 1.7344988818865623, "grad_norm": 0.15502654016017914, "learning_rate": 2.658395199837283e-05, "loss": 1.1525, "step": 8532 }, { "epoch": 1.7347021752388696, "grad_norm": 0.15256285667419434, "learning_rate": 2.6563612325841554e-05, "loss": 0.9234, "step": 8533 }, { "epoch": 1.7349054685911771, "grad_norm": 0.16125310957431793, "learning_rate": 2.6543272653310282e-05, "loss": 1.0497, "step": 8534 }, { "epoch": 1.7351087619434844, "grad_norm": 0.15819425880908966, "learning_rate": 2.652293298077901e-05, "loss": 1.0688, "step": 8535 }, { "epoch": 1.735312055295792, "grad_norm": 0.1654258370399475, "learning_rate": 2.6502593308247738e-05, "loss": 1.071, "step": 8536 }, { "epoch": 1.7355153486480992, "grad_norm": 0.18211229145526886, "learning_rate": 2.6482253635716466e-05, "loss": 1.2299, "step": 8537 }, { "epoch": 1.7357186420004065, "grad_norm": 0.16108962893486023, "learning_rate": 2.6461913963185193e-05, "loss": 0.9642, "step": 8538 }, { "epoch": 1.735921935352714, "grad_norm": 0.15601445734500885, "learning_rate": 2.644157429065392e-05, "loss": 0.9411, "step": 8539 }, { "epoch": 1.7361252287050215, "grad_norm": 0.13617374002933502, "learning_rate": 2.642123461812265e-05, "loss": 0.881, "step": 8540 }, { "epoch": 1.7363285220573288, "grad_norm": 0.14470192790031433, "learning_rate": 2.6400894945591377e-05, "loss": 0.9139, "step": 8541 }, { "epoch": 1.736531815409636, "grad_norm": 0.16617143154144287, "learning_rate": 2.6380555273060105e-05, "loss": 1.06, "step": 8542 }, { "epoch": 1.7367351087619434, "grad_norm": 0.1740252822637558, "learning_rate": 2.6360215600528833e-05, "loss": 1.0253, "step": 8543 }, { "epoch": 1.7369384021142509, "grad_norm": 0.16015122830867767, "learning_rate": 2.633987592799756e-05, "loss": 1.108, "step": 8544 }, { "epoch": 1.7371416954665584, "grad_norm": 0.16304798424243927, "learning_rate": 2.631953625546629e-05, "loss": 1.0366, "step": 8545 }, { "epoch": 1.7373449888188657, "grad_norm": 0.1486780196428299, "learning_rate": 2.6299196582935016e-05, "loss": 0.9679, "step": 8546 }, { "epoch": 1.737548282171173, "grad_norm": 0.14865387976169586, "learning_rate": 2.6278856910403744e-05, "loss": 0.9587, "step": 8547 }, { "epoch": 1.7377515755234803, "grad_norm": 0.1523367017507553, "learning_rate": 2.6258517237872472e-05, "loss": 0.9913, "step": 8548 }, { "epoch": 1.7379548688757878, "grad_norm": 0.1653241217136383, "learning_rate": 2.62381775653412e-05, "loss": 0.8659, "step": 8549 }, { "epoch": 1.7381581622280953, "grad_norm": 0.15530776977539062, "learning_rate": 2.6217837892809928e-05, "loss": 0.9503, "step": 8550 }, { "epoch": 1.7383614555804026, "grad_norm": 0.15581166744232178, "learning_rate": 2.6197498220278655e-05, "loss": 0.864, "step": 8551 }, { "epoch": 1.7385647489327098, "grad_norm": 0.18677006661891937, "learning_rate": 2.6177158547747383e-05, "loss": 1.1236, "step": 8552 }, { "epoch": 1.7387680422850171, "grad_norm": 0.16768181324005127, "learning_rate": 2.615681887521611e-05, "loss": 1.0863, "step": 8553 }, { "epoch": 1.7389713356373246, "grad_norm": 0.16448278725147247, "learning_rate": 2.613647920268484e-05, "loss": 1.0627, "step": 8554 }, { "epoch": 1.7391746289896322, "grad_norm": 0.14863719046115875, "learning_rate": 2.6116139530153567e-05, "loss": 0.953, "step": 8555 }, { "epoch": 1.7393779223419394, "grad_norm": 0.17392180860042572, "learning_rate": 2.6095799857622295e-05, "loss": 1.0856, "step": 8556 }, { "epoch": 1.7395812156942467, "grad_norm": 0.15495418012142181, "learning_rate": 2.6075460185091023e-05, "loss": 0.8906, "step": 8557 }, { "epoch": 1.739784509046554, "grad_norm": 0.1724131852388382, "learning_rate": 2.605512051255975e-05, "loss": 1.0534, "step": 8558 }, { "epoch": 1.7399878023988615, "grad_norm": 0.15719473361968994, "learning_rate": 2.6034780840028478e-05, "loss": 1.049, "step": 8559 }, { "epoch": 1.740191095751169, "grad_norm": 0.17020252346992493, "learning_rate": 2.6014441167497206e-05, "loss": 0.9779, "step": 8560 }, { "epoch": 1.7403943891034763, "grad_norm": 0.1537570357322693, "learning_rate": 2.5994101494965934e-05, "loss": 1.0864, "step": 8561 }, { "epoch": 1.7405976824557836, "grad_norm": 0.17583875358104706, "learning_rate": 2.5973761822434662e-05, "loss": 1.133, "step": 8562 }, { "epoch": 1.7408009758080911, "grad_norm": 0.15067782998085022, "learning_rate": 2.595342214990339e-05, "loss": 0.928, "step": 8563 }, { "epoch": 1.7410042691603984, "grad_norm": 0.15106721222400665, "learning_rate": 2.5933082477372117e-05, "loss": 0.92, "step": 8564 }, { "epoch": 1.741207562512706, "grad_norm": 0.16105949878692627, "learning_rate": 2.5912742804840845e-05, "loss": 1.0759, "step": 8565 }, { "epoch": 1.7414108558650132, "grad_norm": 0.1475791186094284, "learning_rate": 2.5892403132309573e-05, "loss": 1.0209, "step": 8566 }, { "epoch": 1.7416141492173205, "grad_norm": 0.1664276123046875, "learning_rate": 2.58720634597783e-05, "loss": 1.0454, "step": 8567 }, { "epoch": 1.741817442569628, "grad_norm": 0.1712283194065094, "learning_rate": 2.585172378724703e-05, "loss": 0.9977, "step": 8568 }, { "epoch": 1.7420207359219355, "grad_norm": 0.15563756227493286, "learning_rate": 2.5831384114715757e-05, "loss": 0.9738, "step": 8569 }, { "epoch": 1.7422240292742428, "grad_norm": 0.1421973556280136, "learning_rate": 2.5811044442184485e-05, "loss": 0.851, "step": 8570 }, { "epoch": 1.74242732262655, "grad_norm": 0.167726531624794, "learning_rate": 2.5790704769653212e-05, "loss": 1.0939, "step": 8571 }, { "epoch": 1.7426306159788574, "grad_norm": 0.1623532921075821, "learning_rate": 2.577036509712194e-05, "loss": 1.1149, "step": 8572 }, { "epoch": 1.7428339093311649, "grad_norm": 0.15647442638874054, "learning_rate": 2.5750025424590668e-05, "loss": 0.9609, "step": 8573 }, { "epoch": 1.7430372026834724, "grad_norm": 0.1691005676984787, "learning_rate": 2.572968575205939e-05, "loss": 1.0528, "step": 8574 }, { "epoch": 1.7432404960357797, "grad_norm": 0.17108562588691711, "learning_rate": 2.5709346079528117e-05, "loss": 1.1082, "step": 8575 }, { "epoch": 1.743443789388087, "grad_norm": 0.1632906198501587, "learning_rate": 2.5689006406996845e-05, "loss": 0.9907, "step": 8576 }, { "epoch": 1.7436470827403943, "grad_norm": 0.16216085851192474, "learning_rate": 2.5668666734465573e-05, "loss": 1.0345, "step": 8577 }, { "epoch": 1.7438503760927018, "grad_norm": 0.16023129224777222, "learning_rate": 2.56483270619343e-05, "loss": 1.0036, "step": 8578 }, { "epoch": 1.7440536694450093, "grad_norm": 0.1606782227754593, "learning_rate": 2.562798738940303e-05, "loss": 1.0602, "step": 8579 }, { "epoch": 1.7442569627973166, "grad_norm": 0.15448054671287537, "learning_rate": 2.5607647716871756e-05, "loss": 0.8422, "step": 8580 }, { "epoch": 1.7444602561496239, "grad_norm": 0.15102481842041016, "learning_rate": 2.5587308044340484e-05, "loss": 0.9657, "step": 8581 }, { "epoch": 1.7446635495019311, "grad_norm": 0.15836581587791443, "learning_rate": 2.5566968371809212e-05, "loss": 0.9521, "step": 8582 }, { "epoch": 1.7448668428542387, "grad_norm": 0.17319001257419586, "learning_rate": 2.554662869927794e-05, "loss": 1.0355, "step": 8583 }, { "epoch": 1.7450701362065462, "grad_norm": 0.1609099954366684, "learning_rate": 2.5526289026746668e-05, "loss": 0.9202, "step": 8584 }, { "epoch": 1.7452734295588535, "grad_norm": 0.1531849205493927, "learning_rate": 2.5505949354215396e-05, "loss": 0.963, "step": 8585 }, { "epoch": 1.7454767229111607, "grad_norm": 0.15625271201133728, "learning_rate": 2.5485609681684123e-05, "loss": 0.8771, "step": 8586 }, { "epoch": 1.745680016263468, "grad_norm": 0.15236467123031616, "learning_rate": 2.546527000915285e-05, "loss": 0.9514, "step": 8587 }, { "epoch": 1.7458833096157755, "grad_norm": 0.16796226799488068, "learning_rate": 2.5444930336621582e-05, "loss": 1.0722, "step": 8588 }, { "epoch": 1.746086602968083, "grad_norm": 0.14493130147457123, "learning_rate": 2.542459066409031e-05, "loss": 0.8856, "step": 8589 }, { "epoch": 1.7462898963203903, "grad_norm": 0.15408338606357574, "learning_rate": 2.5404250991559038e-05, "loss": 1.0223, "step": 8590 }, { "epoch": 1.7464931896726976, "grad_norm": 0.15143847465515137, "learning_rate": 2.5383911319027766e-05, "loss": 0.9365, "step": 8591 }, { "epoch": 1.7466964830250051, "grad_norm": 0.19309405982494354, "learning_rate": 2.5363571646496494e-05, "loss": 1.2244, "step": 8592 }, { "epoch": 1.7468997763773124, "grad_norm": 0.15929043292999268, "learning_rate": 2.5343231973965222e-05, "loss": 1.0163, "step": 8593 }, { "epoch": 1.74710306972962, "grad_norm": 0.18514969944953918, "learning_rate": 2.532289230143395e-05, "loss": 1.0079, "step": 8594 }, { "epoch": 1.7473063630819272, "grad_norm": 0.17101940512657166, "learning_rate": 2.5302552628902677e-05, "loss": 0.9669, "step": 8595 }, { "epoch": 1.7475096564342345, "grad_norm": 0.14626432955265045, "learning_rate": 2.5282212956371405e-05, "loss": 0.9414, "step": 8596 }, { "epoch": 1.747712949786542, "grad_norm": 0.16107916831970215, "learning_rate": 2.5261873283840133e-05, "loss": 1.0055, "step": 8597 }, { "epoch": 1.7479162431388495, "grad_norm": 0.16907745599746704, "learning_rate": 2.524153361130886e-05, "loss": 1.1728, "step": 8598 }, { "epoch": 1.7481195364911568, "grad_norm": 0.16808782517910004, "learning_rate": 2.522119393877759e-05, "loss": 1.2253, "step": 8599 }, { "epoch": 1.748322829843464, "grad_norm": 0.1396816521883011, "learning_rate": 2.5200854266246317e-05, "loss": 0.8793, "step": 8600 }, { "epoch": 1.7485261231957714, "grad_norm": 0.15383778512477875, "learning_rate": 2.5180514593715044e-05, "loss": 1.1169, "step": 8601 }, { "epoch": 1.748729416548079, "grad_norm": 0.15620054304599762, "learning_rate": 2.5160174921183772e-05, "loss": 1.0014, "step": 8602 }, { "epoch": 1.7489327099003864, "grad_norm": 0.16913941502571106, "learning_rate": 2.51398352486525e-05, "loss": 1.0208, "step": 8603 }, { "epoch": 1.7491360032526937, "grad_norm": 0.149295374751091, "learning_rate": 2.5119495576121228e-05, "loss": 1.0289, "step": 8604 }, { "epoch": 1.749339296605001, "grad_norm": 0.16395288705825806, "learning_rate": 2.5099155903589956e-05, "loss": 1.1434, "step": 8605 }, { "epoch": 1.7495425899573083, "grad_norm": 0.15223750472068787, "learning_rate": 2.5078816231058684e-05, "loss": 0.8848, "step": 8606 }, { "epoch": 1.7497458833096158, "grad_norm": 0.1507001370191574, "learning_rate": 2.505847655852741e-05, "loss": 0.8563, "step": 8607 }, { "epoch": 1.7499491766619233, "grad_norm": 0.17924107611179352, "learning_rate": 2.503813688599614e-05, "loss": 1.1117, "step": 8608 }, { "epoch": 1.7501524700142306, "grad_norm": 0.171994149684906, "learning_rate": 2.5017797213464867e-05, "loss": 1.0739, "step": 8609 }, { "epoch": 1.7503557633665379, "grad_norm": 0.17324091494083405, "learning_rate": 2.4997457540933592e-05, "loss": 1.1091, "step": 8610 }, { "epoch": 1.7505590567188452, "grad_norm": 0.15873806178569794, "learning_rate": 2.497711786840232e-05, "loss": 1.05, "step": 8611 }, { "epoch": 1.7507623500711527, "grad_norm": 0.16404686868190765, "learning_rate": 2.4956778195871047e-05, "loss": 0.9862, "step": 8612 }, { "epoch": 1.7509656434234602, "grad_norm": 0.1653476506471634, "learning_rate": 2.4936438523339775e-05, "loss": 1.041, "step": 8613 }, { "epoch": 1.7511689367757675, "grad_norm": 0.15454378724098206, "learning_rate": 2.4916098850808503e-05, "loss": 0.9947, "step": 8614 }, { "epoch": 1.7513722301280747, "grad_norm": 0.16011664271354675, "learning_rate": 2.489575917827723e-05, "loss": 1.0455, "step": 8615 }, { "epoch": 1.751575523480382, "grad_norm": 0.1689242273569107, "learning_rate": 2.487541950574596e-05, "loss": 1.0899, "step": 8616 }, { "epoch": 1.7517788168326895, "grad_norm": 0.16530191898345947, "learning_rate": 2.4855079833214687e-05, "loss": 1.0008, "step": 8617 }, { "epoch": 1.751982110184997, "grad_norm": 0.15537337958812714, "learning_rate": 2.4834740160683415e-05, "loss": 1.0769, "step": 8618 }, { "epoch": 1.7521854035373043, "grad_norm": 0.1617366075515747, "learning_rate": 2.4814400488152142e-05, "loss": 0.982, "step": 8619 }, { "epoch": 1.7523886968896116, "grad_norm": 0.17090179026126862, "learning_rate": 2.479406081562087e-05, "loss": 1.0244, "step": 8620 }, { "epoch": 1.7525919902419191, "grad_norm": 0.18017461895942688, "learning_rate": 2.4773721143089598e-05, "loss": 1.1363, "step": 8621 }, { "epoch": 1.7527952835942264, "grad_norm": 0.1750902533531189, "learning_rate": 2.4753381470558326e-05, "loss": 1.1712, "step": 8622 }, { "epoch": 1.752998576946534, "grad_norm": 0.1540890485048294, "learning_rate": 2.4733041798027054e-05, "loss": 1.0446, "step": 8623 }, { "epoch": 1.7532018702988412, "grad_norm": 0.14529183506965637, "learning_rate": 2.471270212549578e-05, "loss": 1.0394, "step": 8624 }, { "epoch": 1.7534051636511485, "grad_norm": 0.16094622015953064, "learning_rate": 2.469236245296451e-05, "loss": 0.9898, "step": 8625 }, { "epoch": 1.753608457003456, "grad_norm": 0.1543346494436264, "learning_rate": 2.4672022780433234e-05, "loss": 0.9957, "step": 8626 }, { "epoch": 1.7538117503557635, "grad_norm": 0.15345224738121033, "learning_rate": 2.4651683107901962e-05, "loss": 0.9586, "step": 8627 }, { "epoch": 1.7540150437080708, "grad_norm": 0.16198302805423737, "learning_rate": 2.463134343537069e-05, "loss": 0.9189, "step": 8628 }, { "epoch": 1.754218337060378, "grad_norm": 0.1474822610616684, "learning_rate": 2.4611003762839418e-05, "loss": 0.931, "step": 8629 }, { "epoch": 1.7544216304126854, "grad_norm": 0.16654402017593384, "learning_rate": 2.4590664090308145e-05, "loss": 1.073, "step": 8630 }, { "epoch": 1.754624923764993, "grad_norm": 0.1400275081396103, "learning_rate": 2.4570324417776873e-05, "loss": 0.907, "step": 8631 }, { "epoch": 1.7548282171173004, "grad_norm": 0.1563587635755539, "learning_rate": 2.45499847452456e-05, "loss": 0.9726, "step": 8632 }, { "epoch": 1.7550315104696077, "grad_norm": 0.14966051280498505, "learning_rate": 2.452964507271433e-05, "loss": 0.9912, "step": 8633 }, { "epoch": 1.755234803821915, "grad_norm": 0.17576399445533752, "learning_rate": 2.4509305400183057e-05, "loss": 0.9889, "step": 8634 }, { "epoch": 1.7554380971742223, "grad_norm": 0.14570151269435883, "learning_rate": 2.4488965727651785e-05, "loss": 0.845, "step": 8635 }, { "epoch": 1.7556413905265298, "grad_norm": 0.14043374359607697, "learning_rate": 2.4468626055120512e-05, "loss": 0.7473, "step": 8636 }, { "epoch": 1.7558446838788373, "grad_norm": 0.1575162708759308, "learning_rate": 2.444828638258924e-05, "loss": 1.0496, "step": 8637 }, { "epoch": 1.7560479772311446, "grad_norm": 0.17352034151554108, "learning_rate": 2.4427946710057968e-05, "loss": 1.141, "step": 8638 }, { "epoch": 1.7562512705834519, "grad_norm": 0.16709619760513306, "learning_rate": 2.4407607037526696e-05, "loss": 1.1015, "step": 8639 }, { "epoch": 1.7564545639357592, "grad_norm": 0.1486104428768158, "learning_rate": 2.4387267364995424e-05, "loss": 0.9989, "step": 8640 }, { "epoch": 1.7566578572880667, "grad_norm": 0.1603458970785141, "learning_rate": 2.4366927692464152e-05, "loss": 0.9816, "step": 8641 }, { "epoch": 1.7568611506403742, "grad_norm": 0.14481213688850403, "learning_rate": 2.434658801993288e-05, "loss": 0.9044, "step": 8642 }, { "epoch": 1.7570644439926815, "grad_norm": 0.1569254845380783, "learning_rate": 2.4326248347401607e-05, "loss": 0.9009, "step": 8643 }, { "epoch": 1.7572677373449888, "grad_norm": 0.16491267085075378, "learning_rate": 2.4305908674870335e-05, "loss": 1.0607, "step": 8644 }, { "epoch": 1.757471030697296, "grad_norm": 0.15816418826580048, "learning_rate": 2.4285569002339063e-05, "loss": 0.966, "step": 8645 }, { "epoch": 1.7576743240496036, "grad_norm": 0.17248882353305817, "learning_rate": 2.426522932980779e-05, "loss": 1.1783, "step": 8646 }, { "epoch": 1.757877617401911, "grad_norm": 0.15414273738861084, "learning_rate": 2.424488965727652e-05, "loss": 1.0752, "step": 8647 }, { "epoch": 1.7580809107542184, "grad_norm": 0.16159240901470184, "learning_rate": 2.4224549984745247e-05, "loss": 1.0431, "step": 8648 }, { "epoch": 1.7582842041065256, "grad_norm": 0.1643780767917633, "learning_rate": 2.4204210312213974e-05, "loss": 1.1693, "step": 8649 }, { "epoch": 1.7584874974588331, "grad_norm": 0.16782324016094208, "learning_rate": 2.4183870639682702e-05, "loss": 1.091, "step": 8650 }, { "epoch": 1.7586907908111404, "grad_norm": 0.15618129074573517, "learning_rate": 2.416353096715143e-05, "loss": 1.0182, "step": 8651 }, { "epoch": 1.758894084163448, "grad_norm": 0.17457202076911926, "learning_rate": 2.4143191294620158e-05, "loss": 1.1616, "step": 8652 }, { "epoch": 1.7590973775157552, "grad_norm": 0.16035369038581848, "learning_rate": 2.4122851622088886e-05, "loss": 0.9861, "step": 8653 }, { "epoch": 1.7593006708680625, "grad_norm": 0.17972034215927124, "learning_rate": 2.4102511949557614e-05, "loss": 1.1431, "step": 8654 }, { "epoch": 1.75950396422037, "grad_norm": 0.161958709359169, "learning_rate": 2.408217227702634e-05, "loss": 1.0852, "step": 8655 }, { "epoch": 1.7597072575726773, "grad_norm": 0.1621416211128235, "learning_rate": 2.406183260449507e-05, "loss": 0.9122, "step": 8656 }, { "epoch": 1.7599105509249848, "grad_norm": 0.17243926227092743, "learning_rate": 2.4041492931963797e-05, "loss": 1.0571, "step": 8657 }, { "epoch": 1.7601138442772921, "grad_norm": 0.15671218931674957, "learning_rate": 2.4021153259432525e-05, "loss": 0.9073, "step": 8658 }, { "epoch": 1.7603171376295994, "grad_norm": 0.1628493219614029, "learning_rate": 2.4000813586901253e-05, "loss": 0.9029, "step": 8659 }, { "epoch": 1.760520430981907, "grad_norm": 0.1600339561700821, "learning_rate": 2.398047391436998e-05, "loss": 1.0188, "step": 8660 }, { "epoch": 1.7607237243342144, "grad_norm": 0.16779568791389465, "learning_rate": 2.396013424183871e-05, "loss": 0.9023, "step": 8661 }, { "epoch": 1.7609270176865217, "grad_norm": 0.16035765409469604, "learning_rate": 2.3939794569307437e-05, "loss": 1.0467, "step": 8662 }, { "epoch": 1.761130311038829, "grad_norm": 0.15300844609737396, "learning_rate": 2.3919454896776164e-05, "loss": 0.8985, "step": 8663 }, { "epoch": 1.7613336043911363, "grad_norm": 0.1530771255493164, "learning_rate": 2.3899115224244892e-05, "loss": 1.0437, "step": 8664 }, { "epoch": 1.7615368977434438, "grad_norm": 0.16901607811450958, "learning_rate": 2.387877555171362e-05, "loss": 1.0262, "step": 8665 }, { "epoch": 1.7617401910957513, "grad_norm": 0.16632038354873657, "learning_rate": 2.3858435879182348e-05, "loss": 1.0134, "step": 8666 }, { "epoch": 1.7619434844480586, "grad_norm": 0.19071345031261444, "learning_rate": 2.3838096206651072e-05, "loss": 1.2342, "step": 8667 }, { "epoch": 1.7621467778003659, "grad_norm": 0.1486322581768036, "learning_rate": 2.38177565341198e-05, "loss": 0.9473, "step": 8668 }, { "epoch": 1.7623500711526732, "grad_norm": 0.15384294092655182, "learning_rate": 2.3797416861588528e-05, "loss": 0.9527, "step": 8669 }, { "epoch": 1.7625533645049807, "grad_norm": 0.16116808354854584, "learning_rate": 2.3777077189057256e-05, "loss": 1.0498, "step": 8670 }, { "epoch": 1.7627566578572882, "grad_norm": 0.15532322227954865, "learning_rate": 2.3756737516525984e-05, "loss": 1.0291, "step": 8671 }, { "epoch": 1.7629599512095955, "grad_norm": 0.15127001702785492, "learning_rate": 2.373639784399471e-05, "loss": 1.0101, "step": 8672 }, { "epoch": 1.7631632445619028, "grad_norm": 0.14776812493801117, "learning_rate": 2.371605817146344e-05, "loss": 0.8829, "step": 8673 }, { "epoch": 1.76336653791421, "grad_norm": 0.16012734174728394, "learning_rate": 2.3695718498932167e-05, "loss": 0.9639, "step": 8674 }, { "epoch": 1.7635698312665176, "grad_norm": 0.15442492067813873, "learning_rate": 2.3675378826400895e-05, "loss": 0.9638, "step": 8675 }, { "epoch": 1.763773124618825, "grad_norm": 0.1749064326286316, "learning_rate": 2.3655039153869623e-05, "loss": 1.1291, "step": 8676 }, { "epoch": 1.7639764179711324, "grad_norm": 0.1717270463705063, "learning_rate": 2.363469948133835e-05, "loss": 0.9974, "step": 8677 }, { "epoch": 1.7641797113234396, "grad_norm": 0.1567365825176239, "learning_rate": 2.361435980880708e-05, "loss": 0.9906, "step": 8678 }, { "epoch": 1.7643830046757472, "grad_norm": 0.1676221489906311, "learning_rate": 2.3594020136275807e-05, "loss": 1.0574, "step": 8679 }, { "epoch": 1.7645862980280544, "grad_norm": 0.15950854122638702, "learning_rate": 2.3573680463744534e-05, "loss": 1.0236, "step": 8680 }, { "epoch": 1.764789591380362, "grad_norm": 0.15914255380630493, "learning_rate": 2.3553340791213262e-05, "loss": 1.0645, "step": 8681 }, { "epoch": 1.7649928847326692, "grad_norm": 0.15521490573883057, "learning_rate": 2.353300111868199e-05, "loss": 1.0252, "step": 8682 }, { "epoch": 1.7651961780849765, "grad_norm": 0.1569053679704666, "learning_rate": 2.3512661446150718e-05, "loss": 1.0458, "step": 8683 }, { "epoch": 1.765399471437284, "grad_norm": 0.1684349924325943, "learning_rate": 2.3492321773619446e-05, "loss": 1.0584, "step": 8684 }, { "epoch": 1.7656027647895913, "grad_norm": 0.15103667974472046, "learning_rate": 2.3471982101088174e-05, "loss": 1.0146, "step": 8685 }, { "epoch": 1.7658060581418988, "grad_norm": 0.14591017365455627, "learning_rate": 2.34516424285569e-05, "loss": 0.8528, "step": 8686 }, { "epoch": 1.7660093514942061, "grad_norm": 0.16020065546035767, "learning_rate": 2.343130275602563e-05, "loss": 1.1309, "step": 8687 }, { "epoch": 1.7662126448465134, "grad_norm": 0.16146892309188843, "learning_rate": 2.3410963083494354e-05, "loss": 1.0537, "step": 8688 }, { "epoch": 1.766415938198821, "grad_norm": 0.15416647493839264, "learning_rate": 2.3390623410963085e-05, "loss": 0.9077, "step": 8689 }, { "epoch": 1.7666192315511284, "grad_norm": 0.14960621297359467, "learning_rate": 2.3370283738431813e-05, "loss": 0.8932, "step": 8690 }, { "epoch": 1.7668225249034357, "grad_norm": 0.14941620826721191, "learning_rate": 2.334994406590054e-05, "loss": 1.0221, "step": 8691 }, { "epoch": 1.767025818255743, "grad_norm": 0.1510924994945526, "learning_rate": 2.332960439336927e-05, "loss": 0.9644, "step": 8692 }, { "epoch": 1.7672291116080503, "grad_norm": 0.17590442299842834, "learning_rate": 2.3309264720837996e-05, "loss": 1.0622, "step": 8693 }, { "epoch": 1.7674324049603578, "grad_norm": 0.15594623982906342, "learning_rate": 2.3288925048306724e-05, "loss": 0.9452, "step": 8694 }, { "epoch": 1.7676356983126653, "grad_norm": 0.16570919752120972, "learning_rate": 2.3268585375775452e-05, "loss": 1.0828, "step": 8695 }, { "epoch": 1.7678389916649726, "grad_norm": 0.14856794476509094, "learning_rate": 2.324824570324418e-05, "loss": 0.9798, "step": 8696 }, { "epoch": 1.76804228501728, "grad_norm": 0.15642361342906952, "learning_rate": 2.3227906030712908e-05, "loss": 1.0174, "step": 8697 }, { "epoch": 1.7682455783695872, "grad_norm": 0.17373007535934448, "learning_rate": 2.3207566358181636e-05, "loss": 1.1138, "step": 8698 }, { "epoch": 1.7684488717218947, "grad_norm": 0.1775466948747635, "learning_rate": 2.3187226685650364e-05, "loss": 1.0271, "step": 8699 }, { "epoch": 1.7686521650742022, "grad_norm": 0.15572163462638855, "learning_rate": 2.316688701311909e-05, "loss": 0.862, "step": 8700 }, { "epoch": 1.7688554584265095, "grad_norm": 0.1515238881111145, "learning_rate": 2.314654734058782e-05, "loss": 0.9004, "step": 8701 }, { "epoch": 1.7690587517788168, "grad_norm": 0.16354455053806305, "learning_rate": 2.3126207668056547e-05, "loss": 1.0885, "step": 8702 }, { "epoch": 1.769262045131124, "grad_norm": 0.14658953249454498, "learning_rate": 2.3105867995525275e-05, "loss": 0.975, "step": 8703 }, { "epoch": 1.7694653384834316, "grad_norm": 0.14852751791477203, "learning_rate": 2.3085528322994003e-05, "loss": 0.9863, "step": 8704 }, { "epoch": 1.769668631835739, "grad_norm": 0.17904391884803772, "learning_rate": 2.306518865046273e-05, "loss": 1.1327, "step": 8705 }, { "epoch": 1.7698719251880464, "grad_norm": 0.17801867425441742, "learning_rate": 2.304484897793146e-05, "loss": 1.1451, "step": 8706 }, { "epoch": 1.7700752185403537, "grad_norm": 0.1477910876274109, "learning_rate": 2.3024509305400186e-05, "loss": 0.8979, "step": 8707 }, { "epoch": 1.770278511892661, "grad_norm": 0.15619969367980957, "learning_rate": 2.3004169632868914e-05, "loss": 0.9437, "step": 8708 }, { "epoch": 1.7704818052449685, "grad_norm": 0.1542186439037323, "learning_rate": 2.298382996033764e-05, "loss": 1.0265, "step": 8709 }, { "epoch": 1.770685098597276, "grad_norm": 0.1608661264181137, "learning_rate": 2.2963490287806367e-05, "loss": 1.0949, "step": 8710 }, { "epoch": 1.7708883919495833, "grad_norm": 0.16307374835014343, "learning_rate": 2.2943150615275094e-05, "loss": 1.119, "step": 8711 }, { "epoch": 1.7710916853018905, "grad_norm": 0.16916891932487488, "learning_rate": 2.2922810942743822e-05, "loss": 1.0064, "step": 8712 }, { "epoch": 1.771294978654198, "grad_norm": 0.16762399673461914, "learning_rate": 2.290247127021255e-05, "loss": 1.0715, "step": 8713 }, { "epoch": 1.7714982720065053, "grad_norm": 0.14874054491519928, "learning_rate": 2.2882131597681278e-05, "loss": 0.8658, "step": 8714 }, { "epoch": 1.7717015653588128, "grad_norm": 0.1698228269815445, "learning_rate": 2.2861791925150006e-05, "loss": 1.0503, "step": 8715 }, { "epoch": 1.7719048587111201, "grad_norm": 0.1641293466091156, "learning_rate": 2.2841452252618734e-05, "loss": 1.0572, "step": 8716 }, { "epoch": 1.7721081520634274, "grad_norm": 0.1568494737148285, "learning_rate": 2.282111258008746e-05, "loss": 0.984, "step": 8717 }, { "epoch": 1.772311445415735, "grad_norm": 0.16213612258434296, "learning_rate": 2.280077290755619e-05, "loss": 0.973, "step": 8718 }, { "epoch": 1.7725147387680424, "grad_norm": 0.1667012721300125, "learning_rate": 2.2780433235024917e-05, "loss": 1.0665, "step": 8719 }, { "epoch": 1.7727180321203497, "grad_norm": 0.15457123517990112, "learning_rate": 2.2760093562493645e-05, "loss": 1.06, "step": 8720 }, { "epoch": 1.772921325472657, "grad_norm": 0.1483316570520401, "learning_rate": 2.2739753889962373e-05, "loss": 0.8819, "step": 8721 }, { "epoch": 1.7731246188249643, "grad_norm": 0.1739690601825714, "learning_rate": 2.27194142174311e-05, "loss": 1.1123, "step": 8722 }, { "epoch": 1.7733279121772718, "grad_norm": 0.18747751414775848, "learning_rate": 2.269907454489983e-05, "loss": 1.121, "step": 8723 }, { "epoch": 1.7735312055295793, "grad_norm": 0.16540110111236572, "learning_rate": 2.2678734872368556e-05, "loss": 1.0137, "step": 8724 }, { "epoch": 1.7737344988818866, "grad_norm": 0.185720756649971, "learning_rate": 2.2658395199837284e-05, "loss": 1.2018, "step": 8725 }, { "epoch": 1.773937792234194, "grad_norm": 0.1612144559621811, "learning_rate": 2.2638055527306012e-05, "loss": 1.0388, "step": 8726 }, { "epoch": 1.7741410855865012, "grad_norm": 0.1632845252752304, "learning_rate": 2.261771585477474e-05, "loss": 0.9703, "step": 8727 }, { "epoch": 1.7743443789388087, "grad_norm": 0.1624760627746582, "learning_rate": 2.2597376182243468e-05, "loss": 1.035, "step": 8728 }, { "epoch": 1.7745476722911162, "grad_norm": 0.1559683382511139, "learning_rate": 2.2577036509712196e-05, "loss": 1.0191, "step": 8729 }, { "epoch": 1.7747509656434235, "grad_norm": 0.1438092738389969, "learning_rate": 2.255669683718092e-05, "loss": 0.9954, "step": 8730 }, { "epoch": 1.7749542589957308, "grad_norm": 0.15409444272518158, "learning_rate": 2.2536357164649648e-05, "loss": 1.0011, "step": 8731 }, { "epoch": 1.775157552348038, "grad_norm": 0.17864732444286346, "learning_rate": 2.2516017492118376e-05, "loss": 1.221, "step": 8732 }, { "epoch": 1.7753608457003456, "grad_norm": 0.1632302850484848, "learning_rate": 2.2495677819587104e-05, "loss": 1.1048, "step": 8733 }, { "epoch": 1.775564139052653, "grad_norm": 0.158182755112648, "learning_rate": 2.247533814705583e-05, "loss": 1.0169, "step": 8734 }, { "epoch": 1.7757674324049604, "grad_norm": 0.1428188532590866, "learning_rate": 2.245499847452456e-05, "loss": 0.9004, "step": 8735 }, { "epoch": 1.7759707257572677, "grad_norm": 0.14167264103889465, "learning_rate": 2.2434658801993287e-05, "loss": 0.8933, "step": 8736 }, { "epoch": 1.776174019109575, "grad_norm": 0.1453506201505661, "learning_rate": 2.2414319129462015e-05, "loss": 0.957, "step": 8737 }, { "epoch": 1.7763773124618825, "grad_norm": 0.1613999754190445, "learning_rate": 2.2393979456930743e-05, "loss": 1.0461, "step": 8738 }, { "epoch": 1.77658060581419, "grad_norm": 0.143203467130661, "learning_rate": 2.237363978439947e-05, "loss": 0.8781, "step": 8739 }, { "epoch": 1.7767838991664973, "grad_norm": 0.1658238172531128, "learning_rate": 2.23533001118682e-05, "loss": 1.1794, "step": 8740 }, { "epoch": 1.7769871925188045, "grad_norm": 0.1413705199956894, "learning_rate": 2.2332960439336926e-05, "loss": 0.9271, "step": 8741 }, { "epoch": 1.777190485871112, "grad_norm": 0.15212002396583557, "learning_rate": 2.2312620766805654e-05, "loss": 1.0865, "step": 8742 }, { "epoch": 1.7773937792234193, "grad_norm": 0.16583633422851562, "learning_rate": 2.2292281094274382e-05, "loss": 1.0596, "step": 8743 }, { "epoch": 1.7775970725757269, "grad_norm": 0.158931702375412, "learning_rate": 2.227194142174311e-05, "loss": 1.0673, "step": 8744 }, { "epoch": 1.7778003659280341, "grad_norm": 0.16555020213127136, "learning_rate": 2.2251601749211838e-05, "loss": 1.2127, "step": 8745 }, { "epoch": 1.7780036592803414, "grad_norm": 0.16637022793293, "learning_rate": 2.2231262076680566e-05, "loss": 1.01, "step": 8746 }, { "epoch": 1.778206952632649, "grad_norm": 0.16700969636440277, "learning_rate": 2.2210922404149297e-05, "loss": 1.0795, "step": 8747 }, { "epoch": 1.7784102459849565, "grad_norm": 0.14032140374183655, "learning_rate": 2.2190582731618025e-05, "loss": 0.8598, "step": 8748 }, { "epoch": 1.7786135393372637, "grad_norm": 0.16577371954917908, "learning_rate": 2.2170243059086753e-05, "loss": 1.0524, "step": 8749 }, { "epoch": 1.778816832689571, "grad_norm": 0.15001162886619568, "learning_rate": 2.2149903386555477e-05, "loss": 0.8276, "step": 8750 }, { "epoch": 1.7790201260418783, "grad_norm": 0.17136628925800323, "learning_rate": 2.2129563714024205e-05, "loss": 0.9461, "step": 8751 }, { "epoch": 1.7792234193941858, "grad_norm": 0.1448829025030136, "learning_rate": 2.2109224041492933e-05, "loss": 0.8843, "step": 8752 }, { "epoch": 1.7794267127464933, "grad_norm": 0.1616746038198471, "learning_rate": 2.208888436896166e-05, "loss": 0.9882, "step": 8753 }, { "epoch": 1.7796300060988006, "grad_norm": 0.1401311755180359, "learning_rate": 2.206854469643039e-05, "loss": 0.8597, "step": 8754 }, { "epoch": 1.779833299451108, "grad_norm": 0.14304830133914948, "learning_rate": 2.2048205023899116e-05, "loss": 0.9064, "step": 8755 }, { "epoch": 1.7800365928034152, "grad_norm": 0.1624108999967575, "learning_rate": 2.2027865351367844e-05, "loss": 0.933, "step": 8756 }, { "epoch": 1.7802398861557227, "grad_norm": 0.14867079257965088, "learning_rate": 2.2007525678836572e-05, "loss": 0.9177, "step": 8757 }, { "epoch": 1.7804431795080302, "grad_norm": 0.16010092198848724, "learning_rate": 2.19871860063053e-05, "loss": 0.9704, "step": 8758 }, { "epoch": 1.7806464728603375, "grad_norm": 0.15511338412761688, "learning_rate": 2.1966846333774028e-05, "loss": 0.9847, "step": 8759 }, { "epoch": 1.7808497662126448, "grad_norm": 0.1639104187488556, "learning_rate": 2.1946506661242756e-05, "loss": 1.1286, "step": 8760 }, { "epoch": 1.781053059564952, "grad_norm": 0.18449360132217407, "learning_rate": 2.1926166988711483e-05, "loss": 1.1968, "step": 8761 }, { "epoch": 1.7812563529172596, "grad_norm": 0.14939777553081512, "learning_rate": 2.190582731618021e-05, "loss": 0.8643, "step": 8762 }, { "epoch": 1.781459646269567, "grad_norm": 0.1642538458108902, "learning_rate": 2.188548764364894e-05, "loss": 1.1455, "step": 8763 }, { "epoch": 1.7816629396218744, "grad_norm": 0.1654261350631714, "learning_rate": 2.1865147971117667e-05, "loss": 0.9213, "step": 8764 }, { "epoch": 1.7818662329741817, "grad_norm": 0.1614561378955841, "learning_rate": 2.1844808298586395e-05, "loss": 0.9744, "step": 8765 }, { "epoch": 1.782069526326489, "grad_norm": 0.16945448517799377, "learning_rate": 2.1824468626055123e-05, "loss": 0.9879, "step": 8766 }, { "epoch": 1.7822728196787965, "grad_norm": 0.16549819707870483, "learning_rate": 2.180412895352385e-05, "loss": 1.1835, "step": 8767 }, { "epoch": 1.782476113031104, "grad_norm": 0.15457910299301147, "learning_rate": 2.178378928099258e-05, "loss": 0.9769, "step": 8768 }, { "epoch": 1.7826794063834113, "grad_norm": 0.14759616553783417, "learning_rate": 2.1763449608461306e-05, "loss": 0.9037, "step": 8769 }, { "epoch": 1.7828826997357186, "grad_norm": 0.1654442995786667, "learning_rate": 2.1743109935930034e-05, "loss": 1.1287, "step": 8770 }, { "epoch": 1.783085993088026, "grad_norm": 0.15763290226459503, "learning_rate": 2.172277026339876e-05, "loss": 1.0302, "step": 8771 }, { "epoch": 1.7832892864403334, "grad_norm": 0.1774844229221344, "learning_rate": 2.1702430590867486e-05, "loss": 0.9519, "step": 8772 }, { "epoch": 1.7834925797926409, "grad_norm": 0.16249552369117737, "learning_rate": 2.1682090918336214e-05, "loss": 1.0732, "step": 8773 }, { "epoch": 1.7836958731449482, "grad_norm": 0.19557397067546844, "learning_rate": 2.1661751245804942e-05, "loss": 1.1001, "step": 8774 }, { "epoch": 1.7838991664972554, "grad_norm": 0.17284958064556122, "learning_rate": 2.164141157327367e-05, "loss": 1.0322, "step": 8775 }, { "epoch": 1.784102459849563, "grad_norm": 0.1687261462211609, "learning_rate": 2.1621071900742398e-05, "loss": 1.0279, "step": 8776 }, { "epoch": 1.7843057532018705, "grad_norm": 0.1613539606332779, "learning_rate": 2.1600732228211126e-05, "loss": 1.0223, "step": 8777 }, { "epoch": 1.7845090465541777, "grad_norm": 0.1718090921640396, "learning_rate": 2.1580392555679854e-05, "loss": 1.0618, "step": 8778 }, { "epoch": 1.784712339906485, "grad_norm": 0.17552468180656433, "learning_rate": 2.156005288314858e-05, "loss": 1.246, "step": 8779 }, { "epoch": 1.7849156332587923, "grad_norm": 0.15041019022464752, "learning_rate": 2.153971321061731e-05, "loss": 1.0258, "step": 8780 }, { "epoch": 1.7851189266110998, "grad_norm": 0.1673612892627716, "learning_rate": 2.1519373538086037e-05, "loss": 0.9425, "step": 8781 }, { "epoch": 1.7853222199634073, "grad_norm": 0.16285213828086853, "learning_rate": 2.1499033865554765e-05, "loss": 1.0857, "step": 8782 }, { "epoch": 1.7855255133157146, "grad_norm": 0.15616565942764282, "learning_rate": 2.1478694193023493e-05, "loss": 0.968, "step": 8783 }, { "epoch": 1.785728806668022, "grad_norm": 0.15286174416542053, "learning_rate": 2.145835452049222e-05, "loss": 0.9668, "step": 8784 }, { "epoch": 1.7859321000203292, "grad_norm": 0.1499488651752472, "learning_rate": 2.143801484796095e-05, "loss": 1.0297, "step": 8785 }, { "epoch": 1.7861353933726367, "grad_norm": 0.15404723584651947, "learning_rate": 2.1417675175429676e-05, "loss": 1.0174, "step": 8786 }, { "epoch": 1.7863386867249442, "grad_norm": 0.15587931871414185, "learning_rate": 2.1397335502898404e-05, "loss": 1.0931, "step": 8787 }, { "epoch": 1.7865419800772515, "grad_norm": 0.17823952436447144, "learning_rate": 2.1376995830367132e-05, "loss": 1.1556, "step": 8788 }, { "epoch": 1.7867452734295588, "grad_norm": 0.14788757264614105, "learning_rate": 2.135665615783586e-05, "loss": 0.9369, "step": 8789 }, { "epoch": 1.786948566781866, "grad_norm": 0.16817356646060944, "learning_rate": 2.1336316485304588e-05, "loss": 1.0665, "step": 8790 }, { "epoch": 1.7871518601341736, "grad_norm": 0.16087807714939117, "learning_rate": 2.1315976812773316e-05, "loss": 0.8828, "step": 8791 }, { "epoch": 1.787355153486481, "grad_norm": 0.15371643006801605, "learning_rate": 2.1295637140242043e-05, "loss": 1.0907, "step": 8792 }, { "epoch": 1.7875584468387884, "grad_norm": 0.15221457183361053, "learning_rate": 2.127529746771077e-05, "loss": 1.0855, "step": 8793 }, { "epoch": 1.7877617401910957, "grad_norm": 0.1570027768611908, "learning_rate": 2.12549577951795e-05, "loss": 0.9394, "step": 8794 }, { "epoch": 1.787965033543403, "grad_norm": 0.14887520670890808, "learning_rate": 2.1234618122648227e-05, "loss": 0.9996, "step": 8795 }, { "epoch": 1.7881683268957105, "grad_norm": 0.14103272557258606, "learning_rate": 2.1214278450116955e-05, "loss": 0.9285, "step": 8796 }, { "epoch": 1.788371620248018, "grad_norm": 0.16285701096057892, "learning_rate": 2.1193938777585683e-05, "loss": 1.055, "step": 8797 }, { "epoch": 1.7885749136003253, "grad_norm": 0.17137975990772247, "learning_rate": 2.117359910505441e-05, "loss": 1.1223, "step": 8798 }, { "epoch": 1.7887782069526326, "grad_norm": 0.1539456695318222, "learning_rate": 2.115325943252314e-05, "loss": 1.0376, "step": 8799 }, { "epoch": 1.78898150030494, "grad_norm": 0.1618257611989975, "learning_rate": 2.1132919759991866e-05, "loss": 0.959, "step": 8800 }, { "epoch": 1.7891847936572474, "grad_norm": 0.15301983058452606, "learning_rate": 2.1112580087460594e-05, "loss": 0.9978, "step": 8801 }, { "epoch": 1.7893880870095549, "grad_norm": 0.16895170509815216, "learning_rate": 2.1092240414929322e-05, "loss": 1.1444, "step": 8802 }, { "epoch": 1.7895913803618622, "grad_norm": 0.14772938191890717, "learning_rate": 2.107190074239805e-05, "loss": 0.9383, "step": 8803 }, { "epoch": 1.7897946737141694, "grad_norm": 0.16067442297935486, "learning_rate": 2.1051561069866778e-05, "loss": 1.1154, "step": 8804 }, { "epoch": 1.789997967066477, "grad_norm": 0.15227656066417694, "learning_rate": 2.1031221397335505e-05, "loss": 0.9473, "step": 8805 }, { "epoch": 1.7902012604187845, "grad_norm": 0.15492786467075348, "learning_rate": 2.1010881724804233e-05, "loss": 0.9902, "step": 8806 }, { "epoch": 1.7904045537710918, "grad_norm": 0.14794185757637024, "learning_rate": 2.099054205227296e-05, "loss": 0.9328, "step": 8807 }, { "epoch": 1.790607847123399, "grad_norm": 0.1578792929649353, "learning_rate": 2.097020237974169e-05, "loss": 0.9566, "step": 8808 }, { "epoch": 1.7908111404757063, "grad_norm": 0.15910150110721588, "learning_rate": 2.0949862707210417e-05, "loss": 0.9592, "step": 8809 }, { "epoch": 1.7910144338280138, "grad_norm": 0.16889356076717377, "learning_rate": 2.0929523034679145e-05, "loss": 1.1506, "step": 8810 }, { "epoch": 1.7912177271803214, "grad_norm": 0.16163448989391327, "learning_rate": 2.0909183362147873e-05, "loss": 0.9897, "step": 8811 }, { "epoch": 1.7914210205326286, "grad_norm": 0.1594909131526947, "learning_rate": 2.08888436896166e-05, "loss": 1.1839, "step": 8812 }, { "epoch": 1.791624313884936, "grad_norm": 0.15714511275291443, "learning_rate": 2.0868504017085325e-05, "loss": 1.0549, "step": 8813 }, { "epoch": 1.7918276072372432, "grad_norm": 0.16642676293849945, "learning_rate": 2.0848164344554053e-05, "loss": 0.9891, "step": 8814 }, { "epoch": 1.7920309005895507, "grad_norm": 0.16248299181461334, "learning_rate": 2.082782467202278e-05, "loss": 1.0677, "step": 8815 }, { "epoch": 1.7922341939418582, "grad_norm": 0.13689176738262177, "learning_rate": 2.080748499949151e-05, "loss": 0.8174, "step": 8816 }, { "epoch": 1.7924374872941655, "grad_norm": 0.1745375394821167, "learning_rate": 2.0787145326960236e-05, "loss": 1.0926, "step": 8817 }, { "epoch": 1.7926407806464728, "grad_norm": 0.15878495573997498, "learning_rate": 2.0766805654428964e-05, "loss": 1.0955, "step": 8818 }, { "epoch": 1.79284407399878, "grad_norm": 0.16434980928897858, "learning_rate": 2.0746465981897692e-05, "loss": 1.1102, "step": 8819 }, { "epoch": 1.7930473673510876, "grad_norm": 0.1528177410364151, "learning_rate": 2.072612630936642e-05, "loss": 0.9802, "step": 8820 }, { "epoch": 1.7932506607033951, "grad_norm": 0.13742928206920624, "learning_rate": 2.0705786636835148e-05, "loss": 0.8088, "step": 8821 }, { "epoch": 1.7934539540557024, "grad_norm": 0.1730477511882782, "learning_rate": 2.0685446964303875e-05, "loss": 1.0604, "step": 8822 }, { "epoch": 1.7936572474080097, "grad_norm": 0.15058083832263947, "learning_rate": 2.0665107291772603e-05, "loss": 1.0337, "step": 8823 }, { "epoch": 1.793860540760317, "grad_norm": 0.156558096408844, "learning_rate": 2.064476761924133e-05, "loss": 0.886, "step": 8824 }, { "epoch": 1.7940638341126245, "grad_norm": 0.16593307256698608, "learning_rate": 2.062442794671006e-05, "loss": 1.081, "step": 8825 }, { "epoch": 1.794267127464932, "grad_norm": 0.1541585624217987, "learning_rate": 2.0604088274178787e-05, "loss": 0.8783, "step": 8826 }, { "epoch": 1.7944704208172393, "grad_norm": 0.1573958843946457, "learning_rate": 2.0583748601647515e-05, "loss": 1.0143, "step": 8827 }, { "epoch": 1.7946737141695466, "grad_norm": 0.16920708119869232, "learning_rate": 2.0563408929116243e-05, "loss": 1.1278, "step": 8828 }, { "epoch": 1.794877007521854, "grad_norm": 0.17025700211524963, "learning_rate": 2.054306925658497e-05, "loss": 1.1583, "step": 8829 }, { "epoch": 1.7950803008741614, "grad_norm": 0.15445561707019806, "learning_rate": 2.0522729584053698e-05, "loss": 0.9656, "step": 8830 }, { "epoch": 1.7952835942264689, "grad_norm": 0.1617354154586792, "learning_rate": 2.0502389911522426e-05, "loss": 1.0914, "step": 8831 }, { "epoch": 1.7954868875787762, "grad_norm": 0.16941453516483307, "learning_rate": 2.0482050238991154e-05, "loss": 1.0411, "step": 8832 }, { "epoch": 1.7956901809310835, "grad_norm": 0.1707964837551117, "learning_rate": 2.046171056645988e-05, "loss": 1.1742, "step": 8833 }, { "epoch": 1.795893474283391, "grad_norm": 0.15087740123271942, "learning_rate": 2.0441370893928606e-05, "loss": 0.9197, "step": 8834 }, { "epoch": 1.7960967676356985, "grad_norm": 0.15882714092731476, "learning_rate": 2.0421031221397334e-05, "loss": 1.003, "step": 8835 }, { "epoch": 1.7963000609880058, "grad_norm": 0.1605544090270996, "learning_rate": 2.0400691548866062e-05, "loss": 1.0576, "step": 8836 }, { "epoch": 1.796503354340313, "grad_norm": 0.17075075209140778, "learning_rate": 2.038035187633479e-05, "loss": 1.1054, "step": 8837 }, { "epoch": 1.7967066476926203, "grad_norm": 0.17343316972255707, "learning_rate": 2.0360012203803518e-05, "loss": 1.0677, "step": 8838 }, { "epoch": 1.7969099410449278, "grad_norm": 0.15545468032360077, "learning_rate": 2.0339672531272246e-05, "loss": 0.9689, "step": 8839 }, { "epoch": 1.7971132343972354, "grad_norm": 0.16377407312393188, "learning_rate": 2.0319332858740973e-05, "loss": 1.0864, "step": 8840 }, { "epoch": 1.7973165277495426, "grad_norm": 0.16366270184516907, "learning_rate": 2.02989931862097e-05, "loss": 1.0547, "step": 8841 }, { "epoch": 1.79751982110185, "grad_norm": 0.1693117767572403, "learning_rate": 2.027865351367843e-05, "loss": 0.9835, "step": 8842 }, { "epoch": 1.7977231144541572, "grad_norm": 0.15756477415561676, "learning_rate": 2.0258313841147157e-05, "loss": 1.0104, "step": 8843 }, { "epoch": 1.7979264078064647, "grad_norm": 0.14544452726840973, "learning_rate": 2.0237974168615885e-05, "loss": 0.9727, "step": 8844 }, { "epoch": 1.7981297011587722, "grad_norm": 0.16249243915081024, "learning_rate": 2.0217634496084613e-05, "loss": 0.9866, "step": 8845 }, { "epoch": 1.7983329945110795, "grad_norm": 0.14533188939094543, "learning_rate": 2.019729482355334e-05, "loss": 0.8947, "step": 8846 }, { "epoch": 1.7985362878633868, "grad_norm": 0.16754688322544098, "learning_rate": 2.017695515102207e-05, "loss": 1.1544, "step": 8847 }, { "epoch": 1.798739581215694, "grad_norm": 0.16353453695774078, "learning_rate": 2.01566154784908e-05, "loss": 1.0029, "step": 8848 }, { "epoch": 1.7989428745680016, "grad_norm": 0.15536530315876007, "learning_rate": 2.0136275805959527e-05, "loss": 0.9951, "step": 8849 }, { "epoch": 1.7991461679203091, "grad_norm": 0.15725290775299072, "learning_rate": 2.0115936133428255e-05, "loss": 0.9265, "step": 8850 }, { "epoch": 1.7993494612726164, "grad_norm": 0.16195252537727356, "learning_rate": 2.0095596460896983e-05, "loss": 1.0811, "step": 8851 }, { "epoch": 1.7995527546249237, "grad_norm": 0.15696530044078827, "learning_rate": 2.007525678836571e-05, "loss": 1.0009, "step": 8852 }, { "epoch": 1.799756047977231, "grad_norm": 0.15649127960205078, "learning_rate": 2.005491711583444e-05, "loss": 0.9286, "step": 8853 }, { "epoch": 1.7999593413295385, "grad_norm": 0.1716242879629135, "learning_rate": 2.0034577443303163e-05, "loss": 1.1251, "step": 8854 }, { "epoch": 1.800162634681846, "grad_norm": 0.17289458215236664, "learning_rate": 2.001423777077189e-05, "loss": 1.0008, "step": 8855 }, { "epoch": 1.8003659280341533, "grad_norm": 0.17268453538417816, "learning_rate": 1.999389809824062e-05, "loss": 1.0243, "step": 8856 }, { "epoch": 1.8005692213864606, "grad_norm": 0.14764517545700073, "learning_rate": 1.9973558425709347e-05, "loss": 0.8962, "step": 8857 }, { "epoch": 1.800772514738768, "grad_norm": 0.15908636152744293, "learning_rate": 1.9953218753178075e-05, "loss": 0.9935, "step": 8858 }, { "epoch": 1.8009758080910754, "grad_norm": 0.16569429636001587, "learning_rate": 1.9932879080646803e-05, "loss": 1.0439, "step": 8859 }, { "epoch": 1.801179101443383, "grad_norm": 0.15687763690948486, "learning_rate": 1.991253940811553e-05, "loss": 0.9501, "step": 8860 }, { "epoch": 1.8013823947956902, "grad_norm": 0.17330986261367798, "learning_rate": 1.9892199735584258e-05, "loss": 0.9937, "step": 8861 }, { "epoch": 1.8015856881479975, "grad_norm": 0.14336565136909485, "learning_rate": 1.9871860063052986e-05, "loss": 0.8868, "step": 8862 }, { "epoch": 1.801788981500305, "grad_norm": 0.16820016503334045, "learning_rate": 1.9851520390521714e-05, "loss": 1.0598, "step": 8863 }, { "epoch": 1.8019922748526125, "grad_norm": 0.14179497957229614, "learning_rate": 1.9831180717990442e-05, "loss": 0.8291, "step": 8864 }, { "epoch": 1.8021955682049198, "grad_norm": 0.15502776205539703, "learning_rate": 1.981084104545917e-05, "loss": 0.929, "step": 8865 }, { "epoch": 1.802398861557227, "grad_norm": 0.15490376949310303, "learning_rate": 1.9790501372927897e-05, "loss": 0.9308, "step": 8866 }, { "epoch": 1.8026021549095343, "grad_norm": 0.15557946264743805, "learning_rate": 1.9770161700396625e-05, "loss": 1.1182, "step": 8867 }, { "epoch": 1.8028054482618419, "grad_norm": 0.18834523856639862, "learning_rate": 1.9749822027865353e-05, "loss": 1.0342, "step": 8868 }, { "epoch": 1.8030087416141494, "grad_norm": 0.156112402677536, "learning_rate": 1.972948235533408e-05, "loss": 1.0748, "step": 8869 }, { "epoch": 1.8032120349664567, "grad_norm": 0.17047272622585297, "learning_rate": 1.970914268280281e-05, "loss": 1.1164, "step": 8870 }, { "epoch": 1.803415328318764, "grad_norm": 0.1672130972146988, "learning_rate": 1.9688803010271537e-05, "loss": 1.2204, "step": 8871 }, { "epoch": 1.8036186216710712, "grad_norm": 0.150632843375206, "learning_rate": 1.9668463337740265e-05, "loss": 0.937, "step": 8872 }, { "epoch": 1.8038219150233787, "grad_norm": 0.15371845662593842, "learning_rate": 1.9648123665208992e-05, "loss": 0.9159, "step": 8873 }, { "epoch": 1.8040252083756863, "grad_norm": 0.17060934007167816, "learning_rate": 1.962778399267772e-05, "loss": 1.1745, "step": 8874 }, { "epoch": 1.8042285017279935, "grad_norm": 0.14167384803295135, "learning_rate": 1.9607444320146445e-05, "loss": 0.9273, "step": 8875 }, { "epoch": 1.8044317950803008, "grad_norm": 0.13884446024894714, "learning_rate": 1.9587104647615173e-05, "loss": 0.9846, "step": 8876 }, { "epoch": 1.8046350884326081, "grad_norm": 0.1472717523574829, "learning_rate": 1.95667649750839e-05, "loss": 0.9832, "step": 8877 }, { "epoch": 1.8048383817849156, "grad_norm": 0.16537097096443176, "learning_rate": 1.9546425302552628e-05, "loss": 1.0313, "step": 8878 }, { "epoch": 1.8050416751372231, "grad_norm": 0.1480054408311844, "learning_rate": 1.9526085630021356e-05, "loss": 0.8571, "step": 8879 }, { "epoch": 1.8052449684895304, "grad_norm": 0.1856297105550766, "learning_rate": 1.9505745957490084e-05, "loss": 1.1592, "step": 8880 }, { "epoch": 1.8054482618418377, "grad_norm": 0.16036227345466614, "learning_rate": 1.9485406284958812e-05, "loss": 0.8969, "step": 8881 }, { "epoch": 1.805651555194145, "grad_norm": 0.16403596103191376, "learning_rate": 1.946506661242754e-05, "loss": 0.9978, "step": 8882 }, { "epoch": 1.8058548485464525, "grad_norm": 0.17799678444862366, "learning_rate": 1.9444726939896268e-05, "loss": 1.0462, "step": 8883 }, { "epoch": 1.80605814189876, "grad_norm": 0.1569959819316864, "learning_rate": 1.9424387267364995e-05, "loss": 1.0188, "step": 8884 }, { "epoch": 1.8062614352510673, "grad_norm": 0.16799910366535187, "learning_rate": 1.9404047594833723e-05, "loss": 1.1152, "step": 8885 }, { "epoch": 1.8064647286033746, "grad_norm": 0.15117068588733673, "learning_rate": 1.938370792230245e-05, "loss": 0.8605, "step": 8886 }, { "epoch": 1.806668021955682, "grad_norm": 0.16264118254184723, "learning_rate": 1.936336824977118e-05, "loss": 1.0035, "step": 8887 }, { "epoch": 1.8068713153079894, "grad_norm": 0.15436705946922302, "learning_rate": 1.9343028577239907e-05, "loss": 0.9892, "step": 8888 }, { "epoch": 1.807074608660297, "grad_norm": 0.15030941367149353, "learning_rate": 1.9322688904708635e-05, "loss": 0.9024, "step": 8889 }, { "epoch": 1.8072779020126042, "grad_norm": 0.16907203197479248, "learning_rate": 1.9302349232177362e-05, "loss": 1.0877, "step": 8890 }, { "epoch": 1.8074811953649115, "grad_norm": 0.16450457274913788, "learning_rate": 1.928200955964609e-05, "loss": 1.1109, "step": 8891 }, { "epoch": 1.807684488717219, "grad_norm": 0.1898254007101059, "learning_rate": 1.9261669887114818e-05, "loss": 1.3059, "step": 8892 }, { "epoch": 1.8078877820695263, "grad_norm": 0.1547909677028656, "learning_rate": 1.9241330214583546e-05, "loss": 0.9279, "step": 8893 }, { "epoch": 1.8080910754218338, "grad_norm": 0.1567441076040268, "learning_rate": 1.9220990542052274e-05, "loss": 1.0329, "step": 8894 }, { "epoch": 1.808294368774141, "grad_norm": 0.15896877646446228, "learning_rate": 1.9200650869521002e-05, "loss": 0.9831, "step": 8895 }, { "epoch": 1.8084976621264484, "grad_norm": 0.1571255475282669, "learning_rate": 1.918031119698973e-05, "loss": 1.057, "step": 8896 }, { "epoch": 1.8087009554787559, "grad_norm": 0.1545611023902893, "learning_rate": 1.9159971524458457e-05, "loss": 0.9824, "step": 8897 }, { "epoch": 1.8089042488310634, "grad_norm": 0.16258205473423004, "learning_rate": 1.9139631851927185e-05, "loss": 1.0774, "step": 8898 }, { "epoch": 1.8091075421833707, "grad_norm": 0.16286814212799072, "learning_rate": 1.9119292179395913e-05, "loss": 0.9817, "step": 8899 }, { "epoch": 1.809310835535678, "grad_norm": 0.15677201747894287, "learning_rate": 1.909895250686464e-05, "loss": 0.8562, "step": 8900 }, { "epoch": 1.8095141288879852, "grad_norm": 0.14640593528747559, "learning_rate": 1.907861283433337e-05, "loss": 0.951, "step": 8901 }, { "epoch": 1.8097174222402928, "grad_norm": 0.1507757604122162, "learning_rate": 1.9058273161802097e-05, "loss": 0.9395, "step": 8902 }, { "epoch": 1.8099207155926003, "grad_norm": 0.1503894180059433, "learning_rate": 1.9037933489270825e-05, "loss": 0.974, "step": 8903 }, { "epoch": 1.8101240089449075, "grad_norm": 0.16743044555187225, "learning_rate": 1.9017593816739552e-05, "loss": 0.9921, "step": 8904 }, { "epoch": 1.8103273022972148, "grad_norm": 0.17370130121707916, "learning_rate": 1.899725414420828e-05, "loss": 1.1982, "step": 8905 }, { "epoch": 1.8105305956495221, "grad_norm": 0.16445770859718323, "learning_rate": 1.8976914471677008e-05, "loss": 1.1307, "step": 8906 }, { "epoch": 1.8107338890018296, "grad_norm": 0.15819743275642395, "learning_rate": 1.8956574799145736e-05, "loss": 0.9811, "step": 8907 }, { "epoch": 1.8109371823541371, "grad_norm": 0.14742504060268402, "learning_rate": 1.8936235126614464e-05, "loss": 1.0165, "step": 8908 }, { "epoch": 1.8111404757064444, "grad_norm": 0.18553227186203003, "learning_rate": 1.891589545408319e-05, "loss": 1.068, "step": 8909 }, { "epoch": 1.8113437690587517, "grad_norm": 0.16492341458797455, "learning_rate": 1.889555578155192e-05, "loss": 1.0159, "step": 8910 }, { "epoch": 1.811547062411059, "grad_norm": 0.1511560082435608, "learning_rate": 1.8875216109020647e-05, "loss": 0.8532, "step": 8911 }, { "epoch": 1.8117503557633665, "grad_norm": 0.156644806265831, "learning_rate": 1.8854876436489375e-05, "loss": 1.0219, "step": 8912 }, { "epoch": 1.811953649115674, "grad_norm": 0.14762692153453827, "learning_rate": 1.8834536763958103e-05, "loss": 0.9744, "step": 8913 }, { "epoch": 1.8121569424679813, "grad_norm": 0.15013450384140015, "learning_rate": 1.881419709142683e-05, "loss": 0.9889, "step": 8914 }, { "epoch": 1.8123602358202886, "grad_norm": 0.1485605090856552, "learning_rate": 1.879385741889556e-05, "loss": 1.0807, "step": 8915 }, { "epoch": 1.812563529172596, "grad_norm": 0.1600959300994873, "learning_rate": 1.8773517746364283e-05, "loss": 1.0257, "step": 8916 }, { "epoch": 1.8127668225249034, "grad_norm": 0.17865175008773804, "learning_rate": 1.875317807383301e-05, "loss": 1.2575, "step": 8917 }, { "epoch": 1.812970115877211, "grad_norm": 0.16837240755558014, "learning_rate": 1.873283840130174e-05, "loss": 1.1453, "step": 8918 }, { "epoch": 1.8131734092295182, "grad_norm": 0.1615448296070099, "learning_rate": 1.8712498728770467e-05, "loss": 1.0434, "step": 8919 }, { "epoch": 1.8133767025818255, "grad_norm": 0.17639584839344025, "learning_rate": 1.8692159056239195e-05, "loss": 1.1924, "step": 8920 }, { "epoch": 1.813579995934133, "grad_norm": 0.16046710312366486, "learning_rate": 1.8671819383707922e-05, "loss": 1.0064, "step": 8921 }, { "epoch": 1.8137832892864403, "grad_norm": 0.16105994582176208, "learning_rate": 1.865147971117665e-05, "loss": 0.9938, "step": 8922 }, { "epoch": 1.8139865826387478, "grad_norm": 0.15286701917648315, "learning_rate": 1.8631140038645378e-05, "loss": 0.914, "step": 8923 }, { "epoch": 1.814189875991055, "grad_norm": 0.16728432476520538, "learning_rate": 1.8610800366114106e-05, "loss": 1.1705, "step": 8924 }, { "epoch": 1.8143931693433624, "grad_norm": 0.1565847396850586, "learning_rate": 1.8590460693582834e-05, "loss": 0.956, "step": 8925 }, { "epoch": 1.8145964626956699, "grad_norm": 0.15740768611431122, "learning_rate": 1.857012102105156e-05, "loss": 1.1312, "step": 8926 }, { "epoch": 1.8147997560479774, "grad_norm": 0.1441873013973236, "learning_rate": 1.854978134852029e-05, "loss": 0.9478, "step": 8927 }, { "epoch": 1.8150030494002847, "grad_norm": 0.18066157400608063, "learning_rate": 1.8529441675989017e-05, "loss": 1.262, "step": 8928 }, { "epoch": 1.815206342752592, "grad_norm": 0.14439338445663452, "learning_rate": 1.8509102003457745e-05, "loss": 0.8838, "step": 8929 }, { "epoch": 1.8154096361048992, "grad_norm": 0.15646328032016754, "learning_rate": 1.8488762330926473e-05, "loss": 1.055, "step": 8930 }, { "epoch": 1.8156129294572068, "grad_norm": 0.15137188136577606, "learning_rate": 1.84684226583952e-05, "loss": 0.8911, "step": 8931 }, { "epoch": 1.8158162228095143, "grad_norm": 0.1680094599723816, "learning_rate": 1.844808298586393e-05, "loss": 1.1605, "step": 8932 }, { "epoch": 1.8160195161618216, "grad_norm": 0.12992525100708008, "learning_rate": 1.8427743313332657e-05, "loss": 0.8465, "step": 8933 }, { "epoch": 1.8162228095141288, "grad_norm": 0.16572974622249603, "learning_rate": 1.8407403640801384e-05, "loss": 1.0854, "step": 8934 }, { "epoch": 1.8164261028664361, "grad_norm": 0.18794788420200348, "learning_rate": 1.8387063968270112e-05, "loss": 1.2139, "step": 8935 }, { "epoch": 1.8166293962187436, "grad_norm": 0.15362529456615448, "learning_rate": 1.836672429573884e-05, "loss": 0.8442, "step": 8936 }, { "epoch": 1.8168326895710512, "grad_norm": 0.16475287079811096, "learning_rate": 1.8346384623207565e-05, "loss": 1.0102, "step": 8937 }, { "epoch": 1.8170359829233584, "grad_norm": 0.1559743881225586, "learning_rate": 1.8326044950676292e-05, "loss": 1.0221, "step": 8938 }, { "epoch": 1.8172392762756657, "grad_norm": 0.1590835154056549, "learning_rate": 1.830570527814502e-05, "loss": 1.1225, "step": 8939 }, { "epoch": 1.817442569627973, "grad_norm": 0.16373057663440704, "learning_rate": 1.8285365605613748e-05, "loss": 1.0443, "step": 8940 }, { "epoch": 1.8176458629802805, "grad_norm": 0.17291580140590668, "learning_rate": 1.8265025933082476e-05, "loss": 1.1562, "step": 8941 }, { "epoch": 1.817849156332588, "grad_norm": 0.15381531417369843, "learning_rate": 1.8244686260551204e-05, "loss": 1.127, "step": 8942 }, { "epoch": 1.8180524496848953, "grad_norm": 0.15971028804779053, "learning_rate": 1.8224346588019932e-05, "loss": 0.9848, "step": 8943 }, { "epoch": 1.8182557430372026, "grad_norm": 0.16836349666118622, "learning_rate": 1.820400691548866e-05, "loss": 0.9812, "step": 8944 }, { "epoch": 1.81845903638951, "grad_norm": 0.16654492914676666, "learning_rate": 1.8183667242957387e-05, "loss": 1.0614, "step": 8945 }, { "epoch": 1.8186623297418174, "grad_norm": 0.14952759444713593, "learning_rate": 1.8163327570426115e-05, "loss": 1.0625, "step": 8946 }, { "epoch": 1.818865623094125, "grad_norm": 0.14578181505203247, "learning_rate": 1.8142987897894843e-05, "loss": 0.9772, "step": 8947 }, { "epoch": 1.8190689164464322, "grad_norm": 0.15989157557487488, "learning_rate": 1.8122648225363574e-05, "loss": 0.9292, "step": 8948 }, { "epoch": 1.8192722097987395, "grad_norm": 0.1555004119873047, "learning_rate": 1.8102308552832302e-05, "loss": 1.0091, "step": 8949 }, { "epoch": 1.819475503151047, "grad_norm": 0.15522776544094086, "learning_rate": 1.808196888030103e-05, "loss": 1.1034, "step": 8950 }, { "epoch": 1.8196787965033543, "grad_norm": 0.15725792944431305, "learning_rate": 1.8061629207769758e-05, "loss": 0.9926, "step": 8951 }, { "epoch": 1.8198820898556618, "grad_norm": 0.1630498319864273, "learning_rate": 1.8041289535238486e-05, "loss": 0.9583, "step": 8952 }, { "epoch": 1.820085383207969, "grad_norm": 0.1632407158613205, "learning_rate": 1.8020949862707214e-05, "loss": 0.908, "step": 8953 }, { "epoch": 1.8202886765602764, "grad_norm": 0.15952810645103455, "learning_rate": 1.800061019017594e-05, "loss": 1.0008, "step": 8954 }, { "epoch": 1.8204919699125839, "grad_norm": 0.1553991436958313, "learning_rate": 1.798027051764467e-05, "loss": 1.0055, "step": 8955 }, { "epoch": 1.8206952632648914, "grad_norm": 0.17852774262428284, "learning_rate": 1.7959930845113397e-05, "loss": 1.0887, "step": 8956 }, { "epoch": 1.8208985566171987, "grad_norm": 0.15115611255168915, "learning_rate": 1.7939591172582125e-05, "loss": 0.9853, "step": 8957 }, { "epoch": 1.821101849969506, "grad_norm": 0.1544339656829834, "learning_rate": 1.791925150005085e-05, "loss": 1.0697, "step": 8958 }, { "epoch": 1.8213051433218133, "grad_norm": 0.17755717039108276, "learning_rate": 1.7898911827519577e-05, "loss": 1.1082, "step": 8959 }, { "epoch": 1.8215084366741208, "grad_norm": 0.1783563494682312, "learning_rate": 1.7878572154988305e-05, "loss": 1.0226, "step": 8960 }, { "epoch": 1.8217117300264283, "grad_norm": 0.1660248190164566, "learning_rate": 1.7858232482457033e-05, "loss": 0.9879, "step": 8961 }, { "epoch": 1.8219150233787356, "grad_norm": 0.16820134222507477, "learning_rate": 1.783789280992576e-05, "loss": 0.9398, "step": 8962 }, { "epoch": 1.8221183167310429, "grad_norm": 0.1712360978126526, "learning_rate": 1.781755313739449e-05, "loss": 0.8956, "step": 8963 }, { "epoch": 1.8223216100833501, "grad_norm": 0.15812447667121887, "learning_rate": 1.7797213464863217e-05, "loss": 0.9687, "step": 8964 }, { "epoch": 1.8225249034356577, "grad_norm": 0.16239595413208008, "learning_rate": 1.7776873792331944e-05, "loss": 1.1458, "step": 8965 }, { "epoch": 1.8227281967879652, "grad_norm": 0.14732626080513, "learning_rate": 1.7756534119800672e-05, "loss": 0.8479, "step": 8966 }, { "epoch": 1.8229314901402724, "grad_norm": 0.14974117279052734, "learning_rate": 1.77361944472694e-05, "loss": 0.8848, "step": 8967 }, { "epoch": 1.8231347834925797, "grad_norm": 0.13316753506660461, "learning_rate": 1.7715854774738128e-05, "loss": 0.9169, "step": 8968 }, { "epoch": 1.823338076844887, "grad_norm": 0.17134083807468414, "learning_rate": 1.7695515102206856e-05, "loss": 1.1472, "step": 8969 }, { "epoch": 1.8235413701971945, "grad_norm": 0.16951067745685577, "learning_rate": 1.7675175429675584e-05, "loss": 1.1341, "step": 8970 }, { "epoch": 1.823744663549502, "grad_norm": 0.18364255130290985, "learning_rate": 1.765483575714431e-05, "loss": 1.2695, "step": 8971 }, { "epoch": 1.8239479569018093, "grad_norm": 0.16521167755126953, "learning_rate": 1.763449608461304e-05, "loss": 0.9046, "step": 8972 }, { "epoch": 1.8241512502541166, "grad_norm": 0.16237077116966248, "learning_rate": 1.7614156412081767e-05, "loss": 0.9214, "step": 8973 }, { "epoch": 1.824354543606424, "grad_norm": 0.15726378560066223, "learning_rate": 1.7593816739550495e-05, "loss": 0.9602, "step": 8974 }, { "epoch": 1.8245578369587314, "grad_norm": 0.1749069094657898, "learning_rate": 1.7573477067019223e-05, "loss": 1.0964, "step": 8975 }, { "epoch": 1.824761130311039, "grad_norm": 0.1708153784275055, "learning_rate": 1.755313739448795e-05, "loss": 1.1989, "step": 8976 }, { "epoch": 1.8249644236633462, "grad_norm": 0.16547361016273499, "learning_rate": 1.753279772195668e-05, "loss": 0.9141, "step": 8977 }, { "epoch": 1.8251677170156535, "grad_norm": 0.1566082388162613, "learning_rate": 1.7512458049425406e-05, "loss": 0.9451, "step": 8978 }, { "epoch": 1.825371010367961, "grad_norm": 0.15506227314472198, "learning_rate": 1.749211837689413e-05, "loss": 1.0756, "step": 8979 }, { "epoch": 1.8255743037202683, "grad_norm": 0.17131134867668152, "learning_rate": 1.747177870436286e-05, "loss": 1.0886, "step": 8980 }, { "epoch": 1.8257775970725758, "grad_norm": 0.17198830842971802, "learning_rate": 1.7451439031831587e-05, "loss": 1.2699, "step": 8981 }, { "epoch": 1.825980890424883, "grad_norm": 0.16008181869983673, "learning_rate": 1.7431099359300314e-05, "loss": 0.9172, "step": 8982 }, { "epoch": 1.8261841837771904, "grad_norm": 0.1331656277179718, "learning_rate": 1.7410759686769042e-05, "loss": 0.8745, "step": 8983 }, { "epoch": 1.826387477129498, "grad_norm": 0.17219661176204681, "learning_rate": 1.739042001423777e-05, "loss": 1.1453, "step": 8984 }, { "epoch": 1.8265907704818054, "grad_norm": 0.18389520049095154, "learning_rate": 1.7370080341706498e-05, "loss": 1.0553, "step": 8985 }, { "epoch": 1.8267940638341127, "grad_norm": 0.1577422022819519, "learning_rate": 1.7349740669175226e-05, "loss": 1.0115, "step": 8986 }, { "epoch": 1.82699735718642, "grad_norm": 0.17195899784564972, "learning_rate": 1.7329400996643954e-05, "loss": 1.1144, "step": 8987 }, { "epoch": 1.8272006505387273, "grad_norm": 0.14162033796310425, "learning_rate": 1.730906132411268e-05, "loss": 0.8574, "step": 8988 }, { "epoch": 1.8274039438910348, "grad_norm": 0.17170441150665283, "learning_rate": 1.728872165158141e-05, "loss": 1.0028, "step": 8989 }, { "epoch": 1.8276072372433423, "grad_norm": 0.1752428412437439, "learning_rate": 1.7268381979050137e-05, "loss": 1.1323, "step": 8990 }, { "epoch": 1.8278105305956496, "grad_norm": 0.17820240557193756, "learning_rate": 1.7248042306518865e-05, "loss": 1.0194, "step": 8991 }, { "epoch": 1.8280138239479569, "grad_norm": 0.16974826157093048, "learning_rate": 1.7227702633987593e-05, "loss": 1.1138, "step": 8992 }, { "epoch": 1.8282171173002641, "grad_norm": 0.16789694130420685, "learning_rate": 1.720736296145632e-05, "loss": 1.1089, "step": 8993 }, { "epoch": 1.8284204106525717, "grad_norm": 0.17885202169418335, "learning_rate": 1.718702328892505e-05, "loss": 1.063, "step": 8994 }, { "epoch": 1.8286237040048792, "grad_norm": 0.1743476241827011, "learning_rate": 1.7166683616393776e-05, "loss": 1.0927, "step": 8995 }, { "epoch": 1.8288269973571865, "grad_norm": 0.16437438130378723, "learning_rate": 1.7146343943862504e-05, "loss": 1.0493, "step": 8996 }, { "epoch": 1.8290302907094937, "grad_norm": 0.18604162335395813, "learning_rate": 1.7126004271331232e-05, "loss": 1.0376, "step": 8997 }, { "epoch": 1.829233584061801, "grad_norm": 0.16956648230552673, "learning_rate": 1.710566459879996e-05, "loss": 1.082, "step": 8998 }, { "epoch": 1.8294368774141085, "grad_norm": 0.19355973601341248, "learning_rate": 1.7085324926268688e-05, "loss": 1.3312, "step": 8999 }, { "epoch": 1.829640170766416, "grad_norm": 0.15543140470981598, "learning_rate": 1.7064985253737416e-05, "loss": 1.0279, "step": 9000 }, { "epoch": 1.8298434641187233, "grad_norm": 0.1573321372270584, "learning_rate": 1.7044645581206144e-05, "loss": 1.0082, "step": 9001 }, { "epoch": 1.8300467574710306, "grad_norm": 0.16695843636989594, "learning_rate": 1.702430590867487e-05, "loss": 0.9913, "step": 9002 }, { "epoch": 1.830250050823338, "grad_norm": 0.1667664647102356, "learning_rate": 1.70039662361436e-05, "loss": 1.1442, "step": 9003 }, { "epoch": 1.8304533441756454, "grad_norm": 0.1707717329263687, "learning_rate": 1.6983626563612327e-05, "loss": 1.0349, "step": 9004 }, { "epoch": 1.830656637527953, "grad_norm": 0.15421181917190552, "learning_rate": 1.6963286891081055e-05, "loss": 1.0083, "step": 9005 }, { "epoch": 1.8308599308802602, "grad_norm": 0.15851132571697235, "learning_rate": 1.6942947218549783e-05, "loss": 0.9097, "step": 9006 }, { "epoch": 1.8310632242325675, "grad_norm": 0.1705033779144287, "learning_rate": 1.692260754601851e-05, "loss": 1.1789, "step": 9007 }, { "epoch": 1.831266517584875, "grad_norm": 0.17649275064468384, "learning_rate": 1.690226787348724e-05, "loss": 1.0955, "step": 9008 }, { "epoch": 1.8314698109371823, "grad_norm": 0.1793653517961502, "learning_rate": 1.6881928200955966e-05, "loss": 1.1375, "step": 9009 }, { "epoch": 1.8316731042894898, "grad_norm": 0.1481037735939026, "learning_rate": 1.6861588528424694e-05, "loss": 0.8162, "step": 9010 }, { "epoch": 1.831876397641797, "grad_norm": 0.1615297645330429, "learning_rate": 1.6841248855893422e-05, "loss": 0.9544, "step": 9011 }, { "epoch": 1.8320796909941044, "grad_norm": 0.17553842067718506, "learning_rate": 1.682090918336215e-05, "loss": 1.0999, "step": 9012 }, { "epoch": 1.832282984346412, "grad_norm": 0.17471405863761902, "learning_rate": 1.6800569510830878e-05, "loss": 1.1607, "step": 9013 }, { "epoch": 1.8324862776987194, "grad_norm": 0.15131913125514984, "learning_rate": 1.6780229838299606e-05, "loss": 0.9296, "step": 9014 }, { "epoch": 1.8326895710510267, "grad_norm": 0.152798131108284, "learning_rate": 1.6759890165768333e-05, "loss": 0.9088, "step": 9015 }, { "epoch": 1.832892864403334, "grad_norm": 0.1609133929014206, "learning_rate": 1.673955049323706e-05, "loss": 1.0569, "step": 9016 }, { "epoch": 1.8330961577556413, "grad_norm": 0.1573030948638916, "learning_rate": 1.671921082070579e-05, "loss": 0.9416, "step": 9017 }, { "epoch": 1.8332994511079488, "grad_norm": 0.15287207067012787, "learning_rate": 1.6698871148174517e-05, "loss": 0.9777, "step": 9018 }, { "epoch": 1.8335027444602563, "grad_norm": 0.17062637209892273, "learning_rate": 1.6678531475643245e-05, "loss": 1.016, "step": 9019 }, { "epoch": 1.8337060378125636, "grad_norm": 0.16493557393550873, "learning_rate": 1.665819180311197e-05, "loss": 0.9817, "step": 9020 }, { "epoch": 1.8339093311648709, "grad_norm": 0.14450013637542725, "learning_rate": 1.6637852130580697e-05, "loss": 0.863, "step": 9021 }, { "epoch": 1.8341126245171782, "grad_norm": 0.15231171250343323, "learning_rate": 1.6617512458049425e-05, "loss": 0.913, "step": 9022 }, { "epoch": 1.8343159178694857, "grad_norm": 0.16415618360042572, "learning_rate": 1.6597172785518153e-05, "loss": 0.9384, "step": 9023 }, { "epoch": 1.8345192112217932, "grad_norm": 0.1714603155851364, "learning_rate": 1.657683311298688e-05, "loss": 1.1727, "step": 9024 }, { "epoch": 1.8347225045741005, "grad_norm": 0.16260595619678497, "learning_rate": 1.655649344045561e-05, "loss": 1.0471, "step": 9025 }, { "epoch": 1.8349257979264078, "grad_norm": 0.14569585025310516, "learning_rate": 1.6536153767924336e-05, "loss": 0.8852, "step": 9026 }, { "epoch": 1.835129091278715, "grad_norm": 0.19790101051330566, "learning_rate": 1.6515814095393064e-05, "loss": 1.2657, "step": 9027 }, { "epoch": 1.8353323846310226, "grad_norm": 0.1702183037996292, "learning_rate": 1.6495474422861792e-05, "loss": 1.0662, "step": 9028 }, { "epoch": 1.83553567798333, "grad_norm": 0.14195507764816284, "learning_rate": 1.647513475033052e-05, "loss": 0.8035, "step": 9029 }, { "epoch": 1.8357389713356373, "grad_norm": 0.14451409876346588, "learning_rate": 1.6454795077799248e-05, "loss": 0.9198, "step": 9030 }, { "epoch": 1.8359422646879446, "grad_norm": 0.15659938752651215, "learning_rate": 1.6434455405267976e-05, "loss": 0.9483, "step": 9031 }, { "epoch": 1.836145558040252, "grad_norm": 0.1882718801498413, "learning_rate": 1.6414115732736704e-05, "loss": 1.1386, "step": 9032 }, { "epoch": 1.8363488513925594, "grad_norm": 0.14704629778862, "learning_rate": 1.639377606020543e-05, "loss": 1.0199, "step": 9033 }, { "epoch": 1.836552144744867, "grad_norm": 0.1829056292772293, "learning_rate": 1.637343638767416e-05, "loss": 1.2842, "step": 9034 }, { "epoch": 1.8367554380971742, "grad_norm": 0.15590842068195343, "learning_rate": 1.6353096715142887e-05, "loss": 0.8853, "step": 9035 }, { "epoch": 1.8369587314494815, "grad_norm": 0.16517125070095062, "learning_rate": 1.6332757042611615e-05, "loss": 1.0698, "step": 9036 }, { "epoch": 1.837162024801789, "grad_norm": 0.16537168622016907, "learning_rate": 1.6312417370080343e-05, "loss": 1.0284, "step": 9037 }, { "epoch": 1.8373653181540963, "grad_norm": 0.16812415421009064, "learning_rate": 1.629207769754907e-05, "loss": 1.0749, "step": 9038 }, { "epoch": 1.8375686115064038, "grad_norm": 0.15755677223205566, "learning_rate": 1.62717380250178e-05, "loss": 0.9793, "step": 9039 }, { "epoch": 1.8377719048587111, "grad_norm": 0.17731977999210358, "learning_rate": 1.6251398352486526e-05, "loss": 1.2182, "step": 9040 }, { "epoch": 1.8379751982110184, "grad_norm": 0.15155115723609924, "learning_rate": 1.623105867995525e-05, "loss": 0.924, "step": 9041 }, { "epoch": 1.838178491563326, "grad_norm": 0.1586698591709137, "learning_rate": 1.621071900742398e-05, "loss": 1.0022, "step": 9042 }, { "epoch": 1.8383817849156334, "grad_norm": 0.1634819507598877, "learning_rate": 1.6190379334892706e-05, "loss": 0.9965, "step": 9043 }, { "epoch": 1.8385850782679407, "grad_norm": 0.16715705394744873, "learning_rate": 1.6170039662361434e-05, "loss": 0.9681, "step": 9044 }, { "epoch": 1.838788371620248, "grad_norm": 0.15874379873275757, "learning_rate": 1.6149699989830162e-05, "loss": 1.0051, "step": 9045 }, { "epoch": 1.8389916649725553, "grad_norm": 0.14951036870479584, "learning_rate": 1.612936031729889e-05, "loss": 0.9692, "step": 9046 }, { "epoch": 1.8391949583248628, "grad_norm": 0.15619195997714996, "learning_rate": 1.6109020644767618e-05, "loss": 0.9156, "step": 9047 }, { "epoch": 1.8393982516771703, "grad_norm": 0.15344227850437164, "learning_rate": 1.6088680972236346e-05, "loss": 0.8595, "step": 9048 }, { "epoch": 1.8396015450294776, "grad_norm": 0.18604734539985657, "learning_rate": 1.6068341299705077e-05, "loss": 1.2517, "step": 9049 }, { "epoch": 1.8398048383817849, "grad_norm": 0.15447942912578583, "learning_rate": 1.6048001627173805e-05, "loss": 0.9829, "step": 9050 }, { "epoch": 1.8400081317340922, "grad_norm": 0.16891126334667206, "learning_rate": 1.6027661954642533e-05, "loss": 1.1645, "step": 9051 }, { "epoch": 1.8402114250863997, "grad_norm": 0.14703497290611267, "learning_rate": 1.600732228211126e-05, "loss": 0.8263, "step": 9052 }, { "epoch": 1.8404147184387072, "grad_norm": 0.17574016749858856, "learning_rate": 1.598698260957999e-05, "loss": 1.1363, "step": 9053 }, { "epoch": 1.8406180117910145, "grad_norm": 0.15228979289531708, "learning_rate": 1.5966642937048716e-05, "loss": 0.9455, "step": 9054 }, { "epoch": 1.8408213051433218, "grad_norm": 0.17259180545806885, "learning_rate": 1.5946303264517444e-05, "loss": 1.067, "step": 9055 }, { "epoch": 1.841024598495629, "grad_norm": 0.1767015904188156, "learning_rate": 1.5925963591986172e-05, "loss": 1.0191, "step": 9056 }, { "epoch": 1.8412278918479366, "grad_norm": 0.14119932055473328, "learning_rate": 1.59056239194549e-05, "loss": 0.8344, "step": 9057 }, { "epoch": 1.841431185200244, "grad_norm": 0.16362996399402618, "learning_rate": 1.5885284246923628e-05, "loss": 0.9373, "step": 9058 }, { "epoch": 1.8416344785525514, "grad_norm": 0.1613035500049591, "learning_rate": 1.5864944574392355e-05, "loss": 0.9336, "step": 9059 }, { "epoch": 1.8418377719048586, "grad_norm": 0.14492535591125488, "learning_rate": 1.5844604901861083e-05, "loss": 0.9902, "step": 9060 }, { "epoch": 1.842041065257166, "grad_norm": 0.16309863328933716, "learning_rate": 1.582426522932981e-05, "loss": 0.9691, "step": 9061 }, { "epoch": 1.8422443586094734, "grad_norm": 0.15237070620059967, "learning_rate": 1.5803925556798536e-05, "loss": 0.8869, "step": 9062 }, { "epoch": 1.842447651961781, "grad_norm": 0.15500704944133759, "learning_rate": 1.5783585884267263e-05, "loss": 0.9056, "step": 9063 }, { "epoch": 1.8426509453140882, "grad_norm": 0.17146271467208862, "learning_rate": 1.576324621173599e-05, "loss": 1.1087, "step": 9064 }, { "epoch": 1.8428542386663955, "grad_norm": 0.15846385061740875, "learning_rate": 1.574290653920472e-05, "loss": 1.0098, "step": 9065 }, { "epoch": 1.843057532018703, "grad_norm": 0.16141603887081146, "learning_rate": 1.5722566866673447e-05, "loss": 1.026, "step": 9066 }, { "epoch": 1.8432608253710103, "grad_norm": 0.18051780760288239, "learning_rate": 1.5702227194142175e-05, "loss": 1.1456, "step": 9067 }, { "epoch": 1.8434641187233178, "grad_norm": 0.16684190928936005, "learning_rate": 1.5681887521610903e-05, "loss": 1.1075, "step": 9068 }, { "epoch": 1.8436674120756251, "grad_norm": 0.1568390429019928, "learning_rate": 1.566154784907963e-05, "loss": 0.8981, "step": 9069 }, { "epoch": 1.8438707054279324, "grad_norm": 0.1594381332397461, "learning_rate": 1.564120817654836e-05, "loss": 0.9193, "step": 9070 }, { "epoch": 1.84407399878024, "grad_norm": 0.15885666012763977, "learning_rate": 1.5620868504017086e-05, "loss": 0.9432, "step": 9071 }, { "epoch": 1.8442772921325474, "grad_norm": 0.16251152753829956, "learning_rate": 1.5600528831485814e-05, "loss": 0.9929, "step": 9072 }, { "epoch": 1.8444805854848547, "grad_norm": 0.17088516056537628, "learning_rate": 1.5580189158954542e-05, "loss": 1.0962, "step": 9073 }, { "epoch": 1.844683878837162, "grad_norm": 0.15242163836956024, "learning_rate": 1.555984948642327e-05, "loss": 1.0467, "step": 9074 }, { "epoch": 1.8448871721894693, "grad_norm": 0.15932436287403107, "learning_rate": 1.5539509813891998e-05, "loss": 0.9006, "step": 9075 }, { "epoch": 1.8450904655417768, "grad_norm": 0.14553354680538177, "learning_rate": 1.5519170141360725e-05, "loss": 0.9161, "step": 9076 }, { "epoch": 1.8452937588940843, "grad_norm": 0.16639220714569092, "learning_rate": 1.5498830468829453e-05, "loss": 1.1276, "step": 9077 }, { "epoch": 1.8454970522463916, "grad_norm": 0.16353894770145416, "learning_rate": 1.547849079629818e-05, "loss": 1.0393, "step": 9078 }, { "epoch": 1.8457003455986989, "grad_norm": 0.1587207317352295, "learning_rate": 1.545815112376691e-05, "loss": 1.1288, "step": 9079 }, { "epoch": 1.8459036389510062, "grad_norm": 0.1558389514684677, "learning_rate": 1.5437811451235637e-05, "loss": 0.9778, "step": 9080 }, { "epoch": 1.8461069323033137, "grad_norm": 0.12846806645393372, "learning_rate": 1.5417471778704365e-05, "loss": 0.7703, "step": 9081 }, { "epoch": 1.8463102256556212, "grad_norm": 0.16977696120738983, "learning_rate": 1.539713210617309e-05, "loss": 1.0718, "step": 9082 }, { "epoch": 1.8465135190079285, "grad_norm": 0.1602630317211151, "learning_rate": 1.5376792433641817e-05, "loss": 0.9598, "step": 9083 }, { "epoch": 1.8467168123602358, "grad_norm": 0.15434099733829498, "learning_rate": 1.5356452761110545e-05, "loss": 0.9433, "step": 9084 }, { "epoch": 1.846920105712543, "grad_norm": 0.16391155123710632, "learning_rate": 1.5336113088579273e-05, "loss": 0.9734, "step": 9085 }, { "epoch": 1.8471233990648506, "grad_norm": 0.16079957783222198, "learning_rate": 1.5315773416048e-05, "loss": 1.0896, "step": 9086 }, { "epoch": 1.847326692417158, "grad_norm": 0.18687868118286133, "learning_rate": 1.529543374351673e-05, "loss": 1.2211, "step": 9087 }, { "epoch": 1.8475299857694654, "grad_norm": 0.15750667452812195, "learning_rate": 1.5275094070985456e-05, "loss": 1.1841, "step": 9088 }, { "epoch": 1.8477332791217727, "grad_norm": 0.16305695474147797, "learning_rate": 1.5254754398454186e-05, "loss": 1.0134, "step": 9089 }, { "epoch": 1.84793657247408, "grad_norm": 0.17851541936397552, "learning_rate": 1.5234414725922914e-05, "loss": 1.1576, "step": 9090 }, { "epoch": 1.8481398658263875, "grad_norm": 0.161623015999794, "learning_rate": 1.5214075053391642e-05, "loss": 0.9268, "step": 9091 }, { "epoch": 1.848343159178695, "grad_norm": 0.16736359894275665, "learning_rate": 1.519373538086037e-05, "loss": 1.0038, "step": 9092 }, { "epoch": 1.8485464525310022, "grad_norm": 0.17506049573421478, "learning_rate": 1.5173395708329097e-05, "loss": 1.0943, "step": 9093 }, { "epoch": 1.8487497458833095, "grad_norm": 0.15897499024868011, "learning_rate": 1.5153056035797825e-05, "loss": 1.0185, "step": 9094 }, { "epoch": 1.848953039235617, "grad_norm": 0.16290009021759033, "learning_rate": 1.5132716363266553e-05, "loss": 1.0883, "step": 9095 }, { "epoch": 1.8491563325879243, "grad_norm": 0.14727109670639038, "learning_rate": 1.511237669073528e-05, "loss": 0.9444, "step": 9096 }, { "epoch": 1.8493596259402318, "grad_norm": 0.1619667261838913, "learning_rate": 1.5092037018204009e-05, "loss": 0.9525, "step": 9097 }, { "epoch": 1.8495629192925391, "grad_norm": 0.14331746101379395, "learning_rate": 1.5071697345672736e-05, "loss": 0.8943, "step": 9098 }, { "epoch": 1.8497662126448464, "grad_norm": 0.1706109642982483, "learning_rate": 1.5051357673141464e-05, "loss": 0.987, "step": 9099 }, { "epoch": 1.849969505997154, "grad_norm": 0.17344290018081665, "learning_rate": 1.5031018000610192e-05, "loss": 1.0951, "step": 9100 }, { "epoch": 1.8501727993494614, "grad_norm": 0.1367466300725937, "learning_rate": 1.501067832807892e-05, "loss": 0.8686, "step": 9101 }, { "epoch": 1.8503760927017687, "grad_norm": 0.1574309915304184, "learning_rate": 1.4990338655547648e-05, "loss": 0.9125, "step": 9102 }, { "epoch": 1.850579386054076, "grad_norm": 0.1583828628063202, "learning_rate": 1.4969998983016372e-05, "loss": 1.0388, "step": 9103 }, { "epoch": 1.8507826794063833, "grad_norm": 0.166561096906662, "learning_rate": 1.49496593104851e-05, "loss": 1.2264, "step": 9104 }, { "epoch": 1.8509859727586908, "grad_norm": 0.15735095739364624, "learning_rate": 1.4929319637953828e-05, "loss": 0.9944, "step": 9105 }, { "epoch": 1.8511892661109983, "grad_norm": 0.14743497967720032, "learning_rate": 1.4908979965422556e-05, "loss": 0.8475, "step": 9106 }, { "epoch": 1.8513925594633056, "grad_norm": 0.1490921974182129, "learning_rate": 1.4888640292891284e-05, "loss": 0.9916, "step": 9107 }, { "epoch": 1.851595852815613, "grad_norm": 0.16641655564308167, "learning_rate": 1.4868300620360012e-05, "loss": 1.0756, "step": 9108 }, { "epoch": 1.8517991461679202, "grad_norm": 0.15328289568424225, "learning_rate": 1.484796094782874e-05, "loss": 1.0759, "step": 9109 }, { "epoch": 1.8520024395202277, "grad_norm": 0.15595008432865143, "learning_rate": 1.4827621275297467e-05, "loss": 1.0303, "step": 9110 }, { "epoch": 1.8522057328725352, "grad_norm": 0.16380850970745087, "learning_rate": 1.4807281602766195e-05, "loss": 1.0169, "step": 9111 }, { "epoch": 1.8524090262248425, "grad_norm": 0.1718147099018097, "learning_rate": 1.4786941930234923e-05, "loss": 1.1535, "step": 9112 }, { "epoch": 1.8526123195771498, "grad_norm": 0.15970173478126526, "learning_rate": 1.476660225770365e-05, "loss": 0.9563, "step": 9113 }, { "epoch": 1.852815612929457, "grad_norm": 0.1604384332895279, "learning_rate": 1.474626258517238e-05, "loss": 1.0238, "step": 9114 }, { "epoch": 1.8530189062817646, "grad_norm": 0.16613902151584625, "learning_rate": 1.4725922912641108e-05, "loss": 0.9878, "step": 9115 }, { "epoch": 1.853222199634072, "grad_norm": 0.14699769020080566, "learning_rate": 1.4705583240109836e-05, "loss": 1.0044, "step": 9116 }, { "epoch": 1.8534254929863794, "grad_norm": 0.16522325575351715, "learning_rate": 1.4685243567578564e-05, "loss": 1.0771, "step": 9117 }, { "epoch": 1.8536287863386867, "grad_norm": 0.16642172634601593, "learning_rate": 1.4664903895047292e-05, "loss": 1.0138, "step": 9118 }, { "epoch": 1.853832079690994, "grad_norm": 0.1692981719970703, "learning_rate": 1.464456422251602e-05, "loss": 1.0189, "step": 9119 }, { "epoch": 1.8540353730433015, "grad_norm": 0.16554026305675507, "learning_rate": 1.4624224549984747e-05, "loss": 1.0158, "step": 9120 }, { "epoch": 1.854238666395609, "grad_norm": 0.14761210978031158, "learning_rate": 1.4603884877453475e-05, "loss": 0.9394, "step": 9121 }, { "epoch": 1.8544419597479163, "grad_norm": 0.16871321201324463, "learning_rate": 1.4583545204922203e-05, "loss": 1.0278, "step": 9122 }, { "epoch": 1.8546452531002235, "grad_norm": 0.15795601904392242, "learning_rate": 1.4563205532390931e-05, "loss": 0.9431, "step": 9123 }, { "epoch": 1.854848546452531, "grad_norm": 0.1593995988368988, "learning_rate": 1.4542865859859655e-05, "loss": 0.9781, "step": 9124 }, { "epoch": 1.8550518398048383, "grad_norm": 0.15012140572071075, "learning_rate": 1.4522526187328383e-05, "loss": 1.0435, "step": 9125 }, { "epoch": 1.8552551331571459, "grad_norm": 0.16522644460201263, "learning_rate": 1.4502186514797111e-05, "loss": 1.1851, "step": 9126 }, { "epoch": 1.8554584265094531, "grad_norm": 0.1511608511209488, "learning_rate": 1.4481846842265839e-05, "loss": 0.9358, "step": 9127 }, { "epoch": 1.8556617198617604, "grad_norm": 0.17427775263786316, "learning_rate": 1.4461507169734567e-05, "loss": 1.039, "step": 9128 }, { "epoch": 1.855865013214068, "grad_norm": 0.16680866479873657, "learning_rate": 1.4441167497203295e-05, "loss": 0.9389, "step": 9129 }, { "epoch": 1.8560683065663752, "grad_norm": 0.16897761821746826, "learning_rate": 1.4420827824672023e-05, "loss": 1.12, "step": 9130 }, { "epoch": 1.8562715999186827, "grad_norm": 0.16391155123710632, "learning_rate": 1.440048815214075e-05, "loss": 0.9833, "step": 9131 }, { "epoch": 1.85647489327099, "grad_norm": 0.16374000906944275, "learning_rate": 1.4380148479609478e-05, "loss": 1.0831, "step": 9132 }, { "epoch": 1.8566781866232973, "grad_norm": 0.1376214325428009, "learning_rate": 1.4359808807078206e-05, "loss": 0.9043, "step": 9133 }, { "epoch": 1.8568814799756048, "grad_norm": 0.15006083250045776, "learning_rate": 1.4339469134546934e-05, "loss": 0.918, "step": 9134 }, { "epoch": 1.8570847733279123, "grad_norm": 0.15325471758842468, "learning_rate": 1.4319129462015662e-05, "loss": 0.9288, "step": 9135 }, { "epoch": 1.8572880666802196, "grad_norm": 0.15259116888046265, "learning_rate": 1.429878978948439e-05, "loss": 0.9854, "step": 9136 }, { "epoch": 1.857491360032527, "grad_norm": 0.15689361095428467, "learning_rate": 1.4278450116953118e-05, "loss": 0.8798, "step": 9137 }, { "epoch": 1.8576946533848342, "grad_norm": 0.154251366853714, "learning_rate": 1.4258110444421845e-05, "loss": 1.0113, "step": 9138 }, { "epoch": 1.8578979467371417, "grad_norm": 0.15593677759170532, "learning_rate": 1.4237770771890573e-05, "loss": 1.1342, "step": 9139 }, { "epoch": 1.8581012400894492, "grad_norm": 0.1714189201593399, "learning_rate": 1.4217431099359301e-05, "loss": 1.1601, "step": 9140 }, { "epoch": 1.8583045334417565, "grad_norm": 0.13511736690998077, "learning_rate": 1.4197091426828029e-05, "loss": 0.7738, "step": 9141 }, { "epoch": 1.8585078267940638, "grad_norm": 0.14855125546455383, "learning_rate": 1.4176751754296757e-05, "loss": 0.9311, "step": 9142 }, { "epoch": 1.858711120146371, "grad_norm": 0.1357201784849167, "learning_rate": 1.4156412081765486e-05, "loss": 0.8214, "step": 9143 }, { "epoch": 1.8589144134986786, "grad_norm": 0.1451376974582672, "learning_rate": 1.4136072409234214e-05, "loss": 0.9392, "step": 9144 }, { "epoch": 1.859117706850986, "grad_norm": 0.14371171593666077, "learning_rate": 1.4115732736702939e-05, "loss": 0.8943, "step": 9145 }, { "epoch": 1.8593210002032934, "grad_norm": 0.15513314306735992, "learning_rate": 1.4095393064171666e-05, "loss": 0.9792, "step": 9146 }, { "epoch": 1.8595242935556007, "grad_norm": 0.15205495059490204, "learning_rate": 1.4075053391640394e-05, "loss": 0.9238, "step": 9147 }, { "epoch": 1.859727586907908, "grad_norm": 0.168076753616333, "learning_rate": 1.4054713719109122e-05, "loss": 1.0027, "step": 9148 }, { "epoch": 1.8599308802602155, "grad_norm": 0.15293119847774506, "learning_rate": 1.403437404657785e-05, "loss": 1.0271, "step": 9149 }, { "epoch": 1.860134173612523, "grad_norm": 0.15787629783153534, "learning_rate": 1.4014034374046578e-05, "loss": 1.0152, "step": 9150 }, { "epoch": 1.8603374669648303, "grad_norm": 0.16142600774765015, "learning_rate": 1.3993694701515306e-05, "loss": 1.0209, "step": 9151 }, { "epoch": 1.8605407603171376, "grad_norm": 0.15740378201007843, "learning_rate": 1.3973355028984034e-05, "loss": 0.984, "step": 9152 }, { "epoch": 1.860744053669445, "grad_norm": 0.1744755357503891, "learning_rate": 1.3953015356452761e-05, "loss": 1.1172, "step": 9153 }, { "epoch": 1.8609473470217524, "grad_norm": 0.14726267755031586, "learning_rate": 1.393267568392149e-05, "loss": 0.8438, "step": 9154 }, { "epoch": 1.8611506403740599, "grad_norm": 0.18079231679439545, "learning_rate": 1.3912336011390217e-05, "loss": 1.1986, "step": 9155 }, { "epoch": 1.8613539337263671, "grad_norm": 0.1750095933675766, "learning_rate": 1.3891996338858945e-05, "loss": 1.0298, "step": 9156 }, { "epoch": 1.8615572270786744, "grad_norm": 0.16656936705112457, "learning_rate": 1.3871656666327673e-05, "loss": 1.1229, "step": 9157 }, { "epoch": 1.861760520430982, "grad_norm": 0.14559406042099, "learning_rate": 1.38513169937964e-05, "loss": 0.9498, "step": 9158 }, { "epoch": 1.8619638137832892, "grad_norm": 0.14473088085651398, "learning_rate": 1.3830977321265129e-05, "loss": 0.9334, "step": 9159 }, { "epoch": 1.8621671071355967, "grad_norm": 0.17904867231845856, "learning_rate": 1.3810637648733856e-05, "loss": 1.0901, "step": 9160 }, { "epoch": 1.862370400487904, "grad_norm": 0.13409379124641418, "learning_rate": 1.3790297976202584e-05, "loss": 0.8938, "step": 9161 }, { "epoch": 1.8625736938402113, "grad_norm": 0.16518044471740723, "learning_rate": 1.3769958303671312e-05, "loss": 1.0613, "step": 9162 }, { "epoch": 1.8627769871925188, "grad_norm": 0.15572203695774078, "learning_rate": 1.374961863114004e-05, "loss": 0.928, "step": 9163 }, { "epoch": 1.8629802805448263, "grad_norm": 0.16791309416294098, "learning_rate": 1.3729278958608768e-05, "loss": 1.0645, "step": 9164 }, { "epoch": 1.8631835738971336, "grad_norm": 0.16569112241268158, "learning_rate": 1.3708939286077494e-05, "loss": 1.0784, "step": 9165 }, { "epoch": 1.863386867249441, "grad_norm": 0.17138367891311646, "learning_rate": 1.3688599613546222e-05, "loss": 0.9752, "step": 9166 }, { "epoch": 1.8635901606017482, "grad_norm": 0.15366528928279877, "learning_rate": 1.366825994101495e-05, "loss": 0.9418, "step": 9167 }, { "epoch": 1.8637934539540557, "grad_norm": 0.1519828736782074, "learning_rate": 1.3647920268483677e-05, "loss": 1.0562, "step": 9168 }, { "epoch": 1.8639967473063632, "grad_norm": 0.1644189953804016, "learning_rate": 1.3627580595952405e-05, "loss": 0.9926, "step": 9169 }, { "epoch": 1.8642000406586705, "grad_norm": 0.15858952701091766, "learning_rate": 1.3607240923421133e-05, "loss": 1.0813, "step": 9170 }, { "epoch": 1.8644033340109778, "grad_norm": 0.15331768989562988, "learning_rate": 1.3586901250889861e-05, "loss": 0.9703, "step": 9171 }, { "epoch": 1.864606627363285, "grad_norm": 0.14787660539150238, "learning_rate": 1.3566561578358589e-05, "loss": 0.9383, "step": 9172 }, { "epoch": 1.8648099207155926, "grad_norm": 0.18496711552143097, "learning_rate": 1.3546221905827317e-05, "loss": 1.2155, "step": 9173 }, { "epoch": 1.8650132140679, "grad_norm": 0.15096575021743774, "learning_rate": 1.3525882233296045e-05, "loss": 0.905, "step": 9174 }, { "epoch": 1.8652165074202074, "grad_norm": 0.1480555534362793, "learning_rate": 1.3505542560764772e-05, "loss": 0.944, "step": 9175 }, { "epoch": 1.8654198007725147, "grad_norm": 0.1751280426979065, "learning_rate": 1.34852028882335e-05, "loss": 1.0942, "step": 9176 }, { "epoch": 1.865623094124822, "grad_norm": 0.1660584658384323, "learning_rate": 1.3464863215702228e-05, "loss": 0.9944, "step": 9177 }, { "epoch": 1.8658263874771295, "grad_norm": 0.171767920255661, "learning_rate": 1.3444523543170956e-05, "loss": 1.1125, "step": 9178 }, { "epoch": 1.866029680829437, "grad_norm": 0.13880722224712372, "learning_rate": 1.3424183870639684e-05, "loss": 0.8382, "step": 9179 }, { "epoch": 1.8662329741817443, "grad_norm": 0.16650839149951935, "learning_rate": 1.3403844198108412e-05, "loss": 1.1286, "step": 9180 }, { "epoch": 1.8664362675340516, "grad_norm": 0.1714068204164505, "learning_rate": 1.338350452557714e-05, "loss": 1.1127, "step": 9181 }, { "epoch": 1.8666395608863589, "grad_norm": 0.16576112806797028, "learning_rate": 1.3363164853045867e-05, "loss": 1.1478, "step": 9182 }, { "epoch": 1.8668428542386664, "grad_norm": 0.14922307431697845, "learning_rate": 1.3342825180514595e-05, "loss": 0.8867, "step": 9183 }, { "epoch": 1.8670461475909739, "grad_norm": 0.16460509598255157, "learning_rate": 1.3322485507983323e-05, "loss": 0.999, "step": 9184 }, { "epoch": 1.8672494409432812, "grad_norm": 0.1735246777534485, "learning_rate": 1.3302145835452051e-05, "loss": 1.0805, "step": 9185 }, { "epoch": 1.8674527342955884, "grad_norm": 0.15362222492694855, "learning_rate": 1.3281806162920777e-05, "loss": 1.0524, "step": 9186 }, { "epoch": 1.867656027647896, "grad_norm": 0.1630677580833435, "learning_rate": 1.3261466490389505e-05, "loss": 0.9003, "step": 9187 }, { "epoch": 1.8678593210002032, "grad_norm": 0.16949492692947388, "learning_rate": 1.3241126817858233e-05, "loss": 1.1147, "step": 9188 }, { "epoch": 1.8680626143525108, "grad_norm": 0.16529028117656708, "learning_rate": 1.322078714532696e-05, "loss": 1.1372, "step": 9189 }, { "epoch": 1.868265907704818, "grad_norm": 0.1558435559272766, "learning_rate": 1.3200447472795688e-05, "loss": 1.0566, "step": 9190 }, { "epoch": 1.8684692010571253, "grad_norm": 0.17211611568927765, "learning_rate": 1.3180107800264416e-05, "loss": 1.0106, "step": 9191 }, { "epoch": 1.8686724944094328, "grad_norm": 0.15985505282878876, "learning_rate": 1.3159768127733144e-05, "loss": 0.9746, "step": 9192 }, { "epoch": 1.8688757877617403, "grad_norm": 0.1616838425397873, "learning_rate": 1.3139428455201872e-05, "loss": 1.0438, "step": 9193 }, { "epoch": 1.8690790811140476, "grad_norm": 0.15357354283332825, "learning_rate": 1.31190887826706e-05, "loss": 1.0446, "step": 9194 }, { "epoch": 1.869282374466355, "grad_norm": 0.16767995059490204, "learning_rate": 1.3098749110139328e-05, "loss": 1.1207, "step": 9195 }, { "epoch": 1.8694856678186622, "grad_norm": 0.18717004358768463, "learning_rate": 1.3078409437608056e-05, "loss": 1.169, "step": 9196 }, { "epoch": 1.8696889611709697, "grad_norm": 0.15601149201393127, "learning_rate": 1.3058069765076783e-05, "loss": 0.9573, "step": 9197 }, { "epoch": 1.8698922545232772, "grad_norm": 0.15987952053546906, "learning_rate": 1.3037730092545511e-05, "loss": 0.9703, "step": 9198 }, { "epoch": 1.8700955478755845, "grad_norm": 0.17710572481155396, "learning_rate": 1.3017390420014239e-05, "loss": 1.1623, "step": 9199 }, { "epoch": 1.8702988412278918, "grad_norm": 0.15760180354118347, "learning_rate": 1.2997050747482967e-05, "loss": 0.8797, "step": 9200 }, { "epoch": 1.870502134580199, "grad_norm": 0.15317152440547943, "learning_rate": 1.2976711074951695e-05, "loss": 1.081, "step": 9201 }, { "epoch": 1.8707054279325066, "grad_norm": 0.14920032024383545, "learning_rate": 1.2956371402420423e-05, "loss": 0.9608, "step": 9202 }, { "epoch": 1.8709087212848141, "grad_norm": 0.15852342545986176, "learning_rate": 1.293603172988915e-05, "loss": 0.9864, "step": 9203 }, { "epoch": 1.8711120146371214, "grad_norm": 0.14235036075115204, "learning_rate": 1.2915692057357878e-05, "loss": 0.9449, "step": 9204 }, { "epoch": 1.8713153079894287, "grad_norm": 0.17750944197177887, "learning_rate": 1.2895352384826606e-05, "loss": 1.1654, "step": 9205 }, { "epoch": 1.871518601341736, "grad_norm": 0.14732716977596283, "learning_rate": 1.2875012712295334e-05, "loss": 0.8402, "step": 9206 }, { "epoch": 1.8717218946940435, "grad_norm": 0.17688249051570892, "learning_rate": 1.2854673039764059e-05, "loss": 1.1016, "step": 9207 }, { "epoch": 1.871925188046351, "grad_norm": 0.16625085473060608, "learning_rate": 1.2834333367232786e-05, "loss": 1.1078, "step": 9208 }, { "epoch": 1.8721284813986583, "grad_norm": 0.16455720365047455, "learning_rate": 1.2813993694701514e-05, "loss": 0.9093, "step": 9209 }, { "epoch": 1.8723317747509656, "grad_norm": 0.14930884540081024, "learning_rate": 1.2793654022170242e-05, "loss": 0.8861, "step": 9210 }, { "epoch": 1.8725350681032729, "grad_norm": 0.14978399872779846, "learning_rate": 1.277331434963897e-05, "loss": 0.9348, "step": 9211 }, { "epoch": 1.8727383614555804, "grad_norm": 0.15730483829975128, "learning_rate": 1.2752974677107698e-05, "loss": 0.933, "step": 9212 }, { "epoch": 1.8729416548078879, "grad_norm": 0.1400170475244522, "learning_rate": 1.2732635004576426e-05, "loss": 0.8393, "step": 9213 }, { "epoch": 1.8731449481601952, "grad_norm": 0.1616322547197342, "learning_rate": 1.2712295332045155e-05, "loss": 1.0119, "step": 9214 }, { "epoch": 1.8733482415125025, "grad_norm": 0.17237956821918488, "learning_rate": 1.2691955659513883e-05, "loss": 1.1359, "step": 9215 }, { "epoch": 1.87355153486481, "grad_norm": 0.17212575674057007, "learning_rate": 1.2671615986982611e-05, "loss": 1.0404, "step": 9216 }, { "epoch": 1.8737548282171173, "grad_norm": 0.14744362235069275, "learning_rate": 1.2651276314451339e-05, "loss": 0.959, "step": 9217 }, { "epoch": 1.8739581215694248, "grad_norm": 0.15743842720985413, "learning_rate": 1.2630936641920067e-05, "loss": 0.9477, "step": 9218 }, { "epoch": 1.874161414921732, "grad_norm": 0.17763325572013855, "learning_rate": 1.2610596969388794e-05, "loss": 1.2157, "step": 9219 }, { "epoch": 1.8743647082740393, "grad_norm": 0.16381874680519104, "learning_rate": 1.2590257296857522e-05, "loss": 1.0655, "step": 9220 }, { "epoch": 1.8745680016263468, "grad_norm": 0.15161968767642975, "learning_rate": 1.256991762432625e-05, "loss": 0.8991, "step": 9221 }, { "epoch": 1.8747712949786544, "grad_norm": 0.14358890056610107, "learning_rate": 1.2549577951794978e-05, "loss": 0.819, "step": 9222 }, { "epoch": 1.8749745883309616, "grad_norm": 0.18513309955596924, "learning_rate": 1.2529238279263706e-05, "loss": 1.2519, "step": 9223 }, { "epoch": 1.875177881683269, "grad_norm": 0.1643258035182953, "learning_rate": 1.2508898606732434e-05, "loss": 0.9436, "step": 9224 }, { "epoch": 1.8753811750355762, "grad_norm": 0.16449370980262756, "learning_rate": 1.248855893420116e-05, "loss": 1.0323, "step": 9225 }, { "epoch": 1.8755844683878837, "grad_norm": 0.15628814697265625, "learning_rate": 1.2468219261669888e-05, "loss": 0.8658, "step": 9226 }, { "epoch": 1.8757877617401912, "grad_norm": 0.16220559179782867, "learning_rate": 1.2447879589138616e-05, "loss": 0.9974, "step": 9227 }, { "epoch": 1.8759910550924985, "grad_norm": 0.18356288969516754, "learning_rate": 1.2427539916607343e-05, "loss": 1.2149, "step": 9228 }, { "epoch": 1.8761943484448058, "grad_norm": 0.15554633736610413, "learning_rate": 1.2407200244076071e-05, "loss": 1.0418, "step": 9229 }, { "epoch": 1.876397641797113, "grad_norm": 0.1531071811914444, "learning_rate": 1.2386860571544799e-05, "loss": 0.9284, "step": 9230 }, { "epoch": 1.8766009351494206, "grad_norm": 0.15543857216835022, "learning_rate": 1.2366520899013527e-05, "loss": 0.9827, "step": 9231 }, { "epoch": 1.8768042285017281, "grad_norm": 0.16149646043777466, "learning_rate": 1.2346181226482255e-05, "loss": 0.9901, "step": 9232 }, { "epoch": 1.8770075218540354, "grad_norm": 0.15839633345603943, "learning_rate": 1.2325841553950981e-05, "loss": 0.9233, "step": 9233 }, { "epoch": 1.8772108152063427, "grad_norm": 0.18716824054718018, "learning_rate": 1.2305501881419709e-05, "loss": 1.1512, "step": 9234 }, { "epoch": 1.87741410855865, "grad_norm": 0.1665702909231186, "learning_rate": 1.2285162208888437e-05, "loss": 1.0675, "step": 9235 }, { "epoch": 1.8776174019109575, "grad_norm": 0.17894239723682404, "learning_rate": 1.2264822536357164e-05, "loss": 1.3013, "step": 9236 }, { "epoch": 1.877820695263265, "grad_norm": 0.15544363856315613, "learning_rate": 1.2244482863825892e-05, "loss": 0.9811, "step": 9237 }, { "epoch": 1.8780239886155723, "grad_norm": 0.15379776060581207, "learning_rate": 1.222414319129462e-05, "loss": 0.9862, "step": 9238 }, { "epoch": 1.8782272819678796, "grad_norm": 0.15556766092777252, "learning_rate": 1.2203803518763348e-05, "loss": 0.9691, "step": 9239 }, { "epoch": 1.8784305753201869, "grad_norm": 0.15434099733829498, "learning_rate": 1.2183463846232076e-05, "loss": 0.9718, "step": 9240 }, { "epoch": 1.8786338686724944, "grad_norm": 0.14996455609798431, "learning_rate": 1.2163124173700804e-05, "loss": 0.8904, "step": 9241 }, { "epoch": 1.8788371620248019, "grad_norm": 0.1673956960439682, "learning_rate": 1.2142784501169532e-05, "loss": 1.2021, "step": 9242 }, { "epoch": 1.8790404553771092, "grad_norm": 0.1677650660276413, "learning_rate": 1.212244482863826e-05, "loss": 1.1322, "step": 9243 }, { "epoch": 1.8792437487294165, "grad_norm": 0.15573391318321228, "learning_rate": 1.2102105156106987e-05, "loss": 0.9769, "step": 9244 }, { "epoch": 1.879447042081724, "grad_norm": 0.17531399428844452, "learning_rate": 1.2081765483575715e-05, "loss": 1.1125, "step": 9245 }, { "epoch": 1.8796503354340313, "grad_norm": 0.15053331851959229, "learning_rate": 1.2061425811044443e-05, "loss": 0.9271, "step": 9246 }, { "epoch": 1.8798536287863388, "grad_norm": 0.14952681958675385, "learning_rate": 1.204108613851317e-05, "loss": 1.0134, "step": 9247 }, { "epoch": 1.880056922138646, "grad_norm": 0.1583644151687622, "learning_rate": 1.2020746465981899e-05, "loss": 0.9322, "step": 9248 }, { "epoch": 1.8802602154909533, "grad_norm": 0.15615181624889374, "learning_rate": 1.2000406793450626e-05, "loss": 0.9744, "step": 9249 }, { "epoch": 1.8804635088432609, "grad_norm": 0.16670139133930206, "learning_rate": 1.1980067120919354e-05, "loss": 1.0769, "step": 9250 }, { "epoch": 1.8806668021955684, "grad_norm": 0.17233620584011078, "learning_rate": 1.1959727448388082e-05, "loss": 1.025, "step": 9251 }, { "epoch": 1.8808700955478757, "grad_norm": 0.1902504563331604, "learning_rate": 1.193938777585681e-05, "loss": 1.1576, "step": 9252 }, { "epoch": 1.881073388900183, "grad_norm": 0.17896296083927155, "learning_rate": 1.1919048103325536e-05, "loss": 1.0016, "step": 9253 }, { "epoch": 1.8812766822524902, "grad_norm": 0.15610924363136292, "learning_rate": 1.1898708430794264e-05, "loss": 0.9115, "step": 9254 }, { "epoch": 1.8814799756047977, "grad_norm": 0.15474332869052887, "learning_rate": 1.1878368758262992e-05, "loss": 0.8435, "step": 9255 }, { "epoch": 1.8816832689571052, "grad_norm": 0.14739611744880676, "learning_rate": 1.185802908573172e-05, "loss": 0.8896, "step": 9256 }, { "epoch": 1.8818865623094125, "grad_norm": 0.16680103540420532, "learning_rate": 1.1837689413200448e-05, "loss": 1.0401, "step": 9257 }, { "epoch": 1.8820898556617198, "grad_norm": 0.1427062302827835, "learning_rate": 1.1817349740669175e-05, "loss": 0.8666, "step": 9258 }, { "epoch": 1.8822931490140271, "grad_norm": 0.16407820582389832, "learning_rate": 1.1797010068137903e-05, "loss": 0.961, "step": 9259 }, { "epoch": 1.8824964423663346, "grad_norm": 0.14309735596179962, "learning_rate": 1.1776670395606631e-05, "loss": 0.8876, "step": 9260 }, { "epoch": 1.8826997357186421, "grad_norm": 0.15680107474327087, "learning_rate": 1.1756330723075359e-05, "loss": 0.9492, "step": 9261 }, { "epoch": 1.8829030290709494, "grad_norm": 0.16957566142082214, "learning_rate": 1.1735991050544087e-05, "loss": 1.0354, "step": 9262 }, { "epoch": 1.8831063224232567, "grad_norm": 0.16775429248809814, "learning_rate": 1.1715651378012815e-05, "loss": 1.0352, "step": 9263 }, { "epoch": 1.883309615775564, "grad_norm": 0.16033852100372314, "learning_rate": 1.1695311705481543e-05, "loss": 1.1168, "step": 9264 }, { "epoch": 1.8835129091278715, "grad_norm": 0.1635086089372635, "learning_rate": 1.167497203295027e-05, "loss": 0.9929, "step": 9265 }, { "epoch": 1.883716202480179, "grad_norm": 0.15139703452587128, "learning_rate": 1.1654632360418998e-05, "loss": 0.9686, "step": 9266 }, { "epoch": 1.8839194958324863, "grad_norm": 0.17426981031894684, "learning_rate": 1.1634292687887726e-05, "loss": 1.0655, "step": 9267 }, { "epoch": 1.8841227891847936, "grad_norm": 0.17637090384960175, "learning_rate": 1.1613953015356454e-05, "loss": 1.1177, "step": 9268 }, { "epoch": 1.8843260825371009, "grad_norm": 0.14939779043197632, "learning_rate": 1.1593613342825182e-05, "loss": 0.9628, "step": 9269 }, { "epoch": 1.8845293758894084, "grad_norm": 0.1456364542245865, "learning_rate": 1.157327367029391e-05, "loss": 0.7861, "step": 9270 }, { "epoch": 1.884732669241716, "grad_norm": 0.17216473817825317, "learning_rate": 1.1552933997762637e-05, "loss": 1.1147, "step": 9271 }, { "epoch": 1.8849359625940232, "grad_norm": 0.14829221367835999, "learning_rate": 1.1532594325231365e-05, "loss": 0.9284, "step": 9272 }, { "epoch": 1.8851392559463305, "grad_norm": 0.144239604473114, "learning_rate": 1.1512254652700093e-05, "loss": 0.9452, "step": 9273 }, { "epoch": 1.885342549298638, "grad_norm": 0.15933583676815033, "learning_rate": 1.149191498016882e-05, "loss": 1.037, "step": 9274 }, { "epoch": 1.8855458426509453, "grad_norm": 0.16064906120300293, "learning_rate": 1.1471575307637547e-05, "loss": 1.0158, "step": 9275 }, { "epoch": 1.8857491360032528, "grad_norm": 0.14926393330097198, "learning_rate": 1.1451235635106275e-05, "loss": 0.9217, "step": 9276 }, { "epoch": 1.88595242935556, "grad_norm": 0.16550451517105103, "learning_rate": 1.1430895962575003e-05, "loss": 1.0436, "step": 9277 }, { "epoch": 1.8861557227078674, "grad_norm": 0.16239236295223236, "learning_rate": 1.141055629004373e-05, "loss": 1.0714, "step": 9278 }, { "epoch": 1.8863590160601749, "grad_norm": 0.17093604803085327, "learning_rate": 1.1390216617512459e-05, "loss": 0.9801, "step": 9279 }, { "epoch": 1.8865623094124824, "grad_norm": 0.15669691562652588, "learning_rate": 1.1369876944981186e-05, "loss": 1.0196, "step": 9280 }, { "epoch": 1.8867656027647897, "grad_norm": 0.17138594388961792, "learning_rate": 1.1349537272449914e-05, "loss": 1.1031, "step": 9281 }, { "epoch": 1.886968896117097, "grad_norm": 0.16398198902606964, "learning_rate": 1.1329197599918642e-05, "loss": 0.951, "step": 9282 }, { "epoch": 1.8871721894694042, "grad_norm": 0.15002363920211792, "learning_rate": 1.130885792738737e-05, "loss": 0.914, "step": 9283 }, { "epoch": 1.8873754828217117, "grad_norm": 0.16396719217300415, "learning_rate": 1.1288518254856098e-05, "loss": 0.9809, "step": 9284 }, { "epoch": 1.8875787761740193, "grad_norm": 0.1871686577796936, "learning_rate": 1.1268178582324824e-05, "loss": 1.0812, "step": 9285 }, { "epoch": 1.8877820695263265, "grad_norm": 0.14784665405750275, "learning_rate": 1.1247838909793552e-05, "loss": 1.0217, "step": 9286 }, { "epoch": 1.8879853628786338, "grad_norm": 0.15826550126075745, "learning_rate": 1.122749923726228e-05, "loss": 0.9266, "step": 9287 }, { "epoch": 1.8881886562309411, "grad_norm": 0.14023324847221375, "learning_rate": 1.1207159564731008e-05, "loss": 0.8599, "step": 9288 }, { "epoch": 1.8883919495832486, "grad_norm": 0.13892972469329834, "learning_rate": 1.1186819892199735e-05, "loss": 0.8357, "step": 9289 }, { "epoch": 1.8885952429355561, "grad_norm": 0.15983638167381287, "learning_rate": 1.1166480219668463e-05, "loss": 0.9329, "step": 9290 }, { "epoch": 1.8887985362878634, "grad_norm": 0.16801095008850098, "learning_rate": 1.1146140547137191e-05, "loss": 1.0796, "step": 9291 }, { "epoch": 1.8890018296401707, "grad_norm": 0.15868781507015228, "learning_rate": 1.1125800874605919e-05, "loss": 1.0425, "step": 9292 }, { "epoch": 1.889205122992478, "grad_norm": 0.1561150997877121, "learning_rate": 1.1105461202074648e-05, "loss": 0.9358, "step": 9293 }, { "epoch": 1.8894084163447855, "grad_norm": 0.16269925236701965, "learning_rate": 1.1085121529543376e-05, "loss": 1.074, "step": 9294 }, { "epoch": 1.889611709697093, "grad_norm": 0.16091467440128326, "learning_rate": 1.1064781857012102e-05, "loss": 1.0077, "step": 9295 }, { "epoch": 1.8898150030494003, "grad_norm": 0.17417684197425842, "learning_rate": 1.104444218448083e-05, "loss": 0.928, "step": 9296 }, { "epoch": 1.8900182964017076, "grad_norm": 0.1437702625989914, "learning_rate": 1.1024102511949558e-05, "loss": 0.8622, "step": 9297 }, { "epoch": 1.8902215897540149, "grad_norm": 0.14271040260791779, "learning_rate": 1.1003762839418286e-05, "loss": 0.9174, "step": 9298 }, { "epoch": 1.8904248831063224, "grad_norm": 0.17115336656570435, "learning_rate": 1.0983423166887014e-05, "loss": 1.0273, "step": 9299 }, { "epoch": 1.89062817645863, "grad_norm": 0.15985141694545746, "learning_rate": 1.0963083494355742e-05, "loss": 1.0495, "step": 9300 }, { "epoch": 1.8908314698109372, "grad_norm": 0.16537079215049744, "learning_rate": 1.094274382182447e-05, "loss": 1.0986, "step": 9301 }, { "epoch": 1.8910347631632445, "grad_norm": 0.15517854690551758, "learning_rate": 1.0922404149293197e-05, "loss": 0.8952, "step": 9302 }, { "epoch": 1.891238056515552, "grad_norm": 0.20096749067306519, "learning_rate": 1.0902064476761925e-05, "loss": 1.212, "step": 9303 }, { "epoch": 1.8914413498678593, "grad_norm": 0.1688317060470581, "learning_rate": 1.0881724804230653e-05, "loss": 1.0571, "step": 9304 }, { "epoch": 1.8916446432201668, "grad_norm": 0.15583118796348572, "learning_rate": 1.086138513169938e-05, "loss": 0.9346, "step": 9305 }, { "epoch": 1.891847936572474, "grad_norm": 0.17731614410877228, "learning_rate": 1.0841045459168107e-05, "loss": 1.0109, "step": 9306 }, { "epoch": 1.8920512299247814, "grad_norm": 0.16740213334560394, "learning_rate": 1.0820705786636835e-05, "loss": 0.9507, "step": 9307 }, { "epoch": 1.8922545232770889, "grad_norm": 0.15723784267902374, "learning_rate": 1.0800366114105563e-05, "loss": 0.9974, "step": 9308 }, { "epoch": 1.8924578166293964, "grad_norm": 0.1717313826084137, "learning_rate": 1.078002644157429e-05, "loss": 1.1462, "step": 9309 }, { "epoch": 1.8926611099817037, "grad_norm": 0.16550962626934052, "learning_rate": 1.0759686769043019e-05, "loss": 0.9711, "step": 9310 }, { "epoch": 1.892864403334011, "grad_norm": 0.15991832315921783, "learning_rate": 1.0739347096511746e-05, "loss": 0.9214, "step": 9311 }, { "epoch": 1.8930676966863182, "grad_norm": 0.16738741099834442, "learning_rate": 1.0719007423980474e-05, "loss": 0.9806, "step": 9312 }, { "epoch": 1.8932709900386258, "grad_norm": 0.1509847342967987, "learning_rate": 1.0698667751449202e-05, "loss": 0.8114, "step": 9313 }, { "epoch": 1.8934742833909333, "grad_norm": 0.16016948223114014, "learning_rate": 1.067832807891793e-05, "loss": 1.0437, "step": 9314 }, { "epoch": 1.8936775767432406, "grad_norm": 0.16906878352165222, "learning_rate": 1.0657988406386658e-05, "loss": 1.1174, "step": 9315 }, { "epoch": 1.8938808700955478, "grad_norm": 0.14439010620117188, "learning_rate": 1.0637648733855386e-05, "loss": 0.9612, "step": 9316 }, { "epoch": 1.8940841634478551, "grad_norm": 0.14976871013641357, "learning_rate": 1.0617309061324113e-05, "loss": 0.9424, "step": 9317 }, { "epoch": 1.8942874568001626, "grad_norm": 0.14871583878993988, "learning_rate": 1.0596969388792841e-05, "loss": 1.0182, "step": 9318 }, { "epoch": 1.8944907501524701, "grad_norm": 0.15636609494686127, "learning_rate": 1.057662971626157e-05, "loss": 1.0271, "step": 9319 }, { "epoch": 1.8946940435047774, "grad_norm": 0.15208925306797028, "learning_rate": 1.0556290043730297e-05, "loss": 0.9909, "step": 9320 }, { "epoch": 1.8948973368570847, "grad_norm": 0.16356107592582703, "learning_rate": 1.0535950371199025e-05, "loss": 1.0277, "step": 9321 }, { "epoch": 1.895100630209392, "grad_norm": 0.1584353744983673, "learning_rate": 1.0515610698667753e-05, "loss": 1.061, "step": 9322 }, { "epoch": 1.8953039235616995, "grad_norm": 0.17385679483413696, "learning_rate": 1.049527102613648e-05, "loss": 1.0001, "step": 9323 }, { "epoch": 1.895507216914007, "grad_norm": 0.14875008165836334, "learning_rate": 1.0474931353605208e-05, "loss": 0.8962, "step": 9324 }, { "epoch": 1.8957105102663143, "grad_norm": 0.1576494425535202, "learning_rate": 1.0454591681073936e-05, "loss": 0.9676, "step": 9325 }, { "epoch": 1.8959138036186216, "grad_norm": 0.17531967163085938, "learning_rate": 1.0434252008542662e-05, "loss": 1.0986, "step": 9326 }, { "epoch": 1.896117096970929, "grad_norm": 0.17487825453281403, "learning_rate": 1.041391233601139e-05, "loss": 1.004, "step": 9327 }, { "epoch": 1.8963203903232364, "grad_norm": 0.1605103313922882, "learning_rate": 1.0393572663480118e-05, "loss": 1.0181, "step": 9328 }, { "epoch": 1.896523683675544, "grad_norm": 0.1699770838022232, "learning_rate": 1.0373232990948846e-05, "loss": 1.0835, "step": 9329 }, { "epoch": 1.8967269770278512, "grad_norm": 0.15781551599502563, "learning_rate": 1.0352893318417574e-05, "loss": 0.9709, "step": 9330 }, { "epoch": 1.8969302703801585, "grad_norm": 0.15262846648693085, "learning_rate": 1.0332553645886302e-05, "loss": 1.0025, "step": 9331 }, { "epoch": 1.897133563732466, "grad_norm": 0.16675053536891937, "learning_rate": 1.031221397335503e-05, "loss": 1.058, "step": 9332 }, { "epoch": 1.8973368570847733, "grad_norm": 0.15862716734409332, "learning_rate": 1.0291874300823757e-05, "loss": 1.0868, "step": 9333 }, { "epoch": 1.8975401504370808, "grad_norm": 0.15670160949230194, "learning_rate": 1.0271534628292485e-05, "loss": 0.9464, "step": 9334 }, { "epoch": 1.897743443789388, "grad_norm": 0.16652365028858185, "learning_rate": 1.0251194955761213e-05, "loss": 1.0129, "step": 9335 }, { "epoch": 1.8979467371416954, "grad_norm": 0.13543561100959778, "learning_rate": 1.023085528322994e-05, "loss": 0.865, "step": 9336 }, { "epoch": 1.8981500304940029, "grad_norm": 0.17539720237255096, "learning_rate": 1.0210515610698667e-05, "loss": 1.1737, "step": 9337 }, { "epoch": 1.8983533238463104, "grad_norm": 0.16995757818222046, "learning_rate": 1.0190175938167395e-05, "loss": 1.0107, "step": 9338 }, { "epoch": 1.8985566171986177, "grad_norm": 0.16076253354549408, "learning_rate": 1.0169836265636123e-05, "loss": 0.9791, "step": 9339 }, { "epoch": 1.898759910550925, "grad_norm": 0.15653160214424133, "learning_rate": 1.014949659310485e-05, "loss": 1.0362, "step": 9340 }, { "epoch": 1.8989632039032323, "grad_norm": 0.17451439797878265, "learning_rate": 1.0129156920573578e-05, "loss": 1.1921, "step": 9341 }, { "epoch": 1.8991664972555398, "grad_norm": 0.15730206668376923, "learning_rate": 1.0108817248042306e-05, "loss": 1.0043, "step": 9342 }, { "epoch": 1.8993697906078473, "grad_norm": 0.1547120213508606, "learning_rate": 1.0088477575511034e-05, "loss": 0.9677, "step": 9343 }, { "epoch": 1.8995730839601546, "grad_norm": 0.14792628586292267, "learning_rate": 1.0068137902979764e-05, "loss": 0.9495, "step": 9344 }, { "epoch": 1.8997763773124619, "grad_norm": 0.16715767979621887, "learning_rate": 1.0047798230448492e-05, "loss": 1.0871, "step": 9345 }, { "epoch": 1.8999796706647691, "grad_norm": 0.16810470819473267, "learning_rate": 1.002745855791722e-05, "loss": 0.9926, "step": 9346 }, { "epoch": 1.9001829640170766, "grad_norm": 0.16060957312583923, "learning_rate": 1.0007118885385946e-05, "loss": 0.9888, "step": 9347 }, { "epoch": 1.9003862573693842, "grad_norm": 0.16978204250335693, "learning_rate": 9.986779212854673e-06, "loss": 1.1782, "step": 9348 }, { "epoch": 1.9005895507216914, "grad_norm": 0.1680404543876648, "learning_rate": 9.966439540323401e-06, "loss": 1.0525, "step": 9349 }, { "epoch": 1.9007928440739987, "grad_norm": 0.1583416610956192, "learning_rate": 9.946099867792129e-06, "loss": 0.9938, "step": 9350 }, { "epoch": 1.900996137426306, "grad_norm": 0.16274034976959229, "learning_rate": 9.925760195260857e-06, "loss": 1.0139, "step": 9351 }, { "epoch": 1.9011994307786135, "grad_norm": 0.1580599546432495, "learning_rate": 9.905420522729585e-06, "loss": 0.9478, "step": 9352 }, { "epoch": 1.901402724130921, "grad_norm": 0.167547807097435, "learning_rate": 9.885080850198313e-06, "loss": 0.9575, "step": 9353 }, { "epoch": 1.9016060174832283, "grad_norm": 0.17265664041042328, "learning_rate": 9.86474117766704e-06, "loss": 1.2307, "step": 9354 }, { "epoch": 1.9018093108355356, "grad_norm": 0.15563230216503143, "learning_rate": 9.844401505135768e-06, "loss": 0.8814, "step": 9355 }, { "epoch": 1.902012604187843, "grad_norm": 0.17064541578292847, "learning_rate": 9.824061832604496e-06, "loss": 1.1182, "step": 9356 }, { "epoch": 1.9022158975401504, "grad_norm": 0.14311741292476654, "learning_rate": 9.803722160073222e-06, "loss": 0.8657, "step": 9357 }, { "epoch": 1.902419190892458, "grad_norm": 0.17543523013591766, "learning_rate": 9.78338248754195e-06, "loss": 1.0129, "step": 9358 }, { "epoch": 1.9026224842447652, "grad_norm": 0.1519622951745987, "learning_rate": 9.763042815010678e-06, "loss": 0.8612, "step": 9359 }, { "epoch": 1.9028257775970725, "grad_norm": 0.1456150859594345, "learning_rate": 9.742703142479406e-06, "loss": 0.9171, "step": 9360 }, { "epoch": 1.90302907094938, "grad_norm": 0.1435515433549881, "learning_rate": 9.722363469948134e-06, "loss": 0.9186, "step": 9361 }, { "epoch": 1.9032323643016873, "grad_norm": 0.14047978818416595, "learning_rate": 9.702023797416862e-06, "loss": 1.0005, "step": 9362 }, { "epoch": 1.9034356576539948, "grad_norm": 0.13906230032444, "learning_rate": 9.68168412488559e-06, "loss": 0.9253, "step": 9363 }, { "epoch": 1.903638951006302, "grad_norm": 0.1536070853471756, "learning_rate": 9.661344452354317e-06, "loss": 0.9884, "step": 9364 }, { "epoch": 1.9038422443586094, "grad_norm": 0.16738420724868774, "learning_rate": 9.641004779823045e-06, "loss": 1.0671, "step": 9365 }, { "epoch": 1.904045537710917, "grad_norm": 0.14910069108009338, "learning_rate": 9.620665107291773e-06, "loss": 1.0673, "step": 9366 }, { "epoch": 1.9042488310632242, "grad_norm": 0.17034853994846344, "learning_rate": 9.600325434760501e-06, "loss": 0.9899, "step": 9367 }, { "epoch": 1.9044521244155317, "grad_norm": 0.14666365087032318, "learning_rate": 9.579985762229229e-06, "loss": 0.9937, "step": 9368 }, { "epoch": 1.904655417767839, "grad_norm": 0.18429192900657654, "learning_rate": 9.559646089697957e-06, "loss": 0.9585, "step": 9369 }, { "epoch": 1.9048587111201463, "grad_norm": 0.1640387326478958, "learning_rate": 9.539306417166684e-06, "loss": 0.8624, "step": 9370 }, { "epoch": 1.9050620044724538, "grad_norm": 0.17350825667381287, "learning_rate": 9.518966744635412e-06, "loss": 1.1242, "step": 9371 }, { "epoch": 1.9052652978247613, "grad_norm": 0.1487387865781784, "learning_rate": 9.49862707210414e-06, "loss": 0.8233, "step": 9372 }, { "epoch": 1.9054685911770686, "grad_norm": 0.16694356501102448, "learning_rate": 9.478287399572868e-06, "loss": 0.9576, "step": 9373 }, { "epoch": 1.9056718845293759, "grad_norm": 0.16773739457130432, "learning_rate": 9.457947727041596e-06, "loss": 1.1013, "step": 9374 }, { "epoch": 1.9058751778816831, "grad_norm": 0.15956096351146698, "learning_rate": 9.437608054510324e-06, "loss": 0.9642, "step": 9375 }, { "epoch": 1.9060784712339907, "grad_norm": 0.1490715593099594, "learning_rate": 9.417268381979051e-06, "loss": 0.8804, "step": 9376 }, { "epoch": 1.9062817645862982, "grad_norm": 0.18091818690299988, "learning_rate": 9.39692870944778e-06, "loss": 1.2239, "step": 9377 }, { "epoch": 1.9064850579386055, "grad_norm": 0.15678752958774567, "learning_rate": 9.376589036916506e-06, "loss": 0.9122, "step": 9378 }, { "epoch": 1.9066883512909127, "grad_norm": 0.18459545075893402, "learning_rate": 9.356249364385233e-06, "loss": 1.252, "step": 9379 }, { "epoch": 1.90689164464322, "grad_norm": 0.1801295429468155, "learning_rate": 9.335909691853961e-06, "loss": 1.2248, "step": 9380 }, { "epoch": 1.9070949379955275, "grad_norm": 0.16494908928871155, "learning_rate": 9.315570019322689e-06, "loss": 1.0764, "step": 9381 }, { "epoch": 1.907298231347835, "grad_norm": 0.1461213231086731, "learning_rate": 9.295230346791417e-06, "loss": 0.9669, "step": 9382 }, { "epoch": 1.9075015247001423, "grad_norm": 0.1623806357383728, "learning_rate": 9.274890674260145e-06, "loss": 1.044, "step": 9383 }, { "epoch": 1.9077048180524496, "grad_norm": 0.1630796492099762, "learning_rate": 9.254551001728873e-06, "loss": 1.1293, "step": 9384 }, { "epoch": 1.907908111404757, "grad_norm": 0.15207818150520325, "learning_rate": 9.2342113291976e-06, "loss": 1.0695, "step": 9385 }, { "epoch": 1.9081114047570644, "grad_norm": 0.18577249348163605, "learning_rate": 9.213871656666328e-06, "loss": 1.286, "step": 9386 }, { "epoch": 1.908314698109372, "grad_norm": 0.1684713214635849, "learning_rate": 9.193531984135056e-06, "loss": 1.0222, "step": 9387 }, { "epoch": 1.9085179914616792, "grad_norm": 0.1702156364917755, "learning_rate": 9.173192311603782e-06, "loss": 1.0436, "step": 9388 }, { "epoch": 1.9087212848139865, "grad_norm": 0.16724956035614014, "learning_rate": 9.15285263907251e-06, "loss": 1.0927, "step": 9389 }, { "epoch": 1.908924578166294, "grad_norm": 0.15122951567173004, "learning_rate": 9.132512966541238e-06, "loss": 0.9047, "step": 9390 }, { "epoch": 1.9091278715186013, "grad_norm": 0.159726083278656, "learning_rate": 9.112173294009966e-06, "loss": 0.9561, "step": 9391 }, { "epoch": 1.9093311648709088, "grad_norm": 0.16831693053245544, "learning_rate": 9.091833621478694e-06, "loss": 1.0801, "step": 9392 }, { "epoch": 1.909534458223216, "grad_norm": 0.1692102700471878, "learning_rate": 9.071493948947422e-06, "loss": 1.1376, "step": 9393 }, { "epoch": 1.9097377515755234, "grad_norm": 0.17488181591033936, "learning_rate": 9.051154276416151e-06, "loss": 1.1418, "step": 9394 }, { "epoch": 1.909941044927831, "grad_norm": 0.16990408301353455, "learning_rate": 9.030814603884879e-06, "loss": 1.1285, "step": 9395 }, { "epoch": 1.9101443382801382, "grad_norm": 0.16019228100776672, "learning_rate": 9.010474931353607e-06, "loss": 1.0915, "step": 9396 }, { "epoch": 1.9103476316324457, "grad_norm": 0.18147097527980804, "learning_rate": 8.990135258822335e-06, "loss": 1.2002, "step": 9397 }, { "epoch": 1.910550924984753, "grad_norm": 0.16949224472045898, "learning_rate": 8.969795586291062e-06, "loss": 1.0624, "step": 9398 }, { "epoch": 1.9107542183370603, "grad_norm": 0.17006736993789673, "learning_rate": 8.949455913759789e-06, "loss": 1.2127, "step": 9399 }, { "epoch": 1.9109575116893678, "grad_norm": 0.16276562213897705, "learning_rate": 8.929116241228516e-06, "loss": 0.9774, "step": 9400 } ], "logging_steps": 1, "max_steps": 9838, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.263015631335834e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }