EtashGuha commited on
Commit
e1d4d7b
·
verified ·
1 Parent(s): d75950f

Training in progress, epoch 3

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:538ec9d81340d05ce6a66864f86a2b816c37a9b9796db5b60de2a05a80b4dc5e
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4427d05c713a5b37fece43f7dc431d274ab764284676323f967f161ec7c5bc8d
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37abc76ab95c9f3e382f567fa172acbd1552bc6d68f0a8b780af2bdd3fca4faf
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed264047172500bf6b09cc2398a24a8e7772915e3ba24ffdbfe05e3ac1bef91
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e588837b2711cfc4dd1f8aaacb2f1cd533d0583d14965056312426d60c9b13d2
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7648d2578a03a8f889f3826ab8a0a9817141082a1c99b7060c4610cc937df1f9
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:248c79fa2095aa1c42bb06de5789e2f4668be77ec48eabcede9775a5d3b787e9
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e4459a0151d2734d0a59ef0c353b56ca118d4159fead159d77c97c21ca9c1f
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -165,3 +165,84 @@
165
  {"current_steps": 165, "total_steps": 567, "loss": 0.0203, "lr": 1.7867449380334834e-05, "epoch": 2.037037037037037, "percentage": 29.1, "elapsed_time": "1:16:52", "remaining_time": "3:07:17"}
166
  {"current_steps": 166, "total_steps": 567, "loss": 0.0186, "lr": 1.782927610492103e-05, "epoch": 2.049382716049383, "percentage": 29.28, "elapsed_time": "1:17:18", "remaining_time": "3:06:45"}
167
  {"current_steps": 167, "total_steps": 567, "loss": 0.02, "lr": 1.7790805745256703e-05, "epoch": 2.0617283950617282, "percentage": 29.45, "elapsed_time": "1:17:47", "remaining_time": "3:06:19"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  {"current_steps": 165, "total_steps": 567, "loss": 0.0203, "lr": 1.7867449380334834e-05, "epoch": 2.037037037037037, "percentage": 29.1, "elapsed_time": "1:16:52", "remaining_time": "3:07:17"}
166
  {"current_steps": 166, "total_steps": 567, "loss": 0.0186, "lr": 1.782927610492103e-05, "epoch": 2.049382716049383, "percentage": 29.28, "elapsed_time": "1:17:18", "remaining_time": "3:06:45"}
167
  {"current_steps": 167, "total_steps": 567, "loss": 0.02, "lr": 1.7790805745256703e-05, "epoch": 2.0617283950617282, "percentage": 29.45, "elapsed_time": "1:17:47", "remaining_time": "3:06:19"}
168
+ {"current_steps": 168, "total_steps": 567, "loss": 0.0206, "lr": 1.77520397611113e-05, "epoch": 2.074074074074074, "percentage": 29.63, "elapsed_time": "1:18:18", "remaining_time": "3:05:59"}
169
+ {"current_steps": 169, "total_steps": 567, "loss": 0.021, "lr": 1.771297962347181e-05, "epoch": 2.0864197530864197, "percentage": 29.81, "elapsed_time": "1:18:48", "remaining_time": "3:05:36"}
170
+ {"current_steps": 170, "total_steps": 567, "loss": 0.0223, "lr": 1.767362681448697e-05, "epoch": 2.0987654320987654, "percentage": 29.98, "elapsed_time": "1:19:16", "remaining_time": "3:05:08"}
171
+ {"current_steps": 171, "total_steps": 567, "loss": 0.015, "lr": 1.763398282741103e-05, "epoch": 2.111111111111111, "percentage": 30.16, "elapsed_time": "1:19:39", "remaining_time": "3:04:29"}
172
+ {"current_steps": 172, "total_steps": 567, "loss": 0.0152, "lr": 1.7594049166547073e-05, "epoch": 2.123456790123457, "percentage": 30.34, "elapsed_time": "1:20:05", "remaining_time": "3:03:56"}
173
+ {"current_steps": 173, "total_steps": 567, "loss": 0.0218, "lr": 1.7553827347189937e-05, "epoch": 2.1358024691358026, "percentage": 30.51, "elapsed_time": "1:20:24", "remaining_time": "3:03:08"}
174
+ {"current_steps": 174, "total_steps": 567, "loss": 0.0186, "lr": 1.7513318895568734e-05, "epoch": 2.148148148148148, "percentage": 30.69, "elapsed_time": "1:20:50", "remaining_time": "3:02:36"}
175
+ {"current_steps": 175, "total_steps": 567, "loss": 0.0244, "lr": 1.747252534878891e-05, "epoch": 2.1604938271604937, "percentage": 30.86, "elapsed_time": "1:21:19", "remaining_time": "3:02:09"}
176
+ {"current_steps": 176, "total_steps": 567, "loss": 0.0247, "lr": 1.7431448254773943e-05, "epoch": 2.1728395061728394, "percentage": 31.04, "elapsed_time": "1:21:44", "remaining_time": "3:01:34"}
177
+ {"current_steps": 177, "total_steps": 567, "loss": 0.0204, "lr": 1.7390089172206594e-05, "epoch": 2.185185185185185, "percentage": 31.22, "elapsed_time": "1:22:08", "remaining_time": "3:00:59"}
178
+ {"current_steps": 178, "total_steps": 567, "loss": 0.0247, "lr": 1.7348449670469758e-05, "epoch": 2.197530864197531, "percentage": 31.39, "elapsed_time": "1:22:37", "remaining_time": "3:00:34"}
179
+ {"current_steps": 179, "total_steps": 567, "loss": 0.0212, "lr": 1.7306531329586933e-05, "epoch": 2.2098765432098766, "percentage": 31.57, "elapsed_time": "1:23:02", "remaining_time": "3:00:01"}
180
+ {"current_steps": 180, "total_steps": 567, "loss": 0.0141, "lr": 1.7264335740162244e-05, "epoch": 2.2222222222222223, "percentage": 31.75, "elapsed_time": "1:23:29", "remaining_time": "2:59:29"}
181
+ {"current_steps": 181, "total_steps": 567, "loss": 0.0176, "lr": 1.7221864503320093e-05, "epoch": 2.234567901234568, "percentage": 31.92, "elapsed_time": "1:23:59", "remaining_time": "2:59:07"}
182
+ {"current_steps": 182, "total_steps": 567, "loss": 0.0195, "lr": 1.717911923064442e-05, "epoch": 2.246913580246914, "percentage": 32.1, "elapsed_time": "1:24:24", "remaining_time": "2:58:32"}
183
+ {"current_steps": 183, "total_steps": 567, "loss": 0.0216, "lr": 1.7136101544117526e-05, "epoch": 2.259259259259259, "percentage": 32.28, "elapsed_time": "1:24:53", "remaining_time": "2:58:07"}
184
+ {"current_steps": 184, "total_steps": 567, "loss": 0.0275, "lr": 1.7092813076058536e-05, "epoch": 2.271604938271605, "percentage": 32.45, "elapsed_time": "1:25:19", "remaining_time": "2:57:36"}
185
+ {"current_steps": 185, "total_steps": 567, "loss": 0.0344, "lr": 1.7049255469061476e-05, "epoch": 2.2839506172839505, "percentage": 32.63, "elapsed_time": "1:25:46", "remaining_time": "2:57:06"}
186
+ {"current_steps": 186, "total_steps": 567, "loss": 0.0168, "lr": 1.700543037593291e-05, "epoch": 2.2962962962962963, "percentage": 32.8, "elapsed_time": "1:26:15", "remaining_time": "2:56:41"}
187
+ {"current_steps": 187, "total_steps": 567, "loss": 0.0223, "lr": 1.696133945962927e-05, "epoch": 2.308641975308642, "percentage": 32.98, "elapsed_time": "1:26:43", "remaining_time": "2:56:13"}
188
+ {"current_steps": 188, "total_steps": 567, "loss": 0.0149, "lr": 1.6916984393193704e-05, "epoch": 2.3209876543209877, "percentage": 33.16, "elapsed_time": "1:27:07", "remaining_time": "2:55:39"}
189
+ {"current_steps": 189, "total_steps": 567, "loss": 0.0278, "lr": 1.687236685969263e-05, "epoch": 2.3333333333333335, "percentage": 33.33, "elapsed_time": "1:27:39", "remaining_time": "2:55:19"}
190
+ {"current_steps": 190, "total_steps": 567, "loss": 0.0209, "lr": 1.6827488552151855e-05, "epoch": 2.3456790123456788, "percentage": 33.51, "elapsed_time": "1:28:06", "remaining_time": "2:54:49"}
191
+ {"current_steps": 191, "total_steps": 567, "loss": 0.021, "lr": 1.678235117349234e-05, "epoch": 2.3580246913580245, "percentage": 33.69, "elapsed_time": "1:28:36", "remaining_time": "2:54:25"}
192
+ {"current_steps": 192, "total_steps": 567, "loss": 0.0156, "lr": 1.6736956436465573e-05, "epoch": 2.3703703703703702, "percentage": 33.86, "elapsed_time": "1:29:02", "remaining_time": "2:53:53"}
193
+ {"current_steps": 193, "total_steps": 567, "loss": 0.0119, "lr": 1.6691306063588583e-05, "epoch": 2.382716049382716, "percentage": 34.04, "elapsed_time": "1:29:27", "remaining_time": "2:53:20"}
194
+ {"current_steps": 194, "total_steps": 567, "loss": 0.0197, "lr": 1.664540178707858e-05, "epoch": 2.3950617283950617, "percentage": 34.22, "elapsed_time": "1:29:51", "remaining_time": "2:52:46"}
195
+ {"current_steps": 195, "total_steps": 567, "loss": 0.0161, "lr": 1.659924534878723e-05, "epoch": 2.4074074074074074, "percentage": 34.39, "elapsed_time": "1:30:20", "remaining_time": "2:52:20"}
196
+ {"current_steps": 196, "total_steps": 567, "loss": 0.0082, "lr": 1.655283850013454e-05, "epoch": 2.419753086419753, "percentage": 34.57, "elapsed_time": "1:30:47", "remaining_time": "2:51:51"}
197
+ {"current_steps": 197, "total_steps": 567, "loss": 0.0154, "lr": 1.650618300204242e-05, "epoch": 2.432098765432099, "percentage": 34.74, "elapsed_time": "1:31:15", "remaining_time": "2:51:23"}
198
+ {"current_steps": 198, "total_steps": 567, "loss": 0.017, "lr": 1.6459280624867876e-05, "epoch": 2.4444444444444446, "percentage": 34.92, "elapsed_time": "1:31:42", "remaining_time": "2:50:54"}
199
+ {"current_steps": 199, "total_steps": 567, "loss": 0.0253, "lr": 1.6412133148335786e-05, "epoch": 2.45679012345679, "percentage": 35.1, "elapsed_time": "1:32:10", "remaining_time": "2:50:26"}
200
+ {"current_steps": 200, "total_steps": 567, "loss": 0.0248, "lr": 1.6364742361471416e-05, "epoch": 2.4691358024691357, "percentage": 35.27, "elapsed_time": "1:32:39", "remaining_time": "2:50:02"}
201
+ {"current_steps": 201, "total_steps": 567, "loss": 0.0219, "lr": 1.631711006253251e-05, "epoch": 2.4814814814814814, "percentage": 35.45, "elapsed_time": "1:33:04", "remaining_time": "2:49:28"}
202
+ {"current_steps": 202, "total_steps": 567, "loss": 0.0185, "lr": 1.626923805894107e-05, "epoch": 2.493827160493827, "percentage": 35.63, "elapsed_time": "1:33:28", "remaining_time": "2:48:54"}
203
+ {"current_steps": 203, "total_steps": 567, "loss": 0.0198, "lr": 1.6221128167214742e-05, "epoch": 2.506172839506173, "percentage": 35.8, "elapsed_time": "1:33:53", "remaining_time": "2:48:21"}
204
+ {"current_steps": 204, "total_steps": 567, "loss": 0.0131, "lr": 1.617278221289793e-05, "epoch": 2.5185185185185186, "percentage": 35.98, "elapsed_time": "1:34:19", "remaining_time": "2:47:51"}
205
+ {"current_steps": 205, "total_steps": 567, "loss": 0.0216, "lr": 1.61242020304925e-05, "epoch": 2.5308641975308643, "percentage": 36.16, "elapsed_time": "1:34:43", "remaining_time": "2:47:15"}
206
+ {"current_steps": 206, "total_steps": 567, "loss": 0.015, "lr": 1.607538946338817e-05, "epoch": 2.5432098765432096, "percentage": 36.33, "elapsed_time": "1:35:10", "remaining_time": "2:46:47"}
207
+ {"current_steps": 207, "total_steps": 567, "loss": 0.0284, "lr": 1.6026346363792565e-05, "epoch": 2.5555555555555554, "percentage": 36.51, "elapsed_time": "1:35:35", "remaining_time": "2:46:14"}
208
+ {"current_steps": 208, "total_steps": 567, "loss": 0.0166, "lr": 1.5977074592660936e-05, "epoch": 2.567901234567901, "percentage": 36.68, "elapsed_time": "1:36:00", "remaining_time": "2:45:43"}
209
+ {"current_steps": 209, "total_steps": 567, "loss": 0.0096, "lr": 1.592757601962555e-05, "epoch": 2.580246913580247, "percentage": 36.86, "elapsed_time": "1:36:28", "remaining_time": "2:45:15"}
210
+ {"current_steps": 210, "total_steps": 567, "loss": 0.0191, "lr": 1.5877852522924733e-05, "epoch": 2.5925925925925926, "percentage": 37.04, "elapsed_time": "1:36:58", "remaining_time": "2:44:51"}
211
+ {"current_steps": 211, "total_steps": 567, "loss": 0.0265, "lr": 1.582790598933161e-05, "epoch": 2.6049382716049383, "percentage": 37.21, "elapsed_time": "1:37:26", "remaining_time": "2:44:23"}
212
+ {"current_steps": 212, "total_steps": 567, "loss": 0.0174, "lr": 1.5777738314082514e-05, "epoch": 2.617283950617284, "percentage": 37.39, "elapsed_time": "1:37:53", "remaining_time": "2:43:55"}
213
+ {"current_steps": 213, "total_steps": 567, "loss": 0.0159, "lr": 1.5727351400805054e-05, "epoch": 2.6296296296296298, "percentage": 37.57, "elapsed_time": "1:38:23", "remaining_time": "2:43:31"}
214
+ {"current_steps": 214, "total_steps": 567, "loss": 0.0236, "lr": 1.5676747161445903e-05, "epoch": 2.6419753086419755, "percentage": 37.74, "elapsed_time": "1:38:51", "remaining_time": "2:43:04"}
215
+ {"current_steps": 215, "total_steps": 567, "loss": 0.0178, "lr": 1.5625927516198235e-05, "epoch": 2.6543209876543212, "percentage": 37.92, "elapsed_time": "1:39:19", "remaining_time": "2:42:37"}
216
+ {"current_steps": 216, "total_steps": 567, "loss": 0.0111, "lr": 1.5574894393428856e-05, "epoch": 2.6666666666666665, "percentage": 38.1, "elapsed_time": "1:39:45", "remaining_time": "2:42:06"}
217
+ {"current_steps": 217, "total_steps": 567, "loss": 0.026, "lr": 1.552364972960506e-05, "epoch": 2.6790123456790123, "percentage": 38.27, "elapsed_time": "1:40:11", "remaining_time": "2:41:35"}
218
+ {"current_steps": 218, "total_steps": 567, "loss": 0.0309, "lr": 1.5472195469221115e-05, "epoch": 2.691358024691358, "percentage": 38.45, "elapsed_time": "1:40:34", "remaining_time": "2:41:00"}
219
+ {"current_steps": 219, "total_steps": 567, "loss": 0.0183, "lr": 1.5420533564724495e-05, "epoch": 2.7037037037037037, "percentage": 38.62, "elapsed_time": "1:41:01", "remaining_time": "2:40:31"}
220
+ {"current_steps": 220, "total_steps": 567, "loss": 0.0209, "lr": 1.5368665976441802e-05, "epoch": 2.7160493827160495, "percentage": 38.8, "elapsed_time": "1:41:32", "remaining_time": "2:40:08"}
221
+ {"current_steps": 221, "total_steps": 567, "loss": 0.0177, "lr": 1.5316594672504362e-05, "epoch": 2.728395061728395, "percentage": 38.98, "elapsed_time": "1:42:01", "remaining_time": "2:39:43"}
222
+ {"current_steps": 222, "total_steps": 567, "loss": 0.0115, "lr": 1.526432162877356e-05, "epoch": 2.7407407407407405, "percentage": 39.15, "elapsed_time": "1:42:33", "remaining_time": "2:39:22"}
223
+ {"current_steps": 223, "total_steps": 567, "loss": 0.034, "lr": 1.5211848828765852e-05, "epoch": 2.753086419753086, "percentage": 39.33, "elapsed_time": "1:42:54", "remaining_time": "2:38:44"}
224
+ {"current_steps": 224, "total_steps": 567, "loss": 0.0101, "lr": 1.5159178263577512e-05, "epoch": 2.765432098765432, "percentage": 39.51, "elapsed_time": "1:43:23", "remaining_time": "2:38:19"}
225
+ {"current_steps": 225, "total_steps": 567, "loss": 0.0251, "lr": 1.510631193180907e-05, "epoch": 2.7777777777777777, "percentage": 39.68, "elapsed_time": "1:43:49", "remaining_time": "2:37:48"}
226
+ {"current_steps": 226, "total_steps": 567, "loss": 0.0108, "lr": 1.5053251839489482e-05, "epoch": 2.7901234567901234, "percentage": 39.86, "elapsed_time": "1:44:19", "remaining_time": "2:37:24"}
227
+ {"current_steps": 227, "total_steps": 567, "loss": 0.0209, "lr": 1.5000000000000002e-05, "epoch": 2.802469135802469, "percentage": 40.04, "elapsed_time": "1:44:44", "remaining_time": "2:36:53"}
228
+ {"current_steps": 228, "total_steps": 567, "loss": 0.0264, "lr": 1.4946558433997792e-05, "epoch": 2.814814814814815, "percentage": 40.21, "elapsed_time": "1:45:10", "remaining_time": "2:36:23"}
229
+ {"current_steps": 229, "total_steps": 567, "loss": 0.0064, "lr": 1.4892929169339237e-05, "epoch": 2.8271604938271606, "percentage": 40.39, "elapsed_time": "1:45:32", "remaining_time": "2:35:46"}
230
+ {"current_steps": 230, "total_steps": 567, "loss": 0.0205, "lr": 1.4839114241003017e-05, "epoch": 2.8395061728395063, "percentage": 40.56, "elapsed_time": "1:45:57", "remaining_time": "2:35:14"}
231
+ {"current_steps": 231, "total_steps": 567, "loss": 0.0133, "lr": 1.4785115691012866e-05, "epoch": 2.851851851851852, "percentage": 40.74, "elapsed_time": "1:46:22", "remaining_time": "2:34:43"}
232
+ {"current_steps": 232, "total_steps": 567, "loss": 0.0258, "lr": 1.4730935568360103e-05, "epoch": 2.8641975308641974, "percentage": 40.92, "elapsed_time": "1:46:48", "remaining_time": "2:34:13"}
233
+ {"current_steps": 233, "total_steps": 567, "loss": 0.0122, "lr": 1.4676575928925869e-05, "epoch": 2.876543209876543, "percentage": 41.09, "elapsed_time": "1:47:13", "remaining_time": "2:33:42"}
234
+ {"current_steps": 234, "total_steps": 567, "loss": 0.0189, "lr": 1.4622038835403135e-05, "epoch": 2.888888888888889, "percentage": 41.27, "elapsed_time": "1:47:37", "remaining_time": "2:33:08"}
235
+ {"current_steps": 235, "total_steps": 567, "loss": 0.025, "lr": 1.4567326357218408e-05, "epoch": 2.9012345679012346, "percentage": 41.45, "elapsed_time": "1:48:07", "remaining_time": "2:32:45"}
236
+ {"current_steps": 236, "total_steps": 567, "loss": 0.0365, "lr": 1.451244057045323e-05, "epoch": 2.9135802469135803, "percentage": 41.62, "elapsed_time": "1:48:36", "remaining_time": "2:32:20"}
237
+ {"current_steps": 237, "total_steps": 567, "loss": 0.0188, "lr": 1.4457383557765385e-05, "epoch": 2.925925925925926, "percentage": 41.8, "elapsed_time": "1:49:01", "remaining_time": "2:31:48"}
238
+ {"current_steps": 238, "total_steps": 567, "loss": 0.0099, "lr": 1.4402157408309876e-05, "epoch": 2.9382716049382713, "percentage": 41.98, "elapsed_time": "1:49:29", "remaining_time": "2:31:21"}
239
+ {"current_steps": 239, "total_steps": 567, "loss": 0.0162, "lr": 1.4346764217659652e-05, "epoch": 2.950617283950617, "percentage": 42.15, "elapsed_time": "1:49:52", "remaining_time": "2:30:47"}
240
+ {"current_steps": 240, "total_steps": 567, "loss": 0.0237, "lr": 1.429120608772609e-05, "epoch": 2.962962962962963, "percentage": 42.33, "elapsed_time": "1:50:14", "remaining_time": "2:30:11"}
241
+ {"current_steps": 241, "total_steps": 567, "loss": 0.0157, "lr": 1.4235485126679244e-05, "epoch": 2.9753086419753085, "percentage": 42.5, "elapsed_time": "1:50:38", "remaining_time": "2:29:40"}
242
+ {"current_steps": 242, "total_steps": 567, "loss": 0.0298, "lr": 1.4179603448867836e-05, "epoch": 2.9876543209876543, "percentage": 42.68, "elapsed_time": "1:51:04", "remaining_time": "2:29:10"}
243
+ {"current_steps": 243, "total_steps": 567, "loss": 0.0152, "lr": 1.4123563174739036e-05, "epoch": 3.0, "percentage": 42.86, "elapsed_time": "1:51:31", "remaining_time": "2:28:42"}
244
+ {"current_steps": 244, "total_steps": 567, "loss": 0.0059, "lr": 1.4067366430758004e-05, "epoch": 3.0123456790123457, "percentage": 43.03, "elapsed_time": "1:53:52", "remaining_time": "2:30:45"}
245
+ {"current_steps": 245, "total_steps": 567, "loss": 0.0088, "lr": 1.4011015349327188e-05, "epoch": 3.0246913580246915, "percentage": 43.21, "elapsed_time": "1:54:24", "remaining_time": "2:30:21"}
246
+ {"current_steps": 246, "total_steps": 567, "loss": 0.0081, "lr": 1.3954512068705425e-05, "epoch": 3.037037037037037, "percentage": 43.39, "elapsed_time": "1:54:54", "remaining_time": "2:29:56"}
247
+ {"current_steps": 247, "total_steps": 567, "loss": 0.0071, "lr": 1.3897858732926794e-05, "epoch": 3.049382716049383, "percentage": 43.56, "elapsed_time": "1:55:22", "remaining_time": "2:29:28"}
248
+ {"current_steps": 248, "total_steps": 567, "loss": 0.0096, "lr": 1.3841057491719261e-05, "epoch": 3.0617283950617282, "percentage": 43.74, "elapsed_time": "1:55:44", "remaining_time": "2:28:53"}