EtashGuha commited on
Commit
dcbdc61
·
verified ·
1 Parent(s): b7ea43b

Training in progress, epoch 5

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0179d1c4f7f861b2464bcdd681b18f701b7c47ec9e6afa8eecfec12525a18c09
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30545f50674caa083b2149ab25a4084051608722c4bd7e02fe8e1dcaee544cb5
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:481d642d6fa9e361d3e2601dcd7d26f30dbaffee7ae4e1b71d118e3713fa4f13
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d35ddefecd1455c641c86e938c7576a7e7129cbeefa76a0b65d82415c22d9d
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:575173684f7499edbffbf98d806032d47f8eb1b12a2264e69062729669c4c561
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e12fcea60006b74344880e5debea69de9b5c52661144c9669ba69595cb208d3e
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2b68bbf1939a4024d3e530a174e0b3f3451e4c8ceed84078c4973d2361e06d2
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058b16763dcd4a4726c423a75784a7732c0bcf0da3f1d925f6b72c135fc4b03c
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -327,3 +327,84 @@
327
  {"current_steps": 327, "total_steps": 567, "loss": 0.0071, "lr": 9.07731640536698e-06, "epoch": 4.037037037037037, "percentage": 57.67, "elapsed_time": "2:32:17", "remaining_time": "1:51:46"}
328
  {"current_steps": 328, "total_steps": 567, "loss": 0.0034, "lr": 9.015997217203574e-06, "epoch": 4.049382716049383, "percentage": 57.85, "elapsed_time": "2:32:48", "remaining_time": "1:51:20"}
329
  {"current_steps": 329, "total_steps": 567, "loss": 0.0063, "lr": 8.954715367323468e-06, "epoch": 4.061728395061729, "percentage": 58.02, "elapsed_time": "2:33:16", "remaining_time": "1:50:53"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  {"current_steps": 327, "total_steps": 567, "loss": 0.0071, "lr": 9.07731640536698e-06, "epoch": 4.037037037037037, "percentage": 57.67, "elapsed_time": "2:32:17", "remaining_time": "1:51:46"}
328
  {"current_steps": 328, "total_steps": 567, "loss": 0.0034, "lr": 9.015997217203574e-06, "epoch": 4.049382716049383, "percentage": 57.85, "elapsed_time": "2:32:48", "remaining_time": "1:51:20"}
329
  {"current_steps": 329, "total_steps": 567, "loss": 0.0063, "lr": 8.954715367323468e-06, "epoch": 4.061728395061729, "percentage": 58.02, "elapsed_time": "2:33:16", "remaining_time": "1:50:53"}
330
+ {"current_steps": 330, "total_steps": 567, "loss": 0.0047, "lr": 8.893473181084993e-06, "epoch": 4.074074074074074, "percentage": 58.2, "elapsed_time": "2:33:46", "remaining_time": "1:50:26"}
331
+ {"current_steps": 331, "total_steps": 567, "loss": 0.0086, "lr": 8.83227298234144e-06, "epoch": 4.08641975308642, "percentage": 58.38, "elapsed_time": "2:34:17", "remaining_time": "1:50:00"}
332
+ {"current_steps": 332, "total_steps": 567, "loss": 0.0051, "lr": 8.771117093352861e-06, "epoch": 4.098765432098766, "percentage": 58.55, "elapsed_time": "2:34:44", "remaining_time": "1:49:31"}
333
+ {"current_steps": 333, "total_steps": 567, "loss": 0.0121, "lr": 8.71000783469797e-06, "epoch": 4.111111111111111, "percentage": 58.73, "elapsed_time": "2:35:10", "remaining_time": "1:49:02"}
334
+ {"current_steps": 334, "total_steps": 567, "loss": 0.005, "lr": 8.648947525186073e-06, "epoch": 4.1234567901234565, "percentage": 58.91, "elapsed_time": "2:35:35", "remaining_time": "1:48:32"}
335
+ {"current_steps": 335, "total_steps": 567, "loss": 0.0058, "lr": 8.58793848176909e-06, "epoch": 4.135802469135802, "percentage": 59.08, "elapsed_time": "2:36:02", "remaining_time": "1:48:03"}
336
+ {"current_steps": 336, "total_steps": 567, "loss": 0.0041, "lr": 8.526983019453624e-06, "epoch": 4.148148148148148, "percentage": 59.26, "elapsed_time": "2:36:29", "remaining_time": "1:47:35"}
337
+ {"current_steps": 337, "total_steps": 567, "loss": 0.0055, "lr": 8.466083451213145e-06, "epoch": 4.160493827160494, "percentage": 59.44, "elapsed_time": "2:36:55", "remaining_time": "1:47:06"}
338
+ {"current_steps": 338, "total_steps": 567, "loss": 0.0031, "lr": 8.405242087900192e-06, "epoch": 4.172839506172839, "percentage": 59.61, "elapsed_time": "2:37:21", "remaining_time": "1:46:37"}
339
+ {"current_steps": 339, "total_steps": 567, "loss": 0.0105, "lr": 8.3444612381587e-06, "epoch": 4.185185185185185, "percentage": 59.79, "elapsed_time": "2:37:48", "remaining_time": "1:46:08"}
340
+ {"current_steps": 340, "total_steps": 567, "loss": 0.0055, "lr": 8.283743208336403e-06, "epoch": 4.197530864197531, "percentage": 59.96, "elapsed_time": "2:38:18", "remaining_time": "1:45:41"}
341
+ {"current_steps": 341, "total_steps": 567, "loss": 0.006, "lr": 8.223090302397313e-06, "epoch": 4.209876543209877, "percentage": 60.14, "elapsed_time": "2:38:48", "remaining_time": "1:45:14"}
342
+ {"current_steps": 342, "total_steps": 567, "loss": 0.0047, "lr": 8.162504821834296e-06, "epoch": 4.222222222222222, "percentage": 60.32, "elapsed_time": "2:39:15", "remaining_time": "1:44:46"}
343
+ {"current_steps": 343, "total_steps": 567, "loss": 0.0068, "lr": 8.101989065581742e-06, "epoch": 4.234567901234568, "percentage": 60.49, "elapsed_time": "2:39:45", "remaining_time": "1:44:19"}
344
+ {"current_steps": 344, "total_steps": 567, "loss": 0.0123, "lr": 8.041545329928332e-06, "epoch": 4.246913580246914, "percentage": 60.67, "elapsed_time": "2:40:06", "remaining_time": "1:43:47"}
345
+ {"current_steps": 345, "total_steps": 567, "loss": 0.0097, "lr": 7.9811759084299e-06, "epoch": 4.2592592592592595, "percentage": 60.85, "elapsed_time": "2:40:34", "remaining_time": "1:43:19"}
346
+ {"current_steps": 346, "total_steps": 567, "loss": 0.0087, "lr": 7.92088309182241e-06, "epoch": 4.271604938271605, "percentage": 61.02, "elapsed_time": "2:41:00", "remaining_time": "1:42:50"}
347
+ {"current_steps": 347, "total_steps": 567, "loss": 0.0032, "lr": 7.860669167935028e-06, "epoch": 4.283950617283951, "percentage": 61.2, "elapsed_time": "2:41:23", "remaining_time": "1:42:19"}
348
+ {"current_steps": 348, "total_steps": 567, "loss": 0.008, "lr": 7.800536421603317e-06, "epoch": 4.296296296296296, "percentage": 61.38, "elapsed_time": "2:41:54", "remaining_time": "1:41:53"}
349
+ {"current_steps": 349, "total_steps": 567, "loss": 0.0059, "lr": 7.740487134582527e-06, "epoch": 4.308641975308642, "percentage": 61.55, "elapsed_time": "2:42:22", "remaining_time": "1:41:25"}
350
+ {"current_steps": 350, "total_steps": 567, "loss": 0.0062, "lr": 7.680523585461021e-06, "epoch": 4.320987654320987, "percentage": 61.73, "elapsed_time": "2:42:45", "remaining_time": "1:40:54"}
351
+ {"current_steps": 351, "total_steps": 567, "loss": 0.0033, "lr": 7.620648049573815e-06, "epoch": 4.333333333333333, "percentage": 61.9, "elapsed_time": "2:43:10", "remaining_time": "1:40:24"}
352
+ {"current_steps": 352, "total_steps": 567, "loss": 0.0066, "lr": 7.560862798916229e-06, "epoch": 4.345679012345679, "percentage": 62.08, "elapsed_time": "2:43:31", "remaining_time": "1:39:52"}
353
+ {"current_steps": 353, "total_steps": 567, "loss": 0.0033, "lr": 7.501170102057691e-06, "epoch": 4.3580246913580245, "percentage": 62.26, "elapsed_time": "2:43:57", "remaining_time": "1:39:23"}
354
+ {"current_steps": 354, "total_steps": 567, "loss": 0.0021, "lr": 7.441572224055644e-06, "epoch": 4.37037037037037, "percentage": 62.43, "elapsed_time": "2:44:22", "remaining_time": "1:38:54"}
355
+ {"current_steps": 355, "total_steps": 567, "loss": 0.0064, "lr": 7.382071426369597e-06, "epoch": 4.382716049382716, "percentage": 62.61, "elapsed_time": "2:44:51", "remaining_time": "1:38:27"}
356
+ {"current_steps": 356, "total_steps": 567, "loss": 0.0095, "lr": 7.322669966775321e-06, "epoch": 4.395061728395062, "percentage": 62.79, "elapsed_time": "2:45:23", "remaining_time": "1:38:01"}
357
+ {"current_steps": 357, "total_steps": 567, "loss": 0.0027, "lr": 7.263370099279173e-06, "epoch": 4.407407407407407, "percentage": 62.96, "elapsed_time": "2:45:45", "remaining_time": "1:37:30"}
358
+ {"current_steps": 358, "total_steps": 567, "loss": 0.0066, "lr": 7.204174074032562e-06, "epoch": 4.419753086419753, "percentage": 63.14, "elapsed_time": "2:46:12", "remaining_time": "1:37:01"}
359
+ {"current_steps": 359, "total_steps": 567, "loss": 0.0056, "lr": 7.1450841372465806e-06, "epoch": 4.432098765432099, "percentage": 63.32, "elapsed_time": "2:46:39", "remaining_time": "1:36:33"}
360
+ {"current_steps": 360, "total_steps": 567, "loss": 0.0031, "lr": 7.086102531106755e-06, "epoch": 4.444444444444445, "percentage": 63.49, "elapsed_time": "2:47:07", "remaining_time": "1:36:05"}
361
+ {"current_steps": 361, "total_steps": 567, "loss": 0.0058, "lr": 7.027231493687974e-06, "epoch": 4.45679012345679, "percentage": 63.67, "elapsed_time": "2:47:34", "remaining_time": "1:35:37"}
362
+ {"current_steps": 362, "total_steps": 567, "loss": 0.0055, "lr": 6.968473258869566e-06, "epoch": 4.469135802469136, "percentage": 63.84, "elapsed_time": "2:48:03", "remaining_time": "1:35:10"}
363
+ {"current_steps": 363, "total_steps": 567, "loss": 0.0061, "lr": 6.909830056250527e-06, "epoch": 4.481481481481482, "percentage": 64.02, "elapsed_time": "2:48:29", "remaining_time": "1:34:41"}
364
+ {"current_steps": 364, "total_steps": 567, "loss": 0.004, "lr": 6.851304111064923e-06, "epoch": 4.493827160493828, "percentage": 64.2, "elapsed_time": "2:48:59", "remaining_time": "1:34:14"}
365
+ {"current_steps": 365, "total_steps": 567, "loss": 0.0064, "lr": 6.7928976440974504e-06, "epoch": 4.506172839506172, "percentage": 64.37, "elapsed_time": "2:49:28", "remaining_time": "1:33:47"}
366
+ {"current_steps": 366, "total_steps": 567, "loss": 0.0047, "lr": 6.734612871599169e-06, "epoch": 4.518518518518518, "percentage": 64.55, "elapsed_time": "2:49:56", "remaining_time": "1:33:19"}
367
+ {"current_steps": 367, "total_steps": 567, "loss": 0.0071, "lr": 6.6764520052034054e-06, "epoch": 4.530864197530864, "percentage": 64.73, "elapsed_time": "2:50:22", "remaining_time": "1:32:51"}
368
+ {"current_steps": 368, "total_steps": 567, "loss": 0.0052, "lr": 6.618417251841829e-06, "epoch": 4.54320987654321, "percentage": 64.9, "elapsed_time": "2:50:52", "remaining_time": "1:32:24"}
369
+ {"current_steps": 369, "total_steps": 567, "loss": 0.01, "lr": 6.560510813660719e-06, "epoch": 4.555555555555555, "percentage": 65.08, "elapsed_time": "2:51:18", "remaining_time": "1:31:55"}
370
+ {"current_steps": 370, "total_steps": 567, "loss": 0.0091, "lr": 6.502734887937389e-06, "epoch": 4.567901234567901, "percentage": 65.26, "elapsed_time": "2:51:42", "remaining_time": "1:31:25"}
371
+ {"current_steps": 371, "total_steps": 567, "loss": 0.0098, "lr": 6.44509166699682e-06, "epoch": 4.580246913580247, "percentage": 65.43, "elapsed_time": "2:52:08", "remaining_time": "1:30:56"}
372
+ {"current_steps": 372, "total_steps": 567, "loss": 0.0035, "lr": 6.387583338128471e-06, "epoch": 4.592592592592593, "percentage": 65.61, "elapsed_time": "2:52:38", "remaining_time": "1:30:29"}
373
+ {"current_steps": 373, "total_steps": 567, "loss": 0.0064, "lr": 6.33021208350328e-06, "epoch": 4.604938271604938, "percentage": 65.78, "elapsed_time": "2:53:06", "remaining_time": "1:30:02"}
374
+ {"current_steps": 374, "total_steps": 567, "loss": 0.0054, "lr": 6.27298008009086e-06, "epoch": 4.617283950617284, "percentage": 65.96, "elapsed_time": "2:53:34", "remaining_time": "1:29:34"}
375
+ {"current_steps": 375, "total_steps": 567, "loss": 0.0074, "lr": 6.215889499576898e-06, "epoch": 4.62962962962963, "percentage": 66.14, "elapsed_time": "2:53:58", "remaining_time": "1:29:04"}
376
+ {"current_steps": 376, "total_steps": 567, "loss": 0.0069, "lr": 6.158942508280743e-06, "epoch": 4.6419753086419755, "percentage": 66.31, "elapsed_time": "2:54:19", "remaining_time": "1:28:33"}
377
+ {"current_steps": 377, "total_steps": 567, "loss": 0.0077, "lr": 6.102141267073207e-06, "epoch": 4.654320987654321, "percentage": 66.49, "elapsed_time": "2:54:48", "remaining_time": "1:28:05"}
378
+ {"current_steps": 378, "total_steps": 567, "loss": 0.0078, "lr": 6.0454879312945755e-06, "epoch": 4.666666666666667, "percentage": 66.67, "elapsed_time": "2:55:14", "remaining_time": "1:27:37"}
379
+ {"current_steps": 379, "total_steps": 567, "loss": 0.0034, "lr": 5.988984650672813e-06, "epoch": 4.679012345679013, "percentage": 66.84, "elapsed_time": "2:55:39", "remaining_time": "1:27:07"}
380
+ {"current_steps": 380, "total_steps": 567, "loss": 0.0098, "lr": 5.932633569242e-06, "epoch": 4.6913580246913575, "percentage": 67.02, "elapsed_time": "2:56:07", "remaining_time": "1:26:40"}
381
+ {"current_steps": 381, "total_steps": 567, "loss": 0.0041, "lr": 5.876436825260967e-06, "epoch": 4.703703703703704, "percentage": 67.2, "elapsed_time": "2:56:29", "remaining_time": "1:26:09"}
382
+ {"current_steps": 382, "total_steps": 567, "loss": 0.0058, "lr": 5.82039655113217e-06, "epoch": 4.716049382716049, "percentage": 67.37, "elapsed_time": "2:56:54", "remaining_time": "1:25:40"}
383
+ {"current_steps": 383, "total_steps": 567, "loss": 0.0069, "lr": 5.764514873320761e-06, "epoch": 4.728395061728395, "percentage": 67.55, "elapsed_time": "2:57:19", "remaining_time": "1:25:11"}
384
+ {"current_steps": 384, "total_steps": 567, "loss": 0.003, "lr": 5.708793912273911e-06, "epoch": 4.7407407407407405, "percentage": 67.72, "elapsed_time": "2:57:40", "remaining_time": "1:24:40"}
385
+ {"current_steps": 385, "total_steps": 567, "loss": 0.0048, "lr": 5.653235782340351e-06, "epoch": 4.753086419753086, "percentage": 67.9, "elapsed_time": "2:58:05", "remaining_time": "1:24:11"}
386
+ {"current_steps": 386, "total_steps": 567, "loss": 0.0029, "lr": 5.597842591690128e-06, "epoch": 4.765432098765432, "percentage": 68.08, "elapsed_time": "2:58:31", "remaining_time": "1:23:42"}
387
+ {"current_steps": 387, "total_steps": 567, "loss": 0.0052, "lr": 5.542616442234618e-06, "epoch": 4.777777777777778, "percentage": 68.25, "elapsed_time": "2:58:59", "remaining_time": "1:23:15"}
388
+ {"current_steps": 388, "total_steps": 567, "loss": 0.003, "lr": 5.487559429546772e-06, "epoch": 4.790123456790123, "percentage": 68.43, "elapsed_time": "2:59:22", "remaining_time": "1:22:45"}
389
+ {"current_steps": 389, "total_steps": 567, "loss": 0.0056, "lr": 5.432673642781595e-06, "epoch": 4.802469135802469, "percentage": 68.61, "elapsed_time": "2:59:44", "remaining_time": "1:22:14"}
390
+ {"current_steps": 390, "total_steps": 567, "loss": 0.0035, "lr": 5.3779611645968696e-06, "epoch": 4.814814814814815, "percentage": 68.78, "elapsed_time": "3:00:08", "remaining_time": "1:21:45"}
391
+ {"current_steps": 391, "total_steps": 567, "loss": 0.003, "lr": 5.3234240710741335e-06, "epoch": 4.827160493827161, "percentage": 68.96, "elapsed_time": "3:00:34", "remaining_time": "1:21:17"}
392
+ {"current_steps": 392, "total_steps": 567, "loss": 0.0058, "lr": 5.269064431639901e-06, "epoch": 4.839506172839506, "percentage": 69.14, "elapsed_time": "3:00:55", "remaining_time": "1:20:45"}
393
+ {"current_steps": 393, "total_steps": 567, "loss": 0.0046, "lr": 5.214884308987136e-06, "epoch": 4.851851851851852, "percentage": 69.31, "elapsed_time": "3:01:17", "remaining_time": "1:20:15"}
394
+ {"current_steps": 394, "total_steps": 567, "loss": 0.0033, "lr": 5.160885758996985e-06, "epoch": 4.864197530864198, "percentage": 69.49, "elapsed_time": "3:01:40", "remaining_time": "1:19:46"}
395
+ {"current_steps": 395, "total_steps": 567, "loss": 0.0033, "lr": 5.107070830660765e-06, "epoch": 4.8765432098765435, "percentage": 69.66, "elapsed_time": "3:02:08", "remaining_time": "1:19:18"}
396
+ {"current_steps": 396, "total_steps": 567, "loss": 0.0027, "lr": 5.053441566002214e-06, "epoch": 4.888888888888889, "percentage": 69.84, "elapsed_time": "3:02:37", "remaining_time": "1:18:51"}
397
+ {"current_steps": 397, "total_steps": 567, "loss": 0.0081, "lr": 5.000000000000003e-06, "epoch": 4.901234567901234, "percentage": 70.02, "elapsed_time": "3:03:02", "remaining_time": "1:18:22"}
398
+ {"current_steps": 398, "total_steps": 567, "loss": 0.0026, "lr": 4.946748160510522e-06, "epoch": 4.91358024691358, "percentage": 70.19, "elapsed_time": "3:03:26", "remaining_time": "1:17:53"}
399
+ {"current_steps": 399, "total_steps": 567, "loss": 0.011, "lr": 4.893688068190933e-06, "epoch": 4.925925925925926, "percentage": 70.37, "elapsed_time": "3:03:55", "remaining_time": "1:17:26"}
400
+ {"current_steps": 400, "total_steps": 567, "loss": 0.0135, "lr": 4.8408217364224886e-06, "epoch": 4.938271604938271, "percentage": 70.55, "elapsed_time": "3:04:18", "remaining_time": "1:16:57"}
401
+ {"current_steps": 401, "total_steps": 567, "loss": 0.0027, "lr": 4.788151171234149e-06, "epoch": 4.950617283950617, "percentage": 70.72, "elapsed_time": "3:04:39", "remaining_time": "1:16:26"}
402
+ {"current_steps": 402, "total_steps": 567, "loss": 0.0076, "lr": 4.7356783712264405e-06, "epoch": 4.962962962962963, "percentage": 70.9, "elapsed_time": "3:05:04", "remaining_time": "1:15:57"}
403
+ {"current_steps": 403, "total_steps": 567, "loss": 0.0039, "lr": 4.683405327495638e-06, "epoch": 4.9753086419753085, "percentage": 71.08, "elapsed_time": "3:05:30", "remaining_time": "1:15:29"}
404
+ {"current_steps": 404, "total_steps": 567, "loss": 0.0036, "lr": 4.631334023558199e-06, "epoch": 4.987654320987654, "percentage": 71.25, "elapsed_time": "3:06:02", "remaining_time": "1:15:03"}
405
+ {"current_steps": 405, "total_steps": 567, "loss": 0.0044, "lr": 4.579466435275506e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "3:06:23", "remaining_time": "1:14:33"}
406
+ {"current_steps": 406, "total_steps": 567, "loss": 0.0029, "lr": 4.5278045307788885e-06, "epoch": 5.012345679012346, "percentage": 71.6, "elapsed_time": "3:08:11", "remaining_time": "1:14:37"}
407
+ {"current_steps": 407, "total_steps": 567, "loss": 0.0017, "lr": 4.476350270394942e-06, "epoch": 5.0246913580246915, "percentage": 71.78, "elapsed_time": "3:08:37", "remaining_time": "1:14:09"}
408
+ {"current_steps": 408, "total_steps": 567, "loss": 0.0035, "lr": 4.425105606571145e-06, "epoch": 5.037037037037037, "percentage": 71.96, "elapsed_time": "3:09:01", "remaining_time": "1:13:39"}
409
+ {"current_steps": 409, "total_steps": 567, "loss": 0.0026, "lr": 4.374072483801769e-06, "epoch": 5.049382716049383, "percentage": 72.13, "elapsed_time": "3:09:25", "remaining_time": "1:13:10"}
410
+ {"current_steps": 410, "total_steps": 567, "loss": 0.0024, "lr": 4.323252838554099e-06, "epoch": 5.061728395061729, "percentage": 72.31, "elapsed_time": "3:09:51", "remaining_time": "1:12:42"}