reinhardh commited on
Commit
ace2ec1
·
verified ·
1 Parent(s): c2be965

Training in progress, epoch 4

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57c6db2e4d3f93feabaecdbe252dbca3af07059826f5bf7fd9ba59a9773f3d09
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44a1527bd89d31eda9fbe81f29617398f16d8f2684f9e7d0d2887eecf479c909
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c69a974cc3b59da07d31c2b0dc84f966a630be4c171bc847e667579e6e37f9d8
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feae9d4a6a7fd57161b15d76daae07db8f0f76195ed4f1f74718c2808c85d1db
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21d442c57a3d163b039b523d8a11b2de1dfa1151cbf300886fa539dcb6a1a037
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f010a023daf504a021a074c743d422fb0a471fd344db4db50844ae1be1cd1ed
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:885e4737c38803b5b67760716a94d3d943a4008760748cf2c3783caa59c9ff01
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24adfb591fc10c10b4504a20c24e52a16e6625e5f6623ced380bcd4dbd098826
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -368,3 +368,70 @@
368
  {"current_steps": 322, "total_steps": 390, "loss": 0.1668, "lr": 3.5913314383339937e-06, "epoch": 4.1216, "percentage": 82.56, "elapsed_time": "3:27:39", "remaining_time": "0:43:51"}
369
  {"current_steps": 323, "total_steps": 390, "loss": 0.1656, "lr": 3.4896435706316e-06, "epoch": 4.1344, "percentage": 82.82, "elapsed_time": "3:28:19", "remaining_time": "0:43:12"}
370
  {"current_steps": 324, "total_steps": 390, "loss": 0.1606, "lr": 3.3892783350397675e-06, "epoch": 4.1472, "percentage": 83.08, "elapsed_time": "3:28:51", "remaining_time": "0:42:32"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  {"current_steps": 322, "total_steps": 390, "loss": 0.1668, "lr": 3.5913314383339937e-06, "epoch": 4.1216, "percentage": 82.56, "elapsed_time": "3:27:39", "remaining_time": "0:43:51"}
369
  {"current_steps": 323, "total_steps": 390, "loss": 0.1656, "lr": 3.4896435706316e-06, "epoch": 4.1344, "percentage": 82.82, "elapsed_time": "3:28:19", "remaining_time": "0:43:12"}
370
  {"current_steps": 324, "total_steps": 390, "loss": 0.1606, "lr": 3.3892783350397675e-06, "epoch": 4.1472, "percentage": 83.08, "elapsed_time": "3:28:51", "remaining_time": "0:42:32"}
371
+ {"current_steps": 325, "total_steps": 390, "loss": 0.1398, "lr": 3.290243771741275e-06, "epoch": 4.16, "percentage": 83.33, "elapsed_time": "3:29:20", "remaining_time": "0:41:52"}
372
+ {"current_steps": 326, "total_steps": 390, "loss": 0.1671, "lr": 3.1925478143197418e-06, "epoch": 4.1728, "percentage": 83.59, "elapsed_time": "3:30:02", "remaining_time": "0:41:14"}
373
+ {"current_steps": 327, "total_steps": 390, "loss": 0.152, "lr": 3.0961982891241083e-06, "epoch": 4.1856, "percentage": 83.85, "elapsed_time": "3:30:32", "remaining_time": "0:40:33"}
374
+ {"current_steps": 328, "total_steps": 390, "loss": 0.1762, "lr": 3.001202914641628e-06, "epoch": 4.1984, "percentage": 84.1, "elapsed_time": "3:31:13", "remaining_time": "0:39:55"}
375
+ {"current_steps": 329, "total_steps": 390, "loss": 0.1658, "lr": 2.907569300879596e-06, "epoch": 4.2112, "percentage": 84.36, "elapsed_time": "3:31:51", "remaining_time": "0:39:16"}
376
+ {"current_steps": 330, "total_steps": 390, "loss": 0.2066, "lr": 2.815304948755664e-06, "epoch": 4.224, "percentage": 84.62, "elapsed_time": "3:32:33", "remaining_time": "0:38:38"}
377
+ {"current_steps": 331, "total_steps": 390, "loss": 0.1863, "lr": 2.7244172494969978e-06, "epoch": 4.2368, "percentage": 84.87, "elapsed_time": "3:33:11", "remaining_time": "0:37:59"}
378
+ {"current_steps": 332, "total_steps": 390, "loss": 0.1946, "lr": 2.6349134840481294e-06, "epoch": 4.2496, "percentage": 85.13, "elapsed_time": "3:33:50", "remaining_time": "0:37:21"}
379
+ {"current_steps": 333, "total_steps": 390, "loss": 0.1642, "lr": 2.546800822487714e-06, "epoch": 4.2624, "percentage": 85.38, "elapsed_time": "3:34:22", "remaining_time": "0:36:41"}
380
+ {"current_steps": 334, "total_steps": 390, "loss": 0.1643, "lr": 2.4600863234541338e-06, "epoch": 4.2752, "percentage": 85.64, "elapsed_time": "3:35:02", "remaining_time": "0:36:03"}
381
+ {"current_steps": 335, "total_steps": 390, "loss": 0.1716, "lr": 2.374776933580025e-06, "epoch": 4.288, "percentage": 85.9, "elapsed_time": "3:35:42", "remaining_time": "0:35:24"}
382
+ {"current_steps": 336, "total_steps": 390, "loss": 0.1557, "lr": 2.2908794869358044e-06, "epoch": 4.3008, "percentage": 86.15, "elapsed_time": "3:36:15", "remaining_time": "0:34:45"}
383
+ {"current_steps": 337, "total_steps": 390, "loss": 0.1676, "lr": 2.2084007044821764e-06, "epoch": 4.3136, "percentage": 86.41, "elapsed_time": "3:36:54", "remaining_time": "0:34:06"}
384
+ {"current_steps": 338, "total_steps": 390, "loss": 0.1567, "lr": 2.127347193531757e-06, "epoch": 4.3264, "percentage": 86.67, "elapsed_time": "3:37:30", "remaining_time": "0:33:27"}
385
+ {"current_steps": 339, "total_steps": 390, "loss": 0.1602, "lr": 2.0477254472197237e-06, "epoch": 4.3392, "percentage": 86.92, "elapsed_time": "3:38:05", "remaining_time": "0:32:48"}
386
+ {"current_steps": 340, "total_steps": 390, "loss": 0.1774, "lr": 1.96954184398368e-06, "epoch": 4.352, "percentage": 87.18, "elapsed_time": "3:38:46", "remaining_time": "0:32:10"}
387
+ {"current_steps": 341, "total_steps": 390, "loss": 0.1687, "lr": 1.8928026470526917e-06, "epoch": 4.3648, "percentage": 87.44, "elapsed_time": "3:39:27", "remaining_time": "0:31:32"}
388
+ {"current_steps": 342, "total_steps": 390, "loss": 0.1755, "lr": 1.817514003945524e-06, "epoch": 4.3776, "percentage": 87.69, "elapsed_time": "3:40:06", "remaining_time": "0:30:53"}
389
+ {"current_steps": 343, "total_steps": 390, "loss": 0.1644, "lr": 1.743681945978184e-06, "epoch": 4.3904, "percentage": 87.95, "elapsed_time": "3:40:46", "remaining_time": "0:30:15"}
390
+ {"current_steps": 344, "total_steps": 390, "loss": 0.1501, "lr": 1.6713123877807413e-06, "epoch": 4.4032, "percentage": 88.21, "elapsed_time": "3:41:20", "remaining_time": "0:29:35"}
391
+ {"current_steps": 345, "total_steps": 390, "loss": 0.1518, "lr": 1.6004111268235156e-06, "epoch": 4.416, "percentage": 88.46, "elapsed_time": "3:42:02", "remaining_time": "0:28:57"}
392
+ {"current_steps": 346, "total_steps": 390, "loss": 0.1771, "lr": 1.5309838429526714e-06, "epoch": 4.4288, "percentage": 88.72, "elapsed_time": "3:42:40", "remaining_time": "0:28:19"}
393
+ {"current_steps": 347, "total_steps": 390, "loss": 0.188, "lr": 1.4630360979351644e-06, "epoch": 4.4416, "percentage": 88.97, "elapsed_time": "3:43:20", "remaining_time": "0:27:40"}
394
+ {"current_steps": 348, "total_steps": 390, "loss": 0.1844, "lr": 1.396573335013236e-06, "epoch": 4.4544, "percentage": 89.23, "elapsed_time": "3:44:01", "remaining_time": "0:27:02"}
395
+ {"current_steps": 349, "total_steps": 390, "loss": 0.1598, "lr": 1.3316008784683265e-06, "epoch": 4.4672, "percentage": 89.49, "elapsed_time": "3:44:39", "remaining_time": "0:26:23"}
396
+ {"current_steps": 350, "total_steps": 390, "loss": 0.1651, "lr": 1.2681239331945695e-06, "epoch": 4.48, "percentage": 89.74, "elapsed_time": "3:45:08", "remaining_time": "0:25:43"}
397
+ {"current_steps": 351, "total_steps": 390, "loss": 0.1928, "lr": 1.2061475842818337e-06, "epoch": 4.4928, "percentage": 90.0, "elapsed_time": "3:45:46", "remaining_time": "0:25:05"}
398
+ {"current_steps": 352, "total_steps": 390, "loss": 0.1747, "lr": 1.1456767966083393e-06, "epoch": 4.5056, "percentage": 90.26, "elapsed_time": "3:46:25", "remaining_time": "0:24:26"}
399
+ {"current_steps": 353, "total_steps": 390, "loss": 0.1808, "lr": 1.086716414442952e-06, "epoch": 4.5184, "percentage": 90.51, "elapsed_time": "3:47:03", "remaining_time": "0:23:47"}
400
+ {"current_steps": 354, "total_steps": 390, "loss": 0.1493, "lr": 1.0292711610570904e-06, "epoch": 4.5312, "percentage": 90.77, "elapsed_time": "3:47:36", "remaining_time": "0:23:08"}
401
+ {"current_steps": 355, "total_steps": 390, "loss": 0.2067, "lr": 9.733456383463658e-07, "epoch": 4.5440000000000005, "percentage": 91.03, "elapsed_time": "3:48:17", "remaining_time": "0:22:30"}
402
+ {"current_steps": 356, "total_steps": 390, "loss": 0.1735, "lr": 9.189443264619102e-07, "epoch": 4.5568, "percentage": 91.28, "elapsed_time": "3:48:56", "remaining_time": "0:21:51"}
403
+ {"current_steps": 357, "total_steps": 390, "loss": 0.1688, "lr": 8.660715834514977e-07, "epoch": 4.5696, "percentage": 91.54, "elapsed_time": "3:49:33", "remaining_time": "0:21:13"}
404
+ {"current_steps": 358, "total_steps": 390, "loss": 0.1686, "lr": 8.147316449103959e-07, "epoch": 4.5824, "percentage": 91.79, "elapsed_time": "3:50:14", "remaining_time": "0:20:34"}
405
+ {"current_steps": 359, "total_steps": 390, "loss": 0.1469, "lr": 7.649286236420806e-07, "epoch": 4.5952, "percentage": 92.05, "elapsed_time": "3:50:51", "remaining_time": "0:19:56"}
406
+ {"current_steps": 360, "total_steps": 390, "loss": 0.1963, "lr": 7.166665093287539e-07, "epoch": 4.608, "percentage": 92.31, "elapsed_time": "3:51:33", "remaining_time": "0:19:17"}
407
+ {"current_steps": 361, "total_steps": 390, "loss": 0.1594, "lr": 6.69949168211721e-07, "epoch": 4.6208, "percentage": 92.56, "elapsed_time": "3:52:16", "remaining_time": "0:18:39"}
408
+ {"current_steps": 362, "total_steps": 390, "loss": 0.1794, "lr": 6.247803427816945e-07, "epoch": 4.6336, "percentage": 92.82, "elapsed_time": "3:52:56", "remaining_time": "0:18:01"}
409
+ {"current_steps": 363, "total_steps": 390, "loss": 0.1894, "lr": 5.811636514789598e-07, "epoch": 4.6464, "percentage": 93.08, "elapsed_time": "3:53:37", "remaining_time": "0:17:22"}
410
+ {"current_steps": 364, "total_steps": 390, "loss": 0.1822, "lr": 5.391025884035239e-07, "epoch": 4.6592, "percentage": 93.33, "elapsed_time": "3:54:19", "remaining_time": "0:16:44"}
411
+ {"current_steps": 365, "total_steps": 390, "loss": 0.1279, "lr": 4.986005230351954e-07, "epoch": 4.672, "percentage": 93.59, "elapsed_time": "3:54:53", "remaining_time": "0:16:05"}
412
+ {"current_steps": 366, "total_steps": 390, "loss": 0.1599, "lr": 4.5966069996365993e-07, "epoch": 4.6848, "percentage": 93.85, "elapsed_time": "3:55:30", "remaining_time": "0:15:26"}
413
+ {"current_steps": 367, "total_steps": 390, "loss": 0.187, "lr": 4.22286238628562e-07, "epoch": 4.6975999999999996, "percentage": 94.1, "elapsed_time": "3:56:14", "remaining_time": "0:14:48"}
414
+ {"current_steps": 368, "total_steps": 390, "loss": 0.1574, "lr": 3.8648013306960664e-07, "epoch": 4.7104, "percentage": 94.36, "elapsed_time": "3:56:52", "remaining_time": "0:14:09"}
415
+ {"current_steps": 369, "total_steps": 390, "loss": 0.1437, "lr": 3.522452516867048e-07, "epoch": 4.7232, "percentage": 94.62, "elapsed_time": "3:57:25", "remaining_time": "0:13:30"}
416
+ {"current_steps": 370, "total_steps": 390, "loss": 0.16, "lr": 3.1958433701019697e-07, "epoch": 4.736, "percentage": 94.87, "elapsed_time": "3:57:56", "remaining_time": "0:12:51"}
417
+ {"current_steps": 371, "total_steps": 390, "loss": 0.167, "lr": 2.8850000548115155e-07, "epoch": 4.7488, "percentage": 95.13, "elapsed_time": "3:58:36", "remaining_time": "0:12:13"}
418
+ {"current_steps": 372, "total_steps": 390, "loss": 0.1577, "lr": 2.5899474724174313e-07, "epoch": 4.7616, "percentage": 95.38, "elapsed_time": "3:59:10", "remaining_time": "0:11:34"}
419
+ {"current_steps": 373, "total_steps": 390, "loss": 0.1373, "lr": 2.3107092593579905e-07, "epoch": 4.7744, "percentage": 95.64, "elapsed_time": "3:59:52", "remaining_time": "0:10:55"}
420
+ {"current_steps": 374, "total_steps": 390, "loss": 0.1575, "lr": 2.0473077851942858e-07, "epoch": 4.7872, "percentage": 95.9, "elapsed_time": "4:00:25", "remaining_time": "0:10:17"}
421
+ {"current_steps": 375, "total_steps": 390, "loss": 0.1704, "lr": 1.799764150818306e-07, "epoch": 4.8, "percentage": 96.15, "elapsed_time": "4:01:03", "remaining_time": "0:09:38"}
422
+ {"current_steps": 376, "total_steps": 390, "loss": 0.1757, "lr": 1.5680981867625566e-07, "epoch": 4.8128, "percentage": 96.41, "elapsed_time": "4:01:36", "remaining_time": "0:08:59"}
423
+ {"current_steps": 377, "total_steps": 390, "loss": 0.1555, "lr": 1.3523284516113955e-07, "epoch": 4.8256, "percentage": 96.67, "elapsed_time": "4:02:13", "remaining_time": "0:08:21"}
424
+ {"current_steps": 378, "total_steps": 390, "loss": 0.1552, "lr": 1.1524722305144231e-07, "epoch": 4.8384, "percentage": 96.92, "elapsed_time": "4:02:51", "remaining_time": "0:07:42"}
425
+ {"current_steps": 379, "total_steps": 390, "loss": 0.2017, "lr": 9.685455338016347e-08, "epoch": 4.8512, "percentage": 97.18, "elapsed_time": "4:03:28", "remaining_time": "0:07:03"}
426
+ {"current_steps": 380, "total_steps": 390, "loss": 0.1623, "lr": 8.005630957010014e-08, "epoch": 4.864, "percentage": 97.44, "elapsed_time": "4:04:08", "remaining_time": "0:06:25"}
427
+ {"current_steps": 381, "total_steps": 390, "loss": 0.1677, "lr": 6.485383731580142e-08, "epoch": 4.8768, "percentage": 97.69, "elapsed_time": "4:04:48", "remaining_time": "0:05:46"}
428
+ {"current_steps": 382, "total_steps": 390, "loss": 0.1597, "lr": 5.1248354475768034e-08, "epoch": 4.8896, "percentage": 97.95, "elapsed_time": "4:05:23", "remaining_time": "0:05:08"}
429
+ {"current_steps": 383, "total_steps": 390, "loss": 0.1652, "lr": 3.924095097489922e-08, "epoch": 4.9024, "percentage": 98.21, "elapsed_time": "4:06:02", "remaining_time": "0:04:29"}
430
+ {"current_steps": 384, "total_steps": 390, "loss": 0.1653, "lr": 2.8832588717164766e-08, "epoch": 4.9152000000000005, "percentage": 98.46, "elapsed_time": "4:06:45", "remaining_time": "0:03:51"}
431
+ {"current_steps": 385, "total_steps": 390, "loss": 0.1445, "lr": 2.0024101508555604e-08, "epoch": 4.928, "percentage": 98.72, "elapsed_time": "4:07:18", "remaining_time": "0:03:12"}
432
+ {"current_steps": 386, "total_steps": 390, "loss": 0.1607, "lr": 1.281619499029274e-08, "epoch": 4.9408, "percentage": 98.97, "elapsed_time": "4:07:51", "remaining_time": "0:02:34"}
433
+ {"current_steps": 387, "total_steps": 390, "loss": 0.173, "lr": 7.209446582292501e-09, "epoch": 4.9536, "percentage": 99.23, "elapsed_time": "4:08:30", "remaining_time": "0:01:55"}
434
+ {"current_steps": 388, "total_steps": 390, "loss": 0.1632, "lr": 3.2043054369057523e-09, "epoch": 4.9664, "percentage": 99.49, "elapsed_time": "4:09:13", "remaining_time": "0:01:17"}
435
+ {"current_steps": 389, "total_steps": 390, "loss": 0.1772, "lr": 8.010924029533406e-10, "epoch": 4.9792, "percentage": 99.74, "elapsed_time": "4:09:48", "remaining_time": "0:00:38"}
436
+ {"current_steps": 390, "total_steps": 390, "loss": 0.1557, "lr": 0.0, "epoch": 4.992, "percentage": 100.0, "elapsed_time": "4:10:23", "remaining_time": "0:00:00"}
437
+ {"current_steps": 390, "total_steps": 390, "epoch": 4.992, "percentage": 100.0, "elapsed_time": "4:11:40", "remaining_time": "0:00:00"}