reinhardh commited on
Commit
c2be965
·
verified ·
1 Parent(s): 4a9f196

Training in progress, epoch 3

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a76c4c8759bd4cccf916c822e56947b3465676458f7abff5c29d68824de2c556
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c6db2e4d3f93feabaecdbe252dbca3af07059826f5bf7fd9ba59a9773f3d09
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e98ee46bbade61772b2fae9baf4be1d029a3e607fe3d17cd3ea439a8a828ea24
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c69a974cc3b59da07d31c2b0dc84f966a630be4c171bc847e667579e6e37f9d8
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e1e3a272c5128d8b7cc3be310bd35e0c3adbbce72c93c45732b75312c8f8730
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d442c57a3d163b039b523d8a11b2de1dfa1151cbf300886fa539dcb6a1a037
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:934ce1d50d9cc06492cf74f7e10c7072e645905401819481289221ee35e80c12
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:885e4737c38803b5b67760716a94d3d943a4008760748cf2c3783caa59c9ff01
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -290,3 +290,81 @@
290
  {"current_steps": 244, "total_steps": 390, "loss": 0.2035, "lr": 1.478040494913926e-05, "epoch": 3.1232, "percentage": 62.56, "elapsed_time": "2:37:12", "remaining_time": "1:34:03"}
291
  {"current_steps": 245, "total_steps": 390, "loss": 0.2217, "lr": 1.460781185335713e-05, "epoch": 3.136, "percentage": 62.82, "elapsed_time": "2:37:55", "remaining_time": "1:33:27"}
292
  {"current_steps": 246, "total_steps": 390, "loss": 0.2268, "lr": 1.443565072167095e-05, "epoch": 3.1488, "percentage": 63.08, "elapsed_time": "2:38:34", "remaining_time": "1:32:49"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  {"current_steps": 244, "total_steps": 390, "loss": 0.2035, "lr": 1.478040494913926e-05, "epoch": 3.1232, "percentage": 62.56, "elapsed_time": "2:37:12", "remaining_time": "1:34:03"}
291
  {"current_steps": 245, "total_steps": 390, "loss": 0.2217, "lr": 1.460781185335713e-05, "epoch": 3.136, "percentage": 62.82, "elapsed_time": "2:37:55", "remaining_time": "1:33:27"}
292
  {"current_steps": 246, "total_steps": 390, "loss": 0.2268, "lr": 1.443565072167095e-05, "epoch": 3.1488, "percentage": 63.08, "elapsed_time": "2:38:34", "remaining_time": "1:32:49"}
293
+ {"current_steps": 247, "total_steps": 390, "loss": 0.2111, "lr": 1.4263935345778202e-05, "epoch": 3.1616, "percentage": 63.33, "elapsed_time": "2:39:09", "remaining_time": "1:32:08"}
294
+ {"current_steps": 248, "total_steps": 390, "loss": 0.2119, "lr": 1.409267948166718e-05, "epoch": 3.1744, "percentage": 63.59, "elapsed_time": "2:39:44", "remaining_time": "1:31:28"}
295
+ {"current_steps": 249, "total_steps": 390, "loss": 0.1852, "lr": 1.3921896848515064e-05, "epoch": 3.1872, "percentage": 63.85, "elapsed_time": "2:40:17", "remaining_time": "1:30:46"}
296
+ {"current_steps": 250, "total_steps": 390, "loss": 0.2326, "lr": 1.3751601127588849e-05, "epoch": 3.2, "percentage": 64.1, "elapsed_time": "2:40:57", "remaining_time": "1:30:08"}
297
+ {"current_steps": 251, "total_steps": 390, "loss": 0.205, "lr": 1.3581805961149371e-05, "epoch": 3.2128, "percentage": 64.36, "elapsed_time": "2:41:30", "remaining_time": "1:29:26"}
298
+ {"current_steps": 252, "total_steps": 390, "loss": 0.2211, "lr": 1.341252495135841e-05, "epoch": 3.2256, "percentage": 64.62, "elapsed_time": "2:42:08", "remaining_time": "1:28:47"}
299
+ {"current_steps": 253, "total_steps": 390, "loss": 0.2199, "lr": 1.324377165918906e-05, "epoch": 3.2384, "percentage": 64.87, "elapsed_time": "2:42:47", "remaining_time": "1:28:09"}
300
+ {"current_steps": 254, "total_steps": 390, "loss": 0.2305, "lr": 1.3075559603339354e-05, "epoch": 3.2512, "percentage": 65.13, "elapsed_time": "2:43:23", "remaining_time": "1:27:29"}
301
+ {"current_steps": 255, "total_steps": 390, "loss": 0.2089, "lr": 1.2907902259149287e-05, "epoch": 3.2640000000000002, "percentage": 65.38, "elapsed_time": "2:44:01", "remaining_time": "1:26:50"}
302
+ {"current_steps": 256, "total_steps": 390, "loss": 0.2128, "lr": 1.274081305752135e-05, "epoch": 3.2768, "percentage": 65.64, "elapsed_time": "2:44:38", "remaining_time": "1:26:10"}
303
+ {"current_steps": 257, "total_steps": 390, "loss": 0.2242, "lr": 1.2574305383844528e-05, "epoch": 3.2896, "percentage": 65.9, "elapsed_time": "2:45:23", "remaining_time": "1:25:35"}
304
+ {"current_steps": 258, "total_steps": 390, "loss": 0.2259, "lr": 1.2408392576922075e-05, "epoch": 3.3024, "percentage": 66.15, "elapsed_time": "2:46:02", "remaining_time": "1:24:57"}
305
+ {"current_steps": 259, "total_steps": 390, "loss": 0.1991, "lr": 1.2243087927902905e-05, "epoch": 3.3152, "percentage": 66.41, "elapsed_time": "2:46:40", "remaining_time": "1:24:18"}
306
+ {"current_steps": 260, "total_steps": 390, "loss": 0.1939, "lr": 1.2078404679216864e-05, "epoch": 3.328, "percentage": 66.67, "elapsed_time": "2:47:18", "remaining_time": "1:23:39"}
307
+ {"current_steps": 261, "total_steps": 390, "loss": 0.2391, "lr": 1.1914356023513904e-05, "epoch": 3.3407999999999998, "percentage": 66.92, "elapsed_time": "2:47:58", "remaining_time": "1:23:01"}
308
+ {"current_steps": 262, "total_steps": 390, "loss": 0.2257, "lr": 1.1750955102607193e-05, "epoch": 3.3536, "percentage": 67.18, "elapsed_time": "2:48:36", "remaining_time": "1:22:22"}
309
+ {"current_steps": 263, "total_steps": 390, "loss": 0.2173, "lr": 1.1588215006420374e-05, "epoch": 3.3664, "percentage": 67.44, "elapsed_time": "2:49:15", "remaining_time": "1:21:43"}
310
+ {"current_steps": 264, "total_steps": 390, "loss": 0.2103, "lr": 1.1426148771938915e-05, "epoch": 3.3792, "percentage": 67.69, "elapsed_time": "2:49:45", "remaining_time": "1:21:01"}
311
+ {"current_steps": 265, "total_steps": 390, "loss": 0.1808, "lr": 1.1264769382165748e-05, "epoch": 3.392, "percentage": 67.95, "elapsed_time": "2:50:21", "remaining_time": "1:20:21"}
312
+ {"current_steps": 266, "total_steps": 390, "loss": 0.1844, "lr": 1.110408976508118e-05, "epoch": 3.4048, "percentage": 68.21, "elapsed_time": "2:50:57", "remaining_time": "1:19:41"}
313
+ {"current_steps": 267, "total_steps": 390, "loss": 0.2119, "lr": 1.094412279260726e-05, "epoch": 3.4176, "percentage": 68.46, "elapsed_time": "2:51:36", "remaining_time": "1:19:03"}
314
+ {"current_steps": 268, "total_steps": 390, "loss": 0.1934, "lr": 1.0784881279576635e-05, "epoch": 3.4304, "percentage": 68.72, "elapsed_time": "2:52:09", "remaining_time": "1:18:22"}
315
+ {"current_steps": 269, "total_steps": 390, "loss": 0.2255, "lr": 1.0626377982705929e-05, "epoch": 3.4432, "percentage": 68.97, "elapsed_time": "2:52:43", "remaining_time": "1:17:41"}
316
+ {"current_steps": 270, "total_steps": 390, "loss": 0.2028, "lr": 1.0468625599573842e-05, "epoch": 3.456, "percentage": 69.23, "elapsed_time": "2:53:18", "remaining_time": "1:17:01"}
317
+ {"current_steps": 271, "total_steps": 390, "loss": 0.2291, "lr": 1.0311636767603952e-05, "epoch": 3.4688, "percentage": 69.49, "elapsed_time": "2:53:53", "remaining_time": "1:16:21"}
318
+ {"current_steps": 272, "total_steps": 390, "loss": 0.2161, "lr": 1.0155424063052306e-05, "epoch": 3.4816, "percentage": 69.74, "elapsed_time": "2:54:26", "remaining_time": "1:15:40"}
319
+ {"current_steps": 273, "total_steps": 390, "loss": 0.2312, "lr": 1.0000000000000006e-05, "epoch": 3.4944, "percentage": 70.0, "elapsed_time": "2:55:10", "remaining_time": "1:15:04"}
320
+ {"current_steps": 274, "total_steps": 390, "loss": 0.2223, "lr": 9.84537702935065e-06, "epoch": 3.5072, "percentage": 70.26, "elapsed_time": "2:55:54", "remaining_time": "1:14:28"}
321
+ {"current_steps": 275, "total_steps": 390, "loss": 0.2054, "lr": 9.691567537832964e-06, "epoch": 3.52, "percentage": 70.51, "elapsed_time": "2:56:29", "remaining_time": "1:13:48"}
322
+ {"current_steps": 276, "total_steps": 390, "loss": 0.2138, "lr": 9.538583847008452e-06, "epoch": 3.5328, "percentage": 70.77, "elapsed_time": "2:57:10", "remaining_time": "1:13:10"}
323
+ {"current_steps": 277, "total_steps": 390, "loss": 0.2116, "lr": 9.386438212284372e-06, "epoch": 3.5456, "percentage": 71.03, "elapsed_time": "2:57:49", "remaining_time": "1:12:32"}
324
+ {"current_steps": 278, "total_steps": 390, "loss": 0.2004, "lr": 9.235142821931928e-06, "epoch": 3.5584, "percentage": 71.28, "elapsed_time": "2:58:25", "remaining_time": "1:11:53"}
325
+ {"current_steps": 279, "total_steps": 390, "loss": 0.1952, "lr": 9.084709796109907e-06, "epoch": 3.5712, "percentage": 71.54, "elapsed_time": "2:59:06", "remaining_time": "1:11:15"}
326
+ {"current_steps": 280, "total_steps": 390, "loss": 0.2116, "lr": 8.93515118589373e-06, "epoch": 3.584, "percentage": 71.79, "elapsed_time": "2:59:45", "remaining_time": "1:10:37"}
327
+ {"current_steps": 281, "total_steps": 390, "loss": 0.2143, "lr": 8.786478972310023e-06, "epoch": 3.5968, "percentage": 72.05, "elapsed_time": "3:00:26", "remaining_time": "1:09:59"}
328
+ {"current_steps": 282, "total_steps": 390, "loss": 0.2331, "lr": 8.638705065376887e-06, "epoch": 3.6096, "percentage": 72.31, "elapsed_time": "3:01:06", "remaining_time": "1:09:21"}
329
+ {"current_steps": 283, "total_steps": 390, "loss": 0.2469, "lr": 8.491841303149728e-06, "epoch": 3.6224, "percentage": 72.56, "elapsed_time": "3:01:42", "remaining_time": "1:08:42"}
330
+ {"current_steps": 284, "total_steps": 390, "loss": 0.2131, "lr": 8.345899450772975e-06, "epoch": 3.6352, "percentage": 72.82, "elapsed_time": "3:02:14", "remaining_time": "1:08:01"}
331
+ {"current_steps": 285, "total_steps": 390, "loss": 0.1957, "lr": 8.200891199537549e-06, "epoch": 3.648, "percentage": 73.08, "elapsed_time": "3:02:51", "remaining_time": "1:07:22"}
332
+ {"current_steps": 286, "total_steps": 390, "loss": 0.2215, "lr": 8.056828165944282e-06, "epoch": 3.6608, "percentage": 73.33, "elapsed_time": "3:03:29", "remaining_time": "1:06:43"}
333
+ {"current_steps": 287, "total_steps": 390, "loss": 0.2043, "lr": 7.913721890773354e-06, "epoch": 3.6736, "percentage": 73.59, "elapsed_time": "3:04:09", "remaining_time": "1:06:05"}
334
+ {"current_steps": 288, "total_steps": 390, "loss": 0.2093, "lr": 7.771583838159756e-06, "epoch": 3.6864, "percentage": 73.85, "elapsed_time": "3:04:51", "remaining_time": "1:05:28"}
335
+ {"current_steps": 289, "total_steps": 390, "loss": 0.2152, "lr": 7.630425394674903e-06, "epoch": 3.6992000000000003, "percentage": 74.1, "elapsed_time": "3:05:30", "remaining_time": "1:04:50"}
336
+ {"current_steps": 290, "total_steps": 390, "loss": 0.1985, "lr": 7.49025786841445e-06, "epoch": 3.7119999999999997, "percentage": 74.36, "elapsed_time": "3:06:04", "remaining_time": "1:04:09"}
337
+ {"current_steps": 291, "total_steps": 390, "loss": 0.1964, "lr": 7.3510924880924575e-06, "epoch": 3.7248, "percentage": 74.62, "elapsed_time": "3:06:39", "remaining_time": "1:03:30"}
338
+ {"current_steps": 292, "total_steps": 390, "loss": 0.1944, "lr": 7.212940402141808e-06, "epoch": 3.7376, "percentage": 74.87, "elapsed_time": "3:07:19", "remaining_time": "1:02:52"}
339
+ {"current_steps": 293, "total_steps": 390, "loss": 0.2178, "lr": 7.075812677821145e-06, "epoch": 3.7504, "percentage": 75.13, "elapsed_time": "3:07:58", "remaining_time": "1:02:13"}
340
+ {"current_steps": 294, "total_steps": 390, "loss": 0.2077, "lr": 6.939720300328303e-06, "epoch": 3.7632, "percentage": 75.38, "elapsed_time": "3:08:38", "remaining_time": "1:01:35"}
341
+ {"current_steps": 295, "total_steps": 390, "loss": 0.2291, "lr": 6.8046741719202385e-06, "epoch": 3.776, "percentage": 75.64, "elapsed_time": "3:09:17", "remaining_time": "1:00:57"}
342
+ {"current_steps": 296, "total_steps": 390, "loss": 0.2251, "lr": 6.67068511103971e-06, "epoch": 3.7888, "percentage": 75.9, "elapsed_time": "3:09:53", "remaining_time": "1:00:18"}
343
+ {"current_steps": 297, "total_steps": 390, "loss": 0.2154, "lr": 6.537763851448593e-06, "epoch": 3.8016, "percentage": 76.15, "elapsed_time": "3:10:31", "remaining_time": "0:59:39"}
344
+ {"current_steps": 298, "total_steps": 390, "loss": 0.2292, "lr": 6.4059210413680175e-06, "epoch": 3.8144, "percentage": 76.41, "elapsed_time": "3:11:09", "remaining_time": "0:59:00"}
345
+ {"current_steps": 299, "total_steps": 390, "loss": 0.2043, "lr": 6.275167242625331e-06, "epoch": 3.8272, "percentage": 76.67, "elapsed_time": "3:11:48", "remaining_time": "0:58:22"}
346
+ {"current_steps": 300, "total_steps": 390, "loss": 0.2285, "lr": 6.145512929808013e-06, "epoch": 3.84, "percentage": 76.92, "elapsed_time": "3:12:22", "remaining_time": "0:57:42"}
347
+ {"current_steps": 301, "total_steps": 390, "loss": 0.1953, "lr": 6.016968489424572e-06, "epoch": 3.8528000000000002, "percentage": 77.18, "elapsed_time": "3:13:01", "remaining_time": "0:57:04"}
348
+ {"current_steps": 302, "total_steps": 390, "loss": 0.2294, "lr": 5.889544219072465e-06, "epoch": 3.8656, "percentage": 77.44, "elapsed_time": "3:13:42", "remaining_time": "0:56:26"}
349
+ {"current_steps": 303, "total_steps": 390, "loss": 0.2097, "lr": 5.7632503266131925e-06, "epoch": 3.8784, "percentage": 77.69, "elapsed_time": "3:14:22", "remaining_time": "0:55:48"}
350
+ {"current_steps": 304, "total_steps": 390, "loss": 0.2025, "lr": 5.638096929354522e-06, "epoch": 3.8912, "percentage": 77.95, "elapsed_time": "3:15:03", "remaining_time": "0:55:10"}
351
+ {"current_steps": 305, "total_steps": 390, "loss": 0.2186, "lr": 5.514094053240035e-06, "epoch": 3.904, "percentage": 78.21, "elapsed_time": "3:15:56", "remaining_time": "0:54:36"}
352
+ {"current_steps": 306, "total_steps": 390, "loss": 0.2288, "lr": 5.39125163204594e-06, "epoch": 3.9168, "percentage": 78.46, "elapsed_time": "3:16:29", "remaining_time": "0:53:56"}
353
+ {"current_steps": 307, "total_steps": 390, "loss": 0.219, "lr": 5.269579506585259e-06, "epoch": 3.9295999999999998, "percentage": 78.72, "elapsed_time": "3:17:03", "remaining_time": "0:53:16"}
354
+ {"current_steps": 308, "total_steps": 390, "loss": 0.2032, "lr": 5.149087423919541e-06, "epoch": 3.9424, "percentage": 78.97, "elapsed_time": "3:17:40", "remaining_time": "0:52:37"}
355
+ {"current_steps": 309, "total_steps": 390, "loss": 0.2271, "lr": 5.029785036577976e-06, "epoch": 3.9552, "percentage": 79.23, "elapsed_time": "3:18:17", "remaining_time": "0:51:58"}
356
+ {"current_steps": 310, "total_steps": 390, "loss": 0.2028, "lr": 4.911681901784198e-06, "epoch": 3.968, "percentage": 79.49, "elapsed_time": "3:18:50", "remaining_time": "0:51:18"}
357
+ {"current_steps": 311, "total_steps": 390, "loss": 0.1941, "lr": 4.794787480690597e-06, "epoch": 3.9808, "percentage": 79.74, "elapsed_time": "3:19:30", "remaining_time": "0:50:40"}
358
+ {"current_steps": 312, "total_steps": 390, "loss": 0.2105, "lr": 4.679111137620442e-06, "epoch": 3.9936, "percentage": 80.0, "elapsed_time": "3:20:12", "remaining_time": "0:50:03"}
359
+ {"current_steps": 313, "total_steps": 390, "loss": 0.2099, "lr": 4.5646621393177e-06, "epoch": 4.0064, "percentage": 80.26, "elapsed_time": "3:21:54", "remaining_time": "0:49:40"}
360
+ {"current_steps": 314, "total_steps": 390, "loss": 0.1713, "lr": 4.451449654204685e-06, "epoch": 4.0192, "percentage": 80.51, "elapsed_time": "3:22:37", "remaining_time": "0:49:02"}
361
+ {"current_steps": 315, "total_steps": 390, "loss": 0.1916, "lr": 4.339482751647557e-06, "epoch": 4.032, "percentage": 80.77, "elapsed_time": "3:23:16", "remaining_time": "0:48:23"}
362
+ {"current_steps": 316, "total_steps": 390, "loss": 0.1799, "lr": 4.228770401229824e-06, "epoch": 4.0448, "percentage": 81.03, "elapsed_time": "3:23:55", "remaining_time": "0:47:45"}
363
+ {"current_steps": 317, "total_steps": 390, "loss": 0.1663, "lr": 4.119321472033779e-06, "epoch": 4.0576, "percentage": 81.28, "elapsed_time": "3:24:26", "remaining_time": "0:47:04"}
364
+ {"current_steps": 318, "total_steps": 390, "loss": 0.1775, "lr": 4.011144731929981e-06, "epoch": 4.0704, "percentage": 81.54, "elapsed_time": "3:25:04", "remaining_time": "0:46:25"}
365
+ {"current_steps": 319, "total_steps": 390, "loss": 0.1747, "lr": 3.904248846874894e-06, "epoch": 4.0832, "percentage": 81.79, "elapsed_time": "3:25:45", "remaining_time": "0:45:47"}
366
+ {"current_steps": 320, "total_steps": 390, "loss": 0.1889, "lr": 3.7986423802166705e-06, "epoch": 4.096, "percentage": 82.05, "elapsed_time": "3:26:25", "remaining_time": "0:45:09"}
367
+ {"current_steps": 321, "total_steps": 390, "loss": 0.1616, "lr": 3.694333792009115e-06, "epoch": 4.1088, "percentage": 82.31, "elapsed_time": "3:27:02", "remaining_time": "0:44:30"}
368
+ {"current_steps": 322, "total_steps": 390, "loss": 0.1668, "lr": 3.5913314383339937e-06, "epoch": 4.1216, "percentage": 82.56, "elapsed_time": "3:27:39", "remaining_time": "0:43:51"}
369
+ {"current_steps": 323, "total_steps": 390, "loss": 0.1656, "lr": 3.4896435706316e-06, "epoch": 4.1344, "percentage": 82.82, "elapsed_time": "3:28:19", "remaining_time": "0:43:12"}
370
+ {"current_steps": 324, "total_steps": 390, "loss": 0.1606, "lr": 3.3892783350397675e-06, "epoch": 4.1472, "percentage": 83.08, "elapsed_time": "3:28:51", "remaining_time": "0:42:32"}