rakhman-llm commited on
Commit
eb25e73
·
verified ·
1 Parent(s): 4319ca7

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d253ddcfd4df2acb84a6afc37ea897625fd4823975484b2722f1d732a807a42
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e4a377d9d9b4ee3182c91d09c6aa160ff1ade127f1e240b0ace4f4312419ec
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80072666b3be47f53f317ccfe6322071cfa6706038da8de25f13ce6816f3172e
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c76f78e4bea0bb233f5490f2342fc733388c7761a0fa0c0e5fdf8f1a5336d2
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f2667454d6cffb59aea7917c33b6757bc9e126470ce377cf6d3d3b895b28bce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f180d57072cebd56f1e36f710e6b62868e2b14fe85aee7effc0a0d28a6763011
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9e7d6d8721d0efdf284e8ea86037aff29ca12a45581cd9384185af5e287f2cf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19fa64c0f058dbaac84e2a6129da56913abd2f29f4a3f61f13f6abfb2cd3ff5f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08615937829017639,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-2500",
4
- "epoch": 0.4,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -81,9 +81,9 @@
81
  {
82
  "epoch": 0.08,
83
  "eval_loss": 0.09235642850399017,
84
- "eval_runtime": 116.7651,
85
- "eval_samples_per_second": 17.128,
86
- "eval_steps_per_second": 2.141,
87
  "step": 500
88
  },
89
  {
@@ -159,9 +159,9 @@
159
  {
160
  "epoch": 0.16,
161
  "eval_loss": 0.08949962258338928,
162
- "eval_runtime": 116.7407,
163
- "eval_samples_per_second": 17.132,
164
- "eval_steps_per_second": 2.141,
165
  "step": 1000
166
  },
167
  {
@@ -237,9 +237,9 @@
237
  {
238
  "epoch": 0.24,
239
  "eval_loss": 0.08808805048465729,
240
- "eval_runtime": 116.8722,
241
- "eval_samples_per_second": 17.113,
242
- "eval_steps_per_second": 2.139,
243
  "step": 1500
244
  },
245
  {
@@ -315,9 +315,9 @@
315
  {
316
  "epoch": 0.32,
317
  "eval_loss": 0.08704760670661926,
318
- "eval_runtime": 116.8362,
319
- "eval_samples_per_second": 17.118,
320
- "eval_steps_per_second": 2.14,
321
  "step": 2000
322
  },
323
  {
@@ -393,10 +393,88 @@
393
  {
394
  "epoch": 0.4,
395
  "eval_loss": 0.08615937829017639,
396
- "eval_runtime": 116.9591,
397
- "eval_samples_per_second": 17.1,
398
- "eval_steps_per_second": 2.137,
399
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  }
401
  ],
402
  "logging_steps": 50,
@@ -416,7 +494,7 @@
416
  "attributes": {}
417
  }
418
  },
419
- "total_flos": 1.21791578112e+16,
420
  "train_batch_size": 8,
421
  "trial_name": null,
422
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08551913499832153,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-3000",
4
+ "epoch": 0.48,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
81
  {
82
  "epoch": 0.08,
83
  "eval_loss": 0.09235642850399017,
84
+ "eval_runtime": 109.274,
85
+ "eval_samples_per_second": 18.303,
86
+ "eval_steps_per_second": 2.288,
87
  "step": 500
88
  },
89
  {
 
159
  {
160
  "epoch": 0.16,
161
  "eval_loss": 0.08949962258338928,
162
+ "eval_runtime": 109.2536,
163
+ "eval_samples_per_second": 18.306,
164
+ "eval_steps_per_second": 2.288,
165
  "step": 1000
166
  },
167
  {
 
237
  {
238
  "epoch": 0.24,
239
  "eval_loss": 0.08808805048465729,
240
+ "eval_runtime": 109.2355,
241
+ "eval_samples_per_second": 18.309,
242
+ "eval_steps_per_second": 2.289,
243
  "step": 1500
244
  },
245
  {
 
315
  {
316
  "epoch": 0.32,
317
  "eval_loss": 0.08704760670661926,
318
+ "eval_runtime": 109.4348,
319
+ "eval_samples_per_second": 18.276,
320
+ "eval_steps_per_second": 2.284,
321
  "step": 2000
322
  },
323
  {
 
393
  {
394
  "epoch": 0.4,
395
  "eval_loss": 0.08615937829017639,
396
+ "eval_runtime": 109.2621,
397
+ "eval_samples_per_second": 18.305,
398
+ "eval_steps_per_second": 2.288,
399
  "step": 2500
400
+ },
401
+ {
402
+ "epoch": 0.408,
403
+ "grad_norm": 7447.5625,
404
+ "learning_rate": 2.592e-05,
405
+ "loss": 0.0672,
406
+ "step": 2550
407
+ },
408
+ {
409
+ "epoch": 0.416,
410
+ "grad_norm": 7057.10009765625,
411
+ "learning_rate": 2.584e-05,
412
+ "loss": 0.0683,
413
+ "step": 2600
414
+ },
415
+ {
416
+ "epoch": 0.424,
417
+ "grad_norm": 8279.7392578125,
418
+ "learning_rate": 2.576e-05,
419
+ "loss": 0.0631,
420
+ "step": 2650
421
+ },
422
+ {
423
+ "epoch": 0.432,
424
+ "grad_norm": 7663.275390625,
425
+ "learning_rate": 2.568e-05,
426
+ "loss": 0.0698,
427
+ "step": 2700
428
+ },
429
+ {
430
+ "epoch": 0.44,
431
+ "grad_norm": 7116.74609375,
432
+ "learning_rate": 2.5600000000000002e-05,
433
+ "loss": 0.0703,
434
+ "step": 2750
435
+ },
436
+ {
437
+ "epoch": 0.448,
438
+ "grad_norm": 8839.5986328125,
439
+ "learning_rate": 2.552e-05,
440
+ "loss": 0.0654,
441
+ "step": 2800
442
+ },
443
+ {
444
+ "epoch": 0.456,
445
+ "grad_norm": 7157.17333984375,
446
+ "learning_rate": 2.544e-05,
447
+ "loss": 0.0628,
448
+ "step": 2850
449
+ },
450
+ {
451
+ "epoch": 0.464,
452
+ "grad_norm": 7690.267578125,
453
+ "learning_rate": 2.536e-05,
454
+ "loss": 0.0694,
455
+ "step": 2900
456
+ },
457
+ {
458
+ "epoch": 0.472,
459
+ "grad_norm": 5030.39501953125,
460
+ "learning_rate": 2.5280000000000002e-05,
461
+ "loss": 0.0654,
462
+ "step": 2950
463
+ },
464
+ {
465
+ "epoch": 0.48,
466
+ "grad_norm": 7269.51171875,
467
+ "learning_rate": 2.52e-05,
468
+ "loss": 0.0732,
469
+ "step": 3000
470
+ },
471
+ {
472
+ "epoch": 0.48,
473
+ "eval_loss": 0.08551913499832153,
474
+ "eval_runtime": 109.2626,
475
+ "eval_samples_per_second": 18.305,
476
+ "eval_steps_per_second": 2.288,
477
+ "step": 3000
478
  }
479
  ],
480
  "logging_steps": 50,
 
494
  "attributes": {}
495
  }
496
  },
497
+ "total_flos": 1.461498937344e+16,
498
  "train_batch_size": 8,
499
  "trial_name": null,
500
  "trial_params": null