Upload checkpoint 2550

Browse files

Files changed (6) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +122 -3
training_args.bin +1 -1

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8d72758923b9f0bd82e5bbc48d12b2be490921493204a39cce9b2d23b381d0f
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:91686ade4add65e1e3f776f0e2e8e27824e1b8b54fd0f3ca8624f5754fd40f74
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef49e65a4841d418eae8cb8e2787b90949f7dc5d0f7f5bff27d3dbf435b150a6
 size 3989163248

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6e350cf5a05c074ce5165ffc5f179d510ac285f53b5429387bf0bd885bbc1d8
 size 3989163248

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c3687ead06fe1d4a2c758ad24015232d57c60288f61505a60550a7000b58c00
 size 17893874312

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed58f770dc223880998359d98df9e164ff7ba4fd4edc2c51456852c720e4ff61
 size 17893874312

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c414f1eefcbe42a55aad752b49082d98662bb35b573d8b5f6de0588323354371
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:37c5646b0cfbdfe9b4cb4a990bd6626407946c25f0686631f52edd5843d42333
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.741399762752076,
   "eval_steps": 500,
-  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5838,6 +5838,125 @@
       "learning_rate": 3.1469034906659946e-05,
       "loss": 0.6053,
       "step": 2499
     }
   ],
   "logging_steps": 3,
@@ -5857,7 +5976,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.636627712615383e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7562277580071174,
   "eval_steps": 500,
+  "global_step": 2550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.1469034906659946e-05,
       "loss": 0.6053,
       "step": 2499
+    },
+    {
+      "epoch": 0.7419928825622776,
+      "grad_norm": 0.267578125,
+      "learning_rate": 3.126515182118793e-05,
+      "loss": 0.5994,
+      "step": 2502
+    },
+    {
+      "epoch": 0.7428825622775801,
+      "grad_norm": 0.263671875,
+      "learning_rate": 3.106180889709567e-05,
+      "loss": 0.5969,
+      "step": 2505
+    },
+    {
+      "epoch": 0.7437722419928826,
+      "grad_norm": 0.259765625,
+      "learning_rate": 3.0859007732378896e-05,
+      "loss": 0.5936,
+      "step": 2508
+    },
+    {
+      "epoch": 0.744661921708185,
+      "grad_norm": 0.251953125,
+      "learning_rate": 3.065674992077584e-05,
+      "loss": 0.5717,
+      "step": 2511
+    },
+    {
+      "epoch": 0.7455516014234875,
+      "grad_norm": 0.2578125,
+      "learning_rate": 3.0455037051754777e-05,
+      "loss": 0.6061,
+      "step": 2514
+    },
+    {
+      "epoch": 0.74644128113879,
+      "grad_norm": 0.251953125,
+      "learning_rate": 3.0253870710501475e-05,
+      "loss": 0.5914,
+      "step": 2517
+    },
+    {
+      "epoch": 0.7473309608540926,
+      "grad_norm": 0.251953125,
+      "learning_rate": 3.005325247790668e-05,
+      "loss": 0.6067,
+      "step": 2520
+    },
+    {
+      "epoch": 0.748220640569395,
+      "grad_norm": 0.271484375,
+      "learning_rate": 2.9853183930553853e-05,
+      "loss": 0.5909,
+      "step": 2523
+    },
+    {
+      "epoch": 0.7491103202846975,
+      "grad_norm": 0.25,
+      "learning_rate": 2.965366664070661e-05,
+      "loss": 0.5847,
+      "step": 2526
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 0.26953125,
+      "learning_rate": 2.9454702176296423e-05,
+      "loss": 0.5907,
+      "step": 2529
+    },
+    {
+      "epoch": 0.7508896797153025,
+      "grad_norm": 0.26953125,
+      "learning_rate": 2.925629210091043e-05,
+      "loss": 0.606,
+      "step": 2532
+    },
+    {
+      "epoch": 0.751779359430605,
+      "grad_norm": 0.267578125,
+      "learning_rate": 2.9058437973778896e-05,
+      "loss": 0.6055,
+      "step": 2535
+    },
+    {
+      "epoch": 0.7526690391459074,
+      "grad_norm": 0.2578125,
+      "learning_rate": 2.886114134976322e-05,
+      "loss": 0.5993,
+      "step": 2538
+    },
+    {
+      "epoch": 0.75355871886121,
+      "grad_norm": 0.271484375,
+      "learning_rate": 2.866440377934352e-05,
+      "loss": 0.6098,
+      "step": 2541
+    },
+    {
+      "epoch": 0.7544483985765125,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.8468226808606522e-05,
+      "loss": 0.584,
+      "step": 2544
+    },
+    {
+      "epoch": 0.755338078291815,
+      "grad_norm": 0.255859375,
+      "learning_rate": 2.827261197923341e-05,
+      "loss": 0.5949,
+      "step": 2547
+    },
+    {
+      "epoch": 0.7562277580071174,
+      "grad_norm": 0.271484375,
+      "learning_rate": 2.8077560828487748e-05,
+      "loss": 0.5698,
+      "step": 2550
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.6693602668676907e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aefe89d5e368b5fae2668039e4dd5be80e2e69b77087b485c9b5f3dfa5cd7716
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:57ae6aec2771e886a275e400f485be0955f7d493f16a01d43cda316730a80162
 size 5368