Upload checkpoint 2600

Browse files

Files changed (5) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +115 -3

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91686ade4add65e1e3f776f0e2e8e27824e1b8b54fd0f3ca8624f5754fd40f74
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:952c309379f76905daa77066d4496fa74792b15ba875c0be3c12f9c41a78acce
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6e350cf5a05c074ce5165ffc5f179d510ac285f53b5429387bf0bd885bbc1d8
 size 3989163248

 version https://git-lfs.github.com/spec/v1
+oid sha256:dcf07a7448c9b7b6ec4edb89149df3e835b966c4cf920e192e8d9b3be9e15e7c
 size 3989163248

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed58f770dc223880998359d98df9e164ff7ba4fd4edc2c51456852c720e4ff61
 size 17893874312

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcb29717c5eb981aab5be12ea7b027e90292c5f54fab761c249cd500c4eba893
 size 17893874312

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37c5646b0cfbdfe9b4cb4a990bd6626407946c25f0686631f52edd5843d42333
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bfe8385483ee0511ac93fc8694ea8ab941b50846cfb63dfdcbfc3051cd0d56d
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7562277580071174,
   "eval_steps": 500,
-  "global_step": 2550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5957,6 +5957,118 @@
       "learning_rate": 2.8077560828487748e-05,
       "loss": 0.5698,
       "step": 2550
     }
   ],
   "logging_steps": 3,
@@ -5976,7 +6088,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6693602668676907e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7710557532621589,
   "eval_steps": 500,
+  "global_step": 2600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.8077560828487748e-05,
       "loss": 0.5698,
       "step": 2550
+    },
+    {
+      "epoch": 0.7571174377224199,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.7883074889203363e-05,
+      "loss": 0.612,
+      "step": 2553
+    },
+    {
+      "epoch": 0.7580071174377224,
+      "grad_norm": 0.26953125,
+      "learning_rate": 2.7689155689772217e-05,
+      "loss": 0.5951,
+      "step": 2556
+    },
+    {
+      "epoch": 0.7588967971530249,
+      "grad_norm": 0.259765625,
+      "learning_rate": 2.7495804754132602e-05,
+      "loss": 0.5841,
+      "step": 2559
+    },
+    {
+      "epoch": 0.7597864768683275,
+      "grad_norm": 0.26953125,
+      "learning_rate": 2.7303023601756928e-05,
+      "loss": 0.5978,
+      "step": 2562
+    },
+    {
+      "epoch": 0.7606761565836299,
+      "grad_norm": 0.251953125,
+      "learning_rate": 2.711081374763993e-05,
+      "loss": 0.5994,
+      "step": 2565
+    },
+    {
+      "epoch": 0.7615658362989324,
+      "grad_norm": 0.263671875,
+      "learning_rate": 2.6919176702286698e-05,
+      "loss": 0.6014,
+      "step": 2568
+    },
+    {
+      "epoch": 0.7624555160142349,
+      "grad_norm": 0.271484375,
+      "learning_rate": 2.6728113971700908e-05,
+      "loss": 0.5958,
+      "step": 2571
+    },
+    {
+      "epoch": 0.7633451957295374,
+      "grad_norm": 0.28125,
+      "learning_rate": 2.653762705737287e-05,
+      "loss": 0.6242,
+      "step": 2574
+    },
+    {
+      "epoch": 0.7642348754448398,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.634771745626772e-05,
+      "loss": 0.616,
+      "step": 2577
+    },
+    {
+      "epoch": 0.7651245551601423,
+      "grad_norm": 0.25390625,
+      "learning_rate": 2.6158386660813806e-05,
+      "loss": 0.5959,
+      "step": 2580
+    },
+    {
+      "epoch": 0.7660142348754448,
+      "grad_norm": 0.26953125,
+      "learning_rate": 2.5969636158890775e-05,
+      "loss": 0.5971,
+      "step": 2583
+    },
+    {
+      "epoch": 0.7669039145907474,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.5781467433817973e-05,
+      "loss": 0.593,
+      "step": 2586
+    },
+    {
+      "epoch": 0.7677935943060499,
+      "grad_norm": 0.2578125,
+      "learning_rate": 2.5593881964342857e-05,
+      "loss": 0.5841,
+      "step": 2589
+    },
+    {
+      "epoch": 0.7686832740213523,
+      "grad_norm": 0.251953125,
+      "learning_rate": 2.5406881224629174e-05,
+      "loss": 0.6111,
+      "step": 2592
+    },
+    {
+      "epoch": 0.7695729537366548,
+      "grad_norm": 0.263671875,
+      "learning_rate": 2.5220466684245646e-05,
+      "loss": 0.5758,
+      "step": 2595
+    },
+    {
+      "epoch": 0.7704626334519573,
+      "grad_norm": 0.263671875,
+      "learning_rate": 2.5034639808154114e-05,
+      "loss": 0.6276,
+      "step": 2598
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.7020928211199984e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null