Training in progress, step 1000, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:700837625208b7724bad50aacfe3ec0899ed845676d6567b103150efb717c031
 size 268858112

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cb5c823ef74dfadc52b27febc38a2ac3a875bb51704bb51249b3924e6ee6f2b
 size 268858112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:94d2f236b773db6cf924287d656356a621b077f1b9db874b98dd46fbac4e2d3c
 size 137668197

 version https://git-lfs.github.com/spec/v1
+oid sha256:35bbf0d8ee6289b2e585cda8bdd7541a68981e8fbe801a50960d7cef7275b8b9
 size 137668197

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33dca605be33ed4ace196e2854e478111341ce60f041bccf2a2bda0cd9a6448a
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfad65ecdebf4d2a4ef23aa53c40f8dd23f710c224d047e20d32dc51c4015de2
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7677543186180422,
   "eval_steps": 500,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1128,6 +1128,286 @@
       "learning_rate": 0.0001,
       "loss": 0.2739,
       "step": 800
     }
   ],
   "logging_steps": 5,
@@ -1147,7 +1427,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.655694975843308e+17,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9596928982725528,
   "eval_steps": 500,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001,
       "loss": 0.2739,
       "step": 800
+    },
+    {
+      "epoch": 0.772552783109405,
+      "grad_norm": 0.14281855523586273,
+      "learning_rate": 0.0001,
+      "loss": 0.2711,
+      "step": 805
+    },
+    {
+      "epoch": 0.7773512476007678,
+      "grad_norm": 0.13634523749351501,
+      "learning_rate": 0.0001,
+      "loss": 0.267,
+      "step": 810
+    },
+    {
+      "epoch": 0.7821497120921305,
+      "grad_norm": 0.14958547055721283,
+      "learning_rate": 0.0001,
+      "loss": 0.2809,
+      "step": 815
+    },
+    {
+      "epoch": 0.7869481765834933,
+      "grad_norm": 0.16246724128723145,
+      "learning_rate": 0.0001,
+      "loss": 0.2632,
+      "step": 820
+    },
+    {
+      "epoch": 0.791746641074856,
+      "grad_norm": 0.15865352749824524,
+      "learning_rate": 0.0001,
+      "loss": 0.2669,
+      "step": 825
+    },
+    {
+      "epoch": 0.7965451055662188,
+      "grad_norm": 0.16149087250232697,
+      "learning_rate": 0.0001,
+      "loss": 0.2708,
+      "step": 830
+    },
+    {
+      "epoch": 0.8013435700575816,
+      "grad_norm": 0.15393655002117157,
+      "learning_rate": 0.0001,
+      "loss": 0.2736,
+      "step": 835
+    },
+    {
+      "epoch": 0.8061420345489443,
+      "grad_norm": 0.16198311746120453,
+      "learning_rate": 0.0001,
+      "loss": 0.2627,
+      "step": 840
+    },
+    {
+      "epoch": 0.8109404990403071,
+      "grad_norm": 0.18130268156528473,
+      "learning_rate": 0.0001,
+      "loss": 0.2663,
+      "step": 845
+    },
+    {
+      "epoch": 0.8157389635316699,
+      "grad_norm": 0.15879245102405548,
+      "learning_rate": 0.0001,
+      "loss": 0.2525,
+      "step": 850
+    },
+    {
+      "epoch": 0.8205374280230326,
+      "grad_norm": 0.15435881912708282,
+      "learning_rate": 0.0001,
+      "loss": 0.277,
+      "step": 855
+    },
+    {
+      "epoch": 0.8253358925143954,
+      "grad_norm": 0.14485138654708862,
+      "learning_rate": 0.0001,
+      "loss": 0.2902,
+      "step": 860
+    },
+    {
+      "epoch": 0.8301343570057581,
+      "grad_norm": 0.15166470408439636,
+      "learning_rate": 0.0001,
+      "loss": 0.2703,
+      "step": 865
+    },
+    {
+      "epoch": 0.8349328214971209,
+      "grad_norm": 0.14248280227184296,
+      "learning_rate": 0.0001,
+      "loss": 0.2635,
+      "step": 870
+    },
+    {
+      "epoch": 0.8397312859884837,
+      "grad_norm": 0.14291894435882568,
+      "learning_rate": 0.0001,
+      "loss": 0.2505,
+      "step": 875
+    },
+    {
+      "epoch": 0.8445297504798465,
+      "grad_norm": 0.16025425493717194,
+      "learning_rate": 0.0001,
+      "loss": 0.2591,
+      "step": 880
+    },
+    {
+      "epoch": 0.8493282149712092,
+      "grad_norm": 0.15063312649726868,
+      "learning_rate": 0.0001,
+      "loss": 0.2767,
+      "step": 885
+    },
+    {
+      "epoch": 0.8541266794625719,
+      "grad_norm": 0.14704886078834534,
+      "learning_rate": 0.0001,
+      "loss": 0.2615,
+      "step": 890
+    },
+    {
+      "epoch": 0.8589251439539347,
+      "grad_norm": 0.1524520218372345,
+      "learning_rate": 0.0001,
+      "loss": 0.2771,
+      "step": 895
+    },
+    {
+      "epoch": 0.8637236084452975,
+      "grad_norm": 0.15311211347579956,
+      "learning_rate": 0.0001,
+      "loss": 0.2779,
+      "step": 900
+    },
+    {
+      "epoch": 0.8685220729366603,
+      "grad_norm": 0.19531571865081787,
+      "learning_rate": 0.0001,
+      "loss": 0.256,
+      "step": 905
+    },
+    {
+      "epoch": 0.8733205374280231,
+      "grad_norm": 0.15908968448638916,
+      "learning_rate": 0.0001,
+      "loss": 0.2598,
+      "step": 910
+    },
+    {
+      "epoch": 0.8781190019193857,
+      "grad_norm": 0.14221200346946716,
+      "learning_rate": 0.0001,
+      "loss": 0.2554,
+      "step": 915
+    },
+    {
+      "epoch": 0.8829174664107485,
+      "grad_norm": 0.16132907569408417,
+      "learning_rate": 0.0001,
+      "loss": 0.2885,
+      "step": 920
+    },
+    {
+      "epoch": 0.8877159309021113,
+      "grad_norm": 0.14751212298870087,
+      "learning_rate": 0.0001,
+      "loss": 0.2716,
+      "step": 925
+    },
+    {
+      "epoch": 0.8925143953934741,
+      "grad_norm": 0.146012082695961,
+      "learning_rate": 0.0001,
+      "loss": 0.2558,
+      "step": 930
+    },
+    {
+      "epoch": 0.8973128598848369,
+      "grad_norm": 0.16232919692993164,
+      "learning_rate": 0.0001,
+      "loss": 0.2805,
+      "step": 935
+    },
+    {
+      "epoch": 0.9021113243761996,
+      "grad_norm": 0.16521847248077393,
+      "learning_rate": 0.0001,
+      "loss": 0.2669,
+      "step": 940
+    },
+    {
+      "epoch": 0.9069097888675623,
+      "grad_norm": 0.14723201096057892,
+      "learning_rate": 0.0001,
+      "loss": 0.2642,
+      "step": 945
+    },
+    {
+      "epoch": 0.9117082533589251,
+      "grad_norm": 0.15053531527519226,
+      "learning_rate": 0.0001,
+      "loss": 0.2698,
+      "step": 950
+    },
+    {
+      "epoch": 0.9165067178502879,
+      "grad_norm": 0.14634476602077484,
+      "learning_rate": 0.0001,
+      "loss": 0.2615,
+      "step": 955
+    },
+    {
+      "epoch": 0.9213051823416507,
+      "grad_norm": 0.1575053334236145,
+      "learning_rate": 0.0001,
+      "loss": 0.2789,
+      "step": 960
+    },
+    {
+      "epoch": 0.9261036468330134,
+      "grad_norm": 0.15231551229953766,
+      "learning_rate": 0.0001,
+      "loss": 0.2577,
+      "step": 965
+    },
+    {
+      "epoch": 0.9309021113243762,
+      "grad_norm": 0.15277941524982452,
+      "learning_rate": 0.0001,
+      "loss": 0.2658,
+      "step": 970
+    },
+    {
+      "epoch": 0.935700575815739,
+      "grad_norm": 0.1474364995956421,
+      "learning_rate": 0.0001,
+      "loss": 0.2771,
+      "step": 975
+    },
+    {
+      "epoch": 0.9404990403071017,
+      "grad_norm": 0.14509518444538116,
+      "learning_rate": 0.0001,
+      "loss": 0.2587,
+      "step": 980
+    },
+    {
+      "epoch": 0.9452975047984645,
+      "grad_norm": 0.146579310297966,
+      "learning_rate": 0.0001,
+      "loss": 0.2662,
+      "step": 985
+    },
+    {
+      "epoch": 0.9500959692898272,
+      "grad_norm": 0.1470819115638733,
+      "learning_rate": 0.0001,
+      "loss": 0.2691,
+      "step": 990
+    },
+    {
+      "epoch": 0.95489443378119,
+      "grad_norm": 0.161437526345253,
+      "learning_rate": 0.0001,
+      "loss": 0.2681,
+      "step": 995
+    },
+    {
+      "epoch": 0.9596928982725528,
+      "grad_norm": 0.1448318362236023,
+      "learning_rate": 0.0001,
+      "loss": 0.2554,
+      "step": 1000
     }
   ],
   "logging_steps": 5,
       "attributes": {}
     }
   },
+  "total_flos": 3.323888912649138e+17,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null