qingy2024 commited on
Commit
0cbed2a
·
verified ·
1 Parent(s): bf7398e

Upload checkpoint 2600

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91686ade4add65e1e3f776f0e2e8e27824e1b8b54fd0f3ca8624f5754fd40f74
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:952c309379f76905daa77066d4496fa74792b15ba875c0be3c12f9c41a78acce
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6e350cf5a05c074ce5165ffc5f179d510ac285f53b5429387bf0bd885bbc1d8
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf07a7448c9b7b6ec4edb89149df3e835b966c4cf920e192e8d9b3be9e15e7c
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed58f770dc223880998359d98df9e164ff7ba4fd4edc2c51456852c720e4ff61
3
  size 17893874312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb29717c5eb981aab5be12ea7b027e90292c5f54fab761c249cd500c4eba893
3
  size 17893874312
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37c5646b0cfbdfe9b4cb4a990bd6626407946c25f0686631f52edd5843d42333
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bfe8385483ee0511ac93fc8694ea8ab941b50846cfb63dfdcbfc3051cd0d56d
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7562277580071174,
5
  "eval_steps": 500,
6
- "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5957,6 +5957,118 @@
5957
  "learning_rate": 2.8077560828487748e-05,
5958
  "loss": 0.5698,
5959
  "step": 2550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5960
  }
5961
  ],
5962
  "logging_steps": 3,
@@ -5976,7 +6088,7 @@
5976
  "attributes": {}
5977
  }
5978
  },
5979
- "total_flos": 1.6693602668676907e+19,
5980
  "train_batch_size": 8,
5981
  "trial_name": null,
5982
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7710557532621589,
5
  "eval_steps": 500,
6
+ "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5957
  "learning_rate": 2.8077560828487748e-05,
5958
  "loss": 0.5698,
5959
  "step": 2550
5960
+ },
5961
+ {
5962
+ "epoch": 0.7571174377224199,
5963
+ "grad_norm": 0.26171875,
5964
+ "learning_rate": 2.7883074889203363e-05,
5965
+ "loss": 0.612,
5966
+ "step": 2553
5967
+ },
5968
+ {
5969
+ "epoch": 0.7580071174377224,
5970
+ "grad_norm": 0.26953125,
5971
+ "learning_rate": 2.7689155689772217e-05,
5972
+ "loss": 0.5951,
5973
+ "step": 2556
5974
+ },
5975
+ {
5976
+ "epoch": 0.7588967971530249,
5977
+ "grad_norm": 0.259765625,
5978
+ "learning_rate": 2.7495804754132602e-05,
5979
+ "loss": 0.5841,
5980
+ "step": 2559
5981
+ },
5982
+ {
5983
+ "epoch": 0.7597864768683275,
5984
+ "grad_norm": 0.26953125,
5985
+ "learning_rate": 2.7303023601756928e-05,
5986
+ "loss": 0.5978,
5987
+ "step": 2562
5988
+ },
5989
+ {
5990
+ "epoch": 0.7606761565836299,
5991
+ "grad_norm": 0.251953125,
5992
+ "learning_rate": 2.711081374763993e-05,
5993
+ "loss": 0.5994,
5994
+ "step": 2565
5995
+ },
5996
+ {
5997
+ "epoch": 0.7615658362989324,
5998
+ "grad_norm": 0.263671875,
5999
+ "learning_rate": 2.6919176702286698e-05,
6000
+ "loss": 0.6014,
6001
+ "step": 2568
6002
+ },
6003
+ {
6004
+ "epoch": 0.7624555160142349,
6005
+ "grad_norm": 0.271484375,
6006
+ "learning_rate": 2.6728113971700908e-05,
6007
+ "loss": 0.5958,
6008
+ "step": 2571
6009
+ },
6010
+ {
6011
+ "epoch": 0.7633451957295374,
6012
+ "grad_norm": 0.28125,
6013
+ "learning_rate": 2.653762705737287e-05,
6014
+ "loss": 0.6242,
6015
+ "step": 2574
6016
+ },
6017
+ {
6018
+ "epoch": 0.7642348754448398,
6019
+ "grad_norm": 0.26171875,
6020
+ "learning_rate": 2.634771745626772e-05,
6021
+ "loss": 0.616,
6022
+ "step": 2577
6023
+ },
6024
+ {
6025
+ "epoch": 0.7651245551601423,
6026
+ "grad_norm": 0.25390625,
6027
+ "learning_rate": 2.6158386660813806e-05,
6028
+ "loss": 0.5959,
6029
+ "step": 2580
6030
+ },
6031
+ {
6032
+ "epoch": 0.7660142348754448,
6033
+ "grad_norm": 0.26953125,
6034
+ "learning_rate": 2.5969636158890775e-05,
6035
+ "loss": 0.5971,
6036
+ "step": 2583
6037
+ },
6038
+ {
6039
+ "epoch": 0.7669039145907474,
6040
+ "grad_norm": 0.26171875,
6041
+ "learning_rate": 2.5781467433817973e-05,
6042
+ "loss": 0.593,
6043
+ "step": 2586
6044
+ },
6045
+ {
6046
+ "epoch": 0.7677935943060499,
6047
+ "grad_norm": 0.2578125,
6048
+ "learning_rate": 2.5593881964342857e-05,
6049
+ "loss": 0.5841,
6050
+ "step": 2589
6051
+ },
6052
+ {
6053
+ "epoch": 0.7686832740213523,
6054
+ "grad_norm": 0.251953125,
6055
+ "learning_rate": 2.5406881224629174e-05,
6056
+ "loss": 0.6111,
6057
+ "step": 2592
6058
+ },
6059
+ {
6060
+ "epoch": 0.7695729537366548,
6061
+ "grad_norm": 0.263671875,
6062
+ "learning_rate": 2.5220466684245646e-05,
6063
+ "loss": 0.5758,
6064
+ "step": 2595
6065
+ },
6066
+ {
6067
+ "epoch": 0.7704626334519573,
6068
+ "grad_norm": 0.263671875,
6069
+ "learning_rate": 2.5034639808154114e-05,
6070
+ "loss": 0.6276,
6071
+ "step": 2598
6072
  }
6073
  ],
6074
  "logging_steps": 3,
 
6088
  "attributes": {}
6089
  }
6090
  },
6091
+ "total_flos": 1.7020928211199984e+19,
6092
  "train_batch_size": 8,
6093
  "trial_name": null,
6094
  "trial_params": null