qingy2024 commited on
Commit
bf7398e
·
verified ·
1 Parent(s): a88825b

Upload checkpoint 2550

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8d72758923b9f0bd82e5bbc48d12b2be490921493204a39cce9b2d23b381d0f
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91686ade4add65e1e3f776f0e2e8e27824e1b8b54fd0f3ca8624f5754fd40f74
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef49e65a4841d418eae8cb8e2787b90949f7dc5d0f7f5bff27d3dbf435b150a6
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6e350cf5a05c074ce5165ffc5f179d510ac285f53b5429387bf0bd885bbc1d8
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c3687ead06fe1d4a2c758ad24015232d57c60288f61505a60550a7000b58c00
3
  size 17893874312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed58f770dc223880998359d98df9e164ff7ba4fd4edc2c51456852c720e4ff61
3
  size 17893874312
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c414f1eefcbe42a55aad752b49082d98662bb35b573d8b5f6de0588323354371
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c5646b0cfbdfe9b4cb4a990bd6626407946c25f0686631f52edd5843d42333
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.741399762752076,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5838,6 +5838,125 @@
5838
  "learning_rate": 3.1469034906659946e-05,
5839
  "loss": 0.6053,
5840
  "step": 2499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5841
  }
5842
  ],
5843
  "logging_steps": 3,
@@ -5857,7 +5976,7 @@
5857
  "attributes": {}
5858
  }
5859
  },
5860
- "total_flos": 1.636627712615383e+19,
5861
  "train_batch_size": 8,
5862
  "trial_name": null,
5863
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7562277580071174,
5
  "eval_steps": 500,
6
+ "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5838
  "learning_rate": 3.1469034906659946e-05,
5839
  "loss": 0.6053,
5840
  "step": 2499
5841
+ },
5842
+ {
5843
+ "epoch": 0.7419928825622776,
5844
+ "grad_norm": 0.267578125,
5845
+ "learning_rate": 3.126515182118793e-05,
5846
+ "loss": 0.5994,
5847
+ "step": 2502
5848
+ },
5849
+ {
5850
+ "epoch": 0.7428825622775801,
5851
+ "grad_norm": 0.263671875,
5852
+ "learning_rate": 3.106180889709567e-05,
5853
+ "loss": 0.5969,
5854
+ "step": 2505
5855
+ },
5856
+ {
5857
+ "epoch": 0.7437722419928826,
5858
+ "grad_norm": 0.259765625,
5859
+ "learning_rate": 3.0859007732378896e-05,
5860
+ "loss": 0.5936,
5861
+ "step": 2508
5862
+ },
5863
+ {
5864
+ "epoch": 0.744661921708185,
5865
+ "grad_norm": 0.251953125,
5866
+ "learning_rate": 3.065674992077584e-05,
5867
+ "loss": 0.5717,
5868
+ "step": 2511
5869
+ },
5870
+ {
5871
+ "epoch": 0.7455516014234875,
5872
+ "grad_norm": 0.2578125,
5873
+ "learning_rate": 3.0455037051754777e-05,
5874
+ "loss": 0.6061,
5875
+ "step": 2514
5876
+ },
5877
+ {
5878
+ "epoch": 0.74644128113879,
5879
+ "grad_norm": 0.251953125,
5880
+ "learning_rate": 3.0253870710501475e-05,
5881
+ "loss": 0.5914,
5882
+ "step": 2517
5883
+ },
5884
+ {
5885
+ "epoch": 0.7473309608540926,
5886
+ "grad_norm": 0.251953125,
5887
+ "learning_rate": 3.005325247790668e-05,
5888
+ "loss": 0.6067,
5889
+ "step": 2520
5890
+ },
5891
+ {
5892
+ "epoch": 0.748220640569395,
5893
+ "grad_norm": 0.271484375,
5894
+ "learning_rate": 2.9853183930553853e-05,
5895
+ "loss": 0.5909,
5896
+ "step": 2523
5897
+ },
5898
+ {
5899
+ "epoch": 0.7491103202846975,
5900
+ "grad_norm": 0.25,
5901
+ "learning_rate": 2.965366664070661e-05,
5902
+ "loss": 0.5847,
5903
+ "step": 2526
5904
+ },
5905
+ {
5906
+ "epoch": 0.75,
5907
+ "grad_norm": 0.26953125,
5908
+ "learning_rate": 2.9454702176296423e-05,
5909
+ "loss": 0.5907,
5910
+ "step": 2529
5911
+ },
5912
+ {
5913
+ "epoch": 0.7508896797153025,
5914
+ "grad_norm": 0.26953125,
5915
+ "learning_rate": 2.925629210091043e-05,
5916
+ "loss": 0.606,
5917
+ "step": 2532
5918
+ },
5919
+ {
5920
+ "epoch": 0.751779359430605,
5921
+ "grad_norm": 0.267578125,
5922
+ "learning_rate": 2.9058437973778896e-05,
5923
+ "loss": 0.6055,
5924
+ "step": 2535
5925
+ },
5926
+ {
5927
+ "epoch": 0.7526690391459074,
5928
+ "grad_norm": 0.2578125,
5929
+ "learning_rate": 2.886114134976322e-05,
5930
+ "loss": 0.5993,
5931
+ "step": 2538
5932
+ },
5933
+ {
5934
+ "epoch": 0.75355871886121,
5935
+ "grad_norm": 0.271484375,
5936
+ "learning_rate": 2.866440377934352e-05,
5937
+ "loss": 0.6098,
5938
+ "step": 2541
5939
+ },
5940
+ {
5941
+ "epoch": 0.7544483985765125,
5942
+ "grad_norm": 0.26171875,
5943
+ "learning_rate": 2.8468226808606522e-05,
5944
+ "loss": 0.584,
5945
+ "step": 2544
5946
+ },
5947
+ {
5948
+ "epoch": 0.755338078291815,
5949
+ "grad_norm": 0.255859375,
5950
+ "learning_rate": 2.827261197923341e-05,
5951
+ "loss": 0.5949,
5952
+ "step": 2547
5953
+ },
5954
+ {
5955
+ "epoch": 0.7562277580071174,
5956
+ "grad_norm": 0.271484375,
5957
+ "learning_rate": 2.8077560828487748e-05,
5958
+ "loss": 0.5698,
5959
+ "step": 2550
5960
  }
5961
  ],
5962
  "logging_steps": 3,
 
5976
  "attributes": {}
5977
  }
5978
  },
5979
+ "total_flos": 1.6693602668676907e+19,
5980
  "train_batch_size": 8,
5981
  "trial_name": null,
5982
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aefe89d5e368b5fae2668039e4dd5be80e2e69b77087b485c9b5f3dfa5cd7716
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ae6aec2771e886a275e400f485be0955f7d493f16a01d43cda316730a80162
3
  size 5368