Upload checkpoint 2550
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +122 -3
- training_args.bin +1 -1
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957560304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91686ade4add65e1e3f776f0e2e8e27824e1b8b54fd0f3ca8624f5754fd40f74
|
3 |
size 4957560304
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3989163248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6e350cf5a05c074ce5165ffc5f179d510ac285f53b5429387bf0bd885bbc1d8
|
3 |
size 3989163248
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17893874312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed58f770dc223880998359d98df9e164ff7ba4fd4edc2c51456852c720e4ff61
|
3 |
size 17893874312
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37c5646b0cfbdfe9b4cb4a990bd6626407946c25f0686631f52edd5843d42333
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5838,6 +5838,125 @@
|
|
5838 |
"learning_rate": 3.1469034906659946e-05,
|
5839 |
"loss": 0.6053,
|
5840 |
"step": 2499
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5841 |
}
|
5842 |
],
|
5843 |
"logging_steps": 3,
|
@@ -5857,7 +5976,7 @@
|
|
5857 |
"attributes": {}
|
5858 |
}
|
5859 |
},
|
5860 |
-
"total_flos": 1.
|
5861 |
"train_batch_size": 8,
|
5862 |
"trial_name": null,
|
5863 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7562277580071174,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2550,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5838 |
"learning_rate": 3.1469034906659946e-05,
|
5839 |
"loss": 0.6053,
|
5840 |
"step": 2499
|
5841 |
+
},
|
5842 |
+
{
|
5843 |
+
"epoch": 0.7419928825622776,
|
5844 |
+
"grad_norm": 0.267578125,
|
5845 |
+
"learning_rate": 3.126515182118793e-05,
|
5846 |
+
"loss": 0.5994,
|
5847 |
+
"step": 2502
|
5848 |
+
},
|
5849 |
+
{
|
5850 |
+
"epoch": 0.7428825622775801,
|
5851 |
+
"grad_norm": 0.263671875,
|
5852 |
+
"learning_rate": 3.106180889709567e-05,
|
5853 |
+
"loss": 0.5969,
|
5854 |
+
"step": 2505
|
5855 |
+
},
|
5856 |
+
{
|
5857 |
+
"epoch": 0.7437722419928826,
|
5858 |
+
"grad_norm": 0.259765625,
|
5859 |
+
"learning_rate": 3.0859007732378896e-05,
|
5860 |
+
"loss": 0.5936,
|
5861 |
+
"step": 2508
|
5862 |
+
},
|
5863 |
+
{
|
5864 |
+
"epoch": 0.744661921708185,
|
5865 |
+
"grad_norm": 0.251953125,
|
5866 |
+
"learning_rate": 3.065674992077584e-05,
|
5867 |
+
"loss": 0.5717,
|
5868 |
+
"step": 2511
|
5869 |
+
},
|
5870 |
+
{
|
5871 |
+
"epoch": 0.7455516014234875,
|
5872 |
+
"grad_norm": 0.2578125,
|
5873 |
+
"learning_rate": 3.0455037051754777e-05,
|
5874 |
+
"loss": 0.6061,
|
5875 |
+
"step": 2514
|
5876 |
+
},
|
5877 |
+
{
|
5878 |
+
"epoch": 0.74644128113879,
|
5879 |
+
"grad_norm": 0.251953125,
|
5880 |
+
"learning_rate": 3.0253870710501475e-05,
|
5881 |
+
"loss": 0.5914,
|
5882 |
+
"step": 2517
|
5883 |
+
},
|
5884 |
+
{
|
5885 |
+
"epoch": 0.7473309608540926,
|
5886 |
+
"grad_norm": 0.251953125,
|
5887 |
+
"learning_rate": 3.005325247790668e-05,
|
5888 |
+
"loss": 0.6067,
|
5889 |
+
"step": 2520
|
5890 |
+
},
|
5891 |
+
{
|
5892 |
+
"epoch": 0.748220640569395,
|
5893 |
+
"grad_norm": 0.271484375,
|
5894 |
+
"learning_rate": 2.9853183930553853e-05,
|
5895 |
+
"loss": 0.5909,
|
5896 |
+
"step": 2523
|
5897 |
+
},
|
5898 |
+
{
|
5899 |
+
"epoch": 0.7491103202846975,
|
5900 |
+
"grad_norm": 0.25,
|
5901 |
+
"learning_rate": 2.965366664070661e-05,
|
5902 |
+
"loss": 0.5847,
|
5903 |
+
"step": 2526
|
5904 |
+
},
|
5905 |
+
{
|
5906 |
+
"epoch": 0.75,
|
5907 |
+
"grad_norm": 0.26953125,
|
5908 |
+
"learning_rate": 2.9454702176296423e-05,
|
5909 |
+
"loss": 0.5907,
|
5910 |
+
"step": 2529
|
5911 |
+
},
|
5912 |
+
{
|
5913 |
+
"epoch": 0.7508896797153025,
|
5914 |
+
"grad_norm": 0.26953125,
|
5915 |
+
"learning_rate": 2.925629210091043e-05,
|
5916 |
+
"loss": 0.606,
|
5917 |
+
"step": 2532
|
5918 |
+
},
|
5919 |
+
{
|
5920 |
+
"epoch": 0.751779359430605,
|
5921 |
+
"grad_norm": 0.267578125,
|
5922 |
+
"learning_rate": 2.9058437973778896e-05,
|
5923 |
+
"loss": 0.6055,
|
5924 |
+
"step": 2535
|
5925 |
+
},
|
5926 |
+
{
|
5927 |
+
"epoch": 0.7526690391459074,
|
5928 |
+
"grad_norm": 0.2578125,
|
5929 |
+
"learning_rate": 2.886114134976322e-05,
|
5930 |
+
"loss": 0.5993,
|
5931 |
+
"step": 2538
|
5932 |
+
},
|
5933 |
+
{
|
5934 |
+
"epoch": 0.75355871886121,
|
5935 |
+
"grad_norm": 0.271484375,
|
5936 |
+
"learning_rate": 2.866440377934352e-05,
|
5937 |
+
"loss": 0.6098,
|
5938 |
+
"step": 2541
|
5939 |
+
},
|
5940 |
+
{
|
5941 |
+
"epoch": 0.7544483985765125,
|
5942 |
+
"grad_norm": 0.26171875,
|
5943 |
+
"learning_rate": 2.8468226808606522e-05,
|
5944 |
+
"loss": 0.584,
|
5945 |
+
"step": 2544
|
5946 |
+
},
|
5947 |
+
{
|
5948 |
+
"epoch": 0.755338078291815,
|
5949 |
+
"grad_norm": 0.255859375,
|
5950 |
+
"learning_rate": 2.827261197923341e-05,
|
5951 |
+
"loss": 0.5949,
|
5952 |
+
"step": 2547
|
5953 |
+
},
|
5954 |
+
{
|
5955 |
+
"epoch": 0.7562277580071174,
|
5956 |
+
"grad_norm": 0.271484375,
|
5957 |
+
"learning_rate": 2.8077560828487748e-05,
|
5958 |
+
"loss": 0.5698,
|
5959 |
+
"step": 2550
|
5960 |
}
|
5961 |
],
|
5962 |
"logging_steps": 3,
|
|
|
5976 |
"attributes": {}
|
5977 |
}
|
5978 |
},
|
5979 |
+
"total_flos": 1.6693602668676907e+19,
|
5980 |
"train_batch_size": 8,
|
5981 |
"trial_name": null,
|
5982 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57ae6aec2771e886a275e400f485be0955f7d493f16a01d43cda316730a80162
|
3 |
size 5368
|