rakhman-llm commited on
Commit
1544b73
·
verified ·
1 Parent(s): 8ed73d9

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27aaf51be344fdf558ecda990badb9c0fdb702501a800115667ae1d0d458d889
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b65f0032ace99ac31a5c4eaebab678922f9196855254a04fa04cd5b09838cd25
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00a78e6cd589620568cd11a8124efee25dc321c57212def3a84623d827f77b81
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9542f526fbcdae270a597291ca95babaa9f3b9e2d72ac373956f44fc941c5cd5
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba54176b539b14420803d3eda38f203f6842a1039f04005978b1bad7976f5771
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c88b9beda061d1039d8d779edfe7f5a8ca6add672a561cd18d21ff3443068c38
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97d0c42656677b3669a5e5760b1be71be06a3cd2fb44d0f56b707a5adc9d4f45
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:635e57a0f4914cbbb1c670a34423e0edc1f884281d003185692f1319d135cdd1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.09235642850399017,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-500",
4
- "epoch": 0.08,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -81,10 +81,88 @@
81
  {
82
  "epoch": 0.08,
83
  "eval_loss": 0.09235642850399017,
84
- "eval_runtime": 116.7651,
85
- "eval_samples_per_second": 17.128,
86
- "eval_steps_per_second": 2.141,
87
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  }
89
  ],
90
  "logging_steps": 50,
@@ -104,7 +182,7 @@
104
  "attributes": {}
105
  }
106
  },
107
- "total_flos": 2435831562240000.0,
108
  "train_batch_size": 8,
109
  "trial_name": null,
110
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08949962258338928,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-1000",
4
+ "epoch": 0.16,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
81
  {
82
  "epoch": 0.08,
83
  "eval_loss": 0.09235642850399017,
84
+ "eval_runtime": 109.274,
85
+ "eval_samples_per_second": 18.303,
86
+ "eval_steps_per_second": 2.288,
87
  "step": 500
88
+ },
89
+ {
90
+ "epoch": 0.088,
91
+ "grad_norm": 6587.6513671875,
92
+ "learning_rate": 2.9120000000000002e-05,
93
+ "loss": 0.0815,
94
+ "step": 550
95
+ },
96
+ {
97
+ "epoch": 0.096,
98
+ "grad_norm": 6632.0947265625,
99
+ "learning_rate": 2.904e-05,
100
+ "loss": 0.0794,
101
+ "step": 600
102
+ },
103
+ {
104
+ "epoch": 0.104,
105
+ "grad_norm": 9301.228515625,
106
+ "learning_rate": 2.896e-05,
107
+ "loss": 0.076,
108
+ "step": 650
109
+ },
110
+ {
111
+ "epoch": 0.112,
112
+ "grad_norm": 10575.0791015625,
113
+ "learning_rate": 2.888e-05,
114
+ "loss": 0.0791,
115
+ "step": 700
116
+ },
117
+ {
118
+ "epoch": 0.12,
119
+ "grad_norm": 8609.86328125,
120
+ "learning_rate": 2.88e-05,
121
+ "loss": 0.0799,
122
+ "step": 750
123
+ },
124
+ {
125
+ "epoch": 0.128,
126
+ "grad_norm": 11379.4423828125,
127
+ "learning_rate": 2.8720000000000003e-05,
128
+ "loss": 0.0759,
129
+ "step": 800
130
+ },
131
+ {
132
+ "epoch": 0.136,
133
+ "grad_norm": 8489.6904296875,
134
+ "learning_rate": 2.864e-05,
135
+ "loss": 0.0753,
136
+ "step": 850
137
+ },
138
+ {
139
+ "epoch": 0.144,
140
+ "grad_norm": 12353.6279296875,
141
+ "learning_rate": 2.856e-05,
142
+ "loss": 0.075,
143
+ "step": 900
144
+ },
145
+ {
146
+ "epoch": 0.152,
147
+ "grad_norm": 11535.3994140625,
148
+ "learning_rate": 2.8480000000000002e-05,
149
+ "loss": 0.0757,
150
+ "step": 950
151
+ },
152
+ {
153
+ "epoch": 0.16,
154
+ "grad_norm": 8291.939453125,
155
+ "learning_rate": 2.84e-05,
156
+ "loss": 0.0753,
157
+ "step": 1000
158
+ },
159
+ {
160
+ "epoch": 0.16,
161
+ "eval_loss": 0.08949962258338928,
162
+ "eval_runtime": 109.2536,
163
+ "eval_samples_per_second": 18.306,
164
+ "eval_steps_per_second": 2.288,
165
+ "step": 1000
166
  }
167
  ],
168
  "logging_steps": 50,
 
182
  "attributes": {}
183
  }
184
  },
185
+ "total_flos": 4871663124480000.0,
186
  "train_batch_size": 8,
187
  "trial_name": null,
188
  "trial_params": null