alexander-sh commited on
Commit
f3047c0
·
verified ·
1 Parent(s): 4e8df88

Upload 14 files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
@@ -42,5 +42,5 @@
42
  "torch_dtype": "float32",
43
  "transformers_version": "4.46.3",
44
  "type_vocab_size": 0,
45
- "vocab_size": 128100
46
  }
 
1
  {
2
+ "_name_or_path": "microsoft/mdeberta-v3-base",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
 
42
  "torch_dtype": "float32",
43
  "transformers_version": "4.46.3",
44
  "type_vocab_size": 0,
45
+ "vocab_size": 251000
46
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdccb9a1326282ce172abc05046d8868d5a53154be6b0905220f3aa58f59e7f1
3
- size 737722356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2c014245ee7a2d39769e223503c03bc2cca57313e7aa8543efcaa4b802a3f61
3
+ size 1115271284
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d635017647497837523dbad9bcb82e9b738f9c5e7a0ffa17b00200367a854171
3
- size 1475564538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9cabf3a82eb8e3c7d9c74c3f1cd32f6b4fe3199c1016050950c7fd5d3cac9a
3
+ size 2230662138
rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba9836d3d5d131b179fe7b16872f0458f9ebf0e4b1ed6cead0b06f1011984f3
3
+ size 15984
rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a0c7025936f28ccadaeabad382f47beb7500bee229d6cadbca163725ffe1d4
3
+ size 15984
rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1871450a718945b3a8f0c9b05f32928beaa2dff494f7f8ae0d7acbbe8ab99575
3
+ size 15984
rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a278fb64096b400bcb71e42326226b98095346679b386a45020136626e71c84
3
+ size 15984
rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85f8fd5c6381ed996cfc981c90e784927522bf727a148c2dc7644a129148cc28
3
+ size 15984
rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0280f37d29580c31bc7d1d6228c906b4878d32927af739cb82e4228ba73fd30
3
+ size 15984
rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36d8cbfbca6b874820791af59a1b4320c909ebd361dd3955e8c6b7343461bb43
3
+ size 15984
rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cafaea57a4cda8ba3e730c6f5572f696d7c678648476ef37977a5cc4650e5e25
3
+ size 15984
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e1d4b62c8569136ac533bea6bd4720b618f75d960825875ab003fa764178d75
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e6703b36d6b79a3ae4291228a28a3b0975b7538704440c681042f431f22e174
3
  size 1064
trainer_state.json CHANGED
@@ -1,168 +1,326 @@
1
  {
2
- "best_metric": 0.7336740904072855,
3
- "best_model_checkpoint": "/shevtsov/sent_results/checkpoint-55176",
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 55176,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 10.982656478881836,
 
 
 
 
 
 
 
 
 
 
 
 
14
  "learning_rate": 5e-06,
15
- "loss": 0.278,
16
- "step": 6897
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.6962722394803728,
21
- "eval_f1": 0.6954934097978532,
22
- "eval_loss": 0.27303698658943176,
23
- "eval_precision": 0.7032410224368593,
24
- "eval_recall": 0.6962722394803728,
25
- "eval_runtime": 235.2201,
26
- "eval_samples_per_second": 90.324,
27
- "eval_steps_per_second": 11.292,
28
- "step": 6897
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.829893112182617,
 
 
 
 
 
 
 
 
 
 
 
 
33
  "learning_rate": 4.849231551964771e-06,
34
- "loss": 0.1974,
35
- "step": 13794
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7188176597947848,
40
- "eval_f1": 0.7159777515871979,
41
- "eval_loss": 0.2562166452407837,
42
- "eval_precision": 0.7156618605155758,
43
- "eval_recall": 0.7188176597947848,
44
- "eval_runtime": 235.187,
45
- "eval_samples_per_second": 90.337,
46
- "eval_steps_per_second": 11.293,
47
- "step": 13794
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 7.553038597106934,
52
  "learning_rate": 4.415111107797445e-06,
53
- "loss": 0.172,
54
- "step": 20691
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.7296902946436976,
59
- "eval_f1": 0.7289604788993475,
60
- "eval_loss": 0.27564719319343567,
61
- "eval_precision": 0.7285244419417612,
62
- "eval_recall": 0.7296902946436976,
63
- "eval_runtime": 235.2108,
64
- "eval_samples_per_second": 90.327,
65
- "eval_steps_per_second": 11.292,
66
- "step": 20691
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 5.199881076812744,
 
 
 
 
 
 
 
 
 
 
 
 
71
  "learning_rate": 3.7500000000000005e-06,
72
- "loss": 0.154,
73
- "step": 27588
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.7309611220935706,
78
- "eval_f1": 0.7313527175830589,
79
- "eval_loss": 0.3117690682411194,
80
- "eval_precision": 0.7331236097784076,
81
- "eval_recall": 0.7309611220935706,
82
- "eval_runtime": 235.1385,
83
- "eval_samples_per_second": 90.355,
84
- "eval_steps_per_second": 11.295,
85
- "step": 27588
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 4.5583109855651855,
 
 
 
 
 
 
 
 
 
 
 
 
90
  "learning_rate": 2.9341204441673267e-06,
91
- "loss": 0.1405,
92
- "step": 34485
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.7292196178104113,
97
- "eval_f1": 0.7306905410800046,
98
- "eval_loss": 0.3434309661388397,
99
- "eval_precision": 0.7340143377006256,
100
- "eval_recall": 0.7292196178104113,
101
- "eval_runtime": 235.1135,
102
- "eval_samples_per_second": 90.365,
103
- "eval_steps_per_second": 11.297,
104
- "step": 34485
 
 
 
 
 
 
 
 
 
 
 
 
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 5.269964218139648,
109
  "learning_rate": 2.0658795558326745e-06,
110
- "loss": 0.1292,
111
- "step": 41382
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.728372399510496,
116
- "eval_f1": 0.7270264813978528,
117
- "eval_loss": 0.37885233759880066,
118
- "eval_precision": 0.7276053091596871,
119
- "eval_recall": 0.728372399510496,
120
- "eval_runtime": 235.0304,
121
- "eval_samples_per_second": 90.397,
122
- "eval_steps_per_second": 11.301,
123
- "step": 41382
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 5.499964714050293,
 
 
 
 
 
 
 
 
 
 
 
 
128
  "learning_rate": 1.2500000000000007e-06,
129
- "loss": 0.1205,
130
- "step": 48279
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.7335969123599736,
135
- "eval_f1": 0.7320317760805066,
136
- "eval_loss": 0.4042453467845917,
137
- "eval_precision": 0.7316491391334223,
138
- "eval_recall": 0.7335969123599736,
139
- "eval_runtime": 235.1289,
140
- "eval_samples_per_second": 90.359,
141
- "eval_steps_per_second": 11.296,
142
- "step": 48279
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 7.1066179275512695,
 
 
 
 
 
 
 
 
 
 
 
 
147
  "learning_rate": 5.848888922025553e-07,
148
- "loss": 0.1145,
149
- "step": 55176
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.7338793184599454,
154
- "eval_f1": 0.7336740904072855,
155
- "eval_loss": 0.4205409288406372,
156
- "eval_precision": 0.7335551105156711,
157
- "eval_recall": 0.7338793184599454,
158
- "eval_runtime": 235.1161,
159
- "eval_samples_per_second": 90.364,
160
- "eval_steps_per_second": 11.297,
161
- "step": 55176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  }
163
  ],
164
  "logging_steps": 500,
165
- "max_steps": 68970,
166
  "num_input_tokens_seen": 0,
167
  "num_train_epochs": 10,
168
  "save_steps": 500,
@@ -173,13 +331,13 @@
173
  "should_evaluate": false,
174
  "should_log": false,
175
  "should_save": true,
176
- "should_training_stop": false
177
  },
178
  "attributes": {}
179
  }
180
  },
181
- "total_flos": 6.968525969553408e+17,
182
- "train_batch_size": 24,
183
  "trial_name": null,
184
  "trial_params": null
185
  }
 
1
  {
2
+ "best_metric": 0.7399751659163123,
3
+ "best_model_checkpoint": "/opt/dlami/nvme/shevtsov/sent_checkpoints/checkpoint-102510",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 102510,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "step": 10251,
14
+ "train_accuracy": 0.9232076630758701,
15
+ "train_f1": 0.9284970394554347,
16
+ "train_loss": 0.08324012160301208,
17
+ "train_precision": 0.9395995876182259,
18
+ "train_recall": 0.9232076630758701,
19
+ "train_runtime": 5077.4566,
20
+ "train_samples_per_second": 775.226,
21
+ "train_steps_per_second": 2.019
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "grad_norm": 2.202436923980713,
26
  "learning_rate": 5e-06,
27
+ "loss": 0.1512,
28
+ "step": 10251
29
  },
30
  {
31
  "epoch": 1.0,
32
+ "eval_accuracy": 0.5906430303537085,
33
+ "eval_f1": 0.5953154182610875,
34
+ "eval_loss": 1.1822575330734253,
35
+ "eval_precision": 0.7605817989645098,
36
+ "eval_recall": 0.5906430303537085,
37
+ "eval_runtime": 31.2064,
38
+ "eval_samples_per_second": 766.446,
39
+ "eval_steps_per_second": 2.019,
40
+ "step": 10251
41
  },
42
  {
43
  "epoch": 2.0,
44
+ "step": 20502,
45
+ "train_accuracy": 0.9251504125579815,
46
+ "train_f1": 0.9318479879378413,
47
+ "train_loss": 0.06029369682073593,
48
+ "train_precision": 0.9457024638382566,
49
+ "train_recall": 0.9251504125579815,
50
+ "train_runtime": 5078.2486,
51
+ "train_samples_per_second": 775.105,
52
+ "train_steps_per_second": 2.019
53
+ },
54
+ {
55
+ "epoch": 2.0,
56
+ "grad_norm": 2.3391969203948975,
57
  "learning_rate": 4.849231551964771e-06,
58
+ "loss": 0.0727,
59
+ "step": 20502
60
  },
61
  {
62
  "epoch": 2.0,
63
+ "eval_accuracy": 0.5809850321933272,
64
+ "eval_f1": 0.5827904211311477,
65
+ "eval_loss": 0.8601031303405762,
66
+ "eval_precision": 0.7707426917608643,
67
+ "eval_recall": 0.5809850321933272,
68
+ "eval_runtime": 31.1947,
69
+ "eval_samples_per_second": 766.734,
70
+ "eval_steps_per_second": 2.02,
71
+ "step": 20502
72
+ },
73
+ {
74
+ "epoch": 3.0,
75
+ "step": 30753,
76
+ "train_accuracy": 0.9311338878819889,
77
+ "train_f1": 0.9367318532276315,
78
+ "train_loss": 0.05340421944856644,
79
+ "train_precision": 0.948615352312463,
80
+ "train_recall": 0.9311338878819889,
81
+ "train_runtime": 5078.9472,
82
+ "train_samples_per_second": 774.998,
83
+ "train_steps_per_second": 2.018
84
  },
85
  {
86
  "epoch": 3.0,
87
+ "grad_norm": 3.368744134902954,
88
  "learning_rate": 4.415111107797445e-06,
89
+ "loss": 0.064,
90
+ "step": 30753
91
  },
92
  {
93
  "epoch": 3.0,
94
+ "eval_accuracy": 0.6280207375198595,
95
+ "eval_f1": 0.6341902420521265,
96
+ "eval_loss": 1.0681450366973877,
97
+ "eval_precision": 0.768741241964947,
98
+ "eval_recall": 0.6280207375198595,
99
+ "eval_runtime": 31.2028,
100
+ "eval_samples_per_second": 766.535,
101
+ "eval_steps_per_second": 2.019,
102
+ "step": 30753
103
  },
104
  {
105
  "epoch": 4.0,
106
+ "step": 41004,
107
+ "train_accuracy": 0.941354218588914,
108
+ "train_f1": 0.9444982035100681,
109
+ "train_loss": 0.0493415892124176,
110
+ "train_precision": 0.9518003283876015,
111
+ "train_recall": 0.941354218588914,
112
+ "train_runtime": 5080.4688,
113
+ "train_samples_per_second": 774.766,
114
+ "train_steps_per_second": 2.018
115
+ },
116
+ {
117
+ "epoch": 4.0,
118
+ "grad_norm": 7.862049579620361,
119
  "learning_rate": 3.7500000000000005e-06,
120
+ "loss": 0.0585,
121
+ "step": 41004
122
  },
123
  {
124
  "epoch": 4.0,
125
+ "eval_accuracy": 0.6900660590350364,
126
+ "eval_f1": 0.6986283009597273,
127
+ "eval_loss": 1.4099539518356323,
128
+ "eval_precision": 0.7671384440115526,
129
+ "eval_recall": 0.6900660590350364,
130
+ "eval_runtime": 31.2008,
131
+ "eval_samples_per_second": 766.584,
132
+ "eval_steps_per_second": 2.019,
133
+ "step": 41004
134
  },
135
  {
136
  "epoch": 5.0,
137
+ "step": 51255,
138
+ "train_accuracy": 0.9479113474150279,
139
+ "train_f1": 0.9498162368391557,
140
+ "train_loss": 0.04905932769179344,
141
+ "train_precision": 0.9544620214289802,
142
+ "train_recall": 0.9479113474150279,
143
+ "train_runtime": 5088.1804,
144
+ "train_samples_per_second": 773.592,
145
+ "train_steps_per_second": 2.015
146
+ },
147
+ {
148
+ "epoch": 5.0,
149
+ "grad_norm": 1.6737421751022339,
150
  "learning_rate": 2.9341204441673267e-06,
151
+ "loss": 0.0543,
152
+ "step": 51255
153
  },
154
  {
155
  "epoch": 5.0,
156
+ "eval_accuracy": 0.7279454803913371,
157
+ "eval_f1": 0.7344324841621389,
158
+ "eval_loss": 2.154353618621826,
159
+ "eval_precision": 0.7651689935562233,
160
+ "eval_recall": 0.7279454803913371,
161
+ "eval_runtime": 31.2388,
162
+ "eval_samples_per_second": 765.65,
163
+ "eval_steps_per_second": 2.017,
164
+ "step": 51255
165
+ },
166
+ {
167
+ "epoch": 6.0,
168
+ "step": 61506,
169
+ "train_accuracy": 0.944803507162031,
170
+ "train_f1": 0.9475462857180277,
171
+ "train_loss": 0.0426737517118454,
172
+ "train_precision": 0.9540200071722176,
173
+ "train_recall": 0.944803507162031,
174
+ "train_runtime": 5078.4022,
175
+ "train_samples_per_second": 775.081,
176
+ "train_steps_per_second": 2.019
177
  },
178
  {
179
  "epoch": 6.0,
180
+ "grad_norm": 2.879870653152466,
181
  "learning_rate": 2.0658795558326745e-06,
182
+ "loss": 0.051,
183
+ "step": 61506
184
  },
185
  {
186
  "epoch": 6.0,
187
+ "eval_accuracy": 0.7079187223011958,
188
+ "eval_f1": 0.7155283465045912,
189
+ "eval_loss": 2.2507946491241455,
190
+ "eval_precision": 0.7570941064333973,
191
+ "eval_recall": 0.7079187223011958,
192
+ "eval_runtime": 31.2026,
193
+ "eval_samples_per_second": 766.538,
194
+ "eval_steps_per_second": 2.019,
195
+ "step": 61506
196
  },
197
  {
198
  "epoch": 7.0,
199
+ "step": 71757,
200
+ "train_accuracy": 0.9474675154096338,
201
+ "train_f1": 0.9497025418465336,
202
+ "train_loss": 0.0410270020365715,
203
+ "train_precision": 0.9551267616744933,
204
+ "train_recall": 0.9474675154096338,
205
+ "train_runtime": 5080.7556,
206
+ "train_samples_per_second": 774.722,
207
+ "train_steps_per_second": 2.018
208
+ },
209
+ {
210
+ "epoch": 7.0,
211
+ "grad_norm": 1.4426418542861938,
212
  "learning_rate": 1.2500000000000007e-06,
213
+ "loss": 0.0486,
214
+ "step": 71757
215
  },
216
  {
217
  "epoch": 7.0,
218
+ "eval_accuracy": 0.7198344343172506,
219
+ "eval_f1": 0.726834352850576,
220
+ "eval_loss": 2.45487380027771,
221
+ "eval_precision": 0.7590566392960729,
222
+ "eval_recall": 0.7198344343172506,
223
+ "eval_runtime": 31.2049,
224
+ "eval_samples_per_second": 766.482,
225
+ "eval_steps_per_second": 2.019,
226
+ "step": 71757
227
  },
228
  {
229
  "epoch": 8.0,
230
+ "step": 82008,
231
+ "train_accuracy": 0.9504046315025708,
232
+ "train_f1": 0.9520763661297328,
233
+ "train_loss": 0.0410199835896492,
234
+ "train_precision": 0.9562815819971588,
235
+ "train_recall": 0.9504046315025708,
236
+ "train_runtime": 5079.0768,
237
+ "train_samples_per_second": 774.978,
238
+ "train_steps_per_second": 2.018
239
+ },
240
+ {
241
+ "epoch": 8.0,
242
+ "grad_norm": 1.1270148754119873,
243
  "learning_rate": 5.848888922025553e-07,
244
+ "loss": 0.0468,
245
+ "step": 82008
246
  },
247
  {
248
  "epoch": 8.0,
249
+ "eval_accuracy": 0.7316665273016139,
250
+ "eval_f1": 0.737640105520052,
251
+ "eval_loss": 2.602576494216919,
252
+ "eval_precision": 0.7612609852108917,
253
+ "eval_recall": 0.7316665273016139,
254
+ "eval_runtime": 31.1916,
255
+ "eval_samples_per_second": 766.809,
256
+ "eval_steps_per_second": 2.02,
257
+ "step": 82008
258
+ },
259
+ {
260
+ "epoch": 9.0,
261
+ "step": 92259,
262
+ "train_accuracy": 0.9505296259769004,
263
+ "train_f1": 0.9522083394688067,
264
+ "train_loss": 0.03998752683401108,
265
+ "train_precision": 0.9564247374560226,
266
+ "train_recall": 0.9505296259769004,
267
+ "train_runtime": 5078.0822,
268
+ "train_samples_per_second": 775.13,
269
+ "train_steps_per_second": 2.019
270
+ },
271
+ {
272
+ "epoch": 9.0,
273
+ "grad_norm": 1.666390061378479,
274
+ "learning_rate": 1.507684480352292e-07,
275
+ "loss": 0.0457,
276
+ "step": 92259
277
+ },
278
+ {
279
+ "epoch": 9.0,
280
+ "eval_accuracy": 0.7320010034283803,
281
+ "eval_f1": 0.7378168031871946,
282
+ "eval_loss": 2.8211002349853516,
283
+ "eval_precision": 0.7595695441249473,
284
+ "eval_recall": 0.7320010034283803,
285
+ "eval_runtime": 31.2132,
286
+ "eval_samples_per_second": 766.278,
287
+ "eval_steps_per_second": 2.018,
288
+ "step": 92259
289
+ },
290
+ {
291
+ "epoch": 10.0,
292
+ "step": 102510,
293
+ "train_accuracy": 0.9510209660446921,
294
+ "train_f1": 0.9526073589638447,
295
+ "train_loss": 0.040298543870449066,
296
+ "train_precision": 0.9566123120238171,
297
+ "train_recall": 0.9510209660446921,
298
+ "train_runtime": 5078.6508,
299
+ "train_samples_per_second": 775.043,
300
+ "train_steps_per_second": 2.018
301
+ },
302
+ {
303
+ "epoch": 10.0,
304
+ "grad_norm": 1.6141560077667236,
305
+ "learning_rate": 0.0,
306
+ "loss": 0.0451,
307
+ "step": 102510
308
+ },
309
+ {
310
+ "epoch": 10.0,
311
+ "eval_accuracy": 0.7345095743791287,
312
+ "eval_f1": 0.7399751659163123,
313
+ "eval_loss": 2.884028911590576,
314
+ "eval_precision": 0.759520061975123,
315
+ "eval_recall": 0.7345095743791287,
316
+ "eval_runtime": 31.2146,
317
+ "eval_samples_per_second": 766.243,
318
+ "eval_steps_per_second": 2.018,
319
+ "step": 102510
320
  }
321
  ],
322
  "logging_steps": 500,
323
+ "max_steps": 102510,
324
  "num_input_tokens_seen": 0,
325
  "num_train_epochs": 10,
326
  "save_steps": 500,
 
331
  "should_evaluate": false,
332
  "should_log": false,
333
  "should_save": true,
334
+ "should_training_stop": true
335
  },
336
  "attributes": {}
337
  }
338
  },
339
+ "total_flos": 1.0357340434899927e+19,
340
+ "train_batch_size": 48,
341
  "trial_name": null,
342
  "trial_params": null
343
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d25bd04595533c4a434f13ac4e4cfbd167bcb4a00df5bd299061065f66a73363
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08fbbf6707e209de037bb4bc3ece3924dd1369ff37078a607f53ee446b0de5b1
3
  size 5304