Lech-Iyoko commited on
Commit
aa2a1c7
·
verified ·
1 Parent(s): bd0b073

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1185/config.json +149 -0
  2. checkpoint-1185/model.safetensors +3 -0
  3. checkpoint-1185/optimizer.pt +3 -0
  4. checkpoint-1185/rng_state.pth +3 -0
  5. checkpoint-1185/scheduler.pt +3 -0
  6. checkpoint-1185/special_tokens_map.json +7 -0
  7. checkpoint-1185/tokenizer.json +0 -0
  8. checkpoint-1185/tokenizer_config.json +56 -0
  9. checkpoint-1185/trainer_state.json +234 -0
  10. checkpoint-1185/training_args.bin +3 -0
  11. checkpoint-1185/vocab.txt +0 -0
  12. checkpoint-237/config.json +149 -0
  13. checkpoint-237/model.safetensors +3 -0
  14. checkpoint-237/optimizer.pt +3 -0
  15. checkpoint-237/rng_state.pth +3 -0
  16. checkpoint-237/scheduler.pt +3 -0
  17. checkpoint-237/special_tokens_map.json +7 -0
  18. checkpoint-237/tokenizer.json +0 -0
  19. checkpoint-237/tokenizer_config.json +56 -0
  20. checkpoint-237/trainer_state.json +69 -0
  21. checkpoint-237/training_args.bin +3 -0
  22. checkpoint-237/vocab.txt +0 -0
  23. checkpoint-474/config.json +149 -0
  24. checkpoint-474/model.safetensors +3 -0
  25. checkpoint-474/optimizer.pt +3 -0
  26. checkpoint-474/rng_state.pth +3 -0
  27. checkpoint-474/scheduler.pt +3 -0
  28. checkpoint-474/special_tokens_map.json +7 -0
  29. checkpoint-474/tokenizer.json +0 -0
  30. checkpoint-474/tokenizer_config.json +56 -0
  31. checkpoint-474/trainer_state.json +112 -0
  32. checkpoint-474/training_args.bin +3 -0
  33. checkpoint-474/vocab.txt +0 -0
  34. checkpoint-711/config.json +149 -0
  35. checkpoint-711/model.safetensors +3 -0
  36. checkpoint-711/optimizer.pt +3 -0
  37. checkpoint-711/rng_state.pth +3 -0
  38. checkpoint-711/scheduler.pt +3 -0
  39. checkpoint-711/special_tokens_map.json +7 -0
  40. checkpoint-711/tokenizer.json +0 -0
  41. checkpoint-711/tokenizer_config.json +56 -0
  42. checkpoint-711/trainer_state.json +155 -0
  43. checkpoint-711/training_args.bin +3 -0
  44. checkpoint-711/vocab.txt +0 -0
  45. checkpoint-948/config.json +149 -0
  46. checkpoint-948/model.safetensors +3 -0
  47. checkpoint-948/optimizer.pt +3 -0
  48. checkpoint-948/rng_state.pth +3 -0
  49. checkpoint-948/scheduler.pt +3 -0
  50. checkpoint-948/special_tokens_map.json +7 -0
checkpoint-1185/config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58"
72
+ },
73
+ "initializer_range": 0.02,
74
+ "intermediate_size": 3072,
75
+ "label2id": {
76
+ "LABEL_0": 0,
77
+ "LABEL_1": 1,
78
+ "LABEL_10": 10,
79
+ "LABEL_11": 11,
80
+ "LABEL_12": 12,
81
+ "LABEL_13": 13,
82
+ "LABEL_14": 14,
83
+ "LABEL_15": 15,
84
+ "LABEL_16": 16,
85
+ "LABEL_17": 17,
86
+ "LABEL_18": 18,
87
+ "LABEL_19": 19,
88
+ "LABEL_2": 2,
89
+ "LABEL_20": 20,
90
+ "LABEL_21": 21,
91
+ "LABEL_22": 22,
92
+ "LABEL_23": 23,
93
+ "LABEL_24": 24,
94
+ "LABEL_25": 25,
95
+ "LABEL_26": 26,
96
+ "LABEL_27": 27,
97
+ "LABEL_28": 28,
98
+ "LABEL_29": 29,
99
+ "LABEL_3": 3,
100
+ "LABEL_30": 30,
101
+ "LABEL_31": 31,
102
+ "LABEL_32": 32,
103
+ "LABEL_33": 33,
104
+ "LABEL_34": 34,
105
+ "LABEL_35": 35,
106
+ "LABEL_36": 36,
107
+ "LABEL_37": 37,
108
+ "LABEL_38": 38,
109
+ "LABEL_39": 39,
110
+ "LABEL_4": 4,
111
+ "LABEL_40": 40,
112
+ "LABEL_41": 41,
113
+ "LABEL_42": 42,
114
+ "LABEL_43": 43,
115
+ "LABEL_44": 44,
116
+ "LABEL_45": 45,
117
+ "LABEL_46": 46,
118
+ "LABEL_47": 47,
119
+ "LABEL_48": 48,
120
+ "LABEL_49": 49,
121
+ "LABEL_5": 5,
122
+ "LABEL_50": 50,
123
+ "LABEL_51": 51,
124
+ "LABEL_52": 52,
125
+ "LABEL_53": 53,
126
+ "LABEL_54": 54,
127
+ "LABEL_55": 55,
128
+ "LABEL_56": 56,
129
+ "LABEL_57": 57,
130
+ "LABEL_58": 58,
131
+ "LABEL_6": 6,
132
+ "LABEL_7": 7,
133
+ "LABEL_8": 8,
134
+ "LABEL_9": 9
135
+ },
136
+ "layer_norm_eps": 1e-12,
137
+ "max_position_embeddings": 512,
138
+ "model_type": "bert",
139
+ "num_attention_heads": 12,
140
+ "num_hidden_layers": 12,
141
+ "pad_token_id": 0,
142
+ "position_embedding_type": "absolute",
143
+ "problem_type": "single_label_classification",
144
+ "torch_dtype": "float32",
145
+ "transformers_version": "4.47.1",
146
+ "type_vocab_size": 2,
147
+ "use_cache": true,
148
+ "vocab_size": 30522
149
+ }
checkpoint-1185/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94de6b4844e865dd1d2145f06c4908aa3003830866a882cf1964f8cc8e0b1f4
3
+ size 438133980
checkpoint-1185/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15b9a61dde14318aeaa62eb94a9116532fe1b83ea311e99a5d83c78b1019abb6
3
+ size 876383354
checkpoint-1185/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a3a9be2c29365ae4d84a07bdadde42bda71e7bea22de1149fb5d09bef45fda7
3
+ size 13990
checkpoint-1185/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8402780fadfeb49285d847a5e1c7e9bd248c0e333ebfaf21b5bdf5cd6d44570e
3
+ size 1064
checkpoint-1185/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-1185/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1185/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1185/trainer_state.json ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1185,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2109704641350211,
13
+ "grad_norm": 17.118331909179688,
14
+ "learning_rate": 1.9156118143459917e-05,
15
+ "loss": 4.0557,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.4219409282700422,
20
+ "grad_norm": 17.078466415405273,
21
+ "learning_rate": 1.8312236286919833e-05,
22
+ "loss": 4.018,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.6329113924050633,
27
+ "grad_norm": 18.140644073486328,
28
+ "learning_rate": 1.746835443037975e-05,
29
+ "loss": 3.9942,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.8438818565400844,
34
+ "grad_norm": 14.235407829284668,
35
+ "learning_rate": 1.662447257383966e-05,
36
+ "loss": 3.9127,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_loss": 3.480090379714966,
42
+ "eval_runtime": 253.6051,
43
+ "eval_samples_per_second": 0.469,
44
+ "eval_steps_per_second": 0.237,
45
+ "step": 237
46
+ },
47
+ {
48
+ "epoch": 1.0548523206751055,
49
+ "grad_norm": 14.833487510681152,
50
+ "learning_rate": 1.578059071729958e-05,
51
+ "loss": 3.6803,
52
+ "step": 250
53
+ },
54
+ {
55
+ "epoch": 1.2658227848101267,
56
+ "grad_norm": 18.978660583496094,
57
+ "learning_rate": 1.4936708860759495e-05,
58
+ "loss": 3.4012,
59
+ "step": 300
60
+ },
61
+ {
62
+ "epoch": 1.4767932489451476,
63
+ "grad_norm": 18.297719955444336,
64
+ "learning_rate": 1.4092827004219412e-05,
65
+ "loss": 3.1245,
66
+ "step": 350
67
+ },
68
+ {
69
+ "epoch": 1.6877637130801688,
70
+ "grad_norm": 16.072620391845703,
71
+ "learning_rate": 1.3248945147679326e-05,
72
+ "loss": 3.122,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 1.8987341772151898,
77
+ "grad_norm": 14.235777854919434,
78
+ "learning_rate": 1.240506329113924e-05,
79
+ "loss": 2.7533,
80
+ "step": 450
81
+ },
82
+ {
83
+ "epoch": 2.0,
84
+ "eval_loss": 2.393742084503174,
85
+ "eval_runtime": 230.7613,
86
+ "eval_samples_per_second": 0.516,
87
+ "eval_steps_per_second": 0.26,
88
+ "step": 474
89
+ },
90
+ {
91
+ "epoch": 2.109704641350211,
92
+ "grad_norm": 15.517921447753906,
93
+ "learning_rate": 1.1561181434599158e-05,
94
+ "loss": 2.5393,
95
+ "step": 500
96
+ },
97
+ {
98
+ "epoch": 2.320675105485232,
99
+ "grad_norm": 12.792672157287598,
100
+ "learning_rate": 1.0717299578059072e-05,
101
+ "loss": 2.4086,
102
+ "step": 550
103
+ },
104
+ {
105
+ "epoch": 2.5316455696202533,
106
+ "grad_norm": 15.125005722045898,
107
+ "learning_rate": 9.87341772151899e-06,
108
+ "loss": 2.3738,
109
+ "step": 600
110
+ },
111
+ {
112
+ "epoch": 2.742616033755274,
113
+ "grad_norm": 14.103851318359375,
114
+ "learning_rate": 9.029535864978903e-06,
115
+ "loss": 2.3799,
116
+ "step": 650
117
+ },
118
+ {
119
+ "epoch": 2.9535864978902953,
120
+ "grad_norm": 13.69605541229248,
121
+ "learning_rate": 8.18565400843882e-06,
122
+ "loss": 2.0113,
123
+ "step": 700
124
+ },
125
+ {
126
+ "epoch": 3.0,
127
+ "eval_loss": 1.8042149543762207,
128
+ "eval_runtime": 229.2696,
129
+ "eval_samples_per_second": 0.519,
130
+ "eval_steps_per_second": 0.262,
131
+ "step": 711
132
+ },
133
+ {
134
+ "epoch": 3.1645569620253164,
135
+ "grad_norm": 15.687886238098145,
136
+ "learning_rate": 7.341772151898735e-06,
137
+ "loss": 1.9995,
138
+ "step": 750
139
+ },
140
+ {
141
+ "epoch": 3.3755274261603376,
142
+ "grad_norm": 13.975310325622559,
143
+ "learning_rate": 6.49789029535865e-06,
144
+ "loss": 1.9129,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 3.586497890295359,
149
+ "grad_norm": 14.94083309173584,
150
+ "learning_rate": 5.654008438818566e-06,
151
+ "loss": 2.0081,
152
+ "step": 850
153
+ },
154
+ {
155
+ "epoch": 3.7974683544303796,
156
+ "grad_norm": 16.014385223388672,
157
+ "learning_rate": 4.8101265822784815e-06,
158
+ "loss": 1.7606,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 4.0,
163
+ "eval_loss": 1.5033947229385376,
164
+ "eval_runtime": 229.0556,
165
+ "eval_samples_per_second": 0.52,
166
+ "eval_steps_per_second": 0.262,
167
+ "step": 948
168
+ },
169
+ {
170
+ "epoch": 4.008438818565401,
171
+ "grad_norm": 10.119342803955078,
172
+ "learning_rate": 3.9662447257383965e-06,
173
+ "loss": 1.7556,
174
+ "step": 950
175
+ },
176
+ {
177
+ "epoch": 4.219409282700422,
178
+ "grad_norm": 16.55748748779297,
179
+ "learning_rate": 3.1223628691983127e-06,
180
+ "loss": 1.6124,
181
+ "step": 1000
182
+ },
183
+ {
184
+ "epoch": 4.430379746835443,
185
+ "grad_norm": 10.142475128173828,
186
+ "learning_rate": 2.278481012658228e-06,
187
+ "loss": 1.7171,
188
+ "step": 1050
189
+ },
190
+ {
191
+ "epoch": 4.641350210970464,
192
+ "grad_norm": 15.821284294128418,
193
+ "learning_rate": 1.4345991561181436e-06,
194
+ "loss": 1.7856,
195
+ "step": 1100
196
+ },
197
+ {
198
+ "epoch": 4.852320675105485,
199
+ "grad_norm": 11.253273963928223,
200
+ "learning_rate": 5.907172995780591e-07,
201
+ "loss": 1.7111,
202
+ "step": 1150
203
+ },
204
+ {
205
+ "epoch": 5.0,
206
+ "eval_loss": 1.4161142110824585,
207
+ "eval_runtime": 234.4231,
208
+ "eval_samples_per_second": 0.508,
209
+ "eval_steps_per_second": 0.256,
210
+ "step": 1185
211
+ }
212
+ ],
213
+ "logging_steps": 50,
214
+ "max_steps": 1185,
215
+ "num_input_tokens_seen": 0,
216
+ "num_train_epochs": 5,
217
+ "save_steps": 500,
218
+ "stateful_callbacks": {
219
+ "TrainerControl": {
220
+ "args": {
221
+ "should_epoch_stop": false,
222
+ "should_evaluate": false,
223
+ "should_log": false,
224
+ "should_save": true,
225
+ "should_training_stop": true
226
+ },
227
+ "attributes": {}
228
+ }
229
+ },
230
+ "total_flos": 622576104944640.0,
231
+ "train_batch_size": 2,
232
+ "trial_name": null,
233
+ "trial_params": null
234
+ }
checkpoint-1185/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c864801bcd65938a6635dc9ef295501c2002bc5cda8339db5ff3243768ab40f
3
+ size 5240
checkpoint-1185/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-237/config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58"
72
+ },
73
+ "initializer_range": 0.02,
74
+ "intermediate_size": 3072,
75
+ "label2id": {
76
+ "LABEL_0": 0,
77
+ "LABEL_1": 1,
78
+ "LABEL_10": 10,
79
+ "LABEL_11": 11,
80
+ "LABEL_12": 12,
81
+ "LABEL_13": 13,
82
+ "LABEL_14": 14,
83
+ "LABEL_15": 15,
84
+ "LABEL_16": 16,
85
+ "LABEL_17": 17,
86
+ "LABEL_18": 18,
87
+ "LABEL_19": 19,
88
+ "LABEL_2": 2,
89
+ "LABEL_20": 20,
90
+ "LABEL_21": 21,
91
+ "LABEL_22": 22,
92
+ "LABEL_23": 23,
93
+ "LABEL_24": 24,
94
+ "LABEL_25": 25,
95
+ "LABEL_26": 26,
96
+ "LABEL_27": 27,
97
+ "LABEL_28": 28,
98
+ "LABEL_29": 29,
99
+ "LABEL_3": 3,
100
+ "LABEL_30": 30,
101
+ "LABEL_31": 31,
102
+ "LABEL_32": 32,
103
+ "LABEL_33": 33,
104
+ "LABEL_34": 34,
105
+ "LABEL_35": 35,
106
+ "LABEL_36": 36,
107
+ "LABEL_37": 37,
108
+ "LABEL_38": 38,
109
+ "LABEL_39": 39,
110
+ "LABEL_4": 4,
111
+ "LABEL_40": 40,
112
+ "LABEL_41": 41,
113
+ "LABEL_42": 42,
114
+ "LABEL_43": 43,
115
+ "LABEL_44": 44,
116
+ "LABEL_45": 45,
117
+ "LABEL_46": 46,
118
+ "LABEL_47": 47,
119
+ "LABEL_48": 48,
120
+ "LABEL_49": 49,
121
+ "LABEL_5": 5,
122
+ "LABEL_50": 50,
123
+ "LABEL_51": 51,
124
+ "LABEL_52": 52,
125
+ "LABEL_53": 53,
126
+ "LABEL_54": 54,
127
+ "LABEL_55": 55,
128
+ "LABEL_56": 56,
129
+ "LABEL_57": 57,
130
+ "LABEL_58": 58,
131
+ "LABEL_6": 6,
132
+ "LABEL_7": 7,
133
+ "LABEL_8": 8,
134
+ "LABEL_9": 9
135
+ },
136
+ "layer_norm_eps": 1e-12,
137
+ "max_position_embeddings": 512,
138
+ "model_type": "bert",
139
+ "num_attention_heads": 12,
140
+ "num_hidden_layers": 12,
141
+ "pad_token_id": 0,
142
+ "position_embedding_type": "absolute",
143
+ "problem_type": "single_label_classification",
144
+ "torch_dtype": "float32",
145
+ "transformers_version": "4.47.1",
146
+ "type_vocab_size": 2,
147
+ "use_cache": true,
148
+ "vocab_size": 30522
149
+ }
checkpoint-237/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9059cbc7406daa2de5b5a90a264f0fea948f38cd85f2dfaf56efeb9aad659707
3
+ size 438133980
checkpoint-237/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17fa4f33b492c8943a7ed225a6927f0f10ef578105c6645c2f19b4f8eb504a19
3
+ size 876383354
checkpoint-237/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:050f9824d04918fcf622694fba6928aa8c89accb27c09af953c866c57c937897
3
+ size 13990
checkpoint-237/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:679ef07df3089743e6bb549a1b1dfcdb043edc0dc3f913acf64397b23b940e7a
3
+ size 1064
checkpoint-237/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-237/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-237/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-237/trainer_state.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 237,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2109704641350211,
13
+ "grad_norm": 17.118331909179688,
14
+ "learning_rate": 1.9156118143459917e-05,
15
+ "loss": 4.0557,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.4219409282700422,
20
+ "grad_norm": 17.078466415405273,
21
+ "learning_rate": 1.8312236286919833e-05,
22
+ "loss": 4.018,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.6329113924050633,
27
+ "grad_norm": 18.140644073486328,
28
+ "learning_rate": 1.746835443037975e-05,
29
+ "loss": 3.9942,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.8438818565400844,
34
+ "grad_norm": 14.235407829284668,
35
+ "learning_rate": 1.662447257383966e-05,
36
+ "loss": 3.9127,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_loss": 3.480090379714966,
42
+ "eval_runtime": 253.6051,
43
+ "eval_samples_per_second": 0.469,
44
+ "eval_steps_per_second": 0.237,
45
+ "step": 237
46
+ }
47
+ ],
48
+ "logging_steps": 50,
49
+ "max_steps": 1185,
50
+ "num_input_tokens_seen": 0,
51
+ "num_train_epochs": 5,
52
+ "save_steps": 500,
53
+ "stateful_callbacks": {
54
+ "TrainerControl": {
55
+ "args": {
56
+ "should_epoch_stop": false,
57
+ "should_evaluate": false,
58
+ "should_log": false,
59
+ "should_save": true,
60
+ "should_training_stop": false
61
+ },
62
+ "attributes": {}
63
+ }
64
+ },
65
+ "total_flos": 124515220988928.0,
66
+ "train_batch_size": 2,
67
+ "trial_name": null,
68
+ "trial_params": null
69
+ }
checkpoint-237/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c864801bcd65938a6635dc9ef295501c2002bc5cda8339db5ff3243768ab40f
3
+ size 5240
checkpoint-237/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-474/config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58"
72
+ },
73
+ "initializer_range": 0.02,
74
+ "intermediate_size": 3072,
75
+ "label2id": {
76
+ "LABEL_0": 0,
77
+ "LABEL_1": 1,
78
+ "LABEL_10": 10,
79
+ "LABEL_11": 11,
80
+ "LABEL_12": 12,
81
+ "LABEL_13": 13,
82
+ "LABEL_14": 14,
83
+ "LABEL_15": 15,
84
+ "LABEL_16": 16,
85
+ "LABEL_17": 17,
86
+ "LABEL_18": 18,
87
+ "LABEL_19": 19,
88
+ "LABEL_2": 2,
89
+ "LABEL_20": 20,
90
+ "LABEL_21": 21,
91
+ "LABEL_22": 22,
92
+ "LABEL_23": 23,
93
+ "LABEL_24": 24,
94
+ "LABEL_25": 25,
95
+ "LABEL_26": 26,
96
+ "LABEL_27": 27,
97
+ "LABEL_28": 28,
98
+ "LABEL_29": 29,
99
+ "LABEL_3": 3,
100
+ "LABEL_30": 30,
101
+ "LABEL_31": 31,
102
+ "LABEL_32": 32,
103
+ "LABEL_33": 33,
104
+ "LABEL_34": 34,
105
+ "LABEL_35": 35,
106
+ "LABEL_36": 36,
107
+ "LABEL_37": 37,
108
+ "LABEL_38": 38,
109
+ "LABEL_39": 39,
110
+ "LABEL_4": 4,
111
+ "LABEL_40": 40,
112
+ "LABEL_41": 41,
113
+ "LABEL_42": 42,
114
+ "LABEL_43": 43,
115
+ "LABEL_44": 44,
116
+ "LABEL_45": 45,
117
+ "LABEL_46": 46,
118
+ "LABEL_47": 47,
119
+ "LABEL_48": 48,
120
+ "LABEL_49": 49,
121
+ "LABEL_5": 5,
122
+ "LABEL_50": 50,
123
+ "LABEL_51": 51,
124
+ "LABEL_52": 52,
125
+ "LABEL_53": 53,
126
+ "LABEL_54": 54,
127
+ "LABEL_55": 55,
128
+ "LABEL_56": 56,
129
+ "LABEL_57": 57,
130
+ "LABEL_58": 58,
131
+ "LABEL_6": 6,
132
+ "LABEL_7": 7,
133
+ "LABEL_8": 8,
134
+ "LABEL_9": 9
135
+ },
136
+ "layer_norm_eps": 1e-12,
137
+ "max_position_embeddings": 512,
138
+ "model_type": "bert",
139
+ "num_attention_heads": 12,
140
+ "num_hidden_layers": 12,
141
+ "pad_token_id": 0,
142
+ "position_embedding_type": "absolute",
143
+ "problem_type": "single_label_classification",
144
+ "torch_dtype": "float32",
145
+ "transformers_version": "4.47.1",
146
+ "type_vocab_size": 2,
147
+ "use_cache": true,
148
+ "vocab_size": 30522
149
+ }
checkpoint-474/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e18d1156429d6faa40c45b848fe79005ddba349136166b5640f806b100daa5
3
+ size 438133980
checkpoint-474/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc70523e7c839c1cac5e389a25d089d093c61baf207f9df6fdddab828024c8f
3
+ size 876383354
checkpoint-474/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f1be354591bb6634b2c5cc73ef8264c11fe2bdea540509de8564a55ee228ca
3
+ size 13990
checkpoint-474/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6cb5aeaa4e6ea76c07158fd5faf33d3ddee75b9887090ff37cc434a28c9211
3
+ size 1064
checkpoint-474/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-474/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-474/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-474/trainer_state.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 474,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2109704641350211,
13
+ "grad_norm": 17.118331909179688,
14
+ "learning_rate": 1.9156118143459917e-05,
15
+ "loss": 4.0557,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.4219409282700422,
20
+ "grad_norm": 17.078466415405273,
21
+ "learning_rate": 1.8312236286919833e-05,
22
+ "loss": 4.018,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.6329113924050633,
27
+ "grad_norm": 18.140644073486328,
28
+ "learning_rate": 1.746835443037975e-05,
29
+ "loss": 3.9942,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.8438818565400844,
34
+ "grad_norm": 14.235407829284668,
35
+ "learning_rate": 1.662447257383966e-05,
36
+ "loss": 3.9127,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_loss": 3.480090379714966,
42
+ "eval_runtime": 253.6051,
43
+ "eval_samples_per_second": 0.469,
44
+ "eval_steps_per_second": 0.237,
45
+ "step": 237
46
+ },
47
+ {
48
+ "epoch": 1.0548523206751055,
49
+ "grad_norm": 14.833487510681152,
50
+ "learning_rate": 1.578059071729958e-05,
51
+ "loss": 3.6803,
52
+ "step": 250
53
+ },
54
+ {
55
+ "epoch": 1.2658227848101267,
56
+ "grad_norm": 18.978660583496094,
57
+ "learning_rate": 1.4936708860759495e-05,
58
+ "loss": 3.4012,
59
+ "step": 300
60
+ },
61
+ {
62
+ "epoch": 1.4767932489451476,
63
+ "grad_norm": 18.297719955444336,
64
+ "learning_rate": 1.4092827004219412e-05,
65
+ "loss": 3.1245,
66
+ "step": 350
67
+ },
68
+ {
69
+ "epoch": 1.6877637130801688,
70
+ "grad_norm": 16.072620391845703,
71
+ "learning_rate": 1.3248945147679326e-05,
72
+ "loss": 3.122,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 1.8987341772151898,
77
+ "grad_norm": 14.235777854919434,
78
+ "learning_rate": 1.240506329113924e-05,
79
+ "loss": 2.7533,
80
+ "step": 450
81
+ },
82
+ {
83
+ "epoch": 2.0,
84
+ "eval_loss": 2.393742084503174,
85
+ "eval_runtime": 230.7613,
86
+ "eval_samples_per_second": 0.516,
87
+ "eval_steps_per_second": 0.26,
88
+ "step": 474
89
+ }
90
+ ],
91
+ "logging_steps": 50,
92
+ "max_steps": 1185,
93
+ "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 5,
95
+ "save_steps": 500,
96
+ "stateful_callbacks": {
97
+ "TrainerControl": {
98
+ "args": {
99
+ "should_epoch_stop": false,
100
+ "should_evaluate": false,
101
+ "should_log": false,
102
+ "should_save": true,
103
+ "should_training_stop": false
104
+ },
105
+ "attributes": {}
106
+ }
107
+ },
108
+ "total_flos": 249030441977856.0,
109
+ "train_batch_size": 2,
110
+ "trial_name": null,
111
+ "trial_params": null
112
+ }
checkpoint-474/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c864801bcd65938a6635dc9ef295501c2002bc5cda8339db5ff3243768ab40f
3
+ size 5240
checkpoint-474/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-711/config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58"
72
+ },
73
+ "initializer_range": 0.02,
74
+ "intermediate_size": 3072,
75
+ "label2id": {
76
+ "LABEL_0": 0,
77
+ "LABEL_1": 1,
78
+ "LABEL_10": 10,
79
+ "LABEL_11": 11,
80
+ "LABEL_12": 12,
81
+ "LABEL_13": 13,
82
+ "LABEL_14": 14,
83
+ "LABEL_15": 15,
84
+ "LABEL_16": 16,
85
+ "LABEL_17": 17,
86
+ "LABEL_18": 18,
87
+ "LABEL_19": 19,
88
+ "LABEL_2": 2,
89
+ "LABEL_20": 20,
90
+ "LABEL_21": 21,
91
+ "LABEL_22": 22,
92
+ "LABEL_23": 23,
93
+ "LABEL_24": 24,
94
+ "LABEL_25": 25,
95
+ "LABEL_26": 26,
96
+ "LABEL_27": 27,
97
+ "LABEL_28": 28,
98
+ "LABEL_29": 29,
99
+ "LABEL_3": 3,
100
+ "LABEL_30": 30,
101
+ "LABEL_31": 31,
102
+ "LABEL_32": 32,
103
+ "LABEL_33": 33,
104
+ "LABEL_34": 34,
105
+ "LABEL_35": 35,
106
+ "LABEL_36": 36,
107
+ "LABEL_37": 37,
108
+ "LABEL_38": 38,
109
+ "LABEL_39": 39,
110
+ "LABEL_4": 4,
111
+ "LABEL_40": 40,
112
+ "LABEL_41": 41,
113
+ "LABEL_42": 42,
114
+ "LABEL_43": 43,
115
+ "LABEL_44": 44,
116
+ "LABEL_45": 45,
117
+ "LABEL_46": 46,
118
+ "LABEL_47": 47,
119
+ "LABEL_48": 48,
120
+ "LABEL_49": 49,
121
+ "LABEL_5": 5,
122
+ "LABEL_50": 50,
123
+ "LABEL_51": 51,
124
+ "LABEL_52": 52,
125
+ "LABEL_53": 53,
126
+ "LABEL_54": 54,
127
+ "LABEL_55": 55,
128
+ "LABEL_56": 56,
129
+ "LABEL_57": 57,
130
+ "LABEL_58": 58,
131
+ "LABEL_6": 6,
132
+ "LABEL_7": 7,
133
+ "LABEL_8": 8,
134
+ "LABEL_9": 9
135
+ },
136
+ "layer_norm_eps": 1e-12,
137
+ "max_position_embeddings": 512,
138
+ "model_type": "bert",
139
+ "num_attention_heads": 12,
140
+ "num_hidden_layers": 12,
141
+ "pad_token_id": 0,
142
+ "position_embedding_type": "absolute",
143
+ "problem_type": "single_label_classification",
144
+ "torch_dtype": "float32",
145
+ "transformers_version": "4.47.1",
146
+ "type_vocab_size": 2,
147
+ "use_cache": true,
148
+ "vocab_size": 30522
149
+ }
checkpoint-711/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2402e33af9eefa597c98b6ea6e14f017676ee3c07d5bde19b6236a637c988790
3
+ size 438133980
checkpoint-711/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9244dba1a359bcdf74db68e199776e53c26a8c6be740932f0e588533fa62d800
3
+ size 876383354
checkpoint-711/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45cdde48674967d19dc253fa32d18dc2598d974a3692de61daf0d594c2e52b61
3
+ size 13990
checkpoint-711/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b003a63e288fca8333868ee25acf375876bf835114b79379555b12de6dd948
3
+ size 1064
checkpoint-711/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-711/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-711/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-711/trainer_state.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 711,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2109704641350211,
13
+ "grad_norm": 17.118331909179688,
14
+ "learning_rate": 1.9156118143459917e-05,
15
+ "loss": 4.0557,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.4219409282700422,
20
+ "grad_norm": 17.078466415405273,
21
+ "learning_rate": 1.8312236286919833e-05,
22
+ "loss": 4.018,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.6329113924050633,
27
+ "grad_norm": 18.140644073486328,
28
+ "learning_rate": 1.746835443037975e-05,
29
+ "loss": 3.9942,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.8438818565400844,
34
+ "grad_norm": 14.235407829284668,
35
+ "learning_rate": 1.662447257383966e-05,
36
+ "loss": 3.9127,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_loss": 3.480090379714966,
42
+ "eval_runtime": 253.6051,
43
+ "eval_samples_per_second": 0.469,
44
+ "eval_steps_per_second": 0.237,
45
+ "step": 237
46
+ },
47
+ {
48
+ "epoch": 1.0548523206751055,
49
+ "grad_norm": 14.833487510681152,
50
+ "learning_rate": 1.578059071729958e-05,
51
+ "loss": 3.6803,
52
+ "step": 250
53
+ },
54
+ {
55
+ "epoch": 1.2658227848101267,
56
+ "grad_norm": 18.978660583496094,
57
+ "learning_rate": 1.4936708860759495e-05,
58
+ "loss": 3.4012,
59
+ "step": 300
60
+ },
61
+ {
62
+ "epoch": 1.4767932489451476,
63
+ "grad_norm": 18.297719955444336,
64
+ "learning_rate": 1.4092827004219412e-05,
65
+ "loss": 3.1245,
66
+ "step": 350
67
+ },
68
+ {
69
+ "epoch": 1.6877637130801688,
70
+ "grad_norm": 16.072620391845703,
71
+ "learning_rate": 1.3248945147679326e-05,
72
+ "loss": 3.122,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 1.8987341772151898,
77
+ "grad_norm": 14.235777854919434,
78
+ "learning_rate": 1.240506329113924e-05,
79
+ "loss": 2.7533,
80
+ "step": 450
81
+ },
82
+ {
83
+ "epoch": 2.0,
84
+ "eval_loss": 2.393742084503174,
85
+ "eval_runtime": 230.7613,
86
+ "eval_samples_per_second": 0.516,
87
+ "eval_steps_per_second": 0.26,
88
+ "step": 474
89
+ },
90
+ {
91
+ "epoch": 2.109704641350211,
92
+ "grad_norm": 15.517921447753906,
93
+ "learning_rate": 1.1561181434599158e-05,
94
+ "loss": 2.5393,
95
+ "step": 500
96
+ },
97
+ {
98
+ "epoch": 2.320675105485232,
99
+ "grad_norm": 12.792672157287598,
100
+ "learning_rate": 1.0717299578059072e-05,
101
+ "loss": 2.4086,
102
+ "step": 550
103
+ },
104
+ {
105
+ "epoch": 2.5316455696202533,
106
+ "grad_norm": 15.125005722045898,
107
+ "learning_rate": 9.87341772151899e-06,
108
+ "loss": 2.3738,
109
+ "step": 600
110
+ },
111
+ {
112
+ "epoch": 2.742616033755274,
113
+ "grad_norm": 14.103851318359375,
114
+ "learning_rate": 9.029535864978903e-06,
115
+ "loss": 2.3799,
116
+ "step": 650
117
+ },
118
+ {
119
+ "epoch": 2.9535864978902953,
120
+ "grad_norm": 13.69605541229248,
121
+ "learning_rate": 8.18565400843882e-06,
122
+ "loss": 2.0113,
123
+ "step": 700
124
+ },
125
+ {
126
+ "epoch": 3.0,
127
+ "eval_loss": 1.8042149543762207,
128
+ "eval_runtime": 229.2696,
129
+ "eval_samples_per_second": 0.519,
130
+ "eval_steps_per_second": 0.262,
131
+ "step": 711
132
+ }
133
+ ],
134
+ "logging_steps": 50,
135
+ "max_steps": 1185,
136
+ "num_input_tokens_seen": 0,
137
+ "num_train_epochs": 5,
138
+ "save_steps": 500,
139
+ "stateful_callbacks": {
140
+ "TrainerControl": {
141
+ "args": {
142
+ "should_epoch_stop": false,
143
+ "should_evaluate": false,
144
+ "should_log": false,
145
+ "should_save": true,
146
+ "should_training_stop": false
147
+ },
148
+ "attributes": {}
149
+ }
150
+ },
151
+ "total_flos": 373545662966784.0,
152
+ "train_batch_size": 2,
153
+ "trial_name": null,
154
+ "trial_params": null
155
+ }
checkpoint-711/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c864801bcd65938a6635dc9ef295501c2002bc5cda8339db5ff3243768ab40f
3
+ size 5240
checkpoint-711/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-948/config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58"
72
+ },
73
+ "initializer_range": 0.02,
74
+ "intermediate_size": 3072,
75
+ "label2id": {
76
+ "LABEL_0": 0,
77
+ "LABEL_1": 1,
78
+ "LABEL_10": 10,
79
+ "LABEL_11": 11,
80
+ "LABEL_12": 12,
81
+ "LABEL_13": 13,
82
+ "LABEL_14": 14,
83
+ "LABEL_15": 15,
84
+ "LABEL_16": 16,
85
+ "LABEL_17": 17,
86
+ "LABEL_18": 18,
87
+ "LABEL_19": 19,
88
+ "LABEL_2": 2,
89
+ "LABEL_20": 20,
90
+ "LABEL_21": 21,
91
+ "LABEL_22": 22,
92
+ "LABEL_23": 23,
93
+ "LABEL_24": 24,
94
+ "LABEL_25": 25,
95
+ "LABEL_26": 26,
96
+ "LABEL_27": 27,
97
+ "LABEL_28": 28,
98
+ "LABEL_29": 29,
99
+ "LABEL_3": 3,
100
+ "LABEL_30": 30,
101
+ "LABEL_31": 31,
102
+ "LABEL_32": 32,
103
+ "LABEL_33": 33,
104
+ "LABEL_34": 34,
105
+ "LABEL_35": 35,
106
+ "LABEL_36": 36,
107
+ "LABEL_37": 37,
108
+ "LABEL_38": 38,
109
+ "LABEL_39": 39,
110
+ "LABEL_4": 4,
111
+ "LABEL_40": 40,
112
+ "LABEL_41": 41,
113
+ "LABEL_42": 42,
114
+ "LABEL_43": 43,
115
+ "LABEL_44": 44,
116
+ "LABEL_45": 45,
117
+ "LABEL_46": 46,
118
+ "LABEL_47": 47,
119
+ "LABEL_48": 48,
120
+ "LABEL_49": 49,
121
+ "LABEL_5": 5,
122
+ "LABEL_50": 50,
123
+ "LABEL_51": 51,
124
+ "LABEL_52": 52,
125
+ "LABEL_53": 53,
126
+ "LABEL_54": 54,
127
+ "LABEL_55": 55,
128
+ "LABEL_56": 56,
129
+ "LABEL_57": 57,
130
+ "LABEL_58": 58,
131
+ "LABEL_6": 6,
132
+ "LABEL_7": 7,
133
+ "LABEL_8": 8,
134
+ "LABEL_9": 9
135
+ },
136
+ "layer_norm_eps": 1e-12,
137
+ "max_position_embeddings": 512,
138
+ "model_type": "bert",
139
+ "num_attention_heads": 12,
140
+ "num_hidden_layers": 12,
141
+ "pad_token_id": 0,
142
+ "position_embedding_type": "absolute",
143
+ "problem_type": "single_label_classification",
144
+ "torch_dtype": "float32",
145
+ "transformers_version": "4.47.1",
146
+ "type_vocab_size": 2,
147
+ "use_cache": true,
148
+ "vocab_size": 30522
149
+ }
checkpoint-948/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2d6af8c7c51f1752fc7cf9179ffe579f68c9822fe9c0b4cdfeae91b98a55c5
3
+ size 438133980
checkpoint-948/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f04f91cbe88376dd48c769222d9e827667fb9e17ddf9a7d471b3149bf487d09b
3
+ size 876383354
checkpoint-948/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace1ae70ed3086e737c887e839b0768fa97104ac3fcc4aaaf0b060c61d1cba45
3
+ size 13990
checkpoint-948/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0783dd16df486c722a3ebe892adbfc984d3c78d06305f35ef88f1a6417c97a03
3
+ size 1064
checkpoint-948/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }