vaatsav06 commited on
Commit
fc817dc
·
verified ·
1 Parent(s): fd6485d

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:700837625208b7724bad50aacfe3ec0899ed845676d6567b103150efb717c031
3
  size 268858112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb5c823ef74dfadc52b27febc38a2ac3a875bb51704bb51249b3924e6ee6f2b
3
  size 268858112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94d2f236b773db6cf924287d656356a621b077f1b9db874b98dd46fbac4e2d3c
3
  size 137668197
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35bbf0d8ee6289b2e585cda8bdd7541a68981e8fbe801a50960d7cef7275b8b9
3
  size 137668197
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33dca605be33ed4ace196e2854e478111341ce60f041bccf2a2bda0cd9a6448a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfad65ecdebf4d2a4ef23aa53c40f8dd23f710c224d047e20d32dc51c4015de2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7677543186180422,
6
  "eval_steps": 500,
7
- "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1128,6 +1128,286 @@
1128
  "learning_rate": 0.0001,
1129
  "loss": 0.2739,
1130
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1131
  }
1132
  ],
1133
  "logging_steps": 5,
@@ -1147,7 +1427,7 @@
1147
  "attributes": {}
1148
  }
1149
  },
1150
- "total_flos": 2.655694975843308e+17,
1151
  "train_batch_size": 24,
1152
  "trial_name": null,
1153
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9596928982725528,
6
  "eval_steps": 500,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1128
  "learning_rate": 0.0001,
1129
  "loss": 0.2739,
1130
  "step": 800
1131
+ },
1132
+ {
1133
+ "epoch": 0.772552783109405,
1134
+ "grad_norm": 0.14281855523586273,
1135
+ "learning_rate": 0.0001,
1136
+ "loss": 0.2711,
1137
+ "step": 805
1138
+ },
1139
+ {
1140
+ "epoch": 0.7773512476007678,
1141
+ "grad_norm": 0.13634523749351501,
1142
+ "learning_rate": 0.0001,
1143
+ "loss": 0.267,
1144
+ "step": 810
1145
+ },
1146
+ {
1147
+ "epoch": 0.7821497120921305,
1148
+ "grad_norm": 0.14958547055721283,
1149
+ "learning_rate": 0.0001,
1150
+ "loss": 0.2809,
1151
+ "step": 815
1152
+ },
1153
+ {
1154
+ "epoch": 0.7869481765834933,
1155
+ "grad_norm": 0.16246724128723145,
1156
+ "learning_rate": 0.0001,
1157
+ "loss": 0.2632,
1158
+ "step": 820
1159
+ },
1160
+ {
1161
+ "epoch": 0.791746641074856,
1162
+ "grad_norm": 0.15865352749824524,
1163
+ "learning_rate": 0.0001,
1164
+ "loss": 0.2669,
1165
+ "step": 825
1166
+ },
1167
+ {
1168
+ "epoch": 0.7965451055662188,
1169
+ "grad_norm": 0.16149087250232697,
1170
+ "learning_rate": 0.0001,
1171
+ "loss": 0.2708,
1172
+ "step": 830
1173
+ },
1174
+ {
1175
+ "epoch": 0.8013435700575816,
1176
+ "grad_norm": 0.15393655002117157,
1177
+ "learning_rate": 0.0001,
1178
+ "loss": 0.2736,
1179
+ "step": 835
1180
+ },
1181
+ {
1182
+ "epoch": 0.8061420345489443,
1183
+ "grad_norm": 0.16198311746120453,
1184
+ "learning_rate": 0.0001,
1185
+ "loss": 0.2627,
1186
+ "step": 840
1187
+ },
1188
+ {
1189
+ "epoch": 0.8109404990403071,
1190
+ "grad_norm": 0.18130268156528473,
1191
+ "learning_rate": 0.0001,
1192
+ "loss": 0.2663,
1193
+ "step": 845
1194
+ },
1195
+ {
1196
+ "epoch": 0.8157389635316699,
1197
+ "grad_norm": 0.15879245102405548,
1198
+ "learning_rate": 0.0001,
1199
+ "loss": 0.2525,
1200
+ "step": 850
1201
+ },
1202
+ {
1203
+ "epoch": 0.8205374280230326,
1204
+ "grad_norm": 0.15435881912708282,
1205
+ "learning_rate": 0.0001,
1206
+ "loss": 0.277,
1207
+ "step": 855
1208
+ },
1209
+ {
1210
+ "epoch": 0.8253358925143954,
1211
+ "grad_norm": 0.14485138654708862,
1212
+ "learning_rate": 0.0001,
1213
+ "loss": 0.2902,
1214
+ "step": 860
1215
+ },
1216
+ {
1217
+ "epoch": 0.8301343570057581,
1218
+ "grad_norm": 0.15166470408439636,
1219
+ "learning_rate": 0.0001,
1220
+ "loss": 0.2703,
1221
+ "step": 865
1222
+ },
1223
+ {
1224
+ "epoch": 0.8349328214971209,
1225
+ "grad_norm": 0.14248280227184296,
1226
+ "learning_rate": 0.0001,
1227
+ "loss": 0.2635,
1228
+ "step": 870
1229
+ },
1230
+ {
1231
+ "epoch": 0.8397312859884837,
1232
+ "grad_norm": 0.14291894435882568,
1233
+ "learning_rate": 0.0001,
1234
+ "loss": 0.2505,
1235
+ "step": 875
1236
+ },
1237
+ {
1238
+ "epoch": 0.8445297504798465,
1239
+ "grad_norm": 0.16025425493717194,
1240
+ "learning_rate": 0.0001,
1241
+ "loss": 0.2591,
1242
+ "step": 880
1243
+ },
1244
+ {
1245
+ "epoch": 0.8493282149712092,
1246
+ "grad_norm": 0.15063312649726868,
1247
+ "learning_rate": 0.0001,
1248
+ "loss": 0.2767,
1249
+ "step": 885
1250
+ },
1251
+ {
1252
+ "epoch": 0.8541266794625719,
1253
+ "grad_norm": 0.14704886078834534,
1254
+ "learning_rate": 0.0001,
1255
+ "loss": 0.2615,
1256
+ "step": 890
1257
+ },
1258
+ {
1259
+ "epoch": 0.8589251439539347,
1260
+ "grad_norm": 0.1524520218372345,
1261
+ "learning_rate": 0.0001,
1262
+ "loss": 0.2771,
1263
+ "step": 895
1264
+ },
1265
+ {
1266
+ "epoch": 0.8637236084452975,
1267
+ "grad_norm": 0.15311211347579956,
1268
+ "learning_rate": 0.0001,
1269
+ "loss": 0.2779,
1270
+ "step": 900
1271
+ },
1272
+ {
1273
+ "epoch": 0.8685220729366603,
1274
+ "grad_norm": 0.19531571865081787,
1275
+ "learning_rate": 0.0001,
1276
+ "loss": 0.256,
1277
+ "step": 905
1278
+ },
1279
+ {
1280
+ "epoch": 0.8733205374280231,
1281
+ "grad_norm": 0.15908968448638916,
1282
+ "learning_rate": 0.0001,
1283
+ "loss": 0.2598,
1284
+ "step": 910
1285
+ },
1286
+ {
1287
+ "epoch": 0.8781190019193857,
1288
+ "grad_norm": 0.14221200346946716,
1289
+ "learning_rate": 0.0001,
1290
+ "loss": 0.2554,
1291
+ "step": 915
1292
+ },
1293
+ {
1294
+ "epoch": 0.8829174664107485,
1295
+ "grad_norm": 0.16132907569408417,
1296
+ "learning_rate": 0.0001,
1297
+ "loss": 0.2885,
1298
+ "step": 920
1299
+ },
1300
+ {
1301
+ "epoch": 0.8877159309021113,
1302
+ "grad_norm": 0.14751212298870087,
1303
+ "learning_rate": 0.0001,
1304
+ "loss": 0.2716,
1305
+ "step": 925
1306
+ },
1307
+ {
1308
+ "epoch": 0.8925143953934741,
1309
+ "grad_norm": 0.146012082695961,
1310
+ "learning_rate": 0.0001,
1311
+ "loss": 0.2558,
1312
+ "step": 930
1313
+ },
1314
+ {
1315
+ "epoch": 0.8973128598848369,
1316
+ "grad_norm": 0.16232919692993164,
1317
+ "learning_rate": 0.0001,
1318
+ "loss": 0.2805,
1319
+ "step": 935
1320
+ },
1321
+ {
1322
+ "epoch": 0.9021113243761996,
1323
+ "grad_norm": 0.16521847248077393,
1324
+ "learning_rate": 0.0001,
1325
+ "loss": 0.2669,
1326
+ "step": 940
1327
+ },
1328
+ {
1329
+ "epoch": 0.9069097888675623,
1330
+ "grad_norm": 0.14723201096057892,
1331
+ "learning_rate": 0.0001,
1332
+ "loss": 0.2642,
1333
+ "step": 945
1334
+ },
1335
+ {
1336
+ "epoch": 0.9117082533589251,
1337
+ "grad_norm": 0.15053531527519226,
1338
+ "learning_rate": 0.0001,
1339
+ "loss": 0.2698,
1340
+ "step": 950
1341
+ },
1342
+ {
1343
+ "epoch": 0.9165067178502879,
1344
+ "grad_norm": 0.14634476602077484,
1345
+ "learning_rate": 0.0001,
1346
+ "loss": 0.2615,
1347
+ "step": 955
1348
+ },
1349
+ {
1350
+ "epoch": 0.9213051823416507,
1351
+ "grad_norm": 0.1575053334236145,
1352
+ "learning_rate": 0.0001,
1353
+ "loss": 0.2789,
1354
+ "step": 960
1355
+ },
1356
+ {
1357
+ "epoch": 0.9261036468330134,
1358
+ "grad_norm": 0.15231551229953766,
1359
+ "learning_rate": 0.0001,
1360
+ "loss": 0.2577,
1361
+ "step": 965
1362
+ },
1363
+ {
1364
+ "epoch": 0.9309021113243762,
1365
+ "grad_norm": 0.15277941524982452,
1366
+ "learning_rate": 0.0001,
1367
+ "loss": 0.2658,
1368
+ "step": 970
1369
+ },
1370
+ {
1371
+ "epoch": 0.935700575815739,
1372
+ "grad_norm": 0.1474364995956421,
1373
+ "learning_rate": 0.0001,
1374
+ "loss": 0.2771,
1375
+ "step": 975
1376
+ },
1377
+ {
1378
+ "epoch": 0.9404990403071017,
1379
+ "grad_norm": 0.14509518444538116,
1380
+ "learning_rate": 0.0001,
1381
+ "loss": 0.2587,
1382
+ "step": 980
1383
+ },
1384
+ {
1385
+ "epoch": 0.9452975047984645,
1386
+ "grad_norm": 0.146579310297966,
1387
+ "learning_rate": 0.0001,
1388
+ "loss": 0.2662,
1389
+ "step": 985
1390
+ },
1391
+ {
1392
+ "epoch": 0.9500959692898272,
1393
+ "grad_norm": 0.1470819115638733,
1394
+ "learning_rate": 0.0001,
1395
+ "loss": 0.2691,
1396
+ "step": 990
1397
+ },
1398
+ {
1399
+ "epoch": 0.95489443378119,
1400
+ "grad_norm": 0.161437526345253,
1401
+ "learning_rate": 0.0001,
1402
+ "loss": 0.2681,
1403
+ "step": 995
1404
+ },
1405
+ {
1406
+ "epoch": 0.9596928982725528,
1407
+ "grad_norm": 0.1448318362236023,
1408
+ "learning_rate": 0.0001,
1409
+ "loss": 0.2554,
1410
+ "step": 1000
1411
  }
1412
  ],
1413
  "logging_steps": 5,
 
1427
  "attributes": {}
1428
  }
1429
  },
1430
+ "total_flos": 3.323888912649138e+17,
1431
  "train_batch_size": 24,
1432
  "trial_name": null,
1433
  "trial_params": null