mDeBERTa-v3-multi-sent / trainer_state.json
alexander-sh's picture
Upload 14 files
f3047c0 verified
{
"best_metric": 0.7399751659163123,
"best_model_checkpoint": "/opt/dlami/nvme/shevtsov/sent_checkpoints/checkpoint-102510",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 102510,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 10251,
"train_accuracy": 0.9232076630758701,
"train_f1": 0.9284970394554347,
"train_loss": 0.08324012160301208,
"train_precision": 0.9395995876182259,
"train_recall": 0.9232076630758701,
"train_runtime": 5077.4566,
"train_samples_per_second": 775.226,
"train_steps_per_second": 2.019
},
{
"epoch": 1.0,
"grad_norm": 2.202436923980713,
"learning_rate": 5e-06,
"loss": 0.1512,
"step": 10251
},
{
"epoch": 1.0,
"eval_accuracy": 0.5906430303537085,
"eval_f1": 0.5953154182610875,
"eval_loss": 1.1822575330734253,
"eval_precision": 0.7605817989645098,
"eval_recall": 0.5906430303537085,
"eval_runtime": 31.2064,
"eval_samples_per_second": 766.446,
"eval_steps_per_second": 2.019,
"step": 10251
},
{
"epoch": 2.0,
"step": 20502,
"train_accuracy": 0.9251504125579815,
"train_f1": 0.9318479879378413,
"train_loss": 0.06029369682073593,
"train_precision": 0.9457024638382566,
"train_recall": 0.9251504125579815,
"train_runtime": 5078.2486,
"train_samples_per_second": 775.105,
"train_steps_per_second": 2.019
},
{
"epoch": 2.0,
"grad_norm": 2.3391969203948975,
"learning_rate": 4.849231551964771e-06,
"loss": 0.0727,
"step": 20502
},
{
"epoch": 2.0,
"eval_accuracy": 0.5809850321933272,
"eval_f1": 0.5827904211311477,
"eval_loss": 0.8601031303405762,
"eval_precision": 0.7707426917608643,
"eval_recall": 0.5809850321933272,
"eval_runtime": 31.1947,
"eval_samples_per_second": 766.734,
"eval_steps_per_second": 2.02,
"step": 20502
},
{
"epoch": 3.0,
"step": 30753,
"train_accuracy": 0.9311338878819889,
"train_f1": 0.9367318532276315,
"train_loss": 0.05340421944856644,
"train_precision": 0.948615352312463,
"train_recall": 0.9311338878819889,
"train_runtime": 5078.9472,
"train_samples_per_second": 774.998,
"train_steps_per_second": 2.018
},
{
"epoch": 3.0,
"grad_norm": 3.368744134902954,
"learning_rate": 4.415111107797445e-06,
"loss": 0.064,
"step": 30753
},
{
"epoch": 3.0,
"eval_accuracy": 0.6280207375198595,
"eval_f1": 0.6341902420521265,
"eval_loss": 1.0681450366973877,
"eval_precision": 0.768741241964947,
"eval_recall": 0.6280207375198595,
"eval_runtime": 31.2028,
"eval_samples_per_second": 766.535,
"eval_steps_per_second": 2.019,
"step": 30753
},
{
"epoch": 4.0,
"step": 41004,
"train_accuracy": 0.941354218588914,
"train_f1": 0.9444982035100681,
"train_loss": 0.0493415892124176,
"train_precision": 0.9518003283876015,
"train_recall": 0.941354218588914,
"train_runtime": 5080.4688,
"train_samples_per_second": 774.766,
"train_steps_per_second": 2.018
},
{
"epoch": 4.0,
"grad_norm": 7.862049579620361,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0585,
"step": 41004
},
{
"epoch": 4.0,
"eval_accuracy": 0.6900660590350364,
"eval_f1": 0.6986283009597273,
"eval_loss": 1.4099539518356323,
"eval_precision": 0.7671384440115526,
"eval_recall": 0.6900660590350364,
"eval_runtime": 31.2008,
"eval_samples_per_second": 766.584,
"eval_steps_per_second": 2.019,
"step": 41004
},
{
"epoch": 5.0,
"step": 51255,
"train_accuracy": 0.9479113474150279,
"train_f1": 0.9498162368391557,
"train_loss": 0.04905932769179344,
"train_precision": 0.9544620214289802,
"train_recall": 0.9479113474150279,
"train_runtime": 5088.1804,
"train_samples_per_second": 773.592,
"train_steps_per_second": 2.015
},
{
"epoch": 5.0,
"grad_norm": 1.6737421751022339,
"learning_rate": 2.9341204441673267e-06,
"loss": 0.0543,
"step": 51255
},
{
"epoch": 5.0,
"eval_accuracy": 0.7279454803913371,
"eval_f1": 0.7344324841621389,
"eval_loss": 2.154353618621826,
"eval_precision": 0.7651689935562233,
"eval_recall": 0.7279454803913371,
"eval_runtime": 31.2388,
"eval_samples_per_second": 765.65,
"eval_steps_per_second": 2.017,
"step": 51255
},
{
"epoch": 6.0,
"step": 61506,
"train_accuracy": 0.944803507162031,
"train_f1": 0.9475462857180277,
"train_loss": 0.0426737517118454,
"train_precision": 0.9540200071722176,
"train_recall": 0.944803507162031,
"train_runtime": 5078.4022,
"train_samples_per_second": 775.081,
"train_steps_per_second": 2.019
},
{
"epoch": 6.0,
"grad_norm": 2.879870653152466,
"learning_rate": 2.0658795558326745e-06,
"loss": 0.051,
"step": 61506
},
{
"epoch": 6.0,
"eval_accuracy": 0.7079187223011958,
"eval_f1": 0.7155283465045912,
"eval_loss": 2.2507946491241455,
"eval_precision": 0.7570941064333973,
"eval_recall": 0.7079187223011958,
"eval_runtime": 31.2026,
"eval_samples_per_second": 766.538,
"eval_steps_per_second": 2.019,
"step": 61506
},
{
"epoch": 7.0,
"step": 71757,
"train_accuracy": 0.9474675154096338,
"train_f1": 0.9497025418465336,
"train_loss": 0.0410270020365715,
"train_precision": 0.9551267616744933,
"train_recall": 0.9474675154096338,
"train_runtime": 5080.7556,
"train_samples_per_second": 774.722,
"train_steps_per_second": 2.018
},
{
"epoch": 7.0,
"grad_norm": 1.4426418542861938,
"learning_rate": 1.2500000000000007e-06,
"loss": 0.0486,
"step": 71757
},
{
"epoch": 7.0,
"eval_accuracy": 0.7198344343172506,
"eval_f1": 0.726834352850576,
"eval_loss": 2.45487380027771,
"eval_precision": 0.7590566392960729,
"eval_recall": 0.7198344343172506,
"eval_runtime": 31.2049,
"eval_samples_per_second": 766.482,
"eval_steps_per_second": 2.019,
"step": 71757
},
{
"epoch": 8.0,
"step": 82008,
"train_accuracy": 0.9504046315025708,
"train_f1": 0.9520763661297328,
"train_loss": 0.0410199835896492,
"train_precision": 0.9562815819971588,
"train_recall": 0.9504046315025708,
"train_runtime": 5079.0768,
"train_samples_per_second": 774.978,
"train_steps_per_second": 2.018
},
{
"epoch": 8.0,
"grad_norm": 1.1270148754119873,
"learning_rate": 5.848888922025553e-07,
"loss": 0.0468,
"step": 82008
},
{
"epoch": 8.0,
"eval_accuracy": 0.7316665273016139,
"eval_f1": 0.737640105520052,
"eval_loss": 2.602576494216919,
"eval_precision": 0.7612609852108917,
"eval_recall": 0.7316665273016139,
"eval_runtime": 31.1916,
"eval_samples_per_second": 766.809,
"eval_steps_per_second": 2.02,
"step": 82008
},
{
"epoch": 9.0,
"step": 92259,
"train_accuracy": 0.9505296259769004,
"train_f1": 0.9522083394688067,
"train_loss": 0.03998752683401108,
"train_precision": 0.9564247374560226,
"train_recall": 0.9505296259769004,
"train_runtime": 5078.0822,
"train_samples_per_second": 775.13,
"train_steps_per_second": 2.019
},
{
"epoch": 9.0,
"grad_norm": 1.666390061378479,
"learning_rate": 1.507684480352292e-07,
"loss": 0.0457,
"step": 92259
},
{
"epoch": 9.0,
"eval_accuracy": 0.7320010034283803,
"eval_f1": 0.7378168031871946,
"eval_loss": 2.8211002349853516,
"eval_precision": 0.7595695441249473,
"eval_recall": 0.7320010034283803,
"eval_runtime": 31.2132,
"eval_samples_per_second": 766.278,
"eval_steps_per_second": 2.018,
"step": 92259
},
{
"epoch": 10.0,
"step": 102510,
"train_accuracy": 0.9510209660446921,
"train_f1": 0.9526073589638447,
"train_loss": 0.040298543870449066,
"train_precision": 0.9566123120238171,
"train_recall": 0.9510209660446921,
"train_runtime": 5078.6508,
"train_samples_per_second": 775.043,
"train_steps_per_second": 2.018
},
{
"epoch": 10.0,
"grad_norm": 1.6141560077667236,
"learning_rate": 0.0,
"loss": 0.0451,
"step": 102510
},
{
"epoch": 10.0,
"eval_accuracy": 0.7345095743791287,
"eval_f1": 0.7399751659163123,
"eval_loss": 2.884028911590576,
"eval_precision": 0.759520061975123,
"eval_recall": 0.7345095743791287,
"eval_runtime": 31.2146,
"eval_samples_per_second": 766.243,
"eval_steps_per_second": 2.018,
"step": 102510
}
],
"logging_steps": 500,
"max_steps": 102510,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0357340434899927e+19,
"train_batch_size": 48,
"trial_name": null,
"trial_params": null
}