|
{ |
|
"best_metric": 0.7399751659163123, |
|
"best_model_checkpoint": "/opt/dlami/nvme/shevtsov/sent_checkpoints/checkpoint-102510", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 102510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 10251, |
|
"train_accuracy": 0.9232076630758701, |
|
"train_f1": 0.9284970394554347, |
|
"train_loss": 0.08324012160301208, |
|
"train_precision": 0.9395995876182259, |
|
"train_recall": 0.9232076630758701, |
|
"train_runtime": 5077.4566, |
|
"train_samples_per_second": 775.226, |
|
"train_steps_per_second": 2.019 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.202436923980713, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1512, |
|
"step": 10251 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5906430303537085, |
|
"eval_f1": 0.5953154182610875, |
|
"eval_loss": 1.1822575330734253, |
|
"eval_precision": 0.7605817989645098, |
|
"eval_recall": 0.5906430303537085, |
|
"eval_runtime": 31.2064, |
|
"eval_samples_per_second": 766.446, |
|
"eval_steps_per_second": 2.019, |
|
"step": 10251 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 20502, |
|
"train_accuracy": 0.9251504125579815, |
|
"train_f1": 0.9318479879378413, |
|
"train_loss": 0.06029369682073593, |
|
"train_precision": 0.9457024638382566, |
|
"train_recall": 0.9251504125579815, |
|
"train_runtime": 5078.2486, |
|
"train_samples_per_second": 775.105, |
|
"train_steps_per_second": 2.019 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.3391969203948975, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 0.0727, |
|
"step": 20502 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5809850321933272, |
|
"eval_f1": 0.5827904211311477, |
|
"eval_loss": 0.8601031303405762, |
|
"eval_precision": 0.7707426917608643, |
|
"eval_recall": 0.5809850321933272, |
|
"eval_runtime": 31.1947, |
|
"eval_samples_per_second": 766.734, |
|
"eval_steps_per_second": 2.02, |
|
"step": 20502 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 30753, |
|
"train_accuracy": 0.9311338878819889, |
|
"train_f1": 0.9367318532276315, |
|
"train_loss": 0.05340421944856644, |
|
"train_precision": 0.948615352312463, |
|
"train_recall": 0.9311338878819889, |
|
"train_runtime": 5078.9472, |
|
"train_samples_per_second": 774.998, |
|
"train_steps_per_second": 2.018 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.368744134902954, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 0.064, |
|
"step": 30753 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6280207375198595, |
|
"eval_f1": 0.6341902420521265, |
|
"eval_loss": 1.0681450366973877, |
|
"eval_precision": 0.768741241964947, |
|
"eval_recall": 0.6280207375198595, |
|
"eval_runtime": 31.2028, |
|
"eval_samples_per_second": 766.535, |
|
"eval_steps_per_second": 2.019, |
|
"step": 30753 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 41004, |
|
"train_accuracy": 0.941354218588914, |
|
"train_f1": 0.9444982035100681, |
|
"train_loss": 0.0493415892124176, |
|
"train_precision": 0.9518003283876015, |
|
"train_recall": 0.941354218588914, |
|
"train_runtime": 5080.4688, |
|
"train_samples_per_second": 774.766, |
|
"train_steps_per_second": 2.018 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 7.862049579620361, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.0585, |
|
"step": 41004 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6900660590350364, |
|
"eval_f1": 0.6986283009597273, |
|
"eval_loss": 1.4099539518356323, |
|
"eval_precision": 0.7671384440115526, |
|
"eval_recall": 0.6900660590350364, |
|
"eval_runtime": 31.2008, |
|
"eval_samples_per_second": 766.584, |
|
"eval_steps_per_second": 2.019, |
|
"step": 41004 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 51255, |
|
"train_accuracy": 0.9479113474150279, |
|
"train_f1": 0.9498162368391557, |
|
"train_loss": 0.04905932769179344, |
|
"train_precision": 0.9544620214289802, |
|
"train_recall": 0.9479113474150279, |
|
"train_runtime": 5088.1804, |
|
"train_samples_per_second": 773.592, |
|
"train_steps_per_second": 2.015 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.6737421751022339, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"loss": 0.0543, |
|
"step": 51255 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7279454803913371, |
|
"eval_f1": 0.7344324841621389, |
|
"eval_loss": 2.154353618621826, |
|
"eval_precision": 0.7651689935562233, |
|
"eval_recall": 0.7279454803913371, |
|
"eval_runtime": 31.2388, |
|
"eval_samples_per_second": 765.65, |
|
"eval_steps_per_second": 2.017, |
|
"step": 51255 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 61506, |
|
"train_accuracy": 0.944803507162031, |
|
"train_f1": 0.9475462857180277, |
|
"train_loss": 0.0426737517118454, |
|
"train_precision": 0.9540200071722176, |
|
"train_recall": 0.944803507162031, |
|
"train_runtime": 5078.4022, |
|
"train_samples_per_second": 775.081, |
|
"train_steps_per_second": 2.019 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.879870653152466, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"loss": 0.051, |
|
"step": 61506 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7079187223011958, |
|
"eval_f1": 0.7155283465045912, |
|
"eval_loss": 2.2507946491241455, |
|
"eval_precision": 0.7570941064333973, |
|
"eval_recall": 0.7079187223011958, |
|
"eval_runtime": 31.2026, |
|
"eval_samples_per_second": 766.538, |
|
"eval_steps_per_second": 2.019, |
|
"step": 61506 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 71757, |
|
"train_accuracy": 0.9474675154096338, |
|
"train_f1": 0.9497025418465336, |
|
"train_loss": 0.0410270020365715, |
|
"train_precision": 0.9551267616744933, |
|
"train_recall": 0.9474675154096338, |
|
"train_runtime": 5080.7556, |
|
"train_samples_per_second": 774.722, |
|
"train_steps_per_second": 2.018 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.4426418542861938, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"loss": 0.0486, |
|
"step": 71757 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7198344343172506, |
|
"eval_f1": 0.726834352850576, |
|
"eval_loss": 2.45487380027771, |
|
"eval_precision": 0.7590566392960729, |
|
"eval_recall": 0.7198344343172506, |
|
"eval_runtime": 31.2049, |
|
"eval_samples_per_second": 766.482, |
|
"eval_steps_per_second": 2.019, |
|
"step": 71757 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 82008, |
|
"train_accuracy": 0.9504046315025708, |
|
"train_f1": 0.9520763661297328, |
|
"train_loss": 0.0410199835896492, |
|
"train_precision": 0.9562815819971588, |
|
"train_recall": 0.9504046315025708, |
|
"train_runtime": 5079.0768, |
|
"train_samples_per_second": 774.978, |
|
"train_steps_per_second": 2.018 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.1270148754119873, |
|
"learning_rate": 5.848888922025553e-07, |
|
"loss": 0.0468, |
|
"step": 82008 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7316665273016139, |
|
"eval_f1": 0.737640105520052, |
|
"eval_loss": 2.602576494216919, |
|
"eval_precision": 0.7612609852108917, |
|
"eval_recall": 0.7316665273016139, |
|
"eval_runtime": 31.1916, |
|
"eval_samples_per_second": 766.809, |
|
"eval_steps_per_second": 2.02, |
|
"step": 82008 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 92259, |
|
"train_accuracy": 0.9505296259769004, |
|
"train_f1": 0.9522083394688067, |
|
"train_loss": 0.03998752683401108, |
|
"train_precision": 0.9564247374560226, |
|
"train_recall": 0.9505296259769004, |
|
"train_runtime": 5078.0822, |
|
"train_samples_per_second": 775.13, |
|
"train_steps_per_second": 2.019 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.666390061378479, |
|
"learning_rate": 1.507684480352292e-07, |
|
"loss": 0.0457, |
|
"step": 92259 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7320010034283803, |
|
"eval_f1": 0.7378168031871946, |
|
"eval_loss": 2.8211002349853516, |
|
"eval_precision": 0.7595695441249473, |
|
"eval_recall": 0.7320010034283803, |
|
"eval_runtime": 31.2132, |
|
"eval_samples_per_second": 766.278, |
|
"eval_steps_per_second": 2.018, |
|
"step": 92259 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 102510, |
|
"train_accuracy": 0.9510209660446921, |
|
"train_f1": 0.9526073589638447, |
|
"train_loss": 0.040298543870449066, |
|
"train_precision": 0.9566123120238171, |
|
"train_recall": 0.9510209660446921, |
|
"train_runtime": 5078.6508, |
|
"train_samples_per_second": 775.043, |
|
"train_steps_per_second": 2.018 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.6141560077667236, |
|
"learning_rate": 0.0, |
|
"loss": 0.0451, |
|
"step": 102510 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7345095743791287, |
|
"eval_f1": 0.7399751659163123, |
|
"eval_loss": 2.884028911590576, |
|
"eval_precision": 0.759520061975123, |
|
"eval_recall": 0.7345095743791287, |
|
"eval_runtime": 31.2146, |
|
"eval_samples_per_second": 766.243, |
|
"eval_steps_per_second": 2.018, |
|
"step": 102510 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 102510, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0357340434899927e+19, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|