|
{ |
|
"best_metric": 19.566831683168317, |
|
"best_model_checkpoint": "o0dimplz0o/Whisper-Large-v3-turbo-STT-Zeroth-KO-v2/checkpoint-7500", |
|
"epoch": 1.7872161480235493, |
|
"eval_steps": 500, |
|
"global_step": 8500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00021026072329688813, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 2e-08, |
|
"loss": 0.0149, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10513036164844407, |
|
"grad_norm": 4.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0107, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.10513036164844407, |
|
"eval_cer": 0.10140471258415329, |
|
"eval_loss": 0.01961674913764, |
|
"eval_runtime": 150.7373, |
|
"eval_samples_per_second": 4.75, |
|
"eval_steps_per_second": 0.597, |
|
"eval_wer": 28.849009900990097, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21026072329688814, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.0202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21026072329688814, |
|
"eval_cer": 0.0718539616778871, |
|
"eval_loss": 0.018409011885523796, |
|
"eval_runtime": 146.6316, |
|
"eval_samples_per_second": 4.883, |
|
"eval_steps_per_second": 0.614, |
|
"eval_wer": 21.806930693069308, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3153910849453322, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 8.83022221559489e-06, |
|
"loss": 0.0197, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3153910849453322, |
|
"eval_cer": 0.06975012946659762, |
|
"eval_loss": 0.017869649454951286, |
|
"eval_runtime": 146.8815, |
|
"eval_samples_per_second": 4.875, |
|
"eval_steps_per_second": 0.613, |
|
"eval_wer": 21.633663366336634, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42052144659377627, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0177, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.42052144659377627, |
|
"eval_cer": 0.11176204039357845, |
|
"eval_loss": 0.017242593690752983, |
|
"eval_runtime": 154.9659, |
|
"eval_samples_per_second": 4.62, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 26.683168316831683, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5256518082422204, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 5.8682408883346535e-06, |
|
"loss": 0.0182, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5256518082422204, |
|
"eval_cer": 0.10477084412221647, |
|
"eval_loss": 0.016888286918401718, |
|
"eval_runtime": 155.4479, |
|
"eval_samples_per_second": 4.606, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 24.047029702970296, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6307821698906644, |
|
"grad_norm": 1.859375, |
|
"learning_rate": 4.131759111665349e-06, |
|
"loss": 0.017, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6307821698906644, |
|
"eval_cer": 0.08761651993785603, |
|
"eval_loss": 0.0166668388992548, |
|
"eval_runtime": 158.8576, |
|
"eval_samples_per_second": 4.507, |
|
"eval_steps_per_second": 0.567, |
|
"eval_wer": 22.834158415841586, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7359125315391085, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.0174, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7359125315391085, |
|
"eval_cer": 0.08716338684619368, |
|
"eval_loss": 0.01654214970767498, |
|
"eval_runtime": 151.2266, |
|
"eval_samples_per_second": 4.735, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 22.636138613861387, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8410428931875525, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"loss": 0.0183, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8410428931875525, |
|
"eval_cer": 0.08654842050750906, |
|
"eval_loss": 0.01647871918976307, |
|
"eval_runtime": 151.8997, |
|
"eval_samples_per_second": 4.714, |
|
"eval_steps_per_second": 0.592, |
|
"eval_wer": 22.5990099009901, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9461732548359967, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 3.015368960704584e-07, |
|
"loss": 0.0168, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9461732548359967, |
|
"eval_cer": 0.06573666494044536, |
|
"eval_loss": 0.016473352909088135, |
|
"eval_runtime": 147.2823, |
|
"eval_samples_per_second": 4.861, |
|
"eval_steps_per_second": 0.611, |
|
"eval_wer": 19.93811881188119, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0513036164844407, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 0.0, |
|
"loss": 0.015, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0513036164844407, |
|
"eval_cer": 0.08683972035214915, |
|
"eval_loss": 0.016479654237627983, |
|
"eval_runtime": 151.4679, |
|
"eval_samples_per_second": 4.727, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 22.54950495049505, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1564339781328847, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 1.257446259144494e-06, |
|
"loss": 0.0135, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.1564339781328847, |
|
"eval_cer": 0.08606292076644226, |
|
"eval_loss": 0.016473721712827682, |
|
"eval_runtime": 150.5023, |
|
"eval_samples_per_second": 4.757, |
|
"eval_steps_per_second": 0.598, |
|
"eval_wer": 22.462871287128714, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.2615643397813288, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 2.771308221117309e-06, |
|
"loss": 0.013, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.2615643397813288, |
|
"eval_cer": 0.08635422061108235, |
|
"eval_loss": 0.016464611515402794, |
|
"eval_runtime": 150.7962, |
|
"eval_samples_per_second": 4.748, |
|
"eval_steps_per_second": 0.597, |
|
"eval_wer": 22.66089108910891, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3666947014297728, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 1.9868268181037186e-06, |
|
"loss": 0.0146, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.3666947014297728, |
|
"eval_cer": 0.08230838943552564, |
|
"eval_loss": 0.016470087692141533, |
|
"eval_runtime": 151.3657, |
|
"eval_samples_per_second": 4.73, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 20.977722772277225, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.471825063078217, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 1.3049554138967052e-06, |
|
"loss": 0.0122, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.471825063078217, |
|
"eval_cer": 0.06544536509580529, |
|
"eval_loss": 0.01648491993546486, |
|
"eval_runtime": 146.5932, |
|
"eval_samples_per_second": 4.884, |
|
"eval_steps_per_second": 0.614, |
|
"eval_wer": 19.814356435643564, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.576955424726661, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 7.489143213519301e-07, |
|
"loss": 0.0131, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.576955424726661, |
|
"eval_cer": 0.06457146556188503, |
|
"eval_loss": 0.016461558640003204, |
|
"eval_runtime": 147.4455, |
|
"eval_samples_per_second": 4.856, |
|
"eval_steps_per_second": 0.61, |
|
"eval_wer": 19.566831683168317, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.682085786375105, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 3.3763885297822153e-07, |
|
"loss": 0.0139, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.682085786375105, |
|
"eval_cer": 0.08590108751942, |
|
"eval_loss": 0.01645829528570175, |
|
"eval_runtime": 150.1841, |
|
"eval_samples_per_second": 4.767, |
|
"eval_steps_per_second": 0.599, |
|
"eval_wer": 22.425742574257427, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.7872161480235493, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 8.513450158049109e-08, |
|
"loss": 0.0134, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.7872161480235493, |
|
"eval_cer": 0.06470093215950284, |
|
"eval_loss": 0.016449948772788048, |
|
"eval_runtime": 147.6753, |
|
"eval_samples_per_second": 4.848, |
|
"eval_steps_per_second": 0.609, |
|
"eval_wer": 19.64108910891089, |
|
"step": 8500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.898246464372736e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|