|
{ |
|
"best_metric": 0.6313058518304987, |
|
"best_model_checkpoint": "logs/indian_ner/roberta-base/seed_1/checkpoint-1376", |
|
"epoch": 19.0, |
|
"eval_steps": 500, |
|
"global_step": 1634, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7997455985661011, |
|
"eval_f1": 0.14692321878650028, |
|
"eval_loss": 0.35509759187698364, |
|
"eval_precision": 0.08916579614982838, |
|
"eval_recall": 0.41710296684118675, |
|
"eval_runtime": 3.1945, |
|
"eval_samples_per_second": 336.207, |
|
"eval_steps_per_second": 2.817, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8326539658678076, |
|
"eval_f1": 0.2069869669160176, |
|
"eval_loss": 0.23829466104507446, |
|
"eval_precision": 0.1328449811918432, |
|
"eval_recall": 0.46841186736474694, |
|
"eval_runtime": 3.449, |
|
"eval_samples_per_second": 311.39, |
|
"eval_steps_per_second": 2.609, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8921588467134997, |
|
"eval_f1": 0.29748962245503063, |
|
"eval_loss": 0.21593649685382843, |
|
"eval_precision": 0.20750034468495795, |
|
"eval_recall": 0.525305410122164, |
|
"eval_runtime": 2.5646, |
|
"eval_samples_per_second": 418.778, |
|
"eval_steps_per_second": 3.509, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.902517996010523, |
|
"eval_f1": 0.325260250690461, |
|
"eval_loss": 0.2013314664363861, |
|
"eval_precision": 0.23377614903038632, |
|
"eval_recall": 0.5343804537521815, |
|
"eval_runtime": 2.5624, |
|
"eval_samples_per_second": 419.138, |
|
"eval_steps_per_second": 3.512, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9131180557563142, |
|
"eval_f1": 0.3645869640989892, |
|
"eval_loss": 0.1926085650920868, |
|
"eval_precision": 0.27324973876698017, |
|
"eval_recall": 0.5476439790575917, |
|
"eval_runtime": 2.6037, |
|
"eval_samples_per_second": 412.492, |
|
"eval_steps_per_second": 3.457, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.813953488372093, |
|
"grad_norm": 3.6987311840057373, |
|
"learning_rate": 2.1313953488372093e-05, |
|
"loss": 0.396, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9134071482948358, |
|
"eval_f1": 0.37776752767527666, |
|
"eval_loss": 0.20021429657936096, |
|
"eval_precision": 0.28207335973824693, |
|
"eval_recall": 0.5717277486910994, |
|
"eval_runtime": 6.6862, |
|
"eval_samples_per_second": 160.629, |
|
"eval_steps_per_second": 1.346, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9267054050668285, |
|
"eval_f1": 0.440271772699197, |
|
"eval_loss": 0.21028228104114532, |
|
"eval_precision": 0.3407265774378585, |
|
"eval_recall": 0.6219895287958115, |
|
"eval_runtime": 2.6496, |
|
"eval_samples_per_second": 405.343, |
|
"eval_steps_per_second": 3.397, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9255972170024959, |
|
"eval_f1": 0.4397892931520274, |
|
"eval_loss": 0.1943674236536026, |
|
"eval_precision": 0.33880709701774253, |
|
"eval_recall": 0.6265270506108203, |
|
"eval_runtime": 2.5685, |
|
"eval_samples_per_second": 418.146, |
|
"eval_steps_per_second": 3.504, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9290759638827055, |
|
"eval_f1": 0.4493577075098814, |
|
"eval_loss": 0.21183457970619202, |
|
"eval_precision": 0.3477346587650545, |
|
"eval_recall": 0.6349040139616056, |
|
"eval_runtime": 6.3809, |
|
"eval_samples_per_second": 168.314, |
|
"eval_steps_per_second": 1.41, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9396182051207925, |
|
"eval_f1": 0.5092445853143158, |
|
"eval_loss": 0.22744828462600708, |
|
"eval_precision": 0.4096027193541534, |
|
"eval_recall": 0.6729493891797557, |
|
"eval_runtime": 2.6036, |
|
"eval_samples_per_second": 412.506, |
|
"eval_steps_per_second": 3.457, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9450338720090968, |
|
"eval_f1": 0.5512627986348123, |
|
"eval_loss": 0.23177774250507355, |
|
"eval_precision": 0.45269058295964126, |
|
"eval_recall": 0.7047120418848167, |
|
"eval_runtime": 2.5671, |
|
"eval_samples_per_second": 418.363, |
|
"eval_steps_per_second": 3.506, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 11.627906976744185, |
|
"grad_norm": 2.9459869861602783, |
|
"learning_rate": 1.2593023255813954e-05, |
|
"loss": 0.0715, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9442533221550885, |
|
"eval_f1": 0.541422935655013, |
|
"eval_loss": 0.2438974231481552, |
|
"eval_precision": 0.44360231832367364, |
|
"eval_recall": 0.6945898778359512, |
|
"eval_runtime": 2.5676, |
|
"eval_samples_per_second": 418.287, |
|
"eval_steps_per_second": 3.505, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9460360594759716, |
|
"eval_f1": 0.5802113352545629, |
|
"eval_loss": 0.2385331243276596, |
|
"eval_precision": 0.47806422433288104, |
|
"eval_recall": 0.737870855148342, |
|
"eval_runtime": 2.5597, |
|
"eval_samples_per_second": 419.581, |
|
"eval_steps_per_second": 3.516, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9460071502221195, |
|
"eval_f1": 0.5560439560439561, |
|
"eval_loss": 0.24203675985336304, |
|
"eval_precision": 0.45843714609286523, |
|
"eval_recall": 0.706457242582897, |
|
"eval_runtime": 2.5735, |
|
"eval_samples_per_second": 417.328, |
|
"eval_steps_per_second": 3.497, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9501604463588795, |
|
"eval_f1": 0.5943502824858757, |
|
"eval_loss": 0.245487779378891, |
|
"eval_precision": 0.4991696322657177, |
|
"eval_recall": 0.7343804537521815, |
|
"eval_runtime": 2.5639, |
|
"eval_samples_per_second": 418.898, |
|
"eval_steps_per_second": 3.51, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9571950314629046, |
|
"eval_f1": 0.6313058518304987, |
|
"eval_loss": 0.25131794810295105, |
|
"eval_precision": 0.537687208445863, |
|
"eval_recall": 0.7643979057591623, |
|
"eval_runtime": 2.5566, |
|
"eval_samples_per_second": 420.083, |
|
"eval_steps_per_second": 3.52, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9557784780241488, |
|
"eval_f1": 0.6291390728476821, |
|
"eval_loss": 0.26701298356056213, |
|
"eval_precision": 0.5354079882381769, |
|
"eval_recall": 0.7626527050610821, |
|
"eval_runtime": 2.5575, |
|
"eval_samples_per_second": 419.947, |
|
"eval_steps_per_second": 3.519, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 17.441860465116278, |
|
"grad_norm": 0.8599265813827515, |
|
"learning_rate": 3.8720930232558145e-06, |
|
"loss": 0.0344, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9505266302410068, |
|
"eval_f1": 0.596600566572238, |
|
"eval_loss": 0.26870501041412354, |
|
"eval_precision": 0.5020262216924911, |
|
"eval_recall": 0.7350785340314137, |
|
"eval_runtime": 4.3379, |
|
"eval_samples_per_second": 247.585, |
|
"eval_steps_per_second": 2.075, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9547473813034218, |
|
"eval_f1": 0.619456366237482, |
|
"eval_loss": 0.2665688395500183, |
|
"eval_precision": 0.5248484848484849, |
|
"eval_recall": 0.7556719022687609, |
|
"eval_runtime": 2.5569, |
|
"eval_samples_per_second": 420.037, |
|
"eval_steps_per_second": 3.52, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"step": 1634, |
|
"total_flos": 5.459952374908416e+16, |
|
"train_loss": 0.15556327351158078, |
|
"train_runtime": 746.2434, |
|
"train_samples_per_second": 294.676, |
|
"train_steps_per_second": 2.305 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.459952374908416e+16, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|