roberta-base-sst2 / trainer_state.json
Jeremiah Zhou
End of training
1c05f36
{
"best_metric": 0.9357798165137615,
"best_model_checkpoint": "./fine-tune/roberta-base/sst2/checkpoint-4210",
"epoch": 4.0,
"global_step": 16840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 3.9588281868566905e-06,
"loss": 0.5645,
"step": 500
},
{
"epoch": 0.24,
"learning_rate": 7.917656373713381e-06,
"loss": 0.3021,
"step": 1000
},
{
"epoch": 0.36,
"learning_rate": 1.1876484560570072e-05,
"loss": 0.2658,
"step": 1500
},
{
"epoch": 0.48,
"learning_rate": 1.5835312747426762e-05,
"loss": 0.2706,
"step": 2000
},
{
"epoch": 0.59,
"learning_rate": 1.9794140934283453e-05,
"loss": 0.2508,
"step": 2500
},
{
"epoch": 0.71,
"learning_rate": 1.9760448779501697e-05,
"loss": 0.2557,
"step": 3000
},
{
"epoch": 0.83,
"learning_rate": 1.95077576186385e-05,
"loss": 0.2578,
"step": 3500
},
{
"epoch": 0.95,
"learning_rate": 1.925506645777531e-05,
"loss": 0.2287,
"step": 4000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9357798165137615,
"eval_loss": 0.23143097758293152,
"eval_runtime": 1.9026,
"eval_samples_per_second": 458.311,
"eval_steps_per_second": 57.289,
"step": 4210
},
{
"epoch": 1.07,
"learning_rate": 1.9002375296912114e-05,
"loss": 0.2229,
"step": 4500
},
{
"epoch": 1.19,
"learning_rate": 1.874968413604892e-05,
"loss": 0.2084,
"step": 5000
},
{
"epoch": 1.31,
"learning_rate": 1.849699297518573e-05,
"loss": 0.2047,
"step": 5500
},
{
"epoch": 1.43,
"learning_rate": 1.8244301814322537e-05,
"loss": 0.189,
"step": 6000
},
{
"epoch": 1.54,
"learning_rate": 1.7991610653459345e-05,
"loss": 0.1914,
"step": 6500
},
{
"epoch": 1.66,
"learning_rate": 1.773891949259615e-05,
"loss": 0.1874,
"step": 7000
},
{
"epoch": 1.78,
"learning_rate": 1.7486228331732958e-05,
"loss": 0.1942,
"step": 7500
},
{
"epoch": 1.9,
"learning_rate": 1.7233537170869766e-05,
"loss": 0.1959,
"step": 8000
},
{
"epoch": 2.0,
"eval_accuracy": 0.926605504587156,
"eval_loss": 0.30272433161735535,
"eval_runtime": 1.8407,
"eval_samples_per_second": 473.726,
"eval_steps_per_second": 59.216,
"step": 8420
},
{
"epoch": 2.02,
"learning_rate": 1.698084601000657e-05,
"loss": 0.1783,
"step": 8500
},
{
"epoch": 2.14,
"learning_rate": 1.672815484914338e-05,
"loss": 0.1453,
"step": 9000
},
{
"epoch": 2.26,
"learning_rate": 1.6475463688280183e-05,
"loss": 0.1481,
"step": 9500
},
{
"epoch": 2.38,
"learning_rate": 1.622277252741699e-05,
"loss": 0.1564,
"step": 10000
},
{
"epoch": 2.49,
"learning_rate": 1.59700813665538e-05,
"loss": 0.1457,
"step": 10500
},
{
"epoch": 2.61,
"learning_rate": 1.5717390205690607e-05,
"loss": 0.15,
"step": 11000
},
{
"epoch": 2.73,
"learning_rate": 1.5464699044827415e-05,
"loss": 0.157,
"step": 11500
},
{
"epoch": 2.85,
"learning_rate": 1.521200788396422e-05,
"loss": 0.1531,
"step": 12000
},
{
"epoch": 2.97,
"learning_rate": 1.4959316723101027e-05,
"loss": 0.1635,
"step": 12500
},
{
"epoch": 3.0,
"eval_accuracy": 0.930045871559633,
"eval_loss": 0.30217334628105164,
"eval_runtime": 1.7605,
"eval_samples_per_second": 495.309,
"eval_steps_per_second": 61.914,
"step": 12630
},
{
"epoch": 3.09,
"learning_rate": 1.4706625562237835e-05,
"loss": 0.1167,
"step": 13000
},
{
"epoch": 3.21,
"learning_rate": 1.4453934401374641e-05,
"loss": 0.1147,
"step": 13500
},
{
"epoch": 3.33,
"learning_rate": 1.420124324051145e-05,
"loss": 0.1123,
"step": 14000
},
{
"epoch": 3.44,
"learning_rate": 1.3948552079648254e-05,
"loss": 0.1173,
"step": 14500
},
{
"epoch": 3.56,
"learning_rate": 1.3695860918785062e-05,
"loss": 0.1105,
"step": 15000
},
{
"epoch": 3.68,
"learning_rate": 1.344316975792187e-05,
"loss": 0.1148,
"step": 15500
},
{
"epoch": 3.8,
"learning_rate": 1.3190478597058676e-05,
"loss": 0.1335,
"step": 16000
},
{
"epoch": 3.92,
"learning_rate": 1.2937787436195484e-05,
"loss": 0.1148,
"step": 16500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9288990825688074,
"eval_loss": 0.31621453166007996,
"eval_runtime": 1.6764,
"eval_samples_per_second": 520.163,
"eval_steps_per_second": 65.02,
"step": 16840
},
{
"epoch": 4.0,
"step": 16840,
"total_flos": 1.772026646744064e+16,
"train_loss": 0.19015042130567683,
"train_runtime": 1734.8381,
"train_samples_per_second": 388.215,
"train_steps_per_second": 24.267
}
],
"max_steps": 42100,
"num_train_epochs": 10,
"total_flos": 1.772026646744064e+16,
"trial_name": null,
"trial_params": null
}