robiual-awal's picture
Training in progress, step 200, checkpoint
91e3313 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0396000396000396,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000198000198000198,
"eval_loss": 1.1046350002288818,
"eval_runtime": 177.7012,
"eval_samples_per_second": 11.97,
"eval_steps_per_second": 5.988,
"step": 1
},
{
"epoch": 0.00198000198000198,
"grad_norm": 1.1289453506469727,
"learning_rate": 0.0002,
"loss": 4.1006,
"step": 10
},
{
"epoch": 0.00396000396000396,
"grad_norm": 1.3300156593322754,
"learning_rate": 0.0002,
"loss": 3.7418,
"step": 20
},
{
"epoch": 0.00594000594000594,
"grad_norm": 1.6589006185531616,
"learning_rate": 0.0002,
"loss": 3.7895,
"step": 30
},
{
"epoch": 0.00792000792000792,
"grad_norm": 1.0909771919250488,
"learning_rate": 0.0002,
"loss": 3.6444,
"step": 40
},
{
"epoch": 0.0099000099000099,
"grad_norm": 1.8193849325180054,
"learning_rate": 0.0002,
"loss": 3.92,
"step": 50
},
{
"epoch": 0.0099000099000099,
"eval_loss": 0.8933662176132202,
"eval_runtime": 177.8394,
"eval_samples_per_second": 11.96,
"eval_steps_per_second": 5.983,
"step": 50
},
{
"epoch": 0.01188001188001188,
"grad_norm": 1.5985904932022095,
"learning_rate": 0.0002,
"loss": 3.7766,
"step": 60
},
{
"epoch": 0.01386001386001386,
"grad_norm": 1.0298365354537964,
"learning_rate": 0.0002,
"loss": 3.9574,
"step": 70
},
{
"epoch": 0.01584001584001584,
"grad_norm": 1.3167791366577148,
"learning_rate": 0.0002,
"loss": 3.5841,
"step": 80
},
{
"epoch": 0.01782001782001782,
"grad_norm": 1.6314549446105957,
"learning_rate": 0.0002,
"loss": 3.5142,
"step": 90
},
{
"epoch": 0.0198000198000198,
"grad_norm": 0.9245526790618896,
"learning_rate": 0.0002,
"loss": 3.4552,
"step": 100
},
{
"epoch": 0.0198000198000198,
"eval_loss": 0.8583095669746399,
"eval_runtime": 177.6926,
"eval_samples_per_second": 11.97,
"eval_steps_per_second": 5.988,
"step": 100
},
{
"epoch": 0.02178002178002178,
"grad_norm": 0.9592312574386597,
"learning_rate": 0.0002,
"loss": 3.1673,
"step": 110
},
{
"epoch": 0.02376002376002376,
"grad_norm": 1.070432186126709,
"learning_rate": 0.0002,
"loss": 3.1173,
"step": 120
},
{
"epoch": 0.02574002574002574,
"grad_norm": 2.247570753097534,
"learning_rate": 0.0002,
"loss": 3.7768,
"step": 130
},
{
"epoch": 0.02772002772002772,
"grad_norm": 1.5730074644088745,
"learning_rate": 0.0002,
"loss": 3.3516,
"step": 140
},
{
"epoch": 0.0297000297000297,
"grad_norm": 1.3724550008773804,
"learning_rate": 0.0002,
"loss": 3.2577,
"step": 150
},
{
"epoch": 0.0297000297000297,
"eval_loss": 0.8379923701286316,
"eval_runtime": 177.6782,
"eval_samples_per_second": 11.971,
"eval_steps_per_second": 5.988,
"step": 150
},
{
"epoch": 0.03168003168003168,
"grad_norm": 1.7976691722869873,
"learning_rate": 0.0002,
"loss": 3.2878,
"step": 160
},
{
"epoch": 0.03366003366003366,
"grad_norm": 1.4198904037475586,
"learning_rate": 0.0002,
"loss": 3.3432,
"step": 170
},
{
"epoch": 0.03564003564003564,
"grad_norm": 1.6277859210968018,
"learning_rate": 0.0002,
"loss": 3.8561,
"step": 180
},
{
"epoch": 0.03762003762003762,
"grad_norm": 1.1398273706436157,
"learning_rate": 0.0002,
"loss": 3.5024,
"step": 190
},
{
"epoch": 0.0396000396000396,
"grad_norm": 1.2557786703109741,
"learning_rate": 0.0002,
"loss": 3.0593,
"step": 200
},
{
"epoch": 0.0396000396000396,
"eval_loss": 0.8267537951469421,
"eval_runtime": 177.798,
"eval_samples_per_second": 11.963,
"eval_steps_per_second": 5.984,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.680730813628416e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}