{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0035537865595792317, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.107573119158464e-05, "eval_loss": 2.738691806793213, "eval_runtime": 395.8619, "eval_samples_per_second": 14.965, "eval_steps_per_second": 7.482, "step": 1 }, { "epoch": 0.0003553786559579232, "grad_norm": 83.14938354492188, "learning_rate": 5e-05, "loss": 11.2519, "step": 5 }, { "epoch": 0.0007107573119158464, "grad_norm": 47.0615234375, "learning_rate": 0.0001, "loss": 6.3236, "step": 10 }, { "epoch": 0.0007107573119158464, "eval_loss": 1.474220633506775, "eval_runtime": 396.755, "eval_samples_per_second": 14.931, "eval_steps_per_second": 7.466, "step": 10 }, { "epoch": 0.0010661359678737696, "grad_norm": 58.601806640625, "learning_rate": 9.619397662556435e-05, "loss": 5.6438, "step": 15 }, { "epoch": 0.0014215146238316927, "grad_norm": 38.64629364013672, "learning_rate": 8.535533905932738e-05, "loss": 4.9843, "step": 20 }, { "epoch": 0.0014215146238316927, "eval_loss": 1.0445603132247925, "eval_runtime": 395.5604, "eval_samples_per_second": 14.976, "eval_steps_per_second": 7.488, "step": 20 }, { "epoch": 0.0017768932797896158, "grad_norm": 37.211456298828125, "learning_rate": 6.91341716182545e-05, "loss": 3.3916, "step": 25 }, { "epoch": 0.002132271935747539, "grad_norm": 32.019588470458984, "learning_rate": 5e-05, "loss": 3.6857, "step": 30 }, { "epoch": 0.002132271935747539, "eval_loss": 0.9380649328231812, "eval_runtime": 396.5294, "eval_samples_per_second": 14.94, "eval_steps_per_second": 7.47, "step": 30 }, { "epoch": 0.002487650591705462, "grad_norm": 14.430593490600586, "learning_rate": 3.086582838174551e-05, "loss": 2.3664, "step": 35 }, { "epoch": 0.0028430292476633854, "grad_norm": 56.26362991333008, "learning_rate": 1.4644660940672627e-05, "loss": 5.3614, "step": 40 }, { "epoch": 0.0028430292476633854, "eval_loss": 0.9288716912269592, "eval_runtime": 397.2443, "eval_samples_per_second": 14.913, "eval_steps_per_second": 7.456, "step": 40 }, { "epoch": 0.0031984079036213083, "grad_norm": 25.66505241394043, "learning_rate": 3.8060233744356633e-06, "loss": 4.2842, "step": 45 }, { "epoch": 0.0035537865595792317, "grad_norm": 42.632633209228516, "learning_rate": 0.0, "loss": 3.7495, "step": 50 }, { "epoch": 0.0035537865595792317, "eval_loss": 0.9020283222198486, "eval_runtime": 395.2916, "eval_samples_per_second": 14.986, "eval_steps_per_second": 7.493, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9070232840699904.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }