{ "best_metric": 2.4360692501068115, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.029977516862353237, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009992505620784412, "eval_loss": 3.334719657897949, "eval_runtime": 150.4317, "eval_samples_per_second": 2.805, "eval_steps_per_second": 1.403, "step": 1 }, { "epoch": 0.0029977516862353237, "grad_norm": 3.9588115215301514, "learning_rate": 0.00012, "loss": 9.0256, "step": 3 }, { "epoch": 0.004996252810392206, "eval_loss": 3.162672281265259, "eval_runtime": 163.1181, "eval_samples_per_second": 2.587, "eval_steps_per_second": 1.294, "step": 5 }, { "epoch": 0.005995503372470647, "grad_norm": 3.516874313354492, "learning_rate": 0.0001992114701314478, "loss": 10.0092, "step": 6 }, { "epoch": 0.008993255058705971, "grad_norm": 3.2138047218322754, "learning_rate": 0.00018763066800438636, "loss": 9.5826, "step": 9 }, { "epoch": 0.009992505620784412, "eval_loss": 2.834376811981201, "eval_runtime": 162.8008, "eval_samples_per_second": 2.592, "eval_steps_per_second": 1.296, "step": 10 }, { "epoch": 0.011991006744941295, "grad_norm": 4.357197284698486, "learning_rate": 0.000163742398974869, "loss": 9.6877, "step": 12 }, { "epoch": 0.014988758431176618, "grad_norm": 5.791210174560547, "learning_rate": 0.00013090169943749476, "loss": 10.6052, "step": 15 }, { "epoch": 0.014988758431176618, "eval_loss": 2.6564278602600098, "eval_runtime": 162.1433, "eval_samples_per_second": 2.603, "eval_steps_per_second": 1.301, "step": 15 }, { "epoch": 0.017986510117411942, "grad_norm": 7.677189350128174, "learning_rate": 9.372094804706867e-05, "loss": 10.0151, "step": 18 }, { "epoch": 0.019985011241568824, "eval_loss": 2.5079739093780518, "eval_runtime": 162.1387, "eval_samples_per_second": 2.603, "eval_steps_per_second": 1.301, "step": 20 }, { "epoch": 0.020984261803647264, "grad_norm": 8.353514671325684, "learning_rate": 5.7422070843492734e-05, "loss": 10.3783, "step": 21 }, { "epoch": 0.02398201348988259, "grad_norm": 11.086196899414062, "learning_rate": 2.7103137257858868e-05, "loss": 9.4603, "step": 24 }, { "epoch": 0.02498126405196103, "eval_loss": 2.4464001655578613, "eval_runtime": 162.0333, "eval_samples_per_second": 2.604, "eval_steps_per_second": 1.302, "step": 25 }, { "epoch": 0.02697976517611791, "grad_norm": 6.061897277832031, "learning_rate": 7.022351411174866e-06, "loss": 9.5595, "step": 27 }, { "epoch": 0.029977516862353237, "grad_norm": 5.1886305809021, "learning_rate": 0.0, "loss": 9.5828, "step": 30 }, { "epoch": 0.029977516862353237, "eval_loss": 2.4360692501068115, "eval_runtime": 162.3182, "eval_samples_per_second": 2.6, "eval_steps_per_second": 1.3, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.90434898870272e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }