{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.3823704586063132, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11911852293031566, "grad_norm": 0.3590008318424225, "learning_rate": 1.6666666666666667e-05, "loss": 0.7627, "step": 100 }, { "epoch": 0.23823704586063132, "grad_norm": 0.5137978196144104, "learning_rate": 3.3333333333333335e-05, "loss": 0.6067, "step": 200 }, { "epoch": 0.357355568790947, "grad_norm": 0.46571800112724304, "learning_rate": 5e-05, "loss": 0.5131, "step": 300 }, { "epoch": 0.47647409172126265, "grad_norm": 0.48380178213119507, "learning_rate": 4.957432749209755e-05, "loss": 0.4791, "step": 400 }, { "epoch": 0.5955926146515783, "grad_norm": 0.5578784942626953, "learning_rate": 4.8311805735108894e-05, "loss": 0.4766, "step": 500 }, { "epoch": 0.714711137581894, "grad_norm": 0.4985765516757965, "learning_rate": 4.625542839324036e-05, "loss": 0.4803, "step": 600 }, { "epoch": 0.8338296605122096, "grad_norm": 0.5892139673233032, "learning_rate": 4.347522293051648e-05, "loss": 0.4718, "step": 700 }, { "epoch": 0.9529481834425253, "grad_norm": 0.5380077362060547, "learning_rate": 4.0065865909481417e-05, "loss": 0.4626, "step": 800 }, { "epoch": 1.072066706372841, "grad_norm": 0.576122522354126, "learning_rate": 3.6143458894413465e-05, "loss": 0.4564, "step": 900 }, { "epoch": 1.1911852293031566, "grad_norm": 0.543662428855896, "learning_rate": 3.1841574751802076e-05, "loss": 0.4526, "step": 1000 }, { "epoch": 1.3103037522334722, "grad_norm": 0.6222002506256104, "learning_rate": 2.7306708986582553e-05, "loss": 0.4626, "step": 1100 }, { "epoch": 1.429422275163788, "grad_norm": 0.5597625374794006, "learning_rate": 2.2693291013417453e-05, "loss": 0.4581, "step": 1200 }, { "epoch": 1.5485407980941037, "grad_norm": 0.8017860651016235, "learning_rate": 1.815842524819793e-05, "loss": 0.4646, "step": 1300 }, { "epoch": 1.6676593210244193, "grad_norm": 0.696183443069458, "learning_rate": 1.3856541105586545e-05, "loss": 0.4548, "step": 1400 }, { "epoch": 1.7867778439547348, "grad_norm": 0.6249508261680603, "learning_rate": 9.934134090518593e-06, "loss": 0.4668, "step": 1500 }, { "epoch": 1.9058963668850506, "grad_norm": 0.679349422454834, "learning_rate": 6.524777069483526e-06, "loss": 0.4519, "step": 1600 }, { "epoch": 2.0250148898153664, "grad_norm": 0.6183383464813232, "learning_rate": 3.7445716067596503e-06, "loss": 0.4554, "step": 1700 }, { "epoch": 2.144133412745682, "grad_norm": 0.7539538145065308, "learning_rate": 1.6881942648911076e-06, "loss": 0.4568, "step": 1800 }, { "epoch": 2.2632519356759975, "grad_norm": 0.7621870636940002, "learning_rate": 4.256725079024554e-07, "loss": 0.4577, "step": 1900 }, { "epoch": 2.3823704586063132, "grad_norm": 0.45056673884391785, "learning_rate": 0.0, "loss": 0.4445, "step": 2000 } ], "logging_steps": 100, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.15307218125783e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }