{ "best_metric": 0.9973708987236023, "best_model_checkpoint": "th_cl_28epochs_lora_pos_neg/checkpoint-96", "epoch": 5.0, "eval_steps": 500, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3125, "grad_norm": 11.923697471618652, "learning_rate": 9.375e-05, "loss": 0.2148, "step": 10 }, { "epoch": 0.625, "grad_norm": 27.81431770324707, "learning_rate": 8.75e-05, "loss": 0.3845, "step": 20 }, { "epoch": 0.9375, "grad_norm": 12.166078567504883, "learning_rate": 8.125000000000001e-05, "loss": 0.4669, "step": 30 }, { "epoch": 1.0, "eval_accuracy": 0.5111111111111111, "eval_balanced_accuracy": 0.515, "eval_loss": 1.022578477859497, "eval_runtime": 84.4402, "eval_samples_per_second": 0.533, "eval_steps_per_second": 0.071, "step": 32 }, { "epoch": 1.25, "grad_norm": 23.626405715942383, "learning_rate": 7.500000000000001e-05, "loss": 0.2006, "step": 40 }, { "epoch": 1.5625, "grad_norm": 22.549772262573242, "learning_rate": 6.875e-05, "loss": 0.1906, "step": 50 }, { "epoch": 1.875, "grad_norm": 21.862329483032227, "learning_rate": 6.25e-05, "loss": 0.2575, "step": 60 }, { "epoch": 2.0, "eval_accuracy": 0.5333333333333333, "eval_balanced_accuracy": 0.5394736842105263, "eval_loss": 1.006407618522644, "eval_runtime": 84.7861, "eval_samples_per_second": 0.531, "eval_steps_per_second": 0.071, "step": 64 }, { "epoch": 2.1875, "grad_norm": 8.992438316345215, "learning_rate": 5.6250000000000005e-05, "loss": 0.1647, "step": 70 }, { "epoch": 2.5, "grad_norm": 11.991353988647461, "learning_rate": 5e-05, "loss": 0.157, "step": 80 }, { "epoch": 2.8125, "grad_norm": 5.575663089752197, "learning_rate": 4.375e-05, "loss": 0.0998, "step": 90 }, { "epoch": 3.0, "eval_accuracy": 0.5777777777777777, "eval_balanced_accuracy": 0.5743534482758621, "eval_loss": 0.9973708987236023, "eval_runtime": 84.5235, "eval_samples_per_second": 0.532, "eval_steps_per_second": 0.071, "step": 96 }, { "epoch": 3.125, "grad_norm": 2.0587921142578125, "learning_rate": 3.7500000000000003e-05, "loss": 0.0978, "step": 100 }, { "epoch": 3.4375, "grad_norm": 24.552135467529297, "learning_rate": 3.125e-05, "loss": 0.0775, "step": 110 }, { "epoch": 3.75, "grad_norm": 6.25897741317749, "learning_rate": 2.5e-05, "loss": 0.0339, "step": 120 }, { "epoch": 4.0, "eval_accuracy": 0.5777777777777777, "eval_balanced_accuracy": 0.5740740740740741, "eval_loss": 1.1087063550949097, "eval_runtime": 86.0452, "eval_samples_per_second": 0.523, "eval_steps_per_second": 0.07, "step": 128 }, { "epoch": 4.0625, "grad_norm": 0.7533628940582275, "learning_rate": 1.8750000000000002e-05, "loss": 0.0369, "step": 130 }, { "epoch": 4.375, "grad_norm": 1.4770331382751465, "learning_rate": 1.25e-05, "loss": 0.0142, "step": 140 }, { "epoch": 4.6875, "grad_norm": 4.743549823760986, "learning_rate": 6.25e-06, "loss": 0.0246, "step": 150 }, { "epoch": 5.0, "grad_norm": 9.260613441467285, "learning_rate": 0.0, "loss": 0.048, "step": 160 }, { "epoch": 5.0, "eval_accuracy": 0.5111111111111111, "eval_balanced_accuracy": 0.5118577075098814, "eval_loss": 1.11983060836792, "eval_runtime": 85.4666, "eval_samples_per_second": 0.527, "eval_steps_per_second": 0.07, "step": 160 }, { "epoch": 5.0, "step": 160, "total_flos": 2.696144222158848e+16, "train_loss": 0.15434152334928514, "train_runtime": 7802.4738, "train_samples_per_second": 0.161, "train_steps_per_second": 0.021 } ], "logging_steps": 10, "max_steps": 160, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 2.696144222158848e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }