{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.13199142055766375,
  "eval_steps": 13,
  "global_step": 50,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.002639828411153275,
      "grad_norm": 22.346176147460938,
      "learning_rate": 0.00015,
      "loss": 10.0087,
      "step": 1
    },
    {
      "epoch": 0.002639828411153275,
      "eval_loss": 10.013830184936523,
      "eval_runtime": 229.1847,
      "eval_samples_per_second": 5.572,
      "eval_steps_per_second": 2.788,
      "step": 1
    },
    {
      "epoch": 0.00527965682230655,
      "grad_norm": 22.456666946411133,
      "learning_rate": 0.0003,
      "loss": 10.0246,
      "step": 2
    },
    {
      "epoch": 0.007919485233459825,
      "grad_norm": 19.28063201904297,
      "learning_rate": 0.0002996788384857905,
      "loss": 7.27,
      "step": 3
    },
    {
      "epoch": 0.0105593136446131,
      "grad_norm": 16.660444259643555,
      "learning_rate": 0.00029871672920607153,
      "loss": 2.6075,
      "step": 4
    },
    {
      "epoch": 0.013199142055766375,
      "grad_norm": 6.3326215744018555,
      "learning_rate": 0.00029711779206048454,
      "loss": 0.4204,
      "step": 5
    },
    {
      "epoch": 0.01583897046691965,
      "grad_norm": 0.7938358783721924,
      "learning_rate": 0.0002948888739433602,
      "loss": 0.0209,
      "step": 6
    },
    {
      "epoch": 0.018478798878072925,
      "grad_norm": 0.027728406712412834,
      "learning_rate": 0.0002920395194242658,
      "loss": 0.0006,
      "step": 7
    },
    {
      "epoch": 0.0211186272892262,
      "grad_norm": 0.004830565769225359,
      "learning_rate": 0.000288581929876693,
      "loss": 0.0001,
      "step": 8
    },
    {
      "epoch": 0.023758455700379474,
      "grad_norm": 0.0023810293059796095,
      "learning_rate": 0.00028453091122990323,
      "loss": 0.0,
      "step": 9
    },
    {
      "epoch": 0.02639828411153275,
      "grad_norm": 0.035360973328351974,
      "learning_rate": 0.0002799038105676658,
      "loss": 0.0001,
      "step": 10
    },
    {
      "epoch": 0.029038112522686024,
      "grad_norm": 0.0016157212667167187,
      "learning_rate": 0.0002747204418453818,
      "loss": 0.0,
      "step": 11
    },
    {
      "epoch": 0.0316779409338393,
      "grad_norm": 0.0013053239090368152,
      "learning_rate": 0.00026900300104368524,
      "loss": 0.0,
      "step": 12
    },
    {
      "epoch": 0.034317769344992574,
      "grad_norm": 0.0008068020106293261,
      "learning_rate": 0.0002627759711218466,
      "loss": 0.0,
      "step": 13
    },
    {
      "epoch": 0.034317769344992574,
      "eval_loss": 4.657953468267806e-06,
      "eval_runtime": 229.4053,
      "eval_samples_per_second": 5.567,
      "eval_steps_per_second": 2.785,
      "step": 13
    },
    {
      "epoch": 0.03695759775614585,
      "grad_norm": 0.0009434845414943993,
      "learning_rate": 0.00025606601717798207,
      "loss": 0.0,
      "step": 14
    },
    {
      "epoch": 0.039597426167299124,
      "grad_norm": 0.0005841734819114208,
      "learning_rate": 0.0002489018722650103,
      "loss": 0.0,
      "step": 15
    },
    {
      "epoch": 0.0422372545784524,
      "grad_norm": 0.003180427709594369,
      "learning_rate": 0.00024131421435130807,
      "loss": 0.0,
      "step": 16
    },
    {
      "epoch": 0.044877082989605674,
      "grad_norm": 0.004339354578405619,
      "learning_rate": 0.0002333355349529403,
      "loss": 0.0,
      "step": 17
    },
    {
      "epoch": 0.04751691140075895,
      "grad_norm": 0.006588978227227926,
      "learning_rate": 0.000225,
      "loss": 0.0,
      "step": 18
    },
    {
      "epoch": 0.050156739811912224,
      "grad_norm": 0.0036406053695827723,
      "learning_rate": 0.00021634330353285017,
      "loss": 0.0,
      "step": 19
    },
    {
      "epoch": 0.0527965682230655,
      "grad_norm": 0.00797521322965622,
      "learning_rate": 0.00020740251485476345,
      "loss": 0.0,
      "step": 20
    },
    {
      "epoch": 0.055436396634218774,
      "grad_norm": 0.0004899859195575118,
      "learning_rate": 0.00019821591979547423,
      "loss": 0.0,
      "step": 21
    },
    {
      "epoch": 0.05807622504537205,
      "grad_norm": 0.0003694019978865981,
      "learning_rate": 0.0001888228567653781,
      "loss": 0.0,
      "step": 22
    },
    {
      "epoch": 0.060716053456525323,
      "grad_norm": 0.00016751833027228713,
      "learning_rate": 0.00017926354830241924,
      "loss": 0.0,
      "step": 23
    },
    {
      "epoch": 0.0633558818676786,
      "grad_norm": 0.00011875380005221814,
      "learning_rate": 0.00016957892883300775,
      "loss": 0.0,
      "step": 24
    },
    {
      "epoch": 0.06599571027883187,
      "grad_norm": 5.093684740131721e-05,
      "learning_rate": 0.00015981046938452146,
      "loss": 0.0,
      "step": 25
    },
    {
      "epoch": 0.06863553868998515,
      "grad_norm": 3.9787148125469685e-05,
      "learning_rate": 0.00015,
      "loss": 0.0,
      "step": 26
    },
    {
      "epoch": 0.06863553868998515,
      "eval_loss": 2.810793660046329e-07,
      "eval_runtime": 229.3285,
      "eval_samples_per_second": 5.568,
      "eval_steps_per_second": 2.786,
      "step": 26
    },
    {
      "epoch": 0.07127536710113842,
      "grad_norm": 3.462208769633435e-05,
      "learning_rate": 0.0001401895306154785,
      "loss": 0.0,
      "step": 27
    },
    {
      "epoch": 0.0739151955122917,
      "grad_norm": 2.7388192393118516e-05,
      "learning_rate": 0.00013042107116699228,
      "loss": 0.0,
      "step": 28
    },
    {
      "epoch": 0.07655502392344497,
      "grad_norm": 3.9361602830467746e-05,
      "learning_rate": 0.00012073645169758076,
      "loss": 0.0,
      "step": 29
    },
    {
      "epoch": 0.07919485233459825,
      "grad_norm": 2.9102855478413403e-05,
      "learning_rate": 0.00011117714323462186,
      "loss": 0.0,
      "step": 30
    },
    {
      "epoch": 0.08183468074575152,
      "grad_norm": 2.171610321966e-05,
      "learning_rate": 0.00010178408020452579,
      "loss": 0.0,
      "step": 31
    },
    {
      "epoch": 0.0844745091569048,
      "grad_norm": 2.040547587967012e-05,
      "learning_rate": 9.259748514523653e-05,
      "loss": 0.0,
      "step": 32
    },
    {
      "epoch": 0.08711433756805807,
      "grad_norm": 1.634558975638356e-05,
      "learning_rate": 8.365669646714983e-05,
      "loss": 0.0,
      "step": 33
    },
    {
      "epoch": 0.08975416597921135,
      "grad_norm": 2.2022310076863505e-05,
      "learning_rate": 7.500000000000002e-05,
      "loss": 0.0,
      "step": 34
    },
    {
      "epoch": 0.09239399439036462,
      "grad_norm": 1.6614567357464693e-05,
      "learning_rate": 6.66644650470597e-05,
      "loss": 0.0,
      "step": 35
    },
    {
      "epoch": 0.0950338228015179,
      "grad_norm": 1.5442792573594488e-05,
      "learning_rate": 5.8685785648691894e-05,
      "loss": 0.0,
      "step": 36
    },
    {
      "epoch": 0.09767365121267117,
      "grad_norm": 1.3897730241296813e-05,
      "learning_rate": 5.109812773498967e-05,
      "loss": 0.0,
      "step": 37
    },
    {
      "epoch": 0.10031347962382445,
      "grad_norm": 1.476151192036923e-05,
      "learning_rate": 4.3933982822017876e-05,
      "loss": 0.0,
      "step": 38
    },
    {
      "epoch": 0.10295330803497772,
      "grad_norm": 1.7724401914165355e-05,
      "learning_rate": 3.72240288781534e-05,
      "loss": 0.0,
      "step": 39
    },
    {
      "epoch": 0.10295330803497772,
      "eval_loss": 1.1696874935296364e-07,
      "eval_runtime": 229.2689,
      "eval_samples_per_second": 5.57,
      "eval_steps_per_second": 2.787,
      "step": 39
    },
    {
      "epoch": 0.105593136446131,
      "grad_norm": 1.175746729131788e-05,
      "learning_rate": 3.099699895631474e-05,
      "loss": 0.0,
      "step": 40
    },
    {
      "epoch": 0.10823296485728427,
      "grad_norm": 1.0341846973460633e-05,
      "learning_rate": 2.5279558154618197e-05,
      "loss": 0.0,
      "step": 41
    },
    {
      "epoch": 0.11087279326843755,
      "grad_norm": 1.2265077202755492e-05,
      "learning_rate": 2.009618943233419e-05,
      "loss": 0.0,
      "step": 42
    },
    {
      "epoch": 0.11351262167959082,
      "grad_norm": 1.1335401723044924e-05,
      "learning_rate": 1.546908877009676e-05,
      "loss": 0.0,
      "step": 43
    },
    {
      "epoch": 0.1161524500907441,
      "grad_norm": 1.6442403648397885e-05,
      "learning_rate": 1.1418070123306989e-05,
      "loss": 0.0,
      "step": 44
    },
    {
      "epoch": 0.11879227850189737,
      "grad_norm": 2.3461549062631093e-05,
      "learning_rate": 7.960480575734162e-06,
      "loss": 0.0,
      "step": 45
    },
    {
      "epoch": 0.12143210691305065,
      "grad_norm": 1.1922435987798963e-05,
      "learning_rate": 5.11112605663977e-06,
      "loss": 0.0,
      "step": 46
    },
    {
      "epoch": 0.12407193532420392,
      "grad_norm": 1.07811547422898e-05,
      "learning_rate": 2.882207939515435e-06,
      "loss": 0.0,
      "step": 47
    },
    {
      "epoch": 0.1267117637353572,
      "grad_norm": 4.0074228309094906e-05,
      "learning_rate": 1.2832707939284427e-06,
      "loss": 0.0,
      "step": 48
    },
    {
      "epoch": 0.12935159214651049,
      "grad_norm": 1.022117976390291e-05,
      "learning_rate": 3.211615142094781e-07,
      "loss": 0.0,
      "step": 49
    },
    {
      "epoch": 0.13199142055766375,
      "grad_norm": 9.782426786841825e-06,
      "learning_rate": 0.0,
      "loss": 0.0,
      "step": 50
    }
  ],
  "logging_steps": 1,
  "max_steps": 50,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 13,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.935098268483584e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}