{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 26004,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00011536686663590217,
      "grad_norm": 0.36804234981536865,
      "learning_rate": 7.689350249903884e-08,
      "loss": 1.1237,
      "step": 1
    },
    {
      "epoch": 0.0005768343331795108,
      "grad_norm": 0.3614605665206909,
      "learning_rate": 3.844675124951942e-07,
      "loss": 1.1698,
      "step": 5
    },
    {
      "epoch": 0.0011536686663590216,
      "grad_norm": 0.4128214120864868,
      "learning_rate": 7.689350249903884e-07,
      "loss": 1.2018,
      "step": 10
    },
    {
      "epoch": 0.0017305029995385325,
      "grad_norm": 0.38955822587013245,
      "learning_rate": 1.1534025374855826e-06,
      "loss": 1.1142,
      "step": 15
    },
    {
      "epoch": 0.0023073373327180432,
      "grad_norm": 0.3124810457229614,
      "learning_rate": 1.5378700499807767e-06,
      "loss": 1.1988,
      "step": 20
    },
    {
      "epoch": 0.002884171665897554,
      "grad_norm": 0.37434640526771545,
      "learning_rate": 1.9223375624759706e-06,
      "loss": 1.1647,
      "step": 25
    },
    {
      "epoch": 0.003461005999077065,
      "grad_norm": 0.3723091781139374,
      "learning_rate": 2.3068050749711653e-06,
      "loss": 1.153,
      "step": 30
    },
    {
      "epoch": 0.0040378403322565756,
      "grad_norm": 0.29967185854911804,
      "learning_rate": 2.691272587466359e-06,
      "loss": 1.1444,
      "step": 35
    },
    {
      "epoch": 0.0046146746654360865,
      "grad_norm": 0.3371390402317047,
      "learning_rate": 3.0757400999615534e-06,
      "loss": 1.1453,
      "step": 40
    },
    {
      "epoch": 0.005191508998615597,
      "grad_norm": 0.3575346767902374,
      "learning_rate": 3.4602076124567477e-06,
      "loss": 1.1349,
      "step": 45
    },
    {
      "epoch": 0.005768343331795108,
      "grad_norm": 0.3574388027191162,
      "learning_rate": 3.844675124951941e-06,
      "loss": 1.11,
      "step": 50
    },
    {
      "epoch": 0.006345177664974619,
      "grad_norm": 0.28124046325683594,
      "learning_rate": 4.229142637447136e-06,
      "loss": 1.0939,
      "step": 55
    },
    {
      "epoch": 0.00692201199815413,
      "grad_norm": 0.348366379737854,
      "learning_rate": 4.6136101499423305e-06,
      "loss": 1.0845,
      "step": 60
    },
    {
      "epoch": 0.007498846331333641,
      "grad_norm": 0.28888341784477234,
      "learning_rate": 4.998077662437525e-06,
      "loss": 1.1139,
      "step": 65
    },
    {
      "epoch": 0.008075680664513151,
      "grad_norm": 0.3280262351036072,
      "learning_rate": 5.382545174932718e-06,
      "loss": 1.124,
      "step": 70
    },
    {
      "epoch": 0.008652514997692663,
      "grad_norm": 0.3112921118736267,
      "learning_rate": 5.7670126874279126e-06,
      "loss": 1.1424,
      "step": 75
    },
    {
      "epoch": 0.009229349330872173,
      "grad_norm": 0.31750813126564026,
      "learning_rate": 6.151480199923107e-06,
      "loss": 1.0663,
      "step": 80
    },
    {
      "epoch": 0.009806183664051685,
      "grad_norm": 0.2857876718044281,
      "learning_rate": 6.535947712418301e-06,
      "loss": 1.0595,
      "step": 85
    },
    {
      "epoch": 0.010383017997231195,
      "grad_norm": 0.2947840094566345,
      "learning_rate": 6.920415224913495e-06,
      "loss": 1.1298,
      "step": 90
    },
    {
      "epoch": 0.010959852330410707,
      "grad_norm": 0.3226536810398102,
      "learning_rate": 7.304882737408689e-06,
      "loss": 1.0671,
      "step": 95
    },
    {
      "epoch": 0.011536686663590217,
      "grad_norm": 0.2899499237537384,
      "learning_rate": 7.689350249903882e-06,
      "loss": 1.0859,
      "step": 100
    },
    {
      "epoch": 0.012113520996769728,
      "grad_norm": 0.2774418294429779,
      "learning_rate": 8.073817762399077e-06,
      "loss": 1.0811,
      "step": 105
    },
    {
      "epoch": 0.012690355329949238,
      "grad_norm": 0.2815534174442291,
      "learning_rate": 8.458285274894273e-06,
      "loss": 1.0238,
      "step": 110
    },
    {
      "epoch": 0.01326718966312875,
      "grad_norm": 0.28051742911338806,
      "learning_rate": 8.842752787389466e-06,
      "loss": 1.0586,
      "step": 115
    },
    {
      "epoch": 0.01384402399630826,
      "grad_norm": 0.263121098279953,
      "learning_rate": 9.227220299884661e-06,
      "loss": 1.026,
      "step": 120
    },
    {
      "epoch": 0.01442085832948777,
      "grad_norm": 0.3776288330554962,
      "learning_rate": 9.611687812379855e-06,
      "loss": 1.0285,
      "step": 125
    },
    {
      "epoch": 0.014997692662667282,
      "grad_norm": 0.2619612514972687,
      "learning_rate": 9.99615532487505e-06,
      "loss": 1.0555,
      "step": 130
    },
    {
      "epoch": 0.015574526995846792,
      "grad_norm": 0.2866584062576294,
      "learning_rate": 1.0380622837370241e-05,
      "loss": 1.029,
      "step": 135
    },
    {
      "epoch": 0.016151361329026302,
      "grad_norm": 0.27494966983795166,
      "learning_rate": 1.0765090349865437e-05,
      "loss": 0.9903,
      "step": 140
    },
    {
      "epoch": 0.016728195662205816,
      "grad_norm": 0.2557440996170044,
      "learning_rate": 1.1149557862360632e-05,
      "loss": 0.9651,
      "step": 145
    },
    {
      "epoch": 0.017305029995385326,
      "grad_norm": 0.27979356050491333,
      "learning_rate": 1.1534025374855825e-05,
      "loss": 0.9943,
      "step": 150
    },
    {
      "epoch": 0.017881864328564836,
      "grad_norm": 0.2777612507343292,
      "learning_rate": 1.191849288735102e-05,
      "loss": 1.0984,
      "step": 155
    },
    {
      "epoch": 0.018458698661744346,
      "grad_norm": 0.28315240144729614,
      "learning_rate": 1.2302960399846214e-05,
      "loss": 1.049,
      "step": 160
    },
    {
      "epoch": 0.01903553299492386,
      "grad_norm": 0.2681189477443695,
      "learning_rate": 1.2687427912341407e-05,
      "loss": 1.0103,
      "step": 165
    },
    {
      "epoch": 0.01961236732810337,
      "grad_norm": 0.2712986171245575,
      "learning_rate": 1.3071895424836602e-05,
      "loss": 1.0019,
      "step": 170
    },
    {
      "epoch": 0.02018920166128288,
      "grad_norm": 0.3040061295032501,
      "learning_rate": 1.3456362937331796e-05,
      "loss": 0.9938,
      "step": 175
    },
    {
      "epoch": 0.02076603599446239,
      "grad_norm": 0.2698034346103668,
      "learning_rate": 1.384083044982699e-05,
      "loss": 1.0235,
      "step": 180
    },
    {
      "epoch": 0.0213428703276419,
      "grad_norm": 0.2864539921283722,
      "learning_rate": 1.4225297962322184e-05,
      "loss": 1.0175,
      "step": 185
    },
    {
      "epoch": 0.021919704660821413,
      "grad_norm": 0.2855101227760315,
      "learning_rate": 1.4609765474817378e-05,
      "loss": 1.0239,
      "step": 190
    },
    {
      "epoch": 0.022496538994000923,
      "grad_norm": 0.30460137128829956,
      "learning_rate": 1.4994232987312573e-05,
      "loss": 0.9947,
      "step": 195
    },
    {
      "epoch": 0.023073373327180433,
      "grad_norm": 0.3101496398448944,
      "learning_rate": 1.5378700499807765e-05,
      "loss": 1.0407,
      "step": 200
    },
    {
      "epoch": 0.023650207660359943,
      "grad_norm": 0.29148516058921814,
      "learning_rate": 1.576316801230296e-05,
      "loss": 1.0206,
      "step": 205
    },
    {
      "epoch": 0.024227041993539457,
      "grad_norm": 0.3594810664653778,
      "learning_rate": 1.6147635524798155e-05,
      "loss": 1.0045,
      "step": 210
    },
    {
      "epoch": 0.024803876326718967,
      "grad_norm": 0.28806623816490173,
      "learning_rate": 1.653210303729335e-05,
      "loss": 1.0107,
      "step": 215
    },
    {
      "epoch": 0.025380710659898477,
      "grad_norm": 0.2913208603858948,
      "learning_rate": 1.6916570549788545e-05,
      "loss": 0.9894,
      "step": 220
    },
    {
      "epoch": 0.025957544993077987,
      "grad_norm": 0.30780768394470215,
      "learning_rate": 1.7301038062283735e-05,
      "loss": 1.0328,
      "step": 225
    },
    {
      "epoch": 0.0265343793262575,
      "grad_norm": 0.3564111292362213,
      "learning_rate": 1.7685505574778932e-05,
      "loss": 1.0326,
      "step": 230
    },
    {
      "epoch": 0.02711121365943701,
      "grad_norm": 0.3441677689552307,
      "learning_rate": 1.8069973087274125e-05,
      "loss": 0.9712,
      "step": 235
    },
    {
      "epoch": 0.02768804799261652,
      "grad_norm": 0.3250250518321991,
      "learning_rate": 1.8454440599769322e-05,
      "loss": 0.9905,
      "step": 240
    },
    {
      "epoch": 0.02826488232579603,
      "grad_norm": 0.30477702617645264,
      "learning_rate": 1.8838908112264512e-05,
      "loss": 0.9891,
      "step": 245
    },
    {
      "epoch": 0.02884171665897554,
      "grad_norm": 0.3071553707122803,
      "learning_rate": 1.922337562475971e-05,
      "loss": 1.0037,
      "step": 250
    },
    {
      "epoch": 0.029418550992155054,
      "grad_norm": 0.304791659116745,
      "learning_rate": 1.9607843137254903e-05,
      "loss": 0.9961,
      "step": 255
    },
    {
      "epoch": 0.029995385325334564,
      "grad_norm": 0.4236905872821808,
      "learning_rate": 1.99923106497501e-05,
      "loss": 0.9813,
      "step": 260
    },
    {
      "epoch": 0.030572219658514074,
      "grad_norm": 0.30435487627983093,
      "learning_rate": 2.0376778162245293e-05,
      "loss": 1.0412,
      "step": 265
    },
    {
      "epoch": 0.031149053991693584,
      "grad_norm": 0.30698785185813904,
      "learning_rate": 2.0761245674740483e-05,
      "loss": 1.0284,
      "step": 270
    },
    {
      "epoch": 0.031725888324873094,
      "grad_norm": 0.289407879114151,
      "learning_rate": 2.114571318723568e-05,
      "loss": 0.9807,
      "step": 275
    },
    {
      "epoch": 0.032302722658052604,
      "grad_norm": 0.3242761492729187,
      "learning_rate": 2.1530180699730873e-05,
      "loss": 1.0088,
      "step": 280
    },
    {
      "epoch": 0.03287955699123212,
      "grad_norm": 0.2820676863193512,
      "learning_rate": 2.191464821222607e-05,
      "loss": 0.9437,
      "step": 285
    },
    {
      "epoch": 0.03345639132441163,
      "grad_norm": 0.30823391675949097,
      "learning_rate": 2.2299115724721263e-05,
      "loss": 0.9973,
      "step": 290
    },
    {
      "epoch": 0.03403322565759114,
      "grad_norm": 0.32123836874961853,
      "learning_rate": 2.2683583237216457e-05,
      "loss": 0.9786,
      "step": 295
    },
    {
      "epoch": 0.03461005999077065,
      "grad_norm": 0.30905139446258545,
      "learning_rate": 2.306805074971165e-05,
      "loss": 1.0316,
      "step": 300
    },
    {
      "epoch": 0.03518689432395016,
      "grad_norm": 0.36113137006759644,
      "learning_rate": 2.3452518262206844e-05,
      "loss": 1.0258,
      "step": 305
    },
    {
      "epoch": 0.03576372865712967,
      "grad_norm": 0.3342955708503723,
      "learning_rate": 2.383698577470204e-05,
      "loss": 1.0542,
      "step": 310
    },
    {
      "epoch": 0.03634056299030918,
      "grad_norm": 0.3617742359638214,
      "learning_rate": 2.422145328719723e-05,
      "loss": 0.9642,
      "step": 315
    },
    {
      "epoch": 0.03691739732348869,
      "grad_norm": 0.3330320417881012,
      "learning_rate": 2.4605920799692427e-05,
      "loss": 0.9997,
      "step": 320
    },
    {
      "epoch": 0.0374942316566682,
      "grad_norm": 0.3109203279018402,
      "learning_rate": 2.499038831218762e-05,
      "loss": 1.0317,
      "step": 325
    },
    {
      "epoch": 0.03807106598984772,
      "grad_norm": 0.3200637400150299,
      "learning_rate": 2.5374855824682814e-05,
      "loss": 1.0115,
      "step": 330
    },
    {
      "epoch": 0.03864790032302723,
      "grad_norm": 0.3082106411457062,
      "learning_rate": 2.575932333717801e-05,
      "loss": 0.9983,
      "step": 335
    },
    {
      "epoch": 0.03922473465620674,
      "grad_norm": 0.31465351581573486,
      "learning_rate": 2.6143790849673204e-05,
      "loss": 0.9985,
      "step": 340
    },
    {
      "epoch": 0.03980156898938625,
      "grad_norm": 0.2957000434398651,
      "learning_rate": 2.6528258362168395e-05,
      "loss": 0.9736,
      "step": 345
    },
    {
      "epoch": 0.04037840332256576,
      "grad_norm": 0.32760462164878845,
      "learning_rate": 2.691272587466359e-05,
      "loss": 0.9602,
      "step": 350
    },
    {
      "epoch": 0.04095523765574527,
      "grad_norm": 0.3450353145599365,
      "learning_rate": 2.7297193387158788e-05,
      "loss": 1.049,
      "step": 355
    },
    {
      "epoch": 0.04153207198892478,
      "grad_norm": 1.0949097871780396,
      "learning_rate": 2.768166089965398e-05,
      "loss": 0.9967,
      "step": 360
    },
    {
      "epoch": 0.04210890632210429,
      "grad_norm": 0.3299037218093872,
      "learning_rate": 2.806612841214917e-05,
      "loss": 1.0066,
      "step": 365
    },
    {
      "epoch": 0.0426857406552838,
      "grad_norm": 0.32893791794776917,
      "learning_rate": 2.845059592464437e-05,
      "loss": 0.9696,
      "step": 370
    },
    {
      "epoch": 0.043262574988463316,
      "grad_norm": 0.3077922463417053,
      "learning_rate": 2.8835063437139565e-05,
      "loss": 1.0444,
      "step": 375
    },
    {
      "epoch": 0.043839409321642826,
      "grad_norm": 0.33225706219673157,
      "learning_rate": 2.9219530949634755e-05,
      "loss": 1.0146,
      "step": 380
    },
    {
      "epoch": 0.044416243654822336,
      "grad_norm": 0.3136041462421417,
      "learning_rate": 2.960399846212995e-05,
      "loss": 0.9937,
      "step": 385
    },
    {
      "epoch": 0.044993077988001846,
      "grad_norm": 0.3218904137611389,
      "learning_rate": 2.9988465974625146e-05,
      "loss": 0.9399,
      "step": 390
    },
    {
      "epoch": 0.045569912321181356,
      "grad_norm": 0.32936355471611023,
      "learning_rate": 3.0372933487120342e-05,
      "loss": 1.0736,
      "step": 395
    },
    {
      "epoch": 0.046146746654360866,
      "grad_norm": 0.29534292221069336,
      "learning_rate": 3.075740099961553e-05,
      "loss": 1.0335,
      "step": 400
    },
    {
      "epoch": 0.046723580987540377,
      "grad_norm": 0.33607929944992065,
      "learning_rate": 3.1141868512110726e-05,
      "loss": 0.9879,
      "step": 405
    },
    {
      "epoch": 0.04730041532071989,
      "grad_norm": 0.31074193120002747,
      "learning_rate": 3.152633602460592e-05,
      "loss": 1.0075,
      "step": 410
    },
    {
      "epoch": 0.0478772496538994,
      "grad_norm": 0.2880527675151825,
      "learning_rate": 3.191080353710111e-05,
      "loss": 1.0603,
      "step": 415
    },
    {
      "epoch": 0.048454083987078914,
      "grad_norm": 0.36315208673477173,
      "learning_rate": 3.229527104959631e-05,
      "loss": 1.0167,
      "step": 420
    },
    {
      "epoch": 0.049030918320258424,
      "grad_norm": 0.3171013295650482,
      "learning_rate": 3.2679738562091506e-05,
      "loss": 0.9625,
      "step": 425
    },
    {
      "epoch": 0.049607752653437934,
      "grad_norm": 0.32077813148498535,
      "learning_rate": 3.30642060745867e-05,
      "loss": 0.9845,
      "step": 430
    },
    {
      "epoch": 0.050184586986617444,
      "grad_norm": 0.3356740176677704,
      "learning_rate": 3.344867358708189e-05,
      "loss": 0.9825,
      "step": 435
    },
    {
      "epoch": 0.050761421319796954,
      "grad_norm": 0.2968508005142212,
      "learning_rate": 3.383314109957709e-05,
      "loss": 0.9812,
      "step": 440
    },
    {
      "epoch": 0.051338255652976464,
      "grad_norm": 0.30198368430137634,
      "learning_rate": 3.421760861207228e-05,
      "loss": 1.034,
      "step": 445
    },
    {
      "epoch": 0.051915089986155974,
      "grad_norm": 0.2889617681503296,
      "learning_rate": 3.460207612456747e-05,
      "loss": 1.0387,
      "step": 450
    },
    {
      "epoch": 0.052491924319335484,
      "grad_norm": 0.2965385615825653,
      "learning_rate": 3.498654363706267e-05,
      "loss": 0.9649,
      "step": 455
    },
    {
      "epoch": 0.053068758652515,
      "grad_norm": 0.2926345765590668,
      "learning_rate": 3.5371011149557864e-05,
      "loss": 0.981,
      "step": 460
    },
    {
      "epoch": 0.05364559298569451,
      "grad_norm": 0.2872851490974426,
      "learning_rate": 3.575547866205306e-05,
      "loss": 0.9766,
      "step": 465
    },
    {
      "epoch": 0.05422242731887402,
      "grad_norm": 0.2824111878871918,
      "learning_rate": 3.613994617454825e-05,
      "loss": 0.9689,
      "step": 470
    },
    {
      "epoch": 0.05479926165205353,
      "grad_norm": 0.29697051644325256,
      "learning_rate": 3.652441368704345e-05,
      "loss": 1.02,
      "step": 475
    },
    {
      "epoch": 0.05537609598523304,
      "grad_norm": 0.31594452261924744,
      "learning_rate": 3.6908881199538644e-05,
      "loss": 0.9976,
      "step": 480
    },
    {
      "epoch": 0.05595293031841255,
      "grad_norm": 0.3125144839286804,
      "learning_rate": 3.7293348712033834e-05,
      "loss": 0.9638,
      "step": 485
    },
    {
      "epoch": 0.05652976465159206,
      "grad_norm": 0.3013385832309723,
      "learning_rate": 3.7677816224529024e-05,
      "loss": 0.9993,
      "step": 490
    },
    {
      "epoch": 0.05710659898477157,
      "grad_norm": 0.2799026370048523,
      "learning_rate": 3.806228373702422e-05,
      "loss": 1.0197,
      "step": 495
    },
    {
      "epoch": 0.05768343331795108,
      "grad_norm": 0.3162376880645752,
      "learning_rate": 3.844675124951942e-05,
      "loss": 0.9985,
      "step": 500
    },
    {
      "epoch": 0.0582602676511306,
      "grad_norm": 0.30490589141845703,
      "learning_rate": 3.883121876201461e-05,
      "loss": 0.9825,
      "step": 505
    },
    {
      "epoch": 0.05883710198431011,
      "grad_norm": 0.2959028482437134,
      "learning_rate": 3.9215686274509805e-05,
      "loss": 1.0566,
      "step": 510
    },
    {
      "epoch": 0.05941393631748962,
      "grad_norm": 0.29472246766090393,
      "learning_rate": 3.9600153787005e-05,
      "loss": 0.9864,
      "step": 515
    },
    {
      "epoch": 0.05999077065066913,
      "grad_norm": 0.28552672266960144,
      "learning_rate": 3.99846212995002e-05,
      "loss": 1.0143,
      "step": 520
    },
    {
      "epoch": 0.06056760498384864,
      "grad_norm": 0.27690058946609497,
      "learning_rate": 4.036908881199539e-05,
      "loss": 0.9875,
      "step": 525
    },
    {
      "epoch": 0.06114443931702815,
      "grad_norm": 0.269163578748703,
      "learning_rate": 4.0753556324490586e-05,
      "loss": 0.9781,
      "step": 530
    },
    {
      "epoch": 0.06172127365020766,
      "grad_norm": 0.27882060408592224,
      "learning_rate": 4.1138023836985776e-05,
      "loss": 1.0087,
      "step": 535
    },
    {
      "epoch": 0.06229810798338717,
      "grad_norm": 0.28865769505500793,
      "learning_rate": 4.1522491349480966e-05,
      "loss": 0.9878,
      "step": 540
    },
    {
      "epoch": 0.06287494231656668,
      "grad_norm": 0.29274407029151917,
      "learning_rate": 4.190695886197616e-05,
      "loss": 0.9748,
      "step": 545
    },
    {
      "epoch": 0.06345177664974619,
      "grad_norm": 0.2818305194377899,
      "learning_rate": 4.229142637447136e-05,
      "loss": 0.9791,
      "step": 550
    },
    {
      "epoch": 0.0640286109829257,
      "grad_norm": 0.29222431778907776,
      "learning_rate": 4.2675893886966556e-05,
      "loss": 0.9484,
      "step": 555
    },
    {
      "epoch": 0.06460544531610521,
      "grad_norm": 0.301176518201828,
      "learning_rate": 4.3060361399461746e-05,
      "loss": 0.9872,
      "step": 560
    },
    {
      "epoch": 0.06518227964928472,
      "grad_norm": 0.2671689987182617,
      "learning_rate": 4.344482891195694e-05,
      "loss": 0.9763,
      "step": 565
    },
    {
      "epoch": 0.06575911398246424,
      "grad_norm": 0.2748938202857971,
      "learning_rate": 4.382929642445214e-05,
      "loss": 1.0321,
      "step": 570
    },
    {
      "epoch": 0.06633594831564375,
      "grad_norm": 0.2703133821487427,
      "learning_rate": 4.421376393694733e-05,
      "loss": 1.0098,
      "step": 575
    },
    {
      "epoch": 0.06691278264882326,
      "grad_norm": 0.4125090539455414,
      "learning_rate": 4.459823144944253e-05,
      "loss": 0.9726,
      "step": 580
    },
    {
      "epoch": 0.06748961698200277,
      "grad_norm": 0.30746424198150635,
      "learning_rate": 4.498269896193772e-05,
      "loss": 0.9914,
      "step": 585
    },
    {
      "epoch": 0.06806645131518228,
      "grad_norm": 0.2738732695579529,
      "learning_rate": 4.5367166474432914e-05,
      "loss": 0.9805,
      "step": 590
    },
    {
      "epoch": 0.0686432856483618,
      "grad_norm": 0.2670000195503235,
      "learning_rate": 4.5751633986928104e-05,
      "loss": 1.0024,
      "step": 595
    },
    {
      "epoch": 0.0692201199815413,
      "grad_norm": 0.27094706892967224,
      "learning_rate": 4.61361014994233e-05,
      "loss": 1.0052,
      "step": 600
    },
    {
      "epoch": 0.06979695431472081,
      "grad_norm": 0.2630774974822998,
      "learning_rate": 4.65205690119185e-05,
      "loss": 1.0215,
      "step": 605
    },
    {
      "epoch": 0.07037378864790032,
      "grad_norm": 0.70416659116745,
      "learning_rate": 4.690503652441369e-05,
      "loss": 1.0201,
      "step": 610
    },
    {
      "epoch": 0.07095062298107983,
      "grad_norm": 0.2626412808895111,
      "learning_rate": 4.7289504036908884e-05,
      "loss": 0.9938,
      "step": 615
    },
    {
      "epoch": 0.07152745731425934,
      "grad_norm": 0.3165227472782135,
      "learning_rate": 4.767397154940408e-05,
      "loss": 1.0007,
      "step": 620
    },
    {
      "epoch": 0.07210429164743885,
      "grad_norm": 0.26682278513908386,
      "learning_rate": 4.805843906189927e-05,
      "loss": 0.9994,
      "step": 625
    },
    {
      "epoch": 0.07268112598061836,
      "grad_norm": 0.26086610555648804,
      "learning_rate": 4.844290657439446e-05,
      "loss": 1.0282,
      "step": 630
    },
    {
      "epoch": 0.07325796031379787,
      "grad_norm": 0.2658156454563141,
      "learning_rate": 4.882737408688966e-05,
      "loss": 0.9934,
      "step": 635
    },
    {
      "epoch": 0.07383479464697738,
      "grad_norm": 0.2602865695953369,
      "learning_rate": 4.9211841599384855e-05,
      "loss": 0.992,
      "step": 640
    },
    {
      "epoch": 0.0744116289801569,
      "grad_norm": 0.2830445170402527,
      "learning_rate": 4.9596309111880045e-05,
      "loss": 0.9853,
      "step": 645
    },
    {
      "epoch": 0.0749884633133364,
      "grad_norm": 0.27104446291923523,
      "learning_rate": 4.998077662437524e-05,
      "loss": 1.0037,
      "step": 650
    },
    {
      "epoch": 0.07556529764651591,
      "grad_norm": 0.28188735246658325,
      "learning_rate": 5.036524413687044e-05,
      "loss": 0.9755,
      "step": 655
    },
    {
      "epoch": 0.07614213197969544,
      "grad_norm": 0.2641187012195587,
      "learning_rate": 5.074971164936563e-05,
      "loss": 0.963,
      "step": 660
    },
    {
      "epoch": 0.07671896631287495,
      "grad_norm": 0.2338995337486267,
      "learning_rate": 5.113417916186083e-05,
      "loss": 0.986,
      "step": 665
    },
    {
      "epoch": 0.07729580064605446,
      "grad_norm": 0.2605430483818054,
      "learning_rate": 5.151864667435602e-05,
      "loss": 0.9924,
      "step": 670
    },
    {
      "epoch": 0.07787263497923397,
      "grad_norm": 0.252963662147522,
      "learning_rate": 5.190311418685121e-05,
      "loss": 0.9906,
      "step": 675
    },
    {
      "epoch": 0.07844946931241348,
      "grad_norm": 0.2503387928009033,
      "learning_rate": 5.228758169934641e-05,
      "loss": 1.0003,
      "step": 680
    },
    {
      "epoch": 0.07902630364559299,
      "grad_norm": 0.26554322242736816,
      "learning_rate": 5.26720492118416e-05,
      "loss": 0.9846,
      "step": 685
    },
    {
      "epoch": 0.0796031379787725,
      "grad_norm": 0.28086698055267334,
      "learning_rate": 5.305651672433679e-05,
      "loss": 1.0031,
      "step": 690
    },
    {
      "epoch": 0.08017997231195201,
      "grad_norm": 0.25612834095954895,
      "learning_rate": 5.344098423683199e-05,
      "loss": 0.9386,
      "step": 695
    },
    {
      "epoch": 0.08075680664513152,
      "grad_norm": 0.2532890737056732,
      "learning_rate": 5.382545174932718e-05,
      "loss": 0.9487,
      "step": 700
    },
    {
      "epoch": 0.08133364097831103,
      "grad_norm": 0.2668962776660919,
      "learning_rate": 5.4209919261822386e-05,
      "loss": 0.9955,
      "step": 705
    },
    {
      "epoch": 0.08191047531149054,
      "grad_norm": 0.2815825939178467,
      "learning_rate": 5.4594386774317576e-05,
      "loss": 0.941,
      "step": 710
    },
    {
      "epoch": 0.08248730964467005,
      "grad_norm": 0.25937941670417786,
      "learning_rate": 5.4978854286812766e-05,
      "loss": 0.9959,
      "step": 715
    },
    {
      "epoch": 0.08306414397784956,
      "grad_norm": 0.26474928855895996,
      "learning_rate": 5.536332179930796e-05,
      "loss": 1.0061,
      "step": 720
    },
    {
      "epoch": 0.08364097831102907,
      "grad_norm": 0.24899785220623016,
      "learning_rate": 5.574778931180315e-05,
      "loss": 0.9811,
      "step": 725
    },
    {
      "epoch": 0.08421781264420858,
      "grad_norm": 0.2696942389011383,
      "learning_rate": 5.613225682429834e-05,
      "loss": 0.9386,
      "step": 730
    },
    {
      "epoch": 0.08479464697738809,
      "grad_norm": 0.25111255049705505,
      "learning_rate": 5.651672433679355e-05,
      "loss": 0.9834,
      "step": 735
    },
    {
      "epoch": 0.0853714813105676,
      "grad_norm": 0.25918304920196533,
      "learning_rate": 5.690119184928874e-05,
      "loss": 0.952,
      "step": 740
    },
    {
      "epoch": 0.08594831564374712,
      "grad_norm": 0.2587113380432129,
      "learning_rate": 5.728565936178393e-05,
      "loss": 0.9776,
      "step": 745
    },
    {
      "epoch": 0.08652514997692663,
      "grad_norm": 0.23401151597499847,
      "learning_rate": 5.767012687427913e-05,
      "loss": 0.9666,
      "step": 750
    },
    {
      "epoch": 0.08710198431010614,
      "grad_norm": 0.23831018805503845,
      "learning_rate": 5.805459438677432e-05,
      "loss": 0.9519,
      "step": 755
    },
    {
      "epoch": 0.08767881864328565,
      "grad_norm": 0.25140026211738586,
      "learning_rate": 5.843906189926951e-05,
      "loss": 0.949,
      "step": 760
    },
    {
      "epoch": 0.08825565297646516,
      "grad_norm": 0.23894332349300385,
      "learning_rate": 5.882352941176471e-05,
      "loss": 1.0335,
      "step": 765
    },
    {
      "epoch": 0.08883248730964467,
      "grad_norm": 0.2703632414340973,
      "learning_rate": 5.92079969242599e-05,
      "loss": 1.0152,
      "step": 770
    },
    {
      "epoch": 0.08940932164282418,
      "grad_norm": 0.2679862380027771,
      "learning_rate": 5.95924644367551e-05,
      "loss": 1.0539,
      "step": 775
    },
    {
      "epoch": 0.08998615597600369,
      "grad_norm": 0.24022874236106873,
      "learning_rate": 5.997693194925029e-05,
      "loss": 0.9764,
      "step": 780
    },
    {
      "epoch": 0.0905629903091832,
      "grad_norm": 0.25160306692123413,
      "learning_rate": 6.036139946174548e-05,
      "loss": 1.0094,
      "step": 785
    },
    {
      "epoch": 0.09113982464236271,
      "grad_norm": 0.25750991702079773,
      "learning_rate": 6.0745866974240685e-05,
      "loss": 1.0163,
      "step": 790
    },
    {
      "epoch": 0.09171665897554222,
      "grad_norm": 0.24511879682540894,
      "learning_rate": 6.113033448673587e-05,
      "loss": 0.9777,
      "step": 795
    },
    {
      "epoch": 0.09229349330872173,
      "grad_norm": 0.23882803320884705,
      "learning_rate": 6.151480199923106e-05,
      "loss": 0.9944,
      "step": 800
    },
    {
      "epoch": 0.09287032764190124,
      "grad_norm": 0.235401913523674,
      "learning_rate": 6.189926951172626e-05,
      "loss": 0.9443,
      "step": 805
    },
    {
      "epoch": 0.09344716197508075,
      "grad_norm": 0.22409269213676453,
      "learning_rate": 6.228373702422145e-05,
      "loss": 0.9946,
      "step": 810
    },
    {
      "epoch": 0.09402399630826026,
      "grad_norm": 0.26367950439453125,
      "learning_rate": 6.266820453671664e-05,
      "loss": 1.0017,
      "step": 815
    },
    {
      "epoch": 0.09460083064143977,
      "grad_norm": 0.24628588557243347,
      "learning_rate": 6.305267204921185e-05,
      "loss": 0.9653,
      "step": 820
    },
    {
      "epoch": 0.09517766497461928,
      "grad_norm": 0.24282719194889069,
      "learning_rate": 6.343713956170704e-05,
      "loss": 0.9741,
      "step": 825
    },
    {
      "epoch": 0.0957544993077988,
      "grad_norm": 0.22998395562171936,
      "learning_rate": 6.382160707420223e-05,
      "loss": 0.9731,
      "step": 830
    },
    {
      "epoch": 0.09633133364097832,
      "grad_norm": 0.23305819928646088,
      "learning_rate": 6.420607458669743e-05,
      "loss": 0.9533,
      "step": 835
    },
    {
      "epoch": 0.09690816797415783,
      "grad_norm": 0.2432815134525299,
      "learning_rate": 6.459054209919262e-05,
      "loss": 1.0331,
      "step": 840
    },
    {
      "epoch": 0.09748500230733734,
      "grad_norm": 0.25727975368499756,
      "learning_rate": 6.497500961168782e-05,
      "loss": 0.9055,
      "step": 845
    },
    {
      "epoch": 0.09806183664051685,
      "grad_norm": 0.2272929549217224,
      "learning_rate": 6.535947712418301e-05,
      "loss": 1.004,
      "step": 850
    },
    {
      "epoch": 0.09863867097369636,
      "grad_norm": 0.24411216378211975,
      "learning_rate": 6.57439446366782e-05,
      "loss": 1.0313,
      "step": 855
    },
    {
      "epoch": 0.09921550530687587,
      "grad_norm": 0.23192369937896729,
      "learning_rate": 6.61284121491734e-05,
      "loss": 0.9813,
      "step": 860
    },
    {
      "epoch": 0.09979233964005538,
      "grad_norm": 0.2519132196903229,
      "learning_rate": 6.65128796616686e-05,
      "loss": 0.9928,
      "step": 865
    },
    {
      "epoch": 0.10036917397323489,
      "grad_norm": 0.2485574334859848,
      "learning_rate": 6.689734717416379e-05,
      "loss": 0.9619,
      "step": 870
    },
    {
      "epoch": 0.1009460083064144,
      "grad_norm": 0.21956461668014526,
      "learning_rate": 6.728181468665899e-05,
      "loss": 0.9608,
      "step": 875
    },
    {
      "epoch": 0.10152284263959391,
      "grad_norm": 0.2283376157283783,
      "learning_rate": 6.766628219915418e-05,
      "loss": 0.9964,
      "step": 880
    },
    {
      "epoch": 0.10209967697277342,
      "grad_norm": 0.25438618659973145,
      "learning_rate": 6.805074971164937e-05,
      "loss": 0.9941,
      "step": 885
    },
    {
      "epoch": 0.10267651130595293,
      "grad_norm": 0.2448102980852127,
      "learning_rate": 6.843521722414456e-05,
      "loss": 0.9948,
      "step": 890
    },
    {
      "epoch": 0.10325334563913244,
      "grad_norm": 0.22878648340702057,
      "learning_rate": 6.881968473663975e-05,
      "loss": 0.941,
      "step": 895
    },
    {
      "epoch": 0.10383017997231195,
      "grad_norm": 0.2290370762348175,
      "learning_rate": 6.920415224913494e-05,
      "loss": 0.9948,
      "step": 900
    },
    {
      "epoch": 0.10440701430549146,
      "grad_norm": 0.2316291630268097,
      "learning_rate": 6.958861976163014e-05,
      "loss": 1.0563,
      "step": 905
    },
    {
      "epoch": 0.10498384863867097,
      "grad_norm": 0.2231028527021408,
      "learning_rate": 6.997308727412533e-05,
      "loss": 0.9321,
      "step": 910
    },
    {
      "epoch": 0.10556068297185048,
      "grad_norm": 0.24470089375972748,
      "learning_rate": 7.035755478662054e-05,
      "loss": 0.988,
      "step": 915
    },
    {
      "epoch": 0.10613751730503,
      "grad_norm": 0.23039510846138,
      "learning_rate": 7.074202229911573e-05,
      "loss": 0.9818,
      "step": 920
    },
    {
      "epoch": 0.10671435163820951,
      "grad_norm": 0.23940788209438324,
      "learning_rate": 7.112648981161092e-05,
      "loss": 0.9494,
      "step": 925
    },
    {
      "epoch": 0.10729118597138902,
      "grad_norm": 0.24120782315731049,
      "learning_rate": 7.151095732410612e-05,
      "loss": 1.0259,
      "step": 930
    },
    {
      "epoch": 0.10786802030456853,
      "grad_norm": 0.24399179220199585,
      "learning_rate": 7.189542483660131e-05,
      "loss": 0.9675,
      "step": 935
    },
    {
      "epoch": 0.10844485463774804,
      "grad_norm": 0.23285731673240662,
      "learning_rate": 7.22798923490965e-05,
      "loss": 0.9579,
      "step": 940
    },
    {
      "epoch": 0.10902168897092755,
      "grad_norm": 0.22285297513008118,
      "learning_rate": 7.26643598615917e-05,
      "loss": 0.9986,
      "step": 945
    },
    {
      "epoch": 0.10959852330410706,
      "grad_norm": 0.24459075927734375,
      "learning_rate": 7.30488273740869e-05,
      "loss": 0.9707,
      "step": 950
    },
    {
      "epoch": 0.11017535763728657,
      "grad_norm": 0.23282331228256226,
      "learning_rate": 7.343329488658209e-05,
      "loss": 0.9682,
      "step": 955
    },
    {
      "epoch": 0.11075219197046608,
      "grad_norm": 0.21960249543190002,
      "learning_rate": 7.381776239907729e-05,
      "loss": 0.9469,
      "step": 960
    },
    {
      "epoch": 0.11132902630364559,
      "grad_norm": 0.2371433526277542,
      "learning_rate": 7.420222991157248e-05,
      "loss": 0.9583,
      "step": 965
    },
    {
      "epoch": 0.1119058606368251,
      "grad_norm": 0.21388335525989532,
      "learning_rate": 7.458669742406767e-05,
      "loss": 0.9888,
      "step": 970
    },
    {
      "epoch": 0.11248269497000461,
      "grad_norm": 0.23984219133853912,
      "learning_rate": 7.497116493656286e-05,
      "loss": 0.9845,
      "step": 975
    },
    {
      "epoch": 0.11305952930318412,
      "grad_norm": 0.2341049760580063,
      "learning_rate": 7.535563244905805e-05,
      "loss": 0.9941,
      "step": 980
    },
    {
      "epoch": 0.11363636363636363,
      "grad_norm": 0.23360998928546906,
      "learning_rate": 7.574009996155325e-05,
      "loss": 1.0182,
      "step": 985
    },
    {
      "epoch": 0.11421319796954314,
      "grad_norm": 0.216692715883255,
      "learning_rate": 7.612456747404844e-05,
      "loss": 0.9683,
      "step": 990
    },
    {
      "epoch": 0.11479003230272265,
      "grad_norm": 0.2707868814468384,
      "learning_rate": 7.650903498654363e-05,
      "loss": 0.9892,
      "step": 995
    },
    {
      "epoch": 0.11536686663590216,
      "grad_norm": 0.2180621176958084,
      "learning_rate": 7.689350249903884e-05,
      "loss": 1.0335,
      "step": 1000
    },
    {
      "epoch": 0.11594370096908169,
      "grad_norm": 0.22857269644737244,
      "learning_rate": 7.727797001153403e-05,
      "loss": 1.003,
      "step": 1005
    },
    {
      "epoch": 0.1165205353022612,
      "grad_norm": 0.2268066108226776,
      "learning_rate": 7.766243752402922e-05,
      "loss": 1.0238,
      "step": 1010
    },
    {
      "epoch": 0.1170973696354407,
      "grad_norm": 0.2323949635028839,
      "learning_rate": 7.804690503652442e-05,
      "loss": 0.9725,
      "step": 1015
    },
    {
      "epoch": 0.11767420396862022,
      "grad_norm": 0.229869082570076,
      "learning_rate": 7.843137254901961e-05,
      "loss": 0.9143,
      "step": 1020
    },
    {
      "epoch": 0.11825103830179973,
      "grad_norm": 0.22364865243434906,
      "learning_rate": 7.88158400615148e-05,
      "loss": 0.9522,
      "step": 1025
    },
    {
      "epoch": 0.11882787263497924,
      "grad_norm": 0.2270103394985199,
      "learning_rate": 7.920030757401e-05,
      "loss": 1.0134,
      "step": 1030
    },
    {
      "epoch": 0.11940470696815875,
      "grad_norm": 0.22710810601711273,
      "learning_rate": 7.95847750865052e-05,
      "loss": 0.9782,
      "step": 1035
    },
    {
      "epoch": 0.11998154130133826,
      "grad_norm": 0.21234725415706635,
      "learning_rate": 7.99692425990004e-05,
      "loss": 0.9913,
      "step": 1040
    },
    {
      "epoch": 0.12055837563451777,
      "grad_norm": 0.22006261348724365,
      "learning_rate": 8.035371011149559e-05,
      "loss": 0.952,
      "step": 1045
    },
    {
      "epoch": 0.12113520996769728,
      "grad_norm": 0.21777155995368958,
      "learning_rate": 8.073817762399078e-05,
      "loss": 1.0183,
      "step": 1050
    },
    {
      "epoch": 0.12171204430087679,
      "grad_norm": 0.24173501133918762,
      "learning_rate": 8.112264513648598e-05,
      "loss": 1.0323,
      "step": 1055
    },
    {
      "epoch": 0.1222888786340563,
      "grad_norm": 0.21458600461483002,
      "learning_rate": 8.150711264898117e-05,
      "loss": 0.9759,
      "step": 1060
    },
    {
      "epoch": 0.12286571296723581,
      "grad_norm": 0.24267593026161194,
      "learning_rate": 8.189158016147636e-05,
      "loss": 0.9292,
      "step": 1065
    },
    {
      "epoch": 0.12344254730041532,
      "grad_norm": 0.23081839084625244,
      "learning_rate": 8.227604767397155e-05,
      "loss": 0.9736,
      "step": 1070
    },
    {
      "epoch": 0.12401938163359483,
      "grad_norm": 0.21484199166297913,
      "learning_rate": 8.266051518646674e-05,
      "loss": 1.005,
      "step": 1075
    },
    {
      "epoch": 0.12459621596677434,
      "grad_norm": 0.22842317819595337,
      "learning_rate": 8.304498269896193e-05,
      "loss": 0.9796,
      "step": 1080
    },
    {
      "epoch": 0.12517305029995385,
      "grad_norm": 0.21950694918632507,
      "learning_rate": 8.342945021145713e-05,
      "loss": 0.9551,
      "step": 1085
    },
    {
      "epoch": 0.12574988463313336,
      "grad_norm": 0.22929993271827698,
      "learning_rate": 8.381391772395232e-05,
      "loss": 1.016,
      "step": 1090
    },
    {
      "epoch": 0.12632671896631287,
      "grad_norm": 0.25066328048706055,
      "learning_rate": 8.419838523644751e-05,
      "loss": 0.9864,
      "step": 1095
    },
    {
      "epoch": 0.12690355329949238,
      "grad_norm": 0.22298945486545563,
      "learning_rate": 8.458285274894272e-05,
      "loss": 1.0026,
      "step": 1100
    },
    {
      "epoch": 0.1274803876326719,
      "grad_norm": 0.22116802632808685,
      "learning_rate": 8.496732026143791e-05,
      "loss": 0.9444,
      "step": 1105
    },
    {
      "epoch": 0.1280572219658514,
      "grad_norm": 0.22058875858783722,
      "learning_rate": 8.535178777393311e-05,
      "loss": 0.9712,
      "step": 1110
    },
    {
      "epoch": 0.1286340562990309,
      "grad_norm": 0.22648410499095917,
      "learning_rate": 8.57362552864283e-05,
      "loss": 0.9756,
      "step": 1115
    },
    {
      "epoch": 0.12921089063221042,
      "grad_norm": 0.21567432582378387,
      "learning_rate": 8.612072279892349e-05,
      "loss": 0.9689,
      "step": 1120
    },
    {
      "epoch": 0.12978772496538993,
      "grad_norm": 0.23186016082763672,
      "learning_rate": 8.65051903114187e-05,
      "loss": 0.9337,
      "step": 1125
    },
    {
      "epoch": 0.13036455929856944,
      "grad_norm": 0.22326567769050598,
      "learning_rate": 8.688965782391389e-05,
      "loss": 1.0075,
      "step": 1130
    },
    {
      "epoch": 0.13094139363174895,
      "grad_norm": 0.22077631950378418,
      "learning_rate": 8.727412533640908e-05,
      "loss": 0.9971,
      "step": 1135
    },
    {
      "epoch": 0.13151822796492849,
      "grad_norm": 0.23452219367027283,
      "learning_rate": 8.765859284890428e-05,
      "loss": 0.9606,
      "step": 1140
    },
    {
      "epoch": 0.132095062298108,
      "grad_norm": 0.22227661311626434,
      "learning_rate": 8.804306036139947e-05,
      "loss": 1.0054,
      "step": 1145
    },
    {
      "epoch": 0.1326718966312875,
      "grad_norm": 0.2087729573249817,
      "learning_rate": 8.842752787389466e-05,
      "loss": 0.9683,
      "step": 1150
    },
    {
      "epoch": 0.13324873096446702,
      "grad_norm": 0.2098720818758011,
      "learning_rate": 8.881199538638986e-05,
      "loss": 0.9824,
      "step": 1155
    },
    {
      "epoch": 0.13382556529764653,
      "grad_norm": 0.2236039638519287,
      "learning_rate": 8.919646289888505e-05,
      "loss": 0.9713,
      "step": 1160
    },
    {
      "epoch": 0.13440239963082604,
      "grad_norm": 0.2002977877855301,
      "learning_rate": 8.958093041138024e-05,
      "loss": 0.985,
      "step": 1165
    },
    {
      "epoch": 0.13497923396400555,
      "grad_norm": 0.2261250764131546,
      "learning_rate": 8.996539792387543e-05,
      "loss": 0.983,
      "step": 1170
    },
    {
      "epoch": 0.13555606829718506,
      "grad_norm": 0.21874304115772247,
      "learning_rate": 9.034986543637062e-05,
      "loss": 0.9592,
      "step": 1175
    },
    {
      "epoch": 0.13613290263036457,
      "grad_norm": 0.20477834343910217,
      "learning_rate": 9.073433294886583e-05,
      "loss": 0.939,
      "step": 1180
    },
    {
      "epoch": 0.13670973696354408,
      "grad_norm": 0.2184436321258545,
      "learning_rate": 9.111880046136102e-05,
      "loss": 1.0011,
      "step": 1185
    },
    {
      "epoch": 0.1372865712967236,
      "grad_norm": 0.2131531685590744,
      "learning_rate": 9.150326797385621e-05,
      "loss": 0.9775,
      "step": 1190
    },
    {
      "epoch": 0.1378634056299031,
      "grad_norm": 0.23714293539524078,
      "learning_rate": 9.188773548635141e-05,
      "loss": 1.0107,
      "step": 1195
    },
    {
      "epoch": 0.1384402399630826,
      "grad_norm": 0.21136564016342163,
      "learning_rate": 9.22722029988466e-05,
      "loss": 1.0216,
      "step": 1200
    },
    {
      "epoch": 0.13901707429626212,
      "grad_norm": 0.20974573493003845,
      "learning_rate": 9.265667051134179e-05,
      "loss": 0.9754,
      "step": 1205
    },
    {
      "epoch": 0.13959390862944163,
      "grad_norm": 0.21065163612365723,
      "learning_rate": 9.3041138023837e-05,
      "loss": 0.9459,
      "step": 1210
    },
    {
      "epoch": 0.14017074296262114,
      "grad_norm": 0.22418978810310364,
      "learning_rate": 9.342560553633218e-05,
      "loss": 0.998,
      "step": 1215
    },
    {
      "epoch": 0.14074757729580065,
      "grad_norm": 0.19713211059570312,
      "learning_rate": 9.381007304882737e-05,
      "loss": 1.0003,
      "step": 1220
    },
    {
      "epoch": 0.14132441162898016,
      "grad_norm": 0.21748420596122742,
      "learning_rate": 9.419454056132258e-05,
      "loss": 0.9521,
      "step": 1225
    },
    {
      "epoch": 0.14190124596215967,
      "grad_norm": 0.23637205362319946,
      "learning_rate": 9.457900807381777e-05,
      "loss": 0.9304,
      "step": 1230
    },
    {
      "epoch": 0.14247808029533918,
      "grad_norm": 0.2121867686510086,
      "learning_rate": 9.496347558631297e-05,
      "loss": 0.964,
      "step": 1235
    },
    {
      "epoch": 0.1430549146285187,
      "grad_norm": 0.22569303214550018,
      "learning_rate": 9.534794309880816e-05,
      "loss": 0.9457,
      "step": 1240
    },
    {
      "epoch": 0.1436317489616982,
      "grad_norm": 0.21256321668624878,
      "learning_rate": 9.573241061130335e-05,
      "loss": 0.9737,
      "step": 1245
    },
    {
      "epoch": 0.1442085832948777,
      "grad_norm": 0.21022173762321472,
      "learning_rate": 9.611687812379854e-05,
      "loss": 0.9996,
      "step": 1250
    },
    {
      "epoch": 0.14478541762805722,
      "grad_norm": 0.20462080836296082,
      "learning_rate": 9.650134563629373e-05,
      "loss": 0.9736,
      "step": 1255
    },
    {
      "epoch": 0.14536225196123673,
      "grad_norm": 0.21609050035476685,
      "learning_rate": 9.688581314878892e-05,
      "loss": 0.9608,
      "step": 1260
    },
    {
      "epoch": 0.14593908629441624,
      "grad_norm": 0.2156708985567093,
      "learning_rate": 9.727028066128413e-05,
      "loss": 0.9268,
      "step": 1265
    },
    {
      "epoch": 0.14651592062759575,
      "grad_norm": 0.21314367651939392,
      "learning_rate": 9.765474817377932e-05,
      "loss": 1.035,
      "step": 1270
    },
    {
      "epoch": 0.14709275496077526,
      "grad_norm": 0.23611678183078766,
      "learning_rate": 9.80392156862745e-05,
      "loss": 0.9169,
      "step": 1275
    },
    {
      "epoch": 0.14766958929395477,
      "grad_norm": 0.20264959335327148,
      "learning_rate": 9.842368319876971e-05,
      "loss": 0.9841,
      "step": 1280
    },
    {
      "epoch": 0.14824642362713428,
      "grad_norm": 0.21544356644153595,
      "learning_rate": 9.88081507112649e-05,
      "loss": 0.9589,
      "step": 1285
    },
    {
      "epoch": 0.1488232579603138,
      "grad_norm": 0.2142428606748581,
      "learning_rate": 9.919261822376009e-05,
      "loss": 0.9514,
      "step": 1290
    },
    {
      "epoch": 0.1494000922934933,
      "grad_norm": 0.22194676101207733,
      "learning_rate": 9.957708573625529e-05,
      "loss": 0.9047,
      "step": 1295
    },
    {
      "epoch": 0.1499769266266728,
      "grad_norm": 0.2201799899339676,
      "learning_rate": 9.996155324875048e-05,
      "loss": 1.0036,
      "step": 1300
    },
    {
      "epoch": 0.15055376095985232,
      "grad_norm": 0.2207772582769394,
      "learning_rate": 0.00010034602076124569,
      "loss": 1.025,
      "step": 1305
    },
    {
      "epoch": 0.15113059529303183,
      "grad_norm": 0.20949603617191315,
      "learning_rate": 0.00010073048827374088,
      "loss": 0.9697,
      "step": 1310
    },
    {
      "epoch": 0.15170742962621137,
      "grad_norm": 0.20334427058696747,
      "learning_rate": 0.00010111495578623607,
      "loss": 0.9643,
      "step": 1315
    },
    {
      "epoch": 0.15228426395939088,
      "grad_norm": 0.20708300173282623,
      "learning_rate": 0.00010149942329873126,
      "loss": 0.9753,
      "step": 1320
    },
    {
      "epoch": 0.15286109829257039,
      "grad_norm": 0.22487087547779083,
      "learning_rate": 0.00010188389081122645,
      "loss": 1.0286,
      "step": 1325
    },
    {
      "epoch": 0.1534379326257499,
      "grad_norm": 0.2103419154882431,
      "learning_rate": 0.00010226835832372166,
      "loss": 0.9356,
      "step": 1330
    },
    {
      "epoch": 0.1540147669589294,
      "grad_norm": 0.22619712352752686,
      "learning_rate": 0.00010265282583621685,
      "loss": 0.9911,
      "step": 1335
    },
    {
      "epoch": 0.15459160129210892,
      "grad_norm": 0.2243778258562088,
      "learning_rate": 0.00010303729334871204,
      "loss": 1.0075,
      "step": 1340
    },
    {
      "epoch": 0.15516843562528843,
      "grad_norm": 0.20728860795497894,
      "learning_rate": 0.00010342176086120723,
      "loss": 0.9891,
      "step": 1345
    },
    {
      "epoch": 0.15574526995846794,
      "grad_norm": 0.21606509387493134,
      "learning_rate": 0.00010380622837370242,
      "loss": 0.9393,
      "step": 1350
    },
    {
      "epoch": 0.15632210429164745,
      "grad_norm": 0.21745528280735016,
      "learning_rate": 0.00010419069588619763,
      "loss": 0.9348,
      "step": 1355
    },
    {
      "epoch": 0.15689893862482696,
      "grad_norm": 0.21311575174331665,
      "learning_rate": 0.00010457516339869282,
      "loss": 0.9448,
      "step": 1360
    },
    {
      "epoch": 0.15747577295800647,
      "grad_norm": 0.2084248661994934,
      "learning_rate": 0.00010495963091118801,
      "loss": 0.9728,
      "step": 1365
    },
    {
      "epoch": 0.15805260729118598,
      "grad_norm": 0.21669849753379822,
      "learning_rate": 0.0001053440984236832,
      "loss": 0.9356,
      "step": 1370
    },
    {
      "epoch": 0.15862944162436549,
      "grad_norm": 0.22640497982501984,
      "learning_rate": 0.00010572856593617839,
      "loss": 0.9547,
      "step": 1375
    },
    {
      "epoch": 0.159206275957545,
      "grad_norm": 0.2224353551864624,
      "learning_rate": 0.00010611303344867358,
      "loss": 1.0064,
      "step": 1380
    },
    {
      "epoch": 0.1597831102907245,
      "grad_norm": 0.22003819048404694,
      "learning_rate": 0.0001064975009611688,
      "loss": 0.9927,
      "step": 1385
    },
    {
      "epoch": 0.16035994462390402,
      "grad_norm": 0.23084236681461334,
      "learning_rate": 0.00010688196847366399,
      "loss": 0.9538,
      "step": 1390
    },
    {
      "epoch": 0.16093677895708353,
      "grad_norm": 0.20851434767246246,
      "learning_rate": 0.00010726643598615918,
      "loss": 0.9822,
      "step": 1395
    },
    {
      "epoch": 0.16151361329026304,
      "grad_norm": 0.21839269995689392,
      "learning_rate": 0.00010765090349865437,
      "loss": 1.0137,
      "step": 1400
    },
    {
      "epoch": 0.16209044762344255,
      "grad_norm": 0.2095949798822403,
      "learning_rate": 0.00010803537101114956,
      "loss": 0.959,
      "step": 1405
    },
    {
      "epoch": 0.16266728195662206,
      "grad_norm": 0.20211191475391388,
      "learning_rate": 0.00010841983852364477,
      "loss": 0.9657,
      "step": 1410
    },
    {
      "epoch": 0.16324411628980157,
      "grad_norm": 0.22096802294254303,
      "learning_rate": 0.00010880430603613996,
      "loss": 0.9896,
      "step": 1415
    },
    {
      "epoch": 0.16382095062298108,
      "grad_norm": 0.30525097250938416,
      "learning_rate": 0.00010918877354863515,
      "loss": 0.956,
      "step": 1420
    },
    {
      "epoch": 0.1643977849561606,
      "grad_norm": 0.22134092450141907,
      "learning_rate": 0.00010957324106113034,
      "loss": 1.0124,
      "step": 1425
    },
    {
      "epoch": 0.1649746192893401,
      "grad_norm": 0.270163357257843,
      "learning_rate": 0.00010995770857362553,
      "loss": 0.9718,
      "step": 1430
    },
    {
      "epoch": 0.1655514536225196,
      "grad_norm": 0.21976105868816376,
      "learning_rate": 0.00011034217608612072,
      "loss": 0.9658,
      "step": 1435
    },
    {
      "epoch": 0.16612828795569912,
      "grad_norm": 0.20475271344184875,
      "learning_rate": 0.00011072664359861593,
      "loss": 1.024,
      "step": 1440
    },
    {
      "epoch": 0.16670512228887863,
      "grad_norm": 0.22362278401851654,
      "learning_rate": 0.00011111111111111112,
      "loss": 1.0096,
      "step": 1445
    },
    {
      "epoch": 0.16728195662205814,
      "grad_norm": 0.2165699005126953,
      "learning_rate": 0.0001114955786236063,
      "loss": 0.9666,
      "step": 1450
    },
    {
      "epoch": 0.16785879095523765,
      "grad_norm": 0.2233060896396637,
      "learning_rate": 0.0001118800461361015,
      "loss": 0.9615,
      "step": 1455
    },
    {
      "epoch": 0.16843562528841716,
      "grad_norm": 0.22626008093357086,
      "learning_rate": 0.00011226451364859669,
      "loss": 0.9821,
      "step": 1460
    },
    {
      "epoch": 0.16901245962159667,
      "grad_norm": 0.21532657742500305,
      "learning_rate": 0.00011264898116109188,
      "loss": 0.9635,
      "step": 1465
    },
    {
      "epoch": 0.16958929395477618,
      "grad_norm": 0.2170170545578003,
      "learning_rate": 0.0001130334486735871,
      "loss": 0.9817,
      "step": 1470
    },
    {
      "epoch": 0.1701661282879557,
      "grad_norm": 0.2258489727973938,
      "learning_rate": 0.00011341791618608228,
      "loss": 0.9803,
      "step": 1475
    },
    {
      "epoch": 0.1707429626211352,
      "grad_norm": 0.2402639538049698,
      "learning_rate": 0.00011380238369857747,
      "loss": 1.0435,
      "step": 1480
    },
    {
      "epoch": 0.1713197969543147,
      "grad_norm": 0.22203443944454193,
      "learning_rate": 0.00011418685121107266,
      "loss": 0.9883,
      "step": 1485
    },
    {
      "epoch": 0.17189663128749424,
      "grad_norm": 0.22157324850559235,
      "learning_rate": 0.00011457131872356785,
      "loss": 0.975,
      "step": 1490
    },
    {
      "epoch": 0.17247346562067375,
      "grad_norm": 0.2377525120973587,
      "learning_rate": 0.00011495578623606307,
      "loss": 0.9843,
      "step": 1495
    },
    {
      "epoch": 0.17305029995385326,
      "grad_norm": 0.29224807024002075,
      "learning_rate": 0.00011534025374855826,
      "loss": 0.974,
      "step": 1500
    },
    {
      "epoch": 0.17362713428703277,
      "grad_norm": 0.21139585971832275,
      "learning_rate": 0.00011572472126105345,
      "loss": 0.9682,
      "step": 1505
    },
    {
      "epoch": 0.17420396862021229,
      "grad_norm": 0.24163000285625458,
      "learning_rate": 0.00011610918877354864,
      "loss": 0.9543,
      "step": 1510
    },
    {
      "epoch": 0.1747808029533918,
      "grad_norm": 0.2104686051607132,
      "learning_rate": 0.00011649365628604383,
      "loss": 0.9524,
      "step": 1515
    },
    {
      "epoch": 0.1753576372865713,
      "grad_norm": 0.22780703008174896,
      "learning_rate": 0.00011687812379853902,
      "loss": 0.969,
      "step": 1520
    },
    {
      "epoch": 0.17593447161975082,
      "grad_norm": 0.22077947854995728,
      "learning_rate": 0.00011726259131103422,
      "loss": 0.9561,
      "step": 1525
    },
    {
      "epoch": 0.17651130595293033,
      "grad_norm": 0.21336746215820312,
      "learning_rate": 0.00011764705882352942,
      "loss": 0.9994,
      "step": 1530
    },
    {
      "epoch": 0.17708814028610984,
      "grad_norm": 0.2229880541563034,
      "learning_rate": 0.0001180315263360246,
      "loss": 0.9907,
      "step": 1535
    },
    {
      "epoch": 0.17766497461928935,
      "grad_norm": 0.21756049990653992,
      "learning_rate": 0.0001184159938485198,
      "loss": 1.006,
      "step": 1540
    },
    {
      "epoch": 0.17824180895246886,
      "grad_norm": 0.2166442722082138,
      "learning_rate": 0.00011880046136101499,
      "loss": 0.9598,
      "step": 1545
    },
    {
      "epoch": 0.17881864328564837,
      "grad_norm": 0.21556849777698517,
      "learning_rate": 0.0001191849288735102,
      "loss": 1.0291,
      "step": 1550
    },
    {
      "epoch": 0.17939547761882788,
      "grad_norm": 0.24204514920711517,
      "learning_rate": 0.00011956939638600539,
      "loss": 0.9589,
      "step": 1555
    },
    {
      "epoch": 0.17997231195200739,
      "grad_norm": 0.21335336565971375,
      "learning_rate": 0.00011995386389850058,
      "loss": 1.0487,
      "step": 1560
    },
    {
      "epoch": 0.1805491462851869,
      "grad_norm": 0.2127862423658371,
      "learning_rate": 0.00012033833141099577,
      "loss": 0.9542,
      "step": 1565
    },
    {
      "epoch": 0.1811259806183664,
      "grad_norm": 0.28124332427978516,
      "learning_rate": 0.00012072279892349096,
      "loss": 0.966,
      "step": 1570
    },
    {
      "epoch": 0.18170281495154592,
      "grad_norm": 0.2275981307029724,
      "learning_rate": 0.00012110726643598615,
      "loss": 0.8945,
      "step": 1575
    },
    {
      "epoch": 0.18227964928472543,
      "grad_norm": 0.20791442692279816,
      "learning_rate": 0.00012149173394848137,
      "loss": 1.0404,
      "step": 1580
    },
    {
      "epoch": 0.18285648361790494,
      "grad_norm": 0.21446120738983154,
      "learning_rate": 0.00012187620146097656,
      "loss": 0.9633,
      "step": 1585
    },
    {
      "epoch": 0.18343331795108445,
      "grad_norm": 0.2326110452413559,
      "learning_rate": 0.00012226066897347174,
      "loss": 1.0091,
      "step": 1590
    },
    {
      "epoch": 0.18401015228426396,
      "grad_norm": 0.2202981859445572,
      "learning_rate": 0.00012264513648596693,
      "loss": 0.9841,
      "step": 1595
    },
    {
      "epoch": 0.18458698661744347,
      "grad_norm": 0.22043539583683014,
      "learning_rate": 0.00012302960399846212,
      "loss": 1.0092,
      "step": 1600
    },
    {
      "epoch": 0.18516382095062298,
      "grad_norm": 0.23187553882598877,
      "learning_rate": 0.00012341407151095733,
      "loss": 0.999,
      "step": 1605
    },
    {
      "epoch": 0.18574065528380249,
      "grad_norm": 0.210253044962883,
      "learning_rate": 0.00012379853902345252,
      "loss": 0.9328,
      "step": 1610
    },
    {
      "epoch": 0.186317489616982,
      "grad_norm": 0.21368882060050964,
      "learning_rate": 0.00012418300653594771,
      "loss": 0.9898,
      "step": 1615
    },
    {
      "epoch": 0.1868943239501615,
      "grad_norm": 0.20804187655448914,
      "learning_rate": 0.0001245674740484429,
      "loss": 0.9704,
      "step": 1620
    },
    {
      "epoch": 0.18747115828334102,
      "grad_norm": 0.2148929387331009,
      "learning_rate": 0.0001249519415609381,
      "loss": 0.9855,
      "step": 1625
    },
    {
      "epoch": 0.18804799261652053,
      "grad_norm": 0.21504488587379456,
      "learning_rate": 0.00012533640907343328,
      "loss": 0.957,
      "step": 1630
    },
    {
      "epoch": 0.18862482694970004,
      "grad_norm": 0.21804533898830414,
      "learning_rate": 0.0001257208765859285,
      "loss": 1.0268,
      "step": 1635
    },
    {
      "epoch": 0.18920166128287955,
      "grad_norm": 0.2117997258901596,
      "learning_rate": 0.0001261053440984237,
      "loss": 1.0142,
      "step": 1640
    },
    {
      "epoch": 0.18977849561605906,
      "grad_norm": 0.2191164642572403,
      "learning_rate": 0.00012648981161091888,
      "loss": 0.9501,
      "step": 1645
    },
    {
      "epoch": 0.19035532994923857,
      "grad_norm": 0.21926717460155487,
      "learning_rate": 0.00012687427912341407,
      "loss": 0.9199,
      "step": 1650
    },
    {
      "epoch": 0.19093216428241808,
      "grad_norm": 0.22619302570819855,
      "learning_rate": 0.00012725874663590926,
      "loss": 0.9813,
      "step": 1655
    },
    {
      "epoch": 0.1915089986155976,
      "grad_norm": 0.2379409521818161,
      "learning_rate": 0.00012764321414840445,
      "loss": 0.9914,
      "step": 1660
    },
    {
      "epoch": 0.19208583294877712,
      "grad_norm": 0.20328597724437714,
      "learning_rate": 0.00012802768166089967,
      "loss": 0.9643,
      "step": 1665
    },
    {
      "epoch": 0.19266266728195663,
      "grad_norm": 0.22967956960201263,
      "learning_rate": 0.00012841214917339486,
      "loss": 0.9425,
      "step": 1670
    },
    {
      "epoch": 0.19323950161513614,
      "grad_norm": 0.22374680638313293,
      "learning_rate": 0.00012879661668589005,
      "loss": 0.9906,
      "step": 1675
    },
    {
      "epoch": 0.19381633594831565,
      "grad_norm": 0.20658764243125916,
      "learning_rate": 0.00012918108419838524,
      "loss": 0.9612,
      "step": 1680
    },
    {
      "epoch": 0.19439317028149516,
      "grad_norm": 0.22407005727291107,
      "learning_rate": 0.00012956555171088043,
      "loss": 1.0238,
      "step": 1685
    },
    {
      "epoch": 0.19497000461467467,
      "grad_norm": 0.2127981185913086,
      "learning_rate": 0.00012995001922337565,
      "loss": 0.9385,
      "step": 1690
    },
    {
      "epoch": 0.19554683894785418,
      "grad_norm": 0.23836196959018707,
      "learning_rate": 0.00013033448673587084,
      "loss": 0.9823,
      "step": 1695
    },
    {
      "epoch": 0.1961236732810337,
      "grad_norm": 0.21363188326358795,
      "learning_rate": 0.00013071895424836603,
      "loss": 0.9546,
      "step": 1700
    },
    {
      "epoch": 0.1967005076142132,
      "grad_norm": 0.22047366201877594,
      "learning_rate": 0.00013110342176086122,
      "loss": 0.9882,
      "step": 1705
    },
    {
      "epoch": 0.19727734194739271,
      "grad_norm": 0.2106313556432724,
      "learning_rate": 0.0001314878892733564,
      "loss": 0.9714,
      "step": 1710
    },
    {
      "epoch": 0.19785417628057222,
      "grad_norm": 0.2216782122850418,
      "learning_rate": 0.0001318723567858516,
      "loss": 0.9775,
      "step": 1715
    },
    {
      "epoch": 0.19843101061375173,
      "grad_norm": 0.2969755232334137,
      "learning_rate": 0.0001322568242983468,
      "loss": 0.9424,
      "step": 1720
    },
    {
      "epoch": 0.19900784494693124,
      "grad_norm": 0.218833327293396,
      "learning_rate": 0.000132641291810842,
      "loss": 0.9197,
      "step": 1725
    },
    {
      "epoch": 0.19958467928011075,
      "grad_norm": 0.2189982384443283,
      "learning_rate": 0.0001330257593233372,
      "loss": 1.0039,
      "step": 1730
    },
    {
      "epoch": 0.20016151361329027,
      "grad_norm": 0.22366592288017273,
      "learning_rate": 0.00013341022683583238,
      "loss": 1.0028,
      "step": 1735
    },
    {
      "epoch": 0.20073834794646978,
      "grad_norm": 0.22287939488887787,
      "learning_rate": 0.00013379469434832757,
      "loss": 0.9895,
      "step": 1740
    },
    {
      "epoch": 0.20131518227964929,
      "grad_norm": 0.22727978229522705,
      "learning_rate": 0.0001341791618608228,
      "loss": 0.99,
      "step": 1745
    },
    {
      "epoch": 0.2018920166128288,
      "grad_norm": 0.2164393663406372,
      "learning_rate": 0.00013456362937331798,
      "loss": 0.9771,
      "step": 1750
    },
    {
      "epoch": 0.2024688509460083,
      "grad_norm": 0.20931628346443176,
      "learning_rate": 0.00013494809688581317,
      "loss": 0.975,
      "step": 1755
    },
    {
      "epoch": 0.20304568527918782,
      "grad_norm": 0.22553858160972595,
      "learning_rate": 0.00013533256439830836,
      "loss": 0.9175,
      "step": 1760
    },
    {
      "epoch": 0.20362251961236733,
      "grad_norm": 0.21317711472511292,
      "learning_rate": 0.00013571703191080355,
      "loss": 0.9971,
      "step": 1765
    },
    {
      "epoch": 0.20419935394554684,
      "grad_norm": 0.21764598786830902,
      "learning_rate": 0.00013610149942329874,
      "loss": 1.0036,
      "step": 1770
    },
    {
      "epoch": 0.20477618827872635,
      "grad_norm": 0.21724803745746613,
      "learning_rate": 0.00013648596693579393,
      "loss": 0.9264,
      "step": 1775
    },
    {
      "epoch": 0.20535302261190586,
      "grad_norm": 0.21628911793231964,
      "learning_rate": 0.00013687043444828912,
      "loss": 0.9324,
      "step": 1780
    },
    {
      "epoch": 0.20592985694508537,
      "grad_norm": 0.23706841468811035,
      "learning_rate": 0.0001372549019607843,
      "loss": 1.0163,
      "step": 1785
    },
    {
      "epoch": 0.20650669127826488,
      "grad_norm": 0.2230122983455658,
      "learning_rate": 0.0001376393694732795,
      "loss": 0.9531,
      "step": 1790
    },
    {
      "epoch": 0.20708352561144439,
      "grad_norm": 0.2194584757089615,
      "learning_rate": 0.0001380238369857747,
      "loss": 0.9271,
      "step": 1795
    },
    {
      "epoch": 0.2076603599446239,
      "grad_norm": 0.22293421626091003,
      "learning_rate": 0.00013840830449826988,
      "loss": 0.983,
      "step": 1800
    },
    {
      "epoch": 0.2082371942778034,
      "grad_norm": 0.23112259805202484,
      "learning_rate": 0.0001387927720107651,
      "loss": 0.9551,
      "step": 1805
    },
    {
      "epoch": 0.20881402861098292,
      "grad_norm": 0.2262236773967743,
      "learning_rate": 0.0001391772395232603,
      "loss": 0.9971,
      "step": 1810
    },
    {
      "epoch": 0.20939086294416243,
      "grad_norm": 0.23392175137996674,
      "learning_rate": 0.00013956170703575548,
      "loss": 0.9363,
      "step": 1815
    },
    {
      "epoch": 0.20996769727734194,
      "grad_norm": 0.2209835797548294,
      "learning_rate": 0.00013994617454825067,
      "loss": 0.9597,
      "step": 1820
    },
    {
      "epoch": 0.21054453161052145,
      "grad_norm": 0.21317939460277557,
      "learning_rate": 0.00014033064206074586,
      "loss": 0.9425,
      "step": 1825
    },
    {
      "epoch": 0.21112136594370096,
      "grad_norm": 0.23041388392448425,
      "learning_rate": 0.00014071510957324108,
      "loss": 0.9675,
      "step": 1830
    },
    {
      "epoch": 0.2116982002768805,
      "grad_norm": 0.2254861295223236,
      "learning_rate": 0.00014109957708573627,
      "loss": 0.994,
      "step": 1835
    },
    {
      "epoch": 0.21227503461006,
      "grad_norm": 0.21605949103832245,
      "learning_rate": 0.00014148404459823146,
      "loss": 0.9866,
      "step": 1840
    },
    {
      "epoch": 0.21285186894323951,
      "grad_norm": 0.22147153317928314,
      "learning_rate": 0.00014186851211072665,
      "loss": 1.0008,
      "step": 1845
    },
    {
      "epoch": 0.21342870327641902,
      "grad_norm": 0.2367514967918396,
      "learning_rate": 0.00014225297962322184,
      "loss": 0.9128,
      "step": 1850
    },
    {
      "epoch": 0.21400553760959853,
      "grad_norm": 0.23112766444683075,
      "learning_rate": 0.00014263744713571703,
      "loss": 1.0061,
      "step": 1855
    },
    {
      "epoch": 0.21458237194277804,
      "grad_norm": 0.21282008290290833,
      "learning_rate": 0.00014302191464821224,
      "loss": 0.9833,
      "step": 1860
    },
    {
      "epoch": 0.21515920627595755,
      "grad_norm": 0.22202390432357788,
      "learning_rate": 0.00014340638216070743,
      "loss": 0.9929,
      "step": 1865
    },
    {
      "epoch": 0.21573604060913706,
      "grad_norm": 0.21571186184883118,
      "learning_rate": 0.00014379084967320262,
      "loss": 0.9666,
      "step": 1870
    },
    {
      "epoch": 0.21631287494231657,
      "grad_norm": 0.24291980266571045,
      "learning_rate": 0.0001441753171856978,
      "loss": 0.9818,
      "step": 1875
    },
    {
      "epoch": 0.21688970927549608,
      "grad_norm": 0.21617773175239563,
      "learning_rate": 0.000144559784698193,
      "loss": 0.9596,
      "step": 1880
    },
    {
      "epoch": 0.2174665436086756,
      "grad_norm": 0.22341042757034302,
      "learning_rate": 0.00014494425221068822,
      "loss": 1.0061,
      "step": 1885
    },
    {
      "epoch": 0.2180433779418551,
      "grad_norm": 0.20930033922195435,
      "learning_rate": 0.0001453287197231834,
      "loss": 0.9514,
      "step": 1890
    },
    {
      "epoch": 0.21862021227503461,
      "grad_norm": 0.26263025403022766,
      "learning_rate": 0.0001457131872356786,
      "loss": 0.9916,
      "step": 1895
    },
    {
      "epoch": 0.21919704660821412,
      "grad_norm": 0.21907363831996918,
      "learning_rate": 0.0001460976547481738,
      "loss": 0.9689,
      "step": 1900
    },
    {
      "epoch": 0.21977388094139363,
      "grad_norm": 0.20985926687717438,
      "learning_rate": 0.00014648212226066898,
      "loss": 0.932,
      "step": 1905
    },
    {
      "epoch": 0.22035071527457314,
      "grad_norm": 0.2179040014743805,
      "learning_rate": 0.00014686658977316417,
      "loss": 0.9683,
      "step": 1910
    },
    {
      "epoch": 0.22092754960775265,
      "grad_norm": 0.22273583710193634,
      "learning_rate": 0.0001472510572856594,
      "loss": 1.0013,
      "step": 1915
    },
    {
      "epoch": 0.22150438394093216,
      "grad_norm": 0.2257058471441269,
      "learning_rate": 0.00014763552479815458,
      "loss": 0.9805,
      "step": 1920
    },
    {
      "epoch": 0.22208121827411167,
      "grad_norm": 0.230184406042099,
      "learning_rate": 0.00014801999231064977,
      "loss": 0.9983,
      "step": 1925
    },
    {
      "epoch": 0.22265805260729118,
      "grad_norm": 0.22738578915596008,
      "learning_rate": 0.00014840445982314496,
      "loss": 0.9552,
      "step": 1930
    },
    {
      "epoch": 0.2232348869404707,
      "grad_norm": 0.2206430733203888,
      "learning_rate": 0.00014878892733564015,
      "loss": 0.9695,
      "step": 1935
    },
    {
      "epoch": 0.2238117212736502,
      "grad_norm": 0.22058050334453583,
      "learning_rate": 0.00014917339484813534,
      "loss": 0.9474,
      "step": 1940
    },
    {
      "epoch": 0.22438855560682971,
      "grad_norm": 0.20870976150035858,
      "learning_rate": 0.00014955786236063053,
      "loss": 0.9606,
      "step": 1945
    },
    {
      "epoch": 0.22496538994000922,
      "grad_norm": 0.21419864892959595,
      "learning_rate": 0.00014994232987312572,
      "loss": 0.9374,
      "step": 1950
    },
    {
      "epoch": 0.22554222427318874,
      "grad_norm": 0.24207691848278046,
      "learning_rate": 0.0001503267973856209,
      "loss": 1.0411,
      "step": 1955
    },
    {
      "epoch": 0.22611905860636825,
      "grad_norm": 0.2237478345632553,
      "learning_rate": 0.0001507112648981161,
      "loss": 0.9959,
      "step": 1960
    },
    {
      "epoch": 0.22669589293954776,
      "grad_norm": 0.23635807633399963,
      "learning_rate": 0.0001510957324106113,
      "loss": 0.9434,
      "step": 1965
    },
    {
      "epoch": 0.22727272727272727,
      "grad_norm": 0.245008185505867,
      "learning_rate": 0.0001514801999231065,
      "loss": 0.963,
      "step": 1970
    },
    {
      "epoch": 0.22784956160590678,
      "grad_norm": 0.2118360847234726,
      "learning_rate": 0.0001518646674356017,
      "loss": 0.9645,
      "step": 1975
    },
    {
      "epoch": 0.22842639593908629,
      "grad_norm": 0.22724303603172302,
      "learning_rate": 0.00015224913494809689,
      "loss": 0.9884,
      "step": 1980
    },
    {
      "epoch": 0.2290032302722658,
      "grad_norm": 0.24408259987831116,
      "learning_rate": 0.00015263360246059208,
      "loss": 1.008,
      "step": 1985
    },
    {
      "epoch": 0.2295800646054453,
      "grad_norm": 0.22420641779899597,
      "learning_rate": 0.00015301806997308727,
      "loss": 0.9636,
      "step": 1990
    },
    {
      "epoch": 0.23015689893862482,
      "grad_norm": 0.20731568336486816,
      "learning_rate": 0.00015340253748558246,
      "loss": 0.965,
      "step": 1995
    },
    {
      "epoch": 0.23073373327180433,
      "grad_norm": 0.2164364755153656,
      "learning_rate": 0.00015378700499807767,
      "loss": 0.9844,
      "step": 2000
    },
    {
      "epoch": 0.23131056760498384,
      "grad_norm": 0.22473865747451782,
      "learning_rate": 0.00015417147251057286,
      "loss": 1.019,
      "step": 2005
    },
    {
      "epoch": 0.23188740193816337,
      "grad_norm": 0.2263941615819931,
      "learning_rate": 0.00015455594002306805,
      "loss": 1.0051,
      "step": 2010
    },
    {
      "epoch": 0.23246423627134288,
      "grad_norm": 0.2233802229166031,
      "learning_rate": 0.00015494040753556324,
      "loss": 1.0199,
      "step": 2015
    },
    {
      "epoch": 0.2330410706045224,
      "grad_norm": 0.22767069935798645,
      "learning_rate": 0.00015532487504805843,
      "loss": 0.9763,
      "step": 2020
    },
    {
      "epoch": 0.2336179049377019,
      "grad_norm": 0.2226293981075287,
      "learning_rate": 0.00015570934256055365,
      "loss": 0.9811,
      "step": 2025
    },
    {
      "epoch": 0.2341947392708814,
      "grad_norm": 0.24052846431732178,
      "learning_rate": 0.00015609381007304884,
      "loss": 1.0216,
      "step": 2030
    },
    {
      "epoch": 0.23477157360406092,
      "grad_norm": 0.2248363345861435,
      "learning_rate": 0.00015647827758554403,
      "loss": 0.9654,
      "step": 2035
    },
    {
      "epoch": 0.23534840793724043,
      "grad_norm": 0.2263503223657608,
      "learning_rate": 0.00015686274509803922,
      "loss": 0.9874,
      "step": 2040
    },
    {
      "epoch": 0.23592524227041994,
      "grad_norm": 0.23036567866802216,
      "learning_rate": 0.0001572472126105344,
      "loss": 0.9362,
      "step": 2045
    },
    {
      "epoch": 0.23650207660359945,
      "grad_norm": 0.2279501110315323,
      "learning_rate": 0.0001576316801230296,
      "loss": 0.9495,
      "step": 2050
    },
    {
      "epoch": 0.23707891093677896,
      "grad_norm": 0.23070128262043,
      "learning_rate": 0.00015801614763552482,
      "loss": 0.975,
      "step": 2055
    },
    {
      "epoch": 0.23765574526995847,
      "grad_norm": 0.22231556475162506,
      "learning_rate": 0.00015840061514802,
      "loss": 1.0113,
      "step": 2060
    },
    {
      "epoch": 0.23823257960313798,
      "grad_norm": 0.2362755835056305,
      "learning_rate": 0.0001587850826605152,
      "loss": 0.977,
      "step": 2065
    },
    {
      "epoch": 0.2388094139363175,
      "grad_norm": 0.21221700310707092,
      "learning_rate": 0.0001591695501730104,
      "loss": 0.8838,
      "step": 2070
    },
    {
      "epoch": 0.239386248269497,
      "grad_norm": 0.23924385011196136,
      "learning_rate": 0.00015955401768550558,
      "loss": 0.9988,
      "step": 2075
    },
    {
      "epoch": 0.23996308260267651,
      "grad_norm": 0.21702693402767181,
      "learning_rate": 0.0001599384851980008,
      "loss": 0.9345,
      "step": 2080
    },
    {
      "epoch": 0.24053991693585602,
      "grad_norm": 0.2238760143518448,
      "learning_rate": 0.00016032295271049598,
      "loss": 0.9751,
      "step": 2085
    },
    {
      "epoch": 0.24111675126903553,
      "grad_norm": 0.2292429357767105,
      "learning_rate": 0.00016070742022299117,
      "loss": 0.9696,
      "step": 2090
    },
    {
      "epoch": 0.24169358560221504,
      "grad_norm": 0.33746978640556335,
      "learning_rate": 0.00016109188773548636,
      "loss": 0.9767,
      "step": 2095
    },
    {
      "epoch": 0.24227041993539455,
      "grad_norm": 0.22750449180603027,
      "learning_rate": 0.00016147635524798155,
      "loss": 0.9648,
      "step": 2100
    },
    {
      "epoch": 0.24284725426857406,
      "grad_norm": 0.2371540665626526,
      "learning_rate": 0.00016186082276047674,
      "loss": 0.9723,
      "step": 2105
    },
    {
      "epoch": 0.24342408860175357,
      "grad_norm": 0.22886401414871216,
      "learning_rate": 0.00016224529027297196,
      "loss": 0.9615,
      "step": 2110
    },
    {
      "epoch": 0.24400092293493308,
      "grad_norm": 0.23935814201831818,
      "learning_rate": 0.00016262975778546715,
      "loss": 0.9645,
      "step": 2115
    },
    {
      "epoch": 0.2445777572681126,
      "grad_norm": 0.23147407174110413,
      "learning_rate": 0.00016301422529796234,
      "loss": 0.9454,
      "step": 2120
    },
    {
      "epoch": 0.2451545916012921,
      "grad_norm": 0.23258179426193237,
      "learning_rate": 0.00016339869281045753,
      "loss": 0.947,
      "step": 2125
    },
    {
      "epoch": 0.24573142593447161,
      "grad_norm": 0.23673345148563385,
      "learning_rate": 0.00016378316032295272,
      "loss": 0.9649,
      "step": 2130
    },
    {
      "epoch": 0.24630826026765112,
      "grad_norm": 0.2366418093442917,
      "learning_rate": 0.0001641676278354479,
      "loss": 0.995,
      "step": 2135
    },
    {
      "epoch": 0.24688509460083063,
      "grad_norm": 0.23460763692855835,
      "learning_rate": 0.0001645520953479431,
      "loss": 1.0027,
      "step": 2140
    },
    {
      "epoch": 0.24746192893401014,
      "grad_norm": 0.2338368147611618,
      "learning_rate": 0.0001649365628604383,
      "loss": 0.9455,
      "step": 2145
    },
    {
      "epoch": 0.24803876326718965,
      "grad_norm": 0.23727886378765106,
      "learning_rate": 0.00016532103037293348,
      "loss": 0.9894,
      "step": 2150
    },
    {
      "epoch": 0.24861559760036916,
      "grad_norm": 0.23833225667476654,
      "learning_rate": 0.00016570549788542867,
      "loss": 1.0365,
      "step": 2155
    },
    {
      "epoch": 0.24919243193354867,
      "grad_norm": 0.21977971494197845,
      "learning_rate": 0.00016608996539792386,
      "loss": 0.9554,
      "step": 2160
    },
    {
      "epoch": 0.24976926626672818,
      "grad_norm": 0.22417233884334564,
      "learning_rate": 0.00016647443291041908,
      "loss": 0.9308,
      "step": 2165
    },
    {
      "epoch": 0.2503461005999077,
      "grad_norm": 0.2396731674671173,
      "learning_rate": 0.00016685890042291427,
      "loss": 0.9998,
      "step": 2170
    },
    {
      "epoch": 0.2509229349330872,
      "grad_norm": 0.25424474477767944,
      "learning_rate": 0.00016724336793540946,
      "loss": 1.0282,
      "step": 2175
    },
    {
      "epoch": 0.2514997692662667,
      "grad_norm": 0.24737310409545898,
      "learning_rate": 0.00016762783544790465,
      "loss": 0.9514,
      "step": 2180
    },
    {
      "epoch": 0.2520766035994462,
      "grad_norm": 0.22595183551311493,
      "learning_rate": 0.00016801230296039984,
      "loss": 0.9732,
      "step": 2185
    },
    {
      "epoch": 0.25265343793262574,
      "grad_norm": 0.22247906029224396,
      "learning_rate": 0.00016839677047289503,
      "loss": 0.9478,
      "step": 2190
    },
    {
      "epoch": 0.25323027226580525,
      "grad_norm": 0.23501867055892944,
      "learning_rate": 0.00016878123798539025,
      "loss": 0.961,
      "step": 2195
    },
    {
      "epoch": 0.25380710659898476,
      "grad_norm": 0.22328057885169983,
      "learning_rate": 0.00016916570549788544,
      "loss": 0.9561,
      "step": 2200
    },
    {
      "epoch": 0.25438394093216427,
      "grad_norm": 0.23454934358596802,
      "learning_rate": 0.00016955017301038063,
      "loss": 0.8989,
      "step": 2205
    },
    {
      "epoch": 0.2549607752653438,
      "grad_norm": 0.2128182202577591,
      "learning_rate": 0.00016993464052287582,
      "loss": 0.9227,
      "step": 2210
    },
    {
      "epoch": 0.2555376095985233,
      "grad_norm": 0.22926746308803558,
      "learning_rate": 0.000170319108035371,
      "loss": 0.9504,
      "step": 2215
    },
    {
      "epoch": 0.2561144439317028,
      "grad_norm": 0.2354438304901123,
      "learning_rate": 0.00017070357554786622,
      "loss": 0.9596,
      "step": 2220
    },
    {
      "epoch": 0.2566912782648823,
      "grad_norm": 0.22310538589954376,
      "learning_rate": 0.00017108804306036141,
      "loss": 0.9727,
      "step": 2225
    },
    {
      "epoch": 0.2572681125980618,
      "grad_norm": 0.24562855064868927,
      "learning_rate": 0.0001714725105728566,
      "loss": 1.0191,
      "step": 2230
    },
    {
      "epoch": 0.2578449469312413,
      "grad_norm": 0.23208841681480408,
      "learning_rate": 0.0001718569780853518,
      "loss": 1.0146,
      "step": 2235
    },
    {
      "epoch": 0.25842178126442084,
      "grad_norm": 0.2338448017835617,
      "learning_rate": 0.00017224144559784698,
      "loss": 0.9586,
      "step": 2240
    },
    {
      "epoch": 0.25899861559760035,
      "grad_norm": 0.22157028317451477,
      "learning_rate": 0.00017262591311034217,
      "loss": 1.0005,
      "step": 2245
    },
    {
      "epoch": 0.25957544993077986,
      "grad_norm": 0.23284341394901276,
      "learning_rate": 0.0001730103806228374,
      "loss": 0.9742,
      "step": 2250
    },
    {
      "epoch": 0.26015228426395937,
      "grad_norm": 0.2183004915714264,
      "learning_rate": 0.00017339484813533258,
      "loss": 0.9649,
      "step": 2255
    },
    {
      "epoch": 0.2607291185971389,
      "grad_norm": 0.2606106996536255,
      "learning_rate": 0.00017377931564782777,
      "loss": 1.0056,
      "step": 2260
    },
    {
      "epoch": 0.2613059529303184,
      "grad_norm": 0.2305658459663391,
      "learning_rate": 0.00017416378316032296,
      "loss": 0.958,
      "step": 2265
    },
    {
      "epoch": 0.2618827872634979,
      "grad_norm": 0.23455555737018585,
      "learning_rate": 0.00017454825067281815,
      "loss": 0.966,
      "step": 2270
    },
    {
      "epoch": 0.26245962159667746,
      "grad_norm": 0.22993336617946625,
      "learning_rate": 0.00017493271818531337,
      "loss": 0.955,
      "step": 2275
    },
    {
      "epoch": 0.26303645592985697,
      "grad_norm": 0.22990167140960693,
      "learning_rate": 0.00017531718569780856,
      "loss": 1.0025,
      "step": 2280
    },
    {
      "epoch": 0.2636132902630365,
      "grad_norm": 0.23351961374282837,
      "learning_rate": 0.00017570165321030375,
      "loss": 1.0094,
      "step": 2285
    },
    {
      "epoch": 0.264190124596216,
      "grad_norm": 0.24218833446502686,
      "learning_rate": 0.00017608612072279894,
      "loss": 0.9458,
      "step": 2290
    },
    {
      "epoch": 0.2647669589293955,
      "grad_norm": 0.23496471345424652,
      "learning_rate": 0.00017647058823529413,
      "loss": 0.999,
      "step": 2295
    },
    {
      "epoch": 0.265343793262575,
      "grad_norm": 0.24932752549648285,
      "learning_rate": 0.00017685505574778932,
      "loss": 1.0319,
      "step": 2300
    },
    {
      "epoch": 0.2659206275957545,
      "grad_norm": 0.2334304302930832,
      "learning_rate": 0.00017723952326028454,
      "loss": 1.0447,
      "step": 2305
    },
    {
      "epoch": 0.26649746192893403,
      "grad_norm": 0.23765085637569427,
      "learning_rate": 0.00017762399077277973,
      "loss": 0.9873,
      "step": 2310
    },
    {
      "epoch": 0.26707429626211354,
      "grad_norm": 0.22014391422271729,
      "learning_rate": 0.00017800845828527492,
      "loss": 0.9984,
      "step": 2315
    },
    {
      "epoch": 0.26765113059529305,
      "grad_norm": 0.23627543449401855,
      "learning_rate": 0.0001783929257977701,
      "loss": 0.9945,
      "step": 2320
    },
    {
      "epoch": 0.26822796492847256,
      "grad_norm": 0.24393457174301147,
      "learning_rate": 0.0001787773933102653,
      "loss": 1.002,
      "step": 2325
    },
    {
      "epoch": 0.26880479926165207,
      "grad_norm": 0.2255178838968277,
      "learning_rate": 0.0001791618608227605,
      "loss": 0.9723,
      "step": 2330
    },
    {
      "epoch": 0.2693816335948316,
      "grad_norm": 0.2558629512786865,
      "learning_rate": 0.00017954632833525568,
      "loss": 1.0223,
      "step": 2335
    },
    {
      "epoch": 0.2699584679280111,
      "grad_norm": 0.23998694121837616,
      "learning_rate": 0.00017993079584775087,
      "loss": 0.9723,
      "step": 2340
    },
    {
      "epoch": 0.2705353022611906,
      "grad_norm": 0.24648216366767883,
      "learning_rate": 0.00018031526336024606,
      "loss": 0.9943,
      "step": 2345
    },
    {
      "epoch": 0.2711121365943701,
      "grad_norm": 0.26392728090286255,
      "learning_rate": 0.00018069973087274125,
      "loss": 0.9565,
      "step": 2350
    },
    {
      "epoch": 0.2716889709275496,
      "grad_norm": 0.24391917884349823,
      "learning_rate": 0.00018108419838523644,
      "loss": 1.0053,
      "step": 2355
    },
    {
      "epoch": 0.27226580526072913,
      "grad_norm": 0.23389026522636414,
      "learning_rate": 0.00018146866589773165,
      "loss": 0.9856,
      "step": 2360
    },
    {
      "epoch": 0.27284263959390864,
      "grad_norm": 0.23027820885181427,
      "learning_rate": 0.00018185313341022684,
      "loss": 0.9421,
      "step": 2365
    },
    {
      "epoch": 0.27341947392708815,
      "grad_norm": 0.21990667283535004,
      "learning_rate": 0.00018223760092272203,
      "loss": 0.9932,
      "step": 2370
    },
    {
      "epoch": 0.27399630826026766,
      "grad_norm": 0.23242932558059692,
      "learning_rate": 0.00018262206843521722,
      "loss": 0.9354,
      "step": 2375
    },
    {
      "epoch": 0.2745731425934472,
      "grad_norm": 0.23268820345401764,
      "learning_rate": 0.00018300653594771241,
      "loss": 0.9453,
      "step": 2380
    },
    {
      "epoch": 0.2751499769266267,
      "grad_norm": 0.2401697039604187,
      "learning_rate": 0.0001833910034602076,
      "loss": 0.9552,
      "step": 2385
    },
    {
      "epoch": 0.2757268112598062,
      "grad_norm": 0.22992229461669922,
      "learning_rate": 0.00018377547097270282,
      "loss": 0.9592,
      "step": 2390
    },
    {
      "epoch": 0.2763036455929857,
      "grad_norm": 0.2419811487197876,
      "learning_rate": 0.000184159938485198,
      "loss": 0.9724,
      "step": 2395
    },
    {
      "epoch": 0.2768804799261652,
      "grad_norm": 0.23714175820350647,
      "learning_rate": 0.0001845444059976932,
      "loss": 0.9591,
      "step": 2400
    },
    {
      "epoch": 0.2774573142593447,
      "grad_norm": 0.23658522963523865,
      "learning_rate": 0.0001849288735101884,
      "loss": 0.9249,
      "step": 2405
    },
    {
      "epoch": 0.27803414859252423,
      "grad_norm": 0.25390779972076416,
      "learning_rate": 0.00018531334102268358,
      "loss": 0.9518,
      "step": 2410
    },
    {
      "epoch": 0.27861098292570374,
      "grad_norm": 0.24043525755405426,
      "learning_rate": 0.0001856978085351788,
      "loss": 0.9804,
      "step": 2415
    },
    {
      "epoch": 0.27918781725888325,
      "grad_norm": 0.24651384353637695,
      "learning_rate": 0.000186082276047674,
      "loss": 0.9305,
      "step": 2420
    },
    {
      "epoch": 0.27976465159206276,
      "grad_norm": 0.2362329661846161,
      "learning_rate": 0.00018646674356016918,
      "loss": 1.0093,
      "step": 2425
    },
    {
      "epoch": 0.2803414859252423,
      "grad_norm": 0.23057129979133606,
      "learning_rate": 0.00018685121107266437,
      "loss": 0.9448,
      "step": 2430
    },
    {
      "epoch": 0.2809183202584218,
      "grad_norm": 0.24105559289455414,
      "learning_rate": 0.00018723567858515956,
      "loss": 0.9549,
      "step": 2435
    },
    {
      "epoch": 0.2814951545916013,
      "grad_norm": 0.2542194128036499,
      "learning_rate": 0.00018762014609765475,
      "loss": 0.9946,
      "step": 2440
    },
    {
      "epoch": 0.2820719889247808,
      "grad_norm": 0.25193148851394653,
      "learning_rate": 0.00018800461361014997,
      "loss": 0.9697,
      "step": 2445
    },
    {
      "epoch": 0.2826488232579603,
      "grad_norm": 0.25175783038139343,
      "learning_rate": 0.00018838908112264516,
      "loss": 0.947,
      "step": 2450
    },
    {
      "epoch": 0.2832256575911398,
      "grad_norm": 0.24037009477615356,
      "learning_rate": 0.00018877354863514035,
      "loss": 0.9593,
      "step": 2455
    },
    {
      "epoch": 0.28380249192431933,
      "grad_norm": 0.2611483633518219,
      "learning_rate": 0.00018915801614763554,
      "loss": 0.9462,
      "step": 2460
    },
    {
      "epoch": 0.28437932625749884,
      "grad_norm": 0.23972219228744507,
      "learning_rate": 0.00018954248366013073,
      "loss": 0.9776,
      "step": 2465
    },
    {
      "epoch": 0.28495616059067835,
      "grad_norm": 0.24529783427715302,
      "learning_rate": 0.00018992695117262594,
      "loss": 0.9445,
      "step": 2470
    },
    {
      "epoch": 0.28553299492385786,
      "grad_norm": 0.2517401874065399,
      "learning_rate": 0.00019031141868512113,
      "loss": 0.9944,
      "step": 2475
    },
    {
      "epoch": 0.2861098292570374,
      "grad_norm": 0.25316643714904785,
      "learning_rate": 0.00019069588619761632,
      "loss": 1.0143,
      "step": 2480
    },
    {
      "epoch": 0.2866866635902169,
      "grad_norm": 0.23407518863677979,
      "learning_rate": 0.00019108035371011151,
      "loss": 1.0202,
      "step": 2485
    },
    {
      "epoch": 0.2872634979233964,
      "grad_norm": 0.251371830701828,
      "learning_rate": 0.0001914648212226067,
      "loss": 1.0087,
      "step": 2490
    },
    {
      "epoch": 0.2878403322565759,
      "grad_norm": 0.2522146701812744,
      "learning_rate": 0.0001918492887351019,
      "loss": 0.9761,
      "step": 2495
    },
    {
      "epoch": 0.2884171665897554,
      "grad_norm": 0.24245892465114594,
      "learning_rate": 0.00019223375624759708,
      "loss": 0.9882,
      "step": 2500
    },
    {
      "epoch": 0.2889940009229349,
      "grad_norm": 0.23890967667102814,
      "learning_rate": 0.00019261822376009227,
      "loss": 0.9504,
      "step": 2505
    },
    {
      "epoch": 0.28957083525611443,
      "grad_norm": 0.2405172884464264,
      "learning_rate": 0.00019300269127258746,
      "loss": 1.014,
      "step": 2510
    },
    {
      "epoch": 0.29014766958929394,
      "grad_norm": 0.2515077590942383,
      "learning_rate": 0.00019338715878508265,
      "loss": 0.9936,
      "step": 2515
    },
    {
      "epoch": 0.29072450392247345,
      "grad_norm": 0.23620381951332092,
      "learning_rate": 0.00019377162629757784,
      "loss": 0.9828,
      "step": 2520
    },
    {
      "epoch": 0.29130133825565296,
      "grad_norm": 0.25077781081199646,
      "learning_rate": 0.00019415609381007303,
      "loss": 0.9711,
      "step": 2525
    },
    {
      "epoch": 0.2918781725888325,
      "grad_norm": 0.23658686876296997,
      "learning_rate": 0.00019454056132256825,
      "loss": 1.0081,
      "step": 2530
    },
    {
      "epoch": 0.292455006922012,
      "grad_norm": 0.24098335206508636,
      "learning_rate": 0.00019492502883506344,
      "loss": 0.9102,
      "step": 2535
    },
    {
      "epoch": 0.2930318412551915,
      "grad_norm": 0.2481927126646042,
      "learning_rate": 0.00019530949634755863,
      "loss": 0.9652,
      "step": 2540
    },
    {
      "epoch": 0.293608675588371,
      "grad_norm": 0.23471227288246155,
      "learning_rate": 0.00019569396386005382,
      "loss": 0.9101,
      "step": 2545
    },
    {
      "epoch": 0.2941855099215505,
      "grad_norm": 0.24416758120059967,
      "learning_rate": 0.000196078431372549,
      "loss": 0.975,
      "step": 2550
    },
    {
      "epoch": 0.29476234425473,
      "grad_norm": 0.2367262840270996,
      "learning_rate": 0.00019646289888504423,
      "loss": 0.9632,
      "step": 2555
    },
    {
      "epoch": 0.29533917858790953,
      "grad_norm": 0.24644485116004944,
      "learning_rate": 0.00019684736639753942,
      "loss": 0.9472,
      "step": 2560
    },
    {
      "epoch": 0.29591601292108904,
      "grad_norm": 0.26747334003448486,
      "learning_rate": 0.0001972318339100346,
      "loss": 0.93,
      "step": 2565
    },
    {
      "epoch": 0.29649284725426855,
      "grad_norm": 0.24286748468875885,
      "learning_rate": 0.0001976163014225298,
      "loss": 0.9563,
      "step": 2570
    },
    {
      "epoch": 0.29706968158744806,
      "grad_norm": 0.24571438133716583,
      "learning_rate": 0.000198000768935025,
      "loss": 0.9762,
      "step": 2575
    },
    {
      "epoch": 0.2976465159206276,
      "grad_norm": 0.2596196234226227,
      "learning_rate": 0.00019838523644752018,
      "loss": 1.0187,
      "step": 2580
    },
    {
      "epoch": 0.2982233502538071,
      "grad_norm": 0.2407630831003189,
      "learning_rate": 0.0001987697039600154,
      "loss": 0.9882,
      "step": 2585
    },
    {
      "epoch": 0.2988001845869866,
      "grad_norm": 0.24728403985500336,
      "learning_rate": 0.00019915417147251059,
      "loss": 0.9734,
      "step": 2590
    },
    {
      "epoch": 0.2993770189201661,
      "grad_norm": 0.2402627021074295,
      "learning_rate": 0.00019953863898500578,
      "loss": 1.0186,
      "step": 2595
    },
    {
      "epoch": 0.2999538532533456,
      "grad_norm": 0.24274969100952148,
      "learning_rate": 0.00019992310649750097,
      "loss": 0.9835,
      "step": 2600
    },
    {
      "epoch": 0.3005306875865251,
      "grad_norm": 0.26291441917419434,
      "learning_rate": 0.00019999998558393748,
      "loss": 0.997,
      "step": 2605
    },
    {
      "epoch": 0.30110752191970463,
      "grad_norm": 0.2553878724575043,
      "learning_rate": 0.0001999999270186907,
      "loss": 1.0249,
      "step": 2610
    },
    {
      "epoch": 0.30168435625288414,
      "grad_norm": 0.24096915125846863,
      "learning_rate": 0.00019999982340328205,
      "loss": 1.0262,
      "step": 2615
    },
    {
      "epoch": 0.30226119058606365,
      "grad_norm": 0.2416466474533081,
      "learning_rate": 0.0001999996747377582,
      "loss": 1.0066,
      "step": 2620
    },
    {
      "epoch": 0.3028380249192432,
      "grad_norm": 0.24086306989192963,
      "learning_rate": 0.0001999994810221862,
      "loss": 0.9956,
      "step": 2625
    },
    {
      "epoch": 0.30341485925242273,
      "grad_norm": 0.27510523796081543,
      "learning_rate": 0.00019999924225665326,
      "loss": 1.0013,
      "step": 2630
    },
    {
      "epoch": 0.30399169358560224,
      "grad_norm": 0.24594350159168243,
      "learning_rate": 0.00019999895844126695,
      "loss": 0.9678,
      "step": 2635
    },
    {
      "epoch": 0.30456852791878175,
      "grad_norm": 0.2500142753124237,
      "learning_rate": 0.00019999862957615513,
      "loss": 1.0076,
      "step": 2640
    },
    {
      "epoch": 0.30514536225196126,
      "grad_norm": 0.2603604197502136,
      "learning_rate": 0.000199998255661466,
      "loss": 1.0059,
      "step": 2645
    },
    {
      "epoch": 0.30572219658514077,
      "grad_norm": 0.24498620629310608,
      "learning_rate": 0.00019999783669736795,
      "loss": 0.9441,
      "step": 2650
    },
    {
      "epoch": 0.3062990309183203,
      "grad_norm": 0.2519979774951935,
      "learning_rate": 0.00019999737268404973,
      "loss": 0.9315,
      "step": 2655
    },
    {
      "epoch": 0.3068758652514998,
      "grad_norm": 0.24573497474193573,
      "learning_rate": 0.00019999686362172044,
      "loss": 0.9806,
      "step": 2660
    },
    {
      "epoch": 0.3074526995846793,
      "grad_norm": 0.24200966954231262,
      "learning_rate": 0.00019999630951060934,
      "loss": 1.0077,
      "step": 2665
    },
    {
      "epoch": 0.3080295339178588,
      "grad_norm": 0.26442426443099976,
      "learning_rate": 0.00019999571035096608,
      "loss": 0.975,
      "step": 2670
    },
    {
      "epoch": 0.3086063682510383,
      "grad_norm": 0.2420646846294403,
      "learning_rate": 0.0001999950661430606,
      "loss": 0.9541,
      "step": 2675
    },
    {
      "epoch": 0.30918320258421783,
      "grad_norm": 0.2571803331375122,
      "learning_rate": 0.00019999437688718313,
      "loss": 1.0074,
      "step": 2680
    },
    {
      "epoch": 0.30976003691739734,
      "grad_norm": 0.2612910568714142,
      "learning_rate": 0.00019999364258364413,
      "loss": 1.0123,
      "step": 2685
    },
    {
      "epoch": 0.31033687125057685,
      "grad_norm": 0.2611737549304962,
      "learning_rate": 0.00019999286323277445,
      "loss": 1.0179,
      "step": 2690
    },
    {
      "epoch": 0.31091370558375636,
      "grad_norm": 0.24607665836811066,
      "learning_rate": 0.00019999203883492515,
      "loss": 0.9675,
      "step": 2695
    },
    {
      "epoch": 0.31149053991693587,
      "grad_norm": 0.24605637788772583,
      "learning_rate": 0.00019999116939046764,
      "loss": 0.9357,
      "step": 2700
    },
    {
      "epoch": 0.3120673742501154,
      "grad_norm": 0.24667702615261078,
      "learning_rate": 0.00019999025489979367,
      "loss": 0.9726,
      "step": 2705
    },
    {
      "epoch": 0.3126442085832949,
      "grad_norm": 0.24471449851989746,
      "learning_rate": 0.0001999892953633151,
      "loss": 0.9708,
      "step": 2710
    },
    {
      "epoch": 0.3132210429164744,
      "grad_norm": 0.2780803442001343,
      "learning_rate": 0.0001999882907814643,
      "loss": 0.9675,
      "step": 2715
    },
    {
      "epoch": 0.3137978772496539,
      "grad_norm": 0.264237642288208,
      "learning_rate": 0.00019998724115469378,
      "loss": 1.0132,
      "step": 2720
    },
    {
      "epoch": 0.3143747115828334,
      "grad_norm": 0.2395864874124527,
      "learning_rate": 0.00019998614648347642,
      "loss": 1.0061,
      "step": 2725
    },
    {
      "epoch": 0.31495154591601293,
      "grad_norm": 0.2584296762943268,
      "learning_rate": 0.0001999850067683054,
      "loss": 0.9822,
      "step": 2730
    },
    {
      "epoch": 0.31552838024919244,
      "grad_norm": 0.24742144346237183,
      "learning_rate": 0.0001999838220096941,
      "loss": 0.9794,
      "step": 2735
    },
    {
      "epoch": 0.31610521458237195,
      "grad_norm": 41.201873779296875,
      "learning_rate": 0.0001999825922081763,
      "loss": 1.0792,
      "step": 2740
    },
    {
      "epoch": 0.31668204891555146,
      "grad_norm": 0.2647322714328766,
      "learning_rate": 0.00019998131736430604,
      "loss": 0.9826,
      "step": 2745
    },
    {
      "epoch": 0.31725888324873097,
      "grad_norm": 0.3127318322658539,
      "learning_rate": 0.0001999799974786576,
      "loss": 1.0236,
      "step": 2750
    },
    {
      "epoch": 0.3178357175819105,
      "grad_norm": 0.23633776605129242,
      "learning_rate": 0.0001999786325518256,
      "loss": 0.9661,
      "step": 2755
    },
    {
      "epoch": 0.31841255191509,
      "grad_norm": 0.25068148970603943,
      "learning_rate": 0.00019997722258442499,
      "loss": 1.0099,
      "step": 2760
    },
    {
      "epoch": 0.3189893862482695,
      "grad_norm": 0.26392409205436707,
      "learning_rate": 0.00019997576757709089,
      "loss": 0.9987,
      "step": 2765
    },
    {
      "epoch": 0.319566220581449,
      "grad_norm": 8.13176155090332,
      "learning_rate": 0.00019997426753047882,
      "loss": 1.0128,
      "step": 2770
    },
    {
      "epoch": 0.3201430549146285,
      "grad_norm": 2.1088223457336426,
      "learning_rate": 0.00019997272244526456,
      "loss": 0.9769,
      "step": 2775
    },
    {
      "epoch": 0.32071988924780803,
      "grad_norm": 0.7730352878570557,
      "learning_rate": 0.00019997113232214417,
      "loss": 1.0047,
      "step": 2780
    },
    {
      "epoch": 0.32129672358098754,
      "grad_norm": 0.272935688495636,
      "learning_rate": 0.000199969497161834,
      "loss": 0.9996,
      "step": 2785
    },
    {
      "epoch": 0.32187355791416705,
      "grad_norm": 5.394055366516113,
      "learning_rate": 0.00019996781696507069,
      "loss": 1.0325,
      "step": 2790
    },
    {
      "epoch": 0.32245039224734656,
      "grad_norm": 0.26502957940101624,
      "learning_rate": 0.00019996609173261116,
      "loss": 0.9684,
      "step": 2795
    },
    {
      "epoch": 0.3230272265805261,
      "grad_norm": 0.25611191987991333,
      "learning_rate": 0.00019996432146523267,
      "loss": 0.9542,
      "step": 2800
    },
    {
      "epoch": 0.3236040609137056,
      "grad_norm": 0.2904262840747833,
      "learning_rate": 0.00019996250616373268,
      "loss": 0.9687,
      "step": 2805
    },
    {
      "epoch": 0.3241808952468851,
      "grad_norm": 0.33797159790992737,
      "learning_rate": 0.00019996064582892905,
      "loss": 0.9814,
      "step": 2810
    },
    {
      "epoch": 0.3247577295800646,
      "grad_norm": 0.2576303482055664,
      "learning_rate": 0.00019995874046165981,
      "loss": 0.9391,
      "step": 2815
    },
    {
      "epoch": 0.3253345639132441,
      "grad_norm": 0.2760027050971985,
      "learning_rate": 0.0001999567900627833,
      "loss": 0.9729,
      "step": 2820
    },
    {
      "epoch": 0.3259113982464236,
      "grad_norm": 0.2771868109703064,
      "learning_rate": 0.0001999547946331783,
      "loss": 1.0334,
      "step": 2825
    },
    {
      "epoch": 0.32648823257960313,
      "grad_norm": 0.28071510791778564,
      "learning_rate": 0.00019995275417374365,
      "loss": 0.9437,
      "step": 2830
    },
    {
      "epoch": 0.32706506691278264,
      "grad_norm": 0.25998392701148987,
      "learning_rate": 0.0001999506686853986,
      "loss": 1.0159,
      "step": 2835
    },
    {
      "epoch": 0.32764190124596215,
      "grad_norm": 0.2917766273021698,
      "learning_rate": 0.0001999485381690827,
      "loss": 1.0045,
      "step": 2840
    },
    {
      "epoch": 0.32821873557914166,
      "grad_norm": 0.24891650676727295,
      "learning_rate": 0.0001999463626257557,
      "loss": 0.9768,
      "step": 2845
    },
    {
      "epoch": 0.3287955699123212,
      "grad_norm": 0.26400068402290344,
      "learning_rate": 0.00019994414205639775,
      "loss": 1.002,
      "step": 2850
    },
    {
      "epoch": 0.3293724042455007,
      "grad_norm": 0.266400545835495,
      "learning_rate": 0.00019994187646200917,
      "loss": 1.0267,
      "step": 2855
    },
    {
      "epoch": 0.3299492385786802,
      "grad_norm": 0.25532013177871704,
      "learning_rate": 0.00019993956584361063,
      "loss": 0.9929,
      "step": 2860
    },
    {
      "epoch": 0.3305260729118597,
      "grad_norm": 0.24749302864074707,
      "learning_rate": 0.00019993721020224308,
      "loss": 0.9693,
      "step": 2865
    },
    {
      "epoch": 0.3311029072450392,
      "grad_norm": 0.2967720925807953,
      "learning_rate": 0.0001999348095389677,
      "loss": 0.9674,
      "step": 2870
    },
    {
      "epoch": 0.3316797415782187,
      "grad_norm": 0.25716492533683777,
      "learning_rate": 0.00019993236385486607,
      "loss": 0.9448,
      "step": 2875
    },
    {
      "epoch": 0.33225657591139823,
      "grad_norm": 0.265095978975296,
      "learning_rate": 0.0001999298731510399,
      "loss": 0.9434,
      "step": 2880
    },
    {
      "epoch": 0.33283341024457774,
      "grad_norm": 0.25619763135910034,
      "learning_rate": 0.00019992733742861128,
      "loss": 0.9688,
      "step": 2885
    },
    {
      "epoch": 0.33341024457775725,
      "grad_norm": 0.25639405846595764,
      "learning_rate": 0.0001999247566887226,
      "loss": 0.9769,
      "step": 2890
    },
    {
      "epoch": 0.33398707891093676,
      "grad_norm": 0.29292336106300354,
      "learning_rate": 0.00019992213093253643,
      "loss": 0.9579,
      "step": 2895
    },
    {
      "epoch": 0.3345639132441163,
      "grad_norm": 0.3063664436340332,
      "learning_rate": 0.0001999194601612357,
      "loss": 1.0057,
      "step": 2900
    },
    {
      "epoch": 0.3351407475772958,
      "grad_norm": 0.26349523663520813,
      "learning_rate": 0.00019991674437602362,
      "loss": 1.0265,
      "step": 2905
    },
    {
      "epoch": 0.3357175819104753,
      "grad_norm": 0.26573020219802856,
      "learning_rate": 0.0001999139835781236,
      "loss": 1.0405,
      "step": 2910
    },
    {
      "epoch": 0.3362944162436548,
      "grad_norm": 0.2737710177898407,
      "learning_rate": 0.00019991117776877942,
      "loss": 0.9219,
      "step": 2915
    },
    {
      "epoch": 0.3368712505768343,
      "grad_norm": 0.26778504252433777,
      "learning_rate": 0.00019990832694925513,
      "loss": 1.0212,
      "step": 2920
    },
    {
      "epoch": 0.3374480849100138,
      "grad_norm": 0.26119446754455566,
      "learning_rate": 0.00019990543112083503,
      "loss": 0.9696,
      "step": 2925
    },
    {
      "epoch": 0.33802491924319333,
      "grad_norm": 0.256541907787323,
      "learning_rate": 0.00019990249028482363,
      "loss": 1.0118,
      "step": 2930
    },
    {
      "epoch": 0.33860175357637284,
      "grad_norm": 0.2648811936378479,
      "learning_rate": 0.0001998995044425458,
      "loss": 0.9676,
      "step": 2935
    },
    {
      "epoch": 0.33917858790955235,
      "grad_norm": 0.254853755235672,
      "learning_rate": 0.00019989647359534672,
      "loss": 0.9455,
      "step": 2940
    },
    {
      "epoch": 0.33975542224273186,
      "grad_norm": 0.26572519540786743,
      "learning_rate": 0.00019989339774459177,
      "loss": 0.9875,
      "step": 2945
    },
    {
      "epoch": 0.3403322565759114,
      "grad_norm": 0.27205315232276917,
      "learning_rate": 0.00019989027689166662,
      "loss": 0.9661,
      "step": 2950
    },
    {
      "epoch": 0.3409090909090909,
      "grad_norm": 0.2578754723072052,
      "learning_rate": 0.0001998871110379772,
      "loss": 0.976,
      "step": 2955
    },
    {
      "epoch": 0.3414859252422704,
      "grad_norm": 0.2672814130783081,
      "learning_rate": 0.00019988390018494976,
      "loss": 0.9705,
      "step": 2960
    },
    {
      "epoch": 0.3420627595754499,
      "grad_norm": 0.2663863003253937,
      "learning_rate": 0.00019988064433403078,
      "loss": 0.976,
      "step": 2965
    },
    {
      "epoch": 0.3426395939086294,
      "grad_norm": 0.2686617970466614,
      "learning_rate": 0.00019987734348668706,
      "loss": 0.9612,
      "step": 2970
    },
    {
      "epoch": 0.343216428241809,
      "grad_norm": 0.24667376279830933,
      "learning_rate": 0.00019987399764440558,
      "loss": 0.9562,
      "step": 2975
    },
    {
      "epoch": 0.3437932625749885,
      "grad_norm": 0.2621404528617859,
      "learning_rate": 0.0001998706068086937,
      "loss": 1.0241,
      "step": 2980
    },
    {
      "epoch": 0.344370096908168,
      "grad_norm": 0.2822721600532532,
      "learning_rate": 0.00019986717098107896,
      "loss": 0.993,
      "step": 2985
    },
    {
      "epoch": 0.3449469312413475,
      "grad_norm": 0.26043906807899475,
      "learning_rate": 0.00019986369016310925,
      "loss": 0.9727,
      "step": 2990
    },
    {
      "epoch": 0.345523765574527,
      "grad_norm": 0.27216097712516785,
      "learning_rate": 0.00019986016435635264,
      "loss": 0.9866,
      "step": 2995
    },
    {
      "epoch": 0.34610059990770653,
      "grad_norm": 0.26094967126846313,
      "learning_rate": 0.00019985659356239758,
      "loss": 0.925,
      "step": 3000
    },
    {
      "epoch": 0.34667743424088604,
      "grad_norm": 0.28546351194381714,
      "learning_rate": 0.0001998529777828526,
      "loss": 0.9614,
      "step": 3005
    },
    {
      "epoch": 0.34725426857406555,
      "grad_norm": 0.2722310423851013,
      "learning_rate": 0.00019984931701934677,
      "loss": 0.9827,
      "step": 3010
    },
    {
      "epoch": 0.34783110290724506,
      "grad_norm": 0.2718545198440552,
      "learning_rate": 0.00019984561127352914,
      "loss": 0.9642,
      "step": 3015
    },
    {
      "epoch": 0.34840793724042457,
      "grad_norm": 0.2546035349369049,
      "learning_rate": 0.00019984186054706923,
      "loss": 0.9957,
      "step": 3020
    },
    {
      "epoch": 0.3489847715736041,
      "grad_norm": 0.2482123076915741,
      "learning_rate": 0.00019983806484165674,
      "loss": 0.9924,
      "step": 3025
    },
    {
      "epoch": 0.3495616059067836,
      "grad_norm": 0.2893499732017517,
      "learning_rate": 0.00019983422415900158,
      "loss": 1.0,
      "step": 3030
    },
    {
      "epoch": 0.3501384402399631,
      "grad_norm": 0.2613871693611145,
      "learning_rate": 0.00019983033850083407,
      "loss": 1.0196,
      "step": 3035
    },
    {
      "epoch": 0.3507152745731426,
      "grad_norm": 0.27375757694244385,
      "learning_rate": 0.00019982640786890465,
      "loss": 0.9422,
      "step": 3040
    },
    {
      "epoch": 0.3512921089063221,
      "grad_norm": 0.2552187442779541,
      "learning_rate": 0.00019982243226498411,
      "loss": 0.9861,
      "step": 3045
    },
    {
      "epoch": 0.35186894323950163,
      "grad_norm": 0.2758137583732605,
      "learning_rate": 0.00019981841169086346,
      "loss": 0.9763,
      "step": 3050
    },
    {
      "epoch": 0.35244577757268114,
      "grad_norm": 0.26991990208625793,
      "learning_rate": 0.00019981434614835397,
      "loss": 0.9611,
      "step": 3055
    },
    {
      "epoch": 0.35302261190586065,
      "grad_norm": 0.27569013833999634,
      "learning_rate": 0.00019981023563928716,
      "loss": 0.9829,
      "step": 3060
    },
    {
      "epoch": 0.35359944623904016,
      "grad_norm": 0.27400723099708557,
      "learning_rate": 0.00019980608016551487,
      "loss": 0.9673,
      "step": 3065
    },
    {
      "epoch": 0.35417628057221967,
      "grad_norm": 0.2885724902153015,
      "learning_rate": 0.0001998018797289091,
      "loss": 0.9851,
      "step": 3070
    },
    {
      "epoch": 0.3547531149053992,
      "grad_norm": 0.2800985276699066,
      "learning_rate": 0.00019979763433136216,
      "loss": 0.9221,
      "step": 3075
    },
    {
      "epoch": 0.3553299492385787,
      "grad_norm": 0.28481200337409973,
      "learning_rate": 0.00019979334397478665,
      "loss": 0.9812,
      "step": 3080
    },
    {
      "epoch": 0.3559067835717582,
      "grad_norm": 0.2865158021450043,
      "learning_rate": 0.00019978900866111533,
      "loss": 1.0323,
      "step": 3085
    },
    {
      "epoch": 0.3564836179049377,
      "grad_norm": 0.2730359733104706,
      "learning_rate": 0.00019978462839230133,
      "loss": 0.9722,
      "step": 3090
    },
    {
      "epoch": 0.3570604522381172,
      "grad_norm": 0.2704940736293793,
      "learning_rate": 0.0001997802031703179,
      "loss": 1.0168,
      "step": 3095
    },
    {
      "epoch": 0.35763728657129673,
      "grad_norm": 0.26255276799201965,
      "learning_rate": 0.00019977573299715865,
      "loss": 0.9872,
      "step": 3100
    },
    {
      "epoch": 0.35821412090447624,
      "grad_norm": 0.275738924741745,
      "learning_rate": 0.0001997712178748374,
      "loss": 0.9876,
      "step": 3105
    },
    {
      "epoch": 0.35879095523765575,
      "grad_norm": 0.24914264678955078,
      "learning_rate": 0.00019976665780538824,
      "loss": 0.8764,
      "step": 3110
    },
    {
      "epoch": 0.35936778957083526,
      "grad_norm": 0.26297125220298767,
      "learning_rate": 0.0001997620527908654,
      "loss": 1.0151,
      "step": 3115
    },
    {
      "epoch": 0.35994462390401477,
      "grad_norm": 0.2562413811683655,
      "learning_rate": 0.0001997574028333436,
      "loss": 0.9859,
      "step": 3120
    },
    {
      "epoch": 0.3605214582371943,
      "grad_norm": 0.2676263451576233,
      "learning_rate": 0.0001997527079349175,
      "loss": 0.9625,
      "step": 3125
    },
    {
      "epoch": 0.3610982925703738,
      "grad_norm": 0.2798160910606384,
      "learning_rate": 0.0001997479680977023,
      "loss": 1.0202,
      "step": 3130
    },
    {
      "epoch": 0.3616751269035533,
      "grad_norm": 0.3188830018043518,
      "learning_rate": 0.0001997431833238332,
      "loss": 0.9762,
      "step": 3135
    },
    {
      "epoch": 0.3622519612367328,
      "grad_norm": 0.2774507403373718,
      "learning_rate": 0.00019973835361546577,
      "loss": 0.9795,
      "step": 3140
    },
    {
      "epoch": 0.3628287955699123,
      "grad_norm": 0.2787860631942749,
      "learning_rate": 0.0001997334789747759,
      "loss": 0.9737,
      "step": 3145
    },
    {
      "epoch": 0.36340562990309183,
      "grad_norm": 0.3146364390850067,
      "learning_rate": 0.00019972855940395947,
      "loss": 0.935,
      "step": 3150
    },
    {
      "epoch": 0.36398246423627134,
      "grad_norm": 0.26312851905822754,
      "learning_rate": 0.00019972359490523284,
      "loss": 0.9587,
      "step": 3155
    },
    {
      "epoch": 0.36455929856945085,
      "grad_norm": 0.2730996906757355,
      "learning_rate": 0.0001997185854808325,
      "loss": 0.9661,
      "step": 3160
    },
    {
      "epoch": 0.36513613290263036,
      "grad_norm": 0.28980138897895813,
      "learning_rate": 0.00019971353113301527,
      "loss": 0.9461,
      "step": 3165
    },
    {
      "epoch": 0.36571296723580987,
      "grad_norm": 0.2760608494281769,
      "learning_rate": 0.00019970843186405807,
      "loss": 1.0146,
      "step": 3170
    },
    {
      "epoch": 0.3662898015689894,
      "grad_norm": 0.27623140811920166,
      "learning_rate": 0.0001997032876762582,
      "loss": 0.9888,
      "step": 3175
    },
    {
      "epoch": 0.3668666359021689,
      "grad_norm": 0.2842963635921478,
      "learning_rate": 0.00019969809857193306,
      "loss": 0.9676,
      "step": 3180
    },
    {
      "epoch": 0.3674434702353484,
      "grad_norm": 0.29437965154647827,
      "learning_rate": 0.00019969286455342035,
      "loss": 1.0161,
      "step": 3185
    },
    {
      "epoch": 0.3680203045685279,
      "grad_norm": 0.2872377336025238,
      "learning_rate": 0.00019968758562307807,
      "loss": 0.9173,
      "step": 3190
    },
    {
      "epoch": 0.3685971389017074,
      "grad_norm": 0.2813303768634796,
      "learning_rate": 0.0001996822617832843,
      "loss": 0.9569,
      "step": 3195
    },
    {
      "epoch": 0.36917397323488693,
      "grad_norm": 0.2630009353160858,
      "learning_rate": 0.00019967689303643753,
      "loss": 0.919,
      "step": 3200
    },
    {
      "epoch": 0.36975080756806644,
      "grad_norm": 0.28360888361930847,
      "learning_rate": 0.00019967147938495635,
      "loss": 0.9788,
      "step": 3205
    },
    {
      "epoch": 0.37032764190124595,
      "grad_norm": 0.2732391655445099,
      "learning_rate": 0.0001996660208312796,
      "loss": 1.0405,
      "step": 3210
    },
    {
      "epoch": 0.37090447623442546,
      "grad_norm": 0.28286460041999817,
      "learning_rate": 0.00019966051737786643,
      "loss": 1.0152,
      "step": 3215
    },
    {
      "epoch": 0.37148131056760497,
      "grad_norm": 0.31790444254875183,
      "learning_rate": 0.0001996549690271961,
      "loss": 1.0018,
      "step": 3220
    },
    {
      "epoch": 0.3720581449007845,
      "grad_norm": 0.28235310316085815,
      "learning_rate": 0.00019964937578176816,
      "loss": 1.0148,
      "step": 3225
    },
    {
      "epoch": 0.372634979233964,
      "grad_norm": 0.29751458764076233,
      "learning_rate": 0.00019964373764410237,
      "loss": 0.9432,
      "step": 3230
    },
    {
      "epoch": 0.3732118135671435,
      "grad_norm": 0.2763459086418152,
      "learning_rate": 0.00019963805461673876,
      "loss": 0.9836,
      "step": 3235
    },
    {
      "epoch": 0.373788647900323,
      "grad_norm": 0.2639414966106415,
      "learning_rate": 0.00019963232670223752,
      "loss": 0.9687,
      "step": 3240
    },
    {
      "epoch": 0.3743654822335025,
      "grad_norm": 0.25585541129112244,
      "learning_rate": 0.0001996265539031791,
      "loss": 0.9544,
      "step": 3245
    },
    {
      "epoch": 0.37494231656668203,
      "grad_norm": 0.30802932381629944,
      "learning_rate": 0.00019962073622216417,
      "loss": 1.0018,
      "step": 3250
    },
    {
      "epoch": 0.37551915089986154,
      "grad_norm": 0.27707305550575256,
      "learning_rate": 0.00019961487366181355,
      "loss": 0.948,
      "step": 3255
    },
    {
      "epoch": 0.37609598523304105,
      "grad_norm": 0.26873978972435,
      "learning_rate": 0.0001996089662247684,
      "loss": 0.9899,
      "step": 3260
    },
    {
      "epoch": 0.37667281956622056,
      "grad_norm": 0.2803768217563629,
      "learning_rate": 0.00019960301391368996,
      "loss": 0.9381,
      "step": 3265
    },
    {
      "epoch": 0.3772496538994001,
      "grad_norm": 0.2769775688648224,
      "learning_rate": 0.00019959701673125983,
      "loss": 0.9207,
      "step": 3270
    },
    {
      "epoch": 0.3778264882325796,
      "grad_norm": 0.27256807684898376,
      "learning_rate": 0.0001995909746801797,
      "loss": 1.0029,
      "step": 3275
    },
    {
      "epoch": 0.3784033225657591,
      "grad_norm": 0.2660912573337555,
      "learning_rate": 0.0001995848877631716,
      "loss": 0.9918,
      "step": 3280
    },
    {
      "epoch": 0.3789801568989386,
      "grad_norm": 0.28680184483528137,
      "learning_rate": 0.00019957875598297759,
      "loss": 0.9344,
      "step": 3285
    },
    {
      "epoch": 0.3795569912321181,
      "grad_norm": 0.2940070331096649,
      "learning_rate": 0.00019957257934236013,
      "loss": 0.9575,
      "step": 3290
    },
    {
      "epoch": 0.3801338255652976,
      "grad_norm": 0.25873056054115295,
      "learning_rate": 0.00019956635784410177,
      "loss": 0.981,
      "step": 3295
    },
    {
      "epoch": 0.38071065989847713,
      "grad_norm": 0.2866104245185852,
      "learning_rate": 0.00019956009149100533,
      "loss": 0.9889,
      "step": 3300
    },
    {
      "epoch": 0.38128749423165664,
      "grad_norm": 0.2744695246219635,
      "learning_rate": 0.00019955378028589383,
      "loss": 0.9321,
      "step": 3305
    },
    {
      "epoch": 0.38186432856483615,
      "grad_norm": 0.2819940745830536,
      "learning_rate": 0.0001995474242316104,
      "loss": 0.9603,
      "step": 3310
    },
    {
      "epoch": 0.38244116289801566,
      "grad_norm": 0.2561120390892029,
      "learning_rate": 0.00019954102333101856,
      "loss": 0.9994,
      "step": 3315
    },
    {
      "epoch": 0.3830179972311952,
      "grad_norm": 0.2975214123725891,
      "learning_rate": 0.00019953457758700184,
      "loss": 1.0012,
      "step": 3320
    },
    {
      "epoch": 0.38359483156437474,
      "grad_norm": 0.26979345083236694,
      "learning_rate": 0.00019952808700246413,
      "loss": 0.9865,
      "step": 3325
    },
    {
      "epoch": 0.38417166589755425,
      "grad_norm": 0.2876468002796173,
      "learning_rate": 0.0001995215515803294,
      "loss": 0.973,
      "step": 3330
    },
    {
      "epoch": 0.38474850023073376,
      "grad_norm": 0.2840271592140198,
      "learning_rate": 0.0001995149713235419,
      "loss": 1.0173,
      "step": 3335
    },
    {
      "epoch": 0.38532533456391327,
      "grad_norm": 0.27148565649986267,
      "learning_rate": 0.00019950834623506602,
      "loss": 0.9508,
      "step": 3340
    },
    {
      "epoch": 0.3859021688970928,
      "grad_norm": 0.38565149903297424,
      "learning_rate": 0.00019950167631788642,
      "loss": 1.0152,
      "step": 3345
    },
    {
      "epoch": 0.3864790032302723,
      "grad_norm": 0.2814615070819855,
      "learning_rate": 0.00019949496157500786,
      "loss": 0.9375,
      "step": 3350
    },
    {
      "epoch": 0.3870558375634518,
      "grad_norm": 0.29191091656684875,
      "learning_rate": 0.00019948820200945536,
      "loss": 1.0029,
      "step": 3355
    },
    {
      "epoch": 0.3876326718966313,
      "grad_norm": 0.29751864075660706,
      "learning_rate": 0.00019948139762427416,
      "loss": 1.0162,
      "step": 3360
    },
    {
      "epoch": 0.3882095062298108,
      "grad_norm": 0.2734597623348236,
      "learning_rate": 0.0001994745484225296,
      "loss": 0.9674,
      "step": 3365
    },
    {
      "epoch": 0.38878634056299033,
      "grad_norm": 0.2579993009567261,
      "learning_rate": 0.0001994676544073073,
      "loss": 0.9396,
      "step": 3370
    },
    {
      "epoch": 0.38936317489616984,
      "grad_norm": 0.2908860743045807,
      "learning_rate": 0.000199460715581713,
      "loss": 1.0213,
      "step": 3375
    },
    {
      "epoch": 0.38994000922934935,
      "grad_norm": 0.3159235417842865,
      "learning_rate": 0.0001994537319488726,
      "loss": 0.9886,
      "step": 3380
    },
    {
      "epoch": 0.39051684356252886,
      "grad_norm": 0.27023908495903015,
      "learning_rate": 0.00019944670351193232,
      "loss": 0.9804,
      "step": 3385
    },
    {
      "epoch": 0.39109367789570837,
      "grad_norm": 0.2643144428730011,
      "learning_rate": 0.0001994396302740585,
      "loss": 0.9912,
      "step": 3390
    },
    {
      "epoch": 0.3916705122288879,
      "grad_norm": 0.3253761827945709,
      "learning_rate": 0.00019943251223843755,
      "loss": 1.0102,
      "step": 3395
    },
    {
      "epoch": 0.3922473465620674,
      "grad_norm": 0.2623632848262787,
      "learning_rate": 0.00019942534940827625,
      "loss": 0.9324,
      "step": 3400
    },
    {
      "epoch": 0.3928241808952469,
      "grad_norm": 0.27166181802749634,
      "learning_rate": 0.00019941814178680144,
      "loss": 0.9578,
      "step": 3405
    },
    {
      "epoch": 0.3934010152284264,
      "grad_norm": 0.28351280093193054,
      "learning_rate": 0.00019941088937726011,
      "loss": 0.9684,
      "step": 3410
    },
    {
      "epoch": 0.3939778495616059,
      "grad_norm": 0.2918001413345337,
      "learning_rate": 0.0001994035921829196,
      "loss": 0.9833,
      "step": 3415
    },
    {
      "epoch": 0.39455468389478543,
      "grad_norm": 0.2910713255405426,
      "learning_rate": 0.00019939625020706724,
      "loss": 0.9937,
      "step": 3420
    },
    {
      "epoch": 0.39513151822796494,
      "grad_norm": 0.27372169494628906,
      "learning_rate": 0.0001993888634530106,
      "loss": 0.9769,
      "step": 3425
    },
    {
      "epoch": 0.39570835256114445,
      "grad_norm": 0.298957884311676,
      "learning_rate": 0.00019938143192407744,
      "loss": 0.991,
      "step": 3430
    },
    {
      "epoch": 0.39628518689432396,
      "grad_norm": 0.2799958884716034,
      "learning_rate": 0.00019937395562361564,
      "loss": 1.0079,
      "step": 3435
    },
    {
      "epoch": 0.39686202122750347,
      "grad_norm": 0.2845383882522583,
      "learning_rate": 0.00019936643455499336,
      "loss": 1.0427,
      "step": 3440
    },
    {
      "epoch": 0.397438855560683,
      "grad_norm": 0.2783012390136719,
      "learning_rate": 0.00019935886872159885,
      "loss": 0.9672,
      "step": 3445
    },
    {
      "epoch": 0.3980156898938625,
      "grad_norm": 0.27244484424591064,
      "learning_rate": 0.00019935125812684047,
      "loss": 0.9309,
      "step": 3450
    },
    {
      "epoch": 0.398592524227042,
      "grad_norm": 0.3226234018802643,
      "learning_rate": 0.00019934360277414686,
      "loss": 0.9441,
      "step": 3455
    },
    {
      "epoch": 0.3991693585602215,
      "grad_norm": 0.2866813540458679,
      "learning_rate": 0.00019933590266696673,
      "loss": 1.0051,
      "step": 3460
    },
    {
      "epoch": 0.399746192893401,
      "grad_norm": 0.2691948413848877,
      "learning_rate": 0.00019932815780876904,
      "loss": 0.9913,
      "step": 3465
    },
    {
      "epoch": 0.40032302722658053,
      "grad_norm": 0.2774306833744049,
      "learning_rate": 0.0001993203682030428,
      "loss": 0.9852,
      "step": 3470
    },
    {
      "epoch": 0.40089986155976004,
      "grad_norm": 0.2922816276550293,
      "learning_rate": 0.00019931253385329734,
      "loss": 0.9824,
      "step": 3475
    },
    {
      "epoch": 0.40147669589293955,
      "grad_norm": 0.26545560359954834,
      "learning_rate": 0.00019930465476306197,
      "loss": 0.9951,
      "step": 3480
    },
    {
      "epoch": 0.40205353022611906,
      "grad_norm": 0.27202633023262024,
      "learning_rate": 0.00019929673093588624,
      "loss": 0.9971,
      "step": 3485
    },
    {
      "epoch": 0.40263036455929857,
      "grad_norm": 0.29674404859542847,
      "learning_rate": 0.00019928876237533988,
      "loss": 1.0056,
      "step": 3490
    },
    {
      "epoch": 0.4032071988924781,
      "grad_norm": 0.276904821395874,
      "learning_rate": 0.00019928074908501272,
      "loss": 0.976,
      "step": 3495
    },
    {
      "epoch": 0.4037840332256576,
      "grad_norm": 0.26667892932891846,
      "learning_rate": 0.00019927269106851482,
      "loss": 0.9564,
      "step": 3500
    },
    {
      "epoch": 0.4043608675588371,
      "grad_norm": 0.31080129742622375,
      "learning_rate": 0.00019926458832947622,
      "loss": 1.0365,
      "step": 3505
    },
    {
      "epoch": 0.4049377018920166,
      "grad_norm": 0.278057724237442,
      "learning_rate": 0.00019925644087154734,
      "loss": 0.9868,
      "step": 3510
    },
    {
      "epoch": 0.4055145362251961,
      "grad_norm": 0.2963494658470154,
      "learning_rate": 0.00019924824869839853,
      "loss": 0.9729,
      "step": 3515
    },
    {
      "epoch": 0.40609137055837563,
      "grad_norm": 0.27625954151153564,
      "learning_rate": 0.00019924001181372046,
      "loss": 0.9466,
      "step": 3520
    },
    {
      "epoch": 0.40666820489155514,
      "grad_norm": 0.31308531761169434,
      "learning_rate": 0.00019923173022122378,
      "loss": 1.0257,
      "step": 3525
    },
    {
      "epoch": 0.40724503922473465,
      "grad_norm": 0.32162705063819885,
      "learning_rate": 0.0001992234039246394,
      "loss": 0.9872,
      "step": 3530
    },
    {
      "epoch": 0.40782187355791416,
      "grad_norm": 0.281136691570282,
      "learning_rate": 0.0001992150329277184,
      "loss": 1.0024,
      "step": 3535
    },
    {
      "epoch": 0.40839870789109367,
      "grad_norm": 0.27098140120506287,
      "learning_rate": 0.00019920661723423183,
      "loss": 0.9851,
      "step": 3540
    },
    {
      "epoch": 0.4089755422242732,
      "grad_norm": 0.28920409083366394,
      "learning_rate": 0.000199198156847971,
      "loss": 1.0035,
      "step": 3545
    },
    {
      "epoch": 0.4095523765574527,
      "grad_norm": 0.2802961766719818,
      "learning_rate": 0.00019918965177274735,
      "loss": 1.016,
      "step": 3550
    },
    {
      "epoch": 0.4101292108906322,
      "grad_norm": 0.2619479298591614,
      "learning_rate": 0.00019918110201239247,
      "loss": 0.9942,
      "step": 3555
    },
    {
      "epoch": 0.4107060452238117,
      "grad_norm": 0.26170656085014343,
      "learning_rate": 0.00019917250757075795,
      "loss": 0.9494,
      "step": 3560
    },
    {
      "epoch": 0.4112828795569912,
      "grad_norm": 0.2886107265949249,
      "learning_rate": 0.00019916386845171568,
      "loss": 0.9857,
      "step": 3565
    },
    {
      "epoch": 0.41185971389017073,
      "grad_norm": 0.26587212085723877,
      "learning_rate": 0.00019915518465915758,
      "loss": 0.9708,
      "step": 3570
    },
    {
      "epoch": 0.41243654822335024,
      "grad_norm": 0.2751521170139313,
      "learning_rate": 0.00019914645619699571,
      "loss": 0.9622,
      "step": 3575
    },
    {
      "epoch": 0.41301338255652975,
      "grad_norm": 0.28140199184417725,
      "learning_rate": 0.00019913768306916227,
      "loss": 1.0042,
      "step": 3580
    },
    {
      "epoch": 0.41359021688970926,
      "grad_norm": 0.2568947374820709,
      "learning_rate": 0.00019912886527960954,
      "loss": 0.9312,
      "step": 3585
    },
    {
      "epoch": 0.41416705122288877,
      "grad_norm": 0.27960750460624695,
      "learning_rate": 0.00019912000283231,
      "loss": 0.9689,
      "step": 3590
    },
    {
      "epoch": 0.4147438855560683,
      "grad_norm": 0.30046120285987854,
      "learning_rate": 0.00019911109573125617,
      "loss": 1.0254,
      "step": 3595
    },
    {
      "epoch": 0.4153207198892478,
      "grad_norm": 0.2881747782230377,
      "learning_rate": 0.0001991021439804607,
      "loss": 1.0188,
      "step": 3600
    },
    {
      "epoch": 0.4158975542224273,
      "grad_norm": 0.28633803129196167,
      "learning_rate": 0.00019909314758395638,
      "loss": 0.9999,
      "step": 3605
    },
    {
      "epoch": 0.4164743885556068,
      "grad_norm": 0.27391380071640015,
      "learning_rate": 0.00019908410654579615,
      "loss": 0.9707,
      "step": 3610
    },
    {
      "epoch": 0.4170512228887863,
      "grad_norm": 0.2890860438346863,
      "learning_rate": 0.00019907502087005297,
      "loss": 0.998,
      "step": 3615
    },
    {
      "epoch": 0.41762805722196583,
      "grad_norm": 0.28422412276268005,
      "learning_rate": 0.00019906589056081995,
      "loss": 0.9878,
      "step": 3620
    },
    {
      "epoch": 0.41820489155514534,
      "grad_norm": 0.26787862181663513,
      "learning_rate": 0.0001990567156222103,
      "loss": 0.9707,
      "step": 3625
    },
    {
      "epoch": 0.41878172588832485,
      "grad_norm": 0.28404876589775085,
      "learning_rate": 0.00019904749605835742,
      "loss": 0.9737,
      "step": 3630
    },
    {
      "epoch": 0.41935856022150436,
      "grad_norm": 0.2951257824897766,
      "learning_rate": 0.0001990382318734147,
      "loss": 0.9745,
      "step": 3635
    },
    {
      "epoch": 0.41993539455468387,
      "grad_norm": 0.28763440251350403,
      "learning_rate": 0.00019902892307155563,
      "loss": 0.946,
      "step": 3640
    },
    {
      "epoch": 0.4205122288878634,
      "grad_norm": 0.3106132447719574,
      "learning_rate": 0.00019901956965697387,
      "loss": 0.9918,
      "step": 3645
    },
    {
      "epoch": 0.4210890632210429,
      "grad_norm": 0.29146450757980347,
      "learning_rate": 0.00019901017163388322,
      "loss": 0.9604,
      "step": 3650
    },
    {
      "epoch": 0.4216658975542224,
      "grad_norm": 0.2769738733768463,
      "learning_rate": 0.00019900072900651744,
      "loss": 0.9916,
      "step": 3655
    },
    {
      "epoch": 0.4222427318874019,
      "grad_norm": 0.5984246730804443,
      "learning_rate": 0.00019899124177913041,
      "loss": 0.9699,
      "step": 3660
    },
    {
      "epoch": 0.4228195662205814,
      "grad_norm": 0.2717759907245636,
      "learning_rate": 0.00019898170995599627,
      "loss": 0.9808,
      "step": 3665
    },
    {
      "epoch": 0.423396400553761,
      "grad_norm": 0.292111337184906,
      "learning_rate": 0.00019897213354140903,
      "loss": 0.9985,
      "step": 3670
    },
    {
      "epoch": 0.4239732348869405,
      "grad_norm": 0.2680363655090332,
      "learning_rate": 0.00019896251253968288,
      "loss": 1.0368,
      "step": 3675
    },
    {
      "epoch": 0.42455006922012,
      "grad_norm": 0.2782565653324127,
      "learning_rate": 0.00019895284695515213,
      "loss": 0.9531,
      "step": 3680
    },
    {
      "epoch": 0.4251269035532995,
      "grad_norm": 0.290493369102478,
      "learning_rate": 0.00019894313679217116,
      "loss": 0.948,
      "step": 3685
    },
    {
      "epoch": 0.42570373788647903,
      "grad_norm": 0.2900492250919342,
      "learning_rate": 0.0001989333820551144,
      "loss": 1.0143,
      "step": 3690
    },
    {
      "epoch": 0.42628057221965854,
      "grad_norm": 0.2770122289657593,
      "learning_rate": 0.00019892358274837638,
      "loss": 0.9729,
      "step": 3695
    },
    {
      "epoch": 0.42685740655283805,
      "grad_norm": 0.28434714674949646,
      "learning_rate": 0.00019891373887637168,
      "loss": 0.9791,
      "step": 3700
    },
    {
      "epoch": 0.42743424088601756,
      "grad_norm": 0.28020331263542175,
      "learning_rate": 0.00019890385044353501,
      "loss": 0.9201,
      "step": 3705
    },
    {
      "epoch": 0.42801107521919707,
      "grad_norm": 0.28748229146003723,
      "learning_rate": 0.00019889391745432113,
      "loss": 0.9768,
      "step": 3710
    },
    {
      "epoch": 0.4285879095523766,
      "grad_norm": 0.28073740005493164,
      "learning_rate": 0.00019888393991320487,
      "loss": 0.9962,
      "step": 3715
    },
    {
      "epoch": 0.4291647438855561,
      "grad_norm": 0.2757289409637451,
      "learning_rate": 0.00019887391782468113,
      "loss": 1.0037,
      "step": 3720
    },
    {
      "epoch": 0.4297415782187356,
      "grad_norm": 0.2921489179134369,
      "learning_rate": 0.00019886385119326488,
      "loss": 0.9662,
      "step": 3725
    },
    {
      "epoch": 0.4303184125519151,
      "grad_norm": 0.2666405141353607,
      "learning_rate": 0.0001988537400234911,
      "loss": 0.9689,
      "step": 3730
    },
    {
      "epoch": 0.4308952468850946,
      "grad_norm": 0.3027278780937195,
      "learning_rate": 0.000198843584319915,
      "loss": 0.9806,
      "step": 3735
    },
    {
      "epoch": 0.43147208121827413,
      "grad_norm": 0.28119519352912903,
      "learning_rate": 0.00019883338408711168,
      "loss": 1.0291,
      "step": 3740
    },
    {
      "epoch": 0.43204891555145364,
      "grad_norm": 0.2693753242492676,
      "learning_rate": 0.0001988231393296764,
      "loss": 1.0011,
      "step": 3745
    },
    {
      "epoch": 0.43262574988463315,
      "grad_norm": 0.2855510413646698,
      "learning_rate": 0.0001988128500522244,
      "loss": 1.0099,
      "step": 3750
    },
    {
      "epoch": 0.43320258421781266,
      "grad_norm": 0.2987017035484314,
      "learning_rate": 0.00019880251625939104,
      "loss": 1.0431,
      "step": 3755
    },
    {
      "epoch": 0.43377941855099217,
      "grad_norm": 0.29469966888427734,
      "learning_rate": 0.0001987921379558317,
      "loss": 0.9965,
      "step": 3760
    },
    {
      "epoch": 0.4343562528841717,
      "grad_norm": 0.30328744649887085,
      "learning_rate": 0.00019878171514622187,
      "loss": 0.9773,
      "step": 3765
    },
    {
      "epoch": 0.4349330872173512,
      "grad_norm": 0.2655210494995117,
      "learning_rate": 0.00019877124783525697,
      "loss": 0.9963,
      "step": 3770
    },
    {
      "epoch": 0.4355099215505307,
      "grad_norm": 0.27883434295654297,
      "learning_rate": 0.00019876073602765262,
      "loss": 1.0189,
      "step": 3775
    },
    {
      "epoch": 0.4360867558837102,
      "grad_norm": 0.3105422556400299,
      "learning_rate": 0.00019875017972814435,
      "loss": 0.931,
      "step": 3780
    },
    {
      "epoch": 0.4366635902168897,
      "grad_norm": 0.2792617678642273,
      "learning_rate": 0.00019873957894148782,
      "loss": 0.9956,
      "step": 3785
    },
    {
      "epoch": 0.43724042455006923,
      "grad_norm": 0.2996716797351837,
      "learning_rate": 0.00019872893367245875,
      "loss": 0.9462,
      "step": 3790
    },
    {
      "epoch": 0.43781725888324874,
      "grad_norm": 0.29715242981910706,
      "learning_rate": 0.00019871824392585276,
      "loss": 0.941,
      "step": 3795
    },
    {
      "epoch": 0.43839409321642825,
      "grad_norm": 0.2681552767753601,
      "learning_rate": 0.00019870750970648568,
      "loss": 0.9552,
      "step": 3800
    },
    {
      "epoch": 0.43897092754960776,
      "grad_norm": 0.2906797528266907,
      "learning_rate": 0.00019869673101919325,
      "loss": 0.974,
      "step": 3805
    },
    {
      "epoch": 0.43954776188278727,
      "grad_norm": 0.3114708960056305,
      "learning_rate": 0.00019868590786883134,
      "loss": 0.9228,
      "step": 3810
    },
    {
      "epoch": 0.4401245962159668,
      "grad_norm": 0.30557358264923096,
      "learning_rate": 0.00019867504026027576,
      "loss": 0.991,
      "step": 3815
    },
    {
      "epoch": 0.4407014305491463,
      "grad_norm": 0.31230202317237854,
      "learning_rate": 0.00019866412819842237,
      "loss": 0.9541,
      "step": 3820
    },
    {
      "epoch": 0.4412782648823258,
      "grad_norm": 0.2935572564601898,
      "learning_rate": 0.00019865317168818713,
      "loss": 0.951,
      "step": 3825
    },
    {
      "epoch": 0.4418550992155053,
      "grad_norm": 0.2815152406692505,
      "learning_rate": 0.00019864217073450595,
      "loss": 0.966,
      "step": 3830
    },
    {
      "epoch": 0.4424319335486848,
      "grad_norm": 0.2667374312877655,
      "learning_rate": 0.00019863112534233474,
      "loss": 0.9722,
      "step": 3835
    },
    {
      "epoch": 0.44300876788186433,
      "grad_norm": 0.28506049513816833,
      "learning_rate": 0.0001986200355166495,
      "loss": 0.9854,
      "step": 3840
    },
    {
      "epoch": 0.44358560221504384,
      "grad_norm": 0.28640660643577576,
      "learning_rate": 0.00019860890126244626,
      "loss": 1.0193,
      "step": 3845
    },
    {
      "epoch": 0.44416243654822335,
      "grad_norm": 0.27358683943748474,
      "learning_rate": 0.000198597722584741,
      "loss": 1.0066,
      "step": 3850
    },
    {
      "epoch": 0.44473927088140286,
      "grad_norm": 0.29887109994888306,
      "learning_rate": 0.0001985864994885697,
      "loss": 0.997,
      "step": 3855
    },
    {
      "epoch": 0.44531610521458237,
      "grad_norm": 0.27347439527511597,
      "learning_rate": 0.00019857523197898836,
      "loss": 0.9212,
      "step": 3860
    },
    {
      "epoch": 0.4458929395477619,
      "grad_norm": 0.26219651103019714,
      "learning_rate": 0.0001985639200610731,
      "loss": 0.9621,
      "step": 3865
    },
    {
      "epoch": 0.4464697738809414,
      "grad_norm": 0.3028033673763275,
      "learning_rate": 0.00019855256373991993,
      "loss": 0.9535,
      "step": 3870
    },
    {
      "epoch": 0.4470466082141209,
      "grad_norm": 0.31690603494644165,
      "learning_rate": 0.00019854116302064488,
      "loss": 1.0083,
      "step": 3875
    },
    {
      "epoch": 0.4476234425473004,
      "grad_norm": 0.3070354759693146,
      "learning_rate": 0.00019852971790838402,
      "loss": 0.99,
      "step": 3880
    },
    {
      "epoch": 0.4482002768804799,
      "grad_norm": 0.26783743500709534,
      "learning_rate": 0.00019851822840829338,
      "loss": 1.0102,
      "step": 3885
    },
    {
      "epoch": 0.44877711121365943,
      "grad_norm": 0.315676748752594,
      "learning_rate": 0.00019850669452554898,
      "loss": 0.9339,
      "step": 3890
    },
    {
      "epoch": 0.44935394554683894,
      "grad_norm": 0.2907865345478058,
      "learning_rate": 0.00019849511626534688,
      "loss": 0.973,
      "step": 3895
    },
    {
      "epoch": 0.44993077988001845,
      "grad_norm": 0.2988938093185425,
      "learning_rate": 0.0001984834936329031,
      "loss": 0.9626,
      "step": 3900
    },
    {
      "epoch": 0.45050761421319796,
      "grad_norm": 0.27597156167030334,
      "learning_rate": 0.00019847182663345372,
      "loss": 0.968,
      "step": 3905
    },
    {
      "epoch": 0.45108444854637747,
      "grad_norm": 0.28698641061782837,
      "learning_rate": 0.00019846011527225463,
      "loss": 0.9504,
      "step": 3910
    },
    {
      "epoch": 0.451661282879557,
      "grad_norm": 0.31607502698898315,
      "learning_rate": 0.00019844835955458193,
      "loss": 1.0293,
      "step": 3915
    },
    {
      "epoch": 0.4522381172127365,
      "grad_norm": 0.28895917534828186,
      "learning_rate": 0.00019843655948573153,
      "loss": 0.9979,
      "step": 3920
    },
    {
      "epoch": 0.452814951545916,
      "grad_norm": 0.2905782461166382,
      "learning_rate": 0.00019842471507101937,
      "loss": 0.9544,
      "step": 3925
    },
    {
      "epoch": 0.4533917858790955,
      "grad_norm": 0.3004554510116577,
      "learning_rate": 0.00019841282631578145,
      "loss": 0.9802,
      "step": 3930
    },
    {
      "epoch": 0.453968620212275,
      "grad_norm": 0.2894156277179718,
      "learning_rate": 0.00019840089322537363,
      "loss": 0.996,
      "step": 3935
    },
    {
      "epoch": 0.45454545454545453,
      "grad_norm": 0.30515992641448975,
      "learning_rate": 0.0001983889158051718,
      "loss": 0.9647,
      "step": 3940
    },
    {
      "epoch": 0.45512228887863404,
      "grad_norm": 0.2915753126144409,
      "learning_rate": 0.00019837689406057183,
      "loss": 0.9816,
      "step": 3945
    },
    {
      "epoch": 0.45569912321181355,
      "grad_norm": 0.29045170545578003,
      "learning_rate": 0.0001983648279969895,
      "loss": 0.9855,
      "step": 3950
    },
    {
      "epoch": 0.45627595754499306,
      "grad_norm": 0.31583306193351746,
      "learning_rate": 0.00019835271761986062,
      "loss": 0.9932,
      "step": 3955
    },
    {
      "epoch": 0.45685279187817257,
      "grad_norm": 0.2948096990585327,
      "learning_rate": 0.00019834056293464093,
      "loss": 0.9854,
      "step": 3960
    },
    {
      "epoch": 0.4574296262113521,
      "grad_norm": 0.29406291246414185,
      "learning_rate": 0.00019832836394680615,
      "loss": 0.9591,
      "step": 3965
    },
    {
      "epoch": 0.4580064605445316,
      "grad_norm": 0.36170923709869385,
      "learning_rate": 0.00019831612066185193,
      "loss": 0.9963,
      "step": 3970
    },
    {
      "epoch": 0.4585832948777111,
      "grad_norm": 0.29276424646377563,
      "learning_rate": 0.00019830383308529393,
      "loss": 0.9363,
      "step": 3975
    },
    {
      "epoch": 0.4591601292108906,
      "grad_norm": 0.33209964632987976,
      "learning_rate": 0.0001982915012226677,
      "loss": 0.937,
      "step": 3980
    },
    {
      "epoch": 0.4597369635440701,
      "grad_norm": 0.2749769389629364,
      "learning_rate": 0.00019827912507952876,
      "loss": 0.968,
      "step": 3985
    },
    {
      "epoch": 0.46031379787724963,
      "grad_norm": 0.29793781042099,
      "learning_rate": 0.00019826670466145262,
      "loss": 0.9565,
      "step": 3990
    },
    {
      "epoch": 0.46089063221042914,
      "grad_norm": 0.2820662260055542,
      "learning_rate": 0.00019825423997403462,
      "loss": 1.0132,
      "step": 3995
    },
    {
      "epoch": 0.46146746654360865,
      "grad_norm": 0.2783183157444,
      "learning_rate": 0.00019824173102289027,
      "loss": 1.0136,
      "step": 4000
    },
    {
      "epoch": 0.46204430087678816,
      "grad_norm": 0.29929864406585693,
      "learning_rate": 0.00019822917781365474,
      "loss": 0.9783,
      "step": 4005
    },
    {
      "epoch": 0.46262113520996767,
      "grad_norm": 0.278143048286438,
      "learning_rate": 0.00019821658035198332,
      "loss": 0.9579,
      "step": 4010
    },
    {
      "epoch": 0.4631979695431472,
      "grad_norm": 0.28964924812316895,
      "learning_rate": 0.00019820393864355122,
      "loss": 0.9698,
      "step": 4015
    },
    {
      "epoch": 0.46377480387632675,
      "grad_norm": 0.29153019189834595,
      "learning_rate": 0.00019819125269405352,
      "loss": 0.9622,
      "step": 4020
    },
    {
      "epoch": 0.46435163820950626,
      "grad_norm": 0.28546687960624695,
      "learning_rate": 0.0001981785225092053,
      "loss": 1.0262,
      "step": 4025
    },
    {
      "epoch": 0.46492847254268577,
      "grad_norm": 0.30681976675987244,
      "learning_rate": 0.00019816574809474152,
      "loss": 0.9656,
      "step": 4030
    },
    {
      "epoch": 0.4655053068758653,
      "grad_norm": 0.29933658242225647,
      "learning_rate": 0.00019815292945641705,
      "loss": 1.0036,
      "step": 4035
    },
    {
      "epoch": 0.4660821412090448,
      "grad_norm": 0.2728285491466522,
      "learning_rate": 0.0001981400666000067,
      "loss": 0.9178,
      "step": 4040
    },
    {
      "epoch": 0.4666589755422243,
      "grad_norm": 0.27215078473091125,
      "learning_rate": 0.0001981271595313053,
      "loss": 0.9528,
      "step": 4045
    },
    {
      "epoch": 0.4672358098754038,
      "grad_norm": 0.29951199889183044,
      "learning_rate": 0.0001981142082561274,
      "loss": 1.0329,
      "step": 4050
    },
    {
      "epoch": 0.4678126442085833,
      "grad_norm": 0.3168124556541443,
      "learning_rate": 0.00019810121278030768,
      "loss": 0.9542,
      "step": 4055
    },
    {
      "epoch": 0.4683894785417628,
      "grad_norm": 0.31161144375801086,
      "learning_rate": 0.00019808817310970053,
      "loss": 0.9796,
      "step": 4060
    },
    {
      "epoch": 0.46896631287494234,
      "grad_norm": 0.32224801182746887,
      "learning_rate": 0.0001980750892501804,
      "loss": 1.0188,
      "step": 4065
    },
    {
      "epoch": 0.46954314720812185,
      "grad_norm": 0.30205318331718445,
      "learning_rate": 0.0001980619612076416,
      "loss": 0.9637,
      "step": 4070
    },
    {
      "epoch": 0.47011998154130136,
      "grad_norm": 0.28752511739730835,
      "learning_rate": 0.00019804878898799835,
      "loss": 0.9901,
      "step": 4075
    },
    {
      "epoch": 0.47069681587448087,
      "grad_norm": 0.2915705740451813,
      "learning_rate": 0.0001980355725971847,
      "loss": 0.9823,
      "step": 4080
    },
    {
      "epoch": 0.4712736502076604,
      "grad_norm": 0.29493892192840576,
      "learning_rate": 0.00019802231204115472,
      "loss": 1.0158,
      "step": 4085
    },
    {
      "epoch": 0.4718504845408399,
      "grad_norm": 0.30375000834465027,
      "learning_rate": 0.00019800900732588227,
      "loss": 0.9978,
      "step": 4090
    },
    {
      "epoch": 0.4724273188740194,
      "grad_norm": 0.30259644985198975,
      "learning_rate": 0.0001979956584573612,
      "loss": 0.9674,
      "step": 4095
    },
    {
      "epoch": 0.4730041532071989,
      "grad_norm": 0.2954089939594269,
      "learning_rate": 0.00019798226544160511,
      "loss": 0.9954,
      "step": 4100
    },
    {
      "epoch": 0.4735809875403784,
      "grad_norm": 0.3070552349090576,
      "learning_rate": 0.00019796882828464768,
      "loss": 0.9597,
      "step": 4105
    },
    {
      "epoch": 0.4741578218735579,
      "grad_norm": 0.3012619614601135,
      "learning_rate": 0.00019795534699254238,
      "loss": 0.9749,
      "step": 4110
    },
    {
      "epoch": 0.47473465620673744,
      "grad_norm": 0.30565857887268066,
      "learning_rate": 0.00019794182157136246,
      "loss": 0.946,
      "step": 4115
    },
    {
      "epoch": 0.47531149053991695,
      "grad_norm": 0.2922807037830353,
      "learning_rate": 0.0001979282520272012,
      "loss": 0.9334,
      "step": 4120
    },
    {
      "epoch": 0.47588832487309646,
      "grad_norm": 0.2870953381061554,
      "learning_rate": 0.00019791463836617176,
      "loss": 1.0199,
      "step": 4125
    },
    {
      "epoch": 0.47646515920627597,
      "grad_norm": 0.29794153571128845,
      "learning_rate": 0.00019790098059440704,
      "loss": 1.0163,
      "step": 4130
    },
    {
      "epoch": 0.4770419935394555,
      "grad_norm": 0.26819470524787903,
      "learning_rate": 0.00019788727871805994,
      "loss": 0.988,
      "step": 4135
    },
    {
      "epoch": 0.477618827872635,
      "grad_norm": 0.28677570819854736,
      "learning_rate": 0.00019787353274330313,
      "loss": 0.9704,
      "step": 4140
    },
    {
      "epoch": 0.4781956622058145,
      "grad_norm": 0.30988967418670654,
      "learning_rate": 0.00019785974267632928,
      "loss": 0.9583,
      "step": 4145
    },
    {
      "epoch": 0.478772496538994,
      "grad_norm": 0.29228535294532776,
      "learning_rate": 0.00019784590852335078,
      "loss": 0.9948,
      "step": 4150
    },
    {
      "epoch": 0.4793493308721735,
      "grad_norm": 0.29787692427635193,
      "learning_rate": 0.00019783203029059997,
      "loss": 1.0165,
      "step": 4155
    },
    {
      "epoch": 0.47992616520535303,
      "grad_norm": 0.29377439618110657,
      "learning_rate": 0.000197818107984329,
      "loss": 0.9344,
      "step": 4160
    },
    {
      "epoch": 0.48050299953853254,
      "grad_norm": 0.26499155163764954,
      "learning_rate": 0.0001978041416108099,
      "loss": 0.9622,
      "step": 4165
    },
    {
      "epoch": 0.48107983387171205,
      "grad_norm": 0.2975757122039795,
      "learning_rate": 0.00019779013117633454,
      "loss": 0.9544,
      "step": 4170
    },
    {
      "epoch": 0.48165666820489156,
      "grad_norm": 0.2974706292152405,
      "learning_rate": 0.00019777607668721467,
      "loss": 0.9379,
      "step": 4175
    },
    {
      "epoch": 0.48223350253807107,
      "grad_norm": 0.29165658354759216,
      "learning_rate": 0.00019776197814978187,
      "loss": 0.9735,
      "step": 4180
    },
    {
      "epoch": 0.4828103368712506,
      "grad_norm": 0.29878005385398865,
      "learning_rate": 0.00019774783557038755,
      "loss": 0.9626,
      "step": 4185
    },
    {
      "epoch": 0.4833871712044301,
      "grad_norm": 0.2908392548561096,
      "learning_rate": 0.00019773364895540296,
      "loss": 0.9788,
      "step": 4190
    },
    {
      "epoch": 0.4839640055376096,
      "grad_norm": 0.328003466129303,
      "learning_rate": 0.00019771941831121922,
      "loss": 0.9278,
      "step": 4195
    },
    {
      "epoch": 0.4845408398707891,
      "grad_norm": 0.30477482080459595,
      "learning_rate": 0.00019770514364424725,
      "loss": 0.9544,
      "step": 4200
    },
    {
      "epoch": 0.4851176742039686,
      "grad_norm": 0.2799585461616516,
      "learning_rate": 0.0001976908249609178,
      "loss": 0.9221,
      "step": 4205
    },
    {
      "epoch": 0.48569450853714813,
      "grad_norm": 0.2844686210155487,
      "learning_rate": 0.00019767646226768147,
      "loss": 0.9881,
      "step": 4210
    },
    {
      "epoch": 0.48627134287032764,
      "grad_norm": 0.3142208158969879,
      "learning_rate": 0.00019766205557100868,
      "loss": 0.9814,
      "step": 4215
    },
    {
      "epoch": 0.48684817720350715,
      "grad_norm": 0.3052613139152527,
      "learning_rate": 0.0001976476048773897,
      "loss": 1.0083,
      "step": 4220
    },
    {
      "epoch": 0.48742501153668666,
      "grad_norm": 0.2918776869773865,
      "learning_rate": 0.00019763311019333456,
      "loss": 1.0219,
      "step": 4225
    },
    {
      "epoch": 0.48800184586986617,
      "grad_norm": 0.30725133419036865,
      "learning_rate": 0.0001976185715253732,
      "loss": 1.0352,
      "step": 4230
    },
    {
      "epoch": 0.4885786802030457,
      "grad_norm": 0.3034648895263672,
      "learning_rate": 0.00019760398888005526,
      "loss": 0.9231,
      "step": 4235
    },
    {
      "epoch": 0.4891555145362252,
      "grad_norm": 0.30674096941947937,
      "learning_rate": 0.00019758936226395025,
      "loss": 1.0671,
      "step": 4240
    },
    {
      "epoch": 0.4897323488694047,
      "grad_norm": 0.3024204969406128,
      "learning_rate": 0.0001975746916836475,
      "loss": 0.9656,
      "step": 4245
    },
    {
      "epoch": 0.4903091832025842,
      "grad_norm": 0.26722845435142517,
      "learning_rate": 0.0001975599771457562,
      "loss": 0.9844,
      "step": 4250
    },
    {
      "epoch": 0.4908860175357637,
      "grad_norm": 0.29083022475242615,
      "learning_rate": 0.00019754521865690517,
      "loss": 0.9373,
      "step": 4255
    },
    {
      "epoch": 0.49146285186894323,
      "grad_norm": 0.3160659670829773,
      "learning_rate": 0.0001975304162237432,
      "loss": 1.0018,
      "step": 4260
    },
    {
      "epoch": 0.49203968620212274,
      "grad_norm": 0.2868952453136444,
      "learning_rate": 0.0001975155698529388,
      "loss": 0.9585,
      "step": 4265
    },
    {
      "epoch": 0.49261652053530225,
      "grad_norm": 0.309334397315979,
      "learning_rate": 0.00019750067955118033,
      "loss": 0.9586,
      "step": 4270
    },
    {
      "epoch": 0.49319335486848176,
      "grad_norm": 0.3061840832233429,
      "learning_rate": 0.00019748574532517586,
      "loss": 0.945,
      "step": 4275
    },
    {
      "epoch": 0.49377018920166127,
      "grad_norm": 0.2853107452392578,
      "learning_rate": 0.00019747076718165324,
      "loss": 0.8572,
      "step": 4280
    },
    {
      "epoch": 0.4943470235348408,
      "grad_norm": 0.30467069149017334,
      "learning_rate": 0.00019745574512736026,
      "loss": 0.9617,
      "step": 4285
    },
    {
      "epoch": 0.4949238578680203,
      "grad_norm": 0.3257976472377777,
      "learning_rate": 0.0001974406791690643,
      "loss": 0.9749,
      "step": 4290
    },
    {
      "epoch": 0.4955006922011998,
      "grad_norm": 0.2866266369819641,
      "learning_rate": 0.00019742556931355261,
      "loss": 0.9973,
      "step": 4295
    },
    {
      "epoch": 0.4960775265343793,
      "grad_norm": 0.2891503572463989,
      "learning_rate": 0.0001974104155676323,
      "loss": 1.0119,
      "step": 4300
    },
    {
      "epoch": 0.4966543608675588,
      "grad_norm": 0.27966293692588806,
      "learning_rate": 0.00019739521793813006,
      "loss": 0.8837,
      "step": 4305
    },
    {
      "epoch": 0.49723119520073833,
      "grad_norm": 0.30494049191474915,
      "learning_rate": 0.00019737997643189248,
      "loss": 0.9661,
      "step": 4310
    },
    {
      "epoch": 0.49780802953391784,
      "grad_norm": 0.3047969937324524,
      "learning_rate": 0.0001973646910557859,
      "loss": 1.004,
      "step": 4315
    },
    {
      "epoch": 0.49838486386709735,
      "grad_norm": 0.2946149706840515,
      "learning_rate": 0.00019734936181669638,
      "loss": 0.9053,
      "step": 4320
    },
    {
      "epoch": 0.49896169820027686,
      "grad_norm": 0.3126681447029114,
      "learning_rate": 0.00019733398872152984,
      "loss": 0.9533,
      "step": 4325
    },
    {
      "epoch": 0.49953853253345637,
      "grad_norm": 0.30080464482307434,
      "learning_rate": 0.00019731857177721182,
      "loss": 1.0052,
      "step": 4330
    },
    {
      "epoch": 0.5001153668666359,
      "grad_norm": 0.2873613238334656,
      "learning_rate": 0.00019730311099068771,
      "loss": 0.9475,
      "step": 4335
    },
    {
      "epoch": 0.5006922011998154,
      "grad_norm": 0.3170192539691925,
      "learning_rate": 0.00019728760636892267,
      "loss": 0.9549,
      "step": 4340
    },
    {
      "epoch": 0.501269035532995,
      "grad_norm": 0.3064529299736023,
      "learning_rate": 0.0001972720579189015,
      "loss": 0.9708,
      "step": 4345
    },
    {
      "epoch": 0.5018458698661744,
      "grad_norm": 0.30155086517333984,
      "learning_rate": 0.00019725646564762878,
      "loss": 0.9321,
      "step": 4350
    },
    {
      "epoch": 0.502422704199354,
      "grad_norm": 0.3222599923610687,
      "learning_rate": 0.00019724082956212895,
      "loss": 0.9894,
      "step": 4355
    },
    {
      "epoch": 0.5029995385325334,
      "grad_norm": 0.30701473355293274,
      "learning_rate": 0.00019722514966944604,
      "loss": 0.9928,
      "step": 4360
    },
    {
      "epoch": 0.503576372865713,
      "grad_norm": 0.32665449380874634,
      "learning_rate": 0.00019720942597664385,
      "loss": 1.0055,
      "step": 4365
    },
    {
      "epoch": 0.5041532071988925,
      "grad_norm": 0.2928261458873749,
      "learning_rate": 0.00019719365849080598,
      "loss": 0.9232,
      "step": 4370
    },
    {
      "epoch": 0.504730041532072,
      "grad_norm": 0.3082767128944397,
      "learning_rate": 0.00019717784721903572,
      "loss": 1.019,
      "step": 4375
    },
    {
      "epoch": 0.5053068758652515,
      "grad_norm": 0.2902364432811737,
      "learning_rate": 0.00019716199216845604,
      "loss": 1.0212,
      "step": 4380
    },
    {
      "epoch": 0.505883710198431,
      "grad_norm": 0.30737555027008057,
      "learning_rate": 0.0001971460933462097,
      "loss": 0.9832,
      "step": 4385
    },
    {
      "epoch": 0.5064605445316105,
      "grad_norm": 0.27934834361076355,
      "learning_rate": 0.00019713015075945912,
      "loss": 0.9313,
      "step": 4390
    },
    {
      "epoch": 0.5070373788647901,
      "grad_norm": 0.29655081033706665,
      "learning_rate": 0.00019711416441538652,
      "loss": 0.9946,
      "step": 4395
    },
    {
      "epoch": 0.5076142131979695,
      "grad_norm": 0.31587305665016174,
      "learning_rate": 0.00019709813432119372,
      "loss": 0.9534,
      "step": 4400
    },
    {
      "epoch": 0.5081910475311491,
      "grad_norm": 0.3084726631641388,
      "learning_rate": 0.00019708206048410233,
      "loss": 1.0091,
      "step": 4405
    },
    {
      "epoch": 0.5087678818643285,
      "grad_norm": 0.3127329647541046,
      "learning_rate": 0.00019706594291135366,
      "loss": 0.9801,
      "step": 4410
    },
    {
      "epoch": 0.5093447161975081,
      "grad_norm": 0.30119284987449646,
      "learning_rate": 0.00019704978161020871,
      "loss": 0.9964,
      "step": 4415
    },
    {
      "epoch": 0.5099215505306876,
      "grad_norm": 0.31809040904045105,
      "learning_rate": 0.00019703357658794817,
      "loss": 1.0082,
      "step": 4420
    },
    {
      "epoch": 0.5104983848638671,
      "grad_norm": 0.2844845652580261,
      "learning_rate": 0.0001970173278518724,
      "loss": 0.961,
      "step": 4425
    },
    {
      "epoch": 0.5110752191970466,
      "grad_norm": 0.3252412676811218,
      "learning_rate": 0.00019700103540930153,
      "loss": 0.9484,
      "step": 4430
    },
    {
      "epoch": 0.5116520535302261,
      "grad_norm": 0.32319343090057373,
      "learning_rate": 0.00019698469926757533,
      "loss": 0.9962,
      "step": 4435
    },
    {
      "epoch": 0.5122288878634056,
      "grad_norm": 0.2903590202331543,
      "learning_rate": 0.00019696831943405324,
      "loss": 0.971,
      "step": 4440
    },
    {
      "epoch": 0.5128057221965852,
      "grad_norm": 0.30612602829933167,
      "learning_rate": 0.00019695189591611441,
      "loss": 1.0329,
      "step": 4445
    },
    {
      "epoch": 0.5133825565297646,
      "grad_norm": 0.3082347810268402,
      "learning_rate": 0.00019693542872115772,
      "loss": 0.9756,
      "step": 4450
    },
    {
      "epoch": 0.5139593908629442,
      "grad_norm": 0.31226348876953125,
      "learning_rate": 0.0001969189178566016,
      "loss": 0.9693,
      "step": 4455
    },
    {
      "epoch": 0.5145362251961236,
      "grad_norm": 0.30261221528053284,
      "learning_rate": 0.00019690236332988427,
      "loss": 0.9734,
      "step": 4460
    },
    {
      "epoch": 0.5151130595293032,
      "grad_norm": 0.301273375749588,
      "learning_rate": 0.00019688576514846357,
      "loss": 1.0081,
      "step": 4465
    },
    {
      "epoch": 0.5156898938624827,
      "grad_norm": 0.27689802646636963,
      "learning_rate": 0.00019686912331981702,
      "loss": 1.0053,
      "step": 4470
    },
    {
      "epoch": 0.5162667281956622,
      "grad_norm": 0.3226846158504486,
      "learning_rate": 0.00019685243785144175,
      "loss": 0.9996,
      "step": 4475
    },
    {
      "epoch": 0.5168435625288417,
      "grad_norm": 0.29088863730430603,
      "learning_rate": 0.00019683570875085469,
      "loss": 0.9376,
      "step": 4480
    },
    {
      "epoch": 0.5174203968620212,
      "grad_norm": 0.3116596043109894,
      "learning_rate": 0.00019681893602559224,
      "loss": 0.9622,
      "step": 4485
    },
    {
      "epoch": 0.5179972311952007,
      "grad_norm": 0.302528977394104,
      "learning_rate": 0.00019680211968321057,
      "loss": 1.002,
      "step": 4490
    },
    {
      "epoch": 0.5185740655283803,
      "grad_norm": 0.30661019682884216,
      "learning_rate": 0.0001967852597312855,
      "loss": 0.9535,
      "step": 4495
    },
    {
      "epoch": 0.5191508998615597,
      "grad_norm": 0.31948038935661316,
      "learning_rate": 0.00019676835617741249,
      "loss": 0.9869,
      "step": 4500
    },
    {
      "epoch": 0.5197277341947393,
      "grad_norm": 0.32470494508743286,
      "learning_rate": 0.0001967514090292065,
      "loss": 1.0095,
      "step": 4505
    },
    {
      "epoch": 0.5203045685279187,
      "grad_norm": 0.3115653395652771,
      "learning_rate": 0.0001967344182943024,
      "loss": 0.9293,
      "step": 4510
    },
    {
      "epoch": 0.5208814028610983,
      "grad_norm": 0.2958611845970154,
      "learning_rate": 0.0001967173839803545,
      "loss": 0.9401,
      "step": 4515
    },
    {
      "epoch": 0.5214582371942778,
      "grad_norm": 0.30965474247932434,
      "learning_rate": 0.00019670030609503678,
      "loss": 0.967,
      "step": 4520
    },
    {
      "epoch": 0.5220350715274573,
      "grad_norm": 0.3066132068634033,
      "learning_rate": 0.00019668318464604285,
      "loss": 0.9297,
      "step": 4525
    },
    {
      "epoch": 0.5226119058606368,
      "grad_norm": 0.29480454325675964,
      "learning_rate": 0.00019666601964108598,
      "loss": 1.0089,
      "step": 4530
    },
    {
      "epoch": 0.5231887401938163,
      "grad_norm": 0.3011881709098816,
      "learning_rate": 0.000196648811087899,
      "loss": 0.9437,
      "step": 4535
    },
    {
      "epoch": 0.5237655745269958,
      "grad_norm": 0.3015040159225464,
      "learning_rate": 0.00019663155899423445,
      "loss": 1.0103,
      "step": 4540
    },
    {
      "epoch": 0.5243424088601754,
      "grad_norm": 0.2976595163345337,
      "learning_rate": 0.00019661426336786445,
      "loss": 1.0073,
      "step": 4545
    },
    {
      "epoch": 0.5249192431933549,
      "grad_norm": 0.2739868760108948,
      "learning_rate": 0.0001965969242165806,
      "loss": 0.9681,
      "step": 4550
    },
    {
      "epoch": 0.5254960775265344,
      "grad_norm": 0.3420281708240509,
      "learning_rate": 0.00019657954154819434,
      "loss": 0.9661,
      "step": 4555
    },
    {
      "epoch": 0.5260729118597139,
      "grad_norm": 0.2705232501029968,
      "learning_rate": 0.00019656211537053654,
      "loss": 0.9766,
      "step": 4560
    },
    {
      "epoch": 0.5266497461928934,
      "grad_norm": 0.3390316069126129,
      "learning_rate": 0.0001965446456914577,
      "loss": 0.9616,
      "step": 4565
    },
    {
      "epoch": 0.527226580526073,
      "grad_norm": 0.31515753269195557,
      "learning_rate": 0.00019652713251882802,
      "loss": 0.928,
      "step": 4570
    },
    {
      "epoch": 0.5278034148592524,
      "grad_norm": 0.31040945649147034,
      "learning_rate": 0.00019650957586053716,
      "loss": 1.0218,
      "step": 4575
    },
    {
      "epoch": 0.528380249192432,
      "grad_norm": 0.3205651342868805,
      "learning_rate": 0.00019649197572449442,
      "loss": 1.0069,
      "step": 4580
    },
    {
      "epoch": 0.5289570835256114,
      "grad_norm": 0.32492998242378235,
      "learning_rate": 0.00019647433211862877,
      "loss": 0.9838,
      "step": 4585
    },
    {
      "epoch": 0.529533917858791,
      "grad_norm": 0.33265379071235657,
      "learning_rate": 0.00019645664505088864,
      "loss": 0.9834,
      "step": 4590
    },
    {
      "epoch": 0.5301107521919705,
      "grad_norm": 0.2898595631122589,
      "learning_rate": 0.00019643891452924205,
      "loss": 0.9763,
      "step": 4595
    },
    {
      "epoch": 0.53068758652515,
      "grad_norm": 0.3158058226108551,
      "learning_rate": 0.0001964211405616767,
      "loss": 0.9989,
      "step": 4600
    },
    {
      "epoch": 0.5312644208583295,
      "grad_norm": 0.3105302155017853,
      "learning_rate": 0.00019640332315619977,
      "loss": 0.9999,
      "step": 4605
    },
    {
      "epoch": 0.531841255191509,
      "grad_norm": 0.3675985634326935,
      "learning_rate": 0.000196385462320838,
      "loss": 0.9716,
      "step": 4610
    },
    {
      "epoch": 0.5324180895246885,
      "grad_norm": 0.3245541453361511,
      "learning_rate": 0.00019636755806363783,
      "loss": 0.9309,
      "step": 4615
    },
    {
      "epoch": 0.5329949238578681,
      "grad_norm": 0.39721789956092834,
      "learning_rate": 0.00019634961039266506,
      "loss": 0.9995,
      "step": 4620
    },
    {
      "epoch": 0.5335717581910475,
      "grad_norm": 0.32997554540634155,
      "learning_rate": 0.00019633161931600522,
      "loss": 1.0062,
      "step": 4625
    },
    {
      "epoch": 0.5341485925242271,
      "grad_norm": 0.30046844482421875,
      "learning_rate": 0.00019631358484176325,
      "loss": 0.9584,
      "step": 4630
    },
    {
      "epoch": 0.5347254268574065,
      "grad_norm": 0.3374291658401489,
      "learning_rate": 0.0001962955069780638,
      "loss": 1.0601,
      "step": 4635
    },
    {
      "epoch": 0.5353022611905861,
      "grad_norm": 0.3347518742084503,
      "learning_rate": 0.00019627738573305093,
      "loss": 0.9493,
      "step": 4640
    },
    {
      "epoch": 0.5358790955237656,
      "grad_norm": 0.3199424147605896,
      "learning_rate": 0.00019625922111488831,
      "loss": 0.9892,
      "step": 4645
    },
    {
      "epoch": 0.5364559298569451,
      "grad_norm": 0.30987706780433655,
      "learning_rate": 0.00019624101313175918,
      "loss": 0.9624,
      "step": 4650
    },
    {
      "epoch": 0.5370327641901246,
      "grad_norm": 0.2967313826084137,
      "learning_rate": 0.00019622276179186615,
      "loss": 0.977,
      "step": 4655
    },
    {
      "epoch": 0.5376095985233041,
      "grad_norm": 0.32611724734306335,
      "learning_rate": 0.00019620446710343162,
      "loss": 0.994,
      "step": 4660
    },
    {
      "epoch": 0.5381864328564836,
      "grad_norm": 0.32929539680480957,
      "learning_rate": 0.00019618612907469732,
      "loss": 0.9307,
      "step": 4665
    },
    {
      "epoch": 0.5387632671896632,
      "grad_norm": 0.36718735098838806,
      "learning_rate": 0.00019616774771392457,
      "loss": 0.9736,
      "step": 4670
    },
    {
      "epoch": 0.5393401015228426,
      "grad_norm": 0.3094222843647003,
      "learning_rate": 0.0001961493230293942,
      "loss": 1.0393,
      "step": 4675
    },
    {
      "epoch": 0.5399169358560222,
      "grad_norm": 0.30510690808296204,
      "learning_rate": 0.00019613085502940658,
      "loss": 0.9187,
      "step": 4680
    },
    {
      "epoch": 0.5404937701892016,
      "grad_norm": 0.302712082862854,
      "learning_rate": 0.0001961123437222816,
      "loss": 0.991,
      "step": 4685
    },
    {
      "epoch": 0.5410706045223812,
      "grad_norm": 0.3168560266494751,
      "learning_rate": 0.0001960937891163586,
      "loss": 0.9739,
      "step": 4690
    },
    {
      "epoch": 0.5416474388555607,
      "grad_norm": 0.3183704614639282,
      "learning_rate": 0.00019607519121999647,
      "loss": 0.9794,
      "step": 4695
    },
    {
      "epoch": 0.5422242731887402,
      "grad_norm": 0.2977442741394043,
      "learning_rate": 0.00019605655004157363,
      "loss": 0.9596,
      "step": 4700
    },
    {
      "epoch": 0.5428011075219197,
      "grad_norm": 0.34837374091148376,
      "learning_rate": 0.00019603786558948795,
      "loss": 1.0085,
      "step": 4705
    },
    {
      "epoch": 0.5433779418550992,
      "grad_norm": 0.29827648401260376,
      "learning_rate": 0.00019601913787215683,
      "loss": 0.9767,
      "step": 4710
    },
    {
      "epoch": 0.5439547761882787,
      "grad_norm": 0.3032287061214447,
      "learning_rate": 0.0001960003668980171,
      "loss": 0.9961,
      "step": 4715
    },
    {
      "epoch": 0.5445316105214583,
      "grad_norm": 0.3106001019477844,
      "learning_rate": 0.00019598155267552513,
      "loss": 1.0001,
      "step": 4720
    },
    {
      "epoch": 0.5451084448546377,
      "grad_norm": 0.3098970055580139,
      "learning_rate": 0.0001959626952131568,
      "loss": 0.9958,
      "step": 4725
    },
    {
      "epoch": 0.5456852791878173,
      "grad_norm": 0.3304188847541809,
      "learning_rate": 0.00019594379451940742,
      "loss": 0.9547,
      "step": 4730
    },
    {
      "epoch": 0.5462621135209967,
      "grad_norm": 0.2954466938972473,
      "learning_rate": 0.0001959248506027918,
      "loss": 0.9614,
      "step": 4735
    },
    {
      "epoch": 0.5468389478541763,
      "grad_norm": 0.3072229325771332,
      "learning_rate": 0.00019590586347184417,
      "loss": 0.931,
      "step": 4740
    },
    {
      "epoch": 0.5474157821873558,
      "grad_norm": 0.2952127754688263,
      "learning_rate": 0.00019588683313511828,
      "loss": 0.9975,
      "step": 4745
    },
    {
      "epoch": 0.5479926165205353,
      "grad_norm": 0.27312129735946655,
      "learning_rate": 0.00019586775960118738,
      "loss": 0.9927,
      "step": 4750
    },
    {
      "epoch": 0.5485694508537148,
      "grad_norm": 0.30434802174568176,
      "learning_rate": 0.00019584864287864408,
      "loss": 1.0163,
      "step": 4755
    },
    {
      "epoch": 0.5491462851868943,
      "grad_norm": 0.3188215494155884,
      "learning_rate": 0.00019582948297610053,
      "loss": 1.0148,
      "step": 4760
    },
    {
      "epoch": 0.5497231195200738,
      "grad_norm": 0.3095898926258087,
      "learning_rate": 0.00019581027990218827,
      "loss": 0.9767,
      "step": 4765
    },
    {
      "epoch": 0.5502999538532534,
      "grad_norm": 0.2899004817008972,
      "learning_rate": 0.0001957910336655584,
      "loss": 1.0051,
      "step": 4770
    },
    {
      "epoch": 0.5508767881864328,
      "grad_norm": 0.3027417063713074,
      "learning_rate": 0.00019577174427488128,
      "loss": 0.9726,
      "step": 4775
    },
    {
      "epoch": 0.5514536225196124,
      "grad_norm": 0.319034218788147,
      "learning_rate": 0.00019575241173884692,
      "loss": 0.9724,
      "step": 4780
    },
    {
      "epoch": 0.5520304568527918,
      "grad_norm": 0.3255804479122162,
      "learning_rate": 0.00019573303606616459,
      "loss": 0.9959,
      "step": 4785
    },
    {
      "epoch": 0.5526072911859714,
      "grad_norm": 0.31864824891090393,
      "learning_rate": 0.00019571361726556307,
      "loss": 0.9799,
      "step": 4790
    },
    {
      "epoch": 0.5531841255191509,
      "grad_norm": 0.3090226352214813,
      "learning_rate": 0.00019569415534579062,
      "loss": 0.9547,
      "step": 4795
    },
    {
      "epoch": 0.5537609598523304,
      "grad_norm": 0.342669814825058,
      "learning_rate": 0.00019567465031561487,
      "loss": 0.9725,
      "step": 4800
    },
    {
      "epoch": 0.5543377941855099,
      "grad_norm": 0.3012937903404236,
      "learning_rate": 0.0001956551021838228,
      "loss": 1.0521,
      "step": 4805
    },
    {
      "epoch": 0.5549146285186894,
      "grad_norm": 0.30988457798957825,
      "learning_rate": 0.000195635510959221,
      "loss": 0.9583,
      "step": 4810
    },
    {
      "epoch": 0.5554914628518689,
      "grad_norm": 0.29991039633750916,
      "learning_rate": 0.0001956158766506352,
      "loss": 0.976,
      "step": 4815
    },
    {
      "epoch": 0.5560682971850485,
      "grad_norm": 0.300327867269516,
      "learning_rate": 0.00019559619926691086,
      "loss": 0.9673,
      "step": 4820
    },
    {
      "epoch": 0.5566451315182279,
      "grad_norm": 0.3010301887989044,
      "learning_rate": 0.00019557647881691254,
      "loss": 0.9791,
      "step": 4825
    },
    {
      "epoch": 0.5572219658514075,
      "grad_norm": 0.3046521246433258,
      "learning_rate": 0.00019555671530952445,
      "loss": 0.9923,
      "step": 4830
    },
    {
      "epoch": 0.5577988001845869,
      "grad_norm": 0.2898954451084137,
      "learning_rate": 0.00019553690875365,
      "loss": 0.9813,
      "step": 4835
    },
    {
      "epoch": 0.5583756345177665,
      "grad_norm": 0.30507126450538635,
      "learning_rate": 0.00019551705915821216,
      "loss": 0.9931,
      "step": 4840
    },
    {
      "epoch": 0.558952468850946,
      "grad_norm": 0.3123108446598053,
      "learning_rate": 0.00019549716653215318,
      "loss": 0.9476,
      "step": 4845
    },
    {
      "epoch": 0.5595293031841255,
      "grad_norm": 0.2869836986064911,
      "learning_rate": 0.00019547723088443467,
      "loss": 0.9031,
      "step": 4850
    },
    {
      "epoch": 0.560106137517305,
      "grad_norm": 0.4195156991481781,
      "learning_rate": 0.00019545725222403775,
      "loss": 0.9947,
      "step": 4855
    },
    {
      "epoch": 0.5606829718504845,
      "grad_norm": 0.3179481029510498,
      "learning_rate": 0.00019543723055996282,
      "loss": 1.0052,
      "step": 4860
    },
    {
      "epoch": 0.561259806183664,
      "grad_norm": 0.30937254428863525,
      "learning_rate": 0.00019541716590122971,
      "loss": 0.9722,
      "step": 4865
    },
    {
      "epoch": 0.5618366405168436,
      "grad_norm": 0.3025272488594055,
      "learning_rate": 0.00019539705825687755,
      "loss": 0.9893,
      "step": 4870
    },
    {
      "epoch": 0.562413474850023,
      "grad_norm": 0.28743186593055725,
      "learning_rate": 0.00019537690763596487,
      "loss": 0.9719,
      "step": 4875
    },
    {
      "epoch": 0.5629903091832026,
      "grad_norm": 0.3209276795387268,
      "learning_rate": 0.00019535671404756957,
      "loss": 1.0181,
      "step": 4880
    },
    {
      "epoch": 0.563567143516382,
      "grad_norm": 0.3113163411617279,
      "learning_rate": 0.0001953364775007889,
      "loss": 1.0092,
      "step": 4885
    },
    {
      "epoch": 0.5641439778495616,
      "grad_norm": 0.30241066217422485,
      "learning_rate": 0.00019531619800473952,
      "loss": 0.9724,
      "step": 4890
    },
    {
      "epoch": 0.5647208121827412,
      "grad_norm": 0.3012004792690277,
      "learning_rate": 0.0001952958755685573,
      "loss": 1.0069,
      "step": 4895
    },
    {
      "epoch": 0.5652976465159206,
      "grad_norm": 4.258053302764893,
      "learning_rate": 0.00019527551020139759,
      "loss": 0.9895,
      "step": 4900
    },
    {
      "epoch": 0.5658744808491002,
      "grad_norm": 0.3009074926376343,
      "learning_rate": 0.00019525510191243498,
      "loss": 1.0587,
      "step": 4905
    },
    {
      "epoch": 0.5664513151822796,
      "grad_norm": 0.2939055860042572,
      "learning_rate": 0.0001952346507108635,
      "loss": 0.9872,
      "step": 4910
    },
    {
      "epoch": 0.5670281495154592,
      "grad_norm": 0.2984018623828888,
      "learning_rate": 0.00019521415660589644,
      "loss": 0.9779,
      "step": 4915
    },
    {
      "epoch": 0.5676049838486387,
      "grad_norm": 0.30733126401901245,
      "learning_rate": 0.0001951936196067664,
      "loss": 0.9718,
      "step": 4920
    },
    {
      "epoch": 0.5681818181818182,
      "grad_norm": 0.28078576922416687,
      "learning_rate": 0.00019517303972272536,
      "loss": 0.9951,
      "step": 4925
    },
    {
      "epoch": 0.5687586525149977,
      "grad_norm": 0.31320542097091675,
      "learning_rate": 0.0001951524169630446,
      "loss": 1.0027,
      "step": 4930
    },
    {
      "epoch": 0.5693354868481773,
      "grad_norm": 0.2962873578071594,
      "learning_rate": 0.00019513175133701474,
      "loss": 0.9434,
      "step": 4935
    },
    {
      "epoch": 0.5699123211813567,
      "grad_norm": 0.31314268708229065,
      "learning_rate": 0.0001951110428539456,
      "loss": 0.9575,
      "step": 4940
    },
    {
      "epoch": 0.5704891555145363,
      "grad_norm": 0.30815884470939636,
      "learning_rate": 0.00019509029152316648,
      "loss": 1.0102,
      "step": 4945
    },
    {
      "epoch": 0.5710659898477157,
      "grad_norm": 0.31001782417297363,
      "learning_rate": 0.00019506949735402588,
      "loss": 0.9899,
      "step": 4950
    },
    {
      "epoch": 0.5716428241808953,
      "grad_norm": 0.2955753803253174,
      "learning_rate": 0.0001950486603558916,
      "loss": 0.9249,
      "step": 4955
    },
    {
      "epoch": 0.5722196585140747,
      "grad_norm": 0.28989970684051514,
      "learning_rate": 0.00019502778053815073,
      "loss": 0.9685,
      "step": 4960
    },
    {
      "epoch": 0.5727964928472543,
      "grad_norm": 0.3111148178577423,
      "learning_rate": 0.00019500685791020968,
      "loss": 0.9826,
      "step": 4965
    },
    {
      "epoch": 0.5733733271804338,
      "grad_norm": 0.32383227348327637,
      "learning_rate": 0.00019498589248149415,
      "loss": 0.9688,
      "step": 4970
    },
    {
      "epoch": 0.5739501615136133,
      "grad_norm": 0.29687562584877014,
      "learning_rate": 0.0001949648842614491,
      "loss": 0.924,
      "step": 4975
    },
    {
      "epoch": 0.5745269958467928,
      "grad_norm": 0.3350512683391571,
      "learning_rate": 0.00019494383325953875,
      "loss": 1.0041,
      "step": 4980
    },
    {
      "epoch": 0.5751038301799724,
      "grad_norm": 0.2898659110069275,
      "learning_rate": 0.00019492273948524665,
      "loss": 0.9846,
      "step": 4985
    },
    {
      "epoch": 0.5756806645131518,
      "grad_norm": 0.31808215379714966,
      "learning_rate": 0.00019490160294807556,
      "loss": 0.977,
      "step": 4990
    },
    {
      "epoch": 0.5762574988463314,
      "grad_norm": 0.3810345530509949,
      "learning_rate": 0.00019488042365754758,
      "loss": 1.0013,
      "step": 4995
    },
    {
      "epoch": 0.5768343331795108,
      "grad_norm": 0.27463066577911377,
      "learning_rate": 0.00019485920162320394,
      "loss": 0.947,
      "step": 5000
    },
    {
      "epoch": 0.5774111675126904,
      "grad_norm": 0.30058354139328003,
      "learning_rate": 0.0001948379368546053,
      "loss": 0.936,
      "step": 5005
    },
    {
      "epoch": 0.5779880018458698,
      "grad_norm": 0.34601083397865295,
      "learning_rate": 0.0001948166293613314,
      "loss": 1.0091,
      "step": 5010
    },
    {
      "epoch": 0.5785648361790494,
      "grad_norm": 0.2960231602191925,
      "learning_rate": 0.00019479527915298135,
      "loss": 0.9458,
      "step": 5015
    },
    {
      "epoch": 0.5791416705122289,
      "grad_norm": 0.2993627190589905,
      "learning_rate": 0.00019477388623917344,
      "loss": 0.9932,
      "step": 5020
    },
    {
      "epoch": 0.5797185048454084,
      "grad_norm": 0.3270370066165924,
      "learning_rate": 0.00019475245062954523,
      "loss": 0.9759,
      "step": 5025
    },
    {
      "epoch": 0.5802953391785879,
      "grad_norm": 0.32682403922080994,
      "learning_rate": 0.00019473097233375355,
      "loss": 0.9963,
      "step": 5030
    },
    {
      "epoch": 0.5808721735117675,
      "grad_norm": 0.29264119267463684,
      "learning_rate": 0.00019470945136147431,
      "loss": 0.9417,
      "step": 5035
    },
    {
      "epoch": 0.5814490078449469,
      "grad_norm": 0.3218499720096588,
      "learning_rate": 0.00019468788772240286,
      "loss": 0.962,
      "step": 5040
    },
    {
      "epoch": 0.5820258421781265,
      "grad_norm": 0.2924599349498749,
      "learning_rate": 0.00019466628142625358,
      "loss": 0.9967,
      "step": 5045
    },
    {
      "epoch": 0.5826026765113059,
      "grad_norm": 0.3072007894515991,
      "learning_rate": 0.00019464463248276018,
      "loss": 0.9936,
      "step": 5050
    },
    {
      "epoch": 0.5831795108444855,
      "grad_norm": 0.33545422554016113,
      "learning_rate": 0.00019462294090167554,
      "loss": 0.9554,
      "step": 5055
    },
    {
      "epoch": 0.583756345177665,
      "grad_norm": 0.3114611506462097,
      "learning_rate": 0.0001946012066927718,
      "loss": 0.9685,
      "step": 5060
    },
    {
      "epoch": 0.5843331795108445,
      "grad_norm": 0.3209623396396637,
      "learning_rate": 0.00019457942986584022,
      "loss": 0.9987,
      "step": 5065
    },
    {
      "epoch": 0.584910013844024,
      "grad_norm": 0.3333243131637573,
      "learning_rate": 0.0001945576104306913,
      "loss": 1.0107,
      "step": 5070
    },
    {
      "epoch": 0.5854868481772035,
      "grad_norm": 0.31956392526626587,
      "learning_rate": 0.0001945357483971548,
      "loss": 0.9694,
      "step": 5075
    },
    {
      "epoch": 0.586063682510383,
      "grad_norm": 0.31336089968681335,
      "learning_rate": 0.0001945138437750795,
      "loss": 0.9743,
      "step": 5080
    },
    {
      "epoch": 0.5866405168435626,
      "grad_norm": 0.311122328042984,
      "learning_rate": 0.00019449189657433358,
      "loss": 0.9676,
      "step": 5085
    },
    {
      "epoch": 0.587217351176742,
      "grad_norm": 0.30517736077308655,
      "learning_rate": 0.00019446990680480424,
      "loss": 1.0225,
      "step": 5090
    },
    {
      "epoch": 0.5877941855099216,
      "grad_norm": 0.3143622875213623,
      "learning_rate": 0.00019444787447639791,
      "loss": 0.9529,
      "step": 5095
    },
    {
      "epoch": 0.588371019843101,
      "grad_norm": 0.29679304361343384,
      "learning_rate": 0.00019442579959904024,
      "loss": 0.984,
      "step": 5100
    },
    {
      "epoch": 0.5889478541762806,
      "grad_norm": 0.32181552052497864,
      "learning_rate": 0.00019440368218267596,
      "loss": 0.9447,
      "step": 5105
    },
    {
      "epoch": 0.58952468850946,
      "grad_norm": 0.3025820851325989,
      "learning_rate": 0.00019438152223726904,
      "loss": 0.98,
      "step": 5110
    },
    {
      "epoch": 0.5901015228426396,
      "grad_norm": 0.3288577198982239,
      "learning_rate": 0.0001943593197728026,
      "loss": 0.9507,
      "step": 5115
    },
    {
      "epoch": 0.5906783571758191,
      "grad_norm": 0.30060875415802,
      "learning_rate": 0.0001943370747992788,
      "loss": 0.9607,
      "step": 5120
    },
    {
      "epoch": 0.5912551915089986,
      "grad_norm": 0.34586185216903687,
      "learning_rate": 0.00019431478732671916,
      "loss": 0.9845,
      "step": 5125
    },
    {
      "epoch": 0.5918320258421781,
      "grad_norm": 0.32572728395462036,
      "learning_rate": 0.00019429245736516415,
      "loss": 1.0146,
      "step": 5130
    },
    {
      "epoch": 0.5924088601753577,
      "grad_norm": 0.39712613821029663,
      "learning_rate": 0.00019427008492467346,
      "loss": 0.9785,
      "step": 5135
    },
    {
      "epoch": 0.5929856945085371,
      "grad_norm": 0.32232365012168884,
      "learning_rate": 0.00019424767001532598,
      "loss": 0.9658,
      "step": 5140
    },
    {
      "epoch": 0.5935625288417167,
      "grad_norm": 0.3060401380062103,
      "learning_rate": 0.00019422521264721962,
      "loss": 1.0234,
      "step": 5145
    },
    {
      "epoch": 0.5941393631748961,
      "grad_norm": 0.33208033442497253,
      "learning_rate": 0.0001942027128304715,
      "loss": 0.9906,
      "step": 5150
    },
    {
      "epoch": 0.5947161975080757,
      "grad_norm": 0.3264749348163605,
      "learning_rate": 0.0001941801705752178,
      "loss": 1.0163,
      "step": 5155
    },
    {
      "epoch": 0.5952930318412551,
      "grad_norm": 0.3408641219139099,
      "learning_rate": 0.00019415758589161385,
      "loss": 0.9299,
      "step": 5160
    },
    {
      "epoch": 0.5958698661744347,
      "grad_norm": 0.2970794141292572,
      "learning_rate": 0.00019413495878983414,
      "loss": 0.9351,
      "step": 5165
    },
    {
      "epoch": 0.5964467005076142,
      "grad_norm": 0.2991337180137634,
      "learning_rate": 0.00019411228928007215,
      "loss": 0.8861,
      "step": 5170
    },
    {
      "epoch": 0.5970235348407937,
      "grad_norm": 0.3135198652744293,
      "learning_rate": 0.0001940895773725406,
      "loss": 0.9857,
      "step": 5175
    },
    {
      "epoch": 0.5976003691739732,
      "grad_norm": 0.3125375807285309,
      "learning_rate": 0.00019406682307747123,
      "loss": 0.9865,
      "step": 5180
    },
    {
      "epoch": 0.5981772035071528,
      "grad_norm": 0.30986109375953674,
      "learning_rate": 0.0001940440264051149,
      "loss": 0.9213,
      "step": 5185
    },
    {
      "epoch": 0.5987540378403322,
      "grad_norm": 0.32293030619621277,
      "learning_rate": 0.00019402118736574155,
      "loss": 0.9697,
      "step": 5190
    },
    {
      "epoch": 0.5993308721735118,
      "grad_norm": 0.304883748292923,
      "learning_rate": 0.0001939983059696402,
      "loss": 0.9424,
      "step": 5195
    },
    {
      "epoch": 0.5999077065066912,
      "grad_norm": 0.30604103207588196,
      "learning_rate": 0.00019397538222711895,
      "loss": 0.9705,
      "step": 5200
    },
    {
      "epoch": 0.6004845408398708,
      "grad_norm": 0.33686572313308716,
      "learning_rate": 0.00019395241614850504,
      "loss": 0.9414,
      "step": 5205
    },
    {
      "epoch": 0.6010613751730502,
      "grad_norm": 0.30786630511283875,
      "learning_rate": 0.0001939294077441447,
      "loss": 0.9526,
      "step": 5210
    },
    {
      "epoch": 0.6016382095062298,
      "grad_norm": 0.31119078397750854,
      "learning_rate": 0.00019390635702440324,
      "loss": 1.0046,
      "step": 5215
    },
    {
      "epoch": 0.6022150438394093,
      "grad_norm": 0.32397225499153137,
      "learning_rate": 0.00019388326399966515,
      "loss": 1.0064,
      "step": 5220
    },
    {
      "epoch": 0.6027918781725888,
      "grad_norm": 0.31831094622612,
      "learning_rate": 0.00019386012868033374,
      "loss": 0.9344,
      "step": 5225
    },
    {
      "epoch": 0.6033687125057683,
      "grad_norm": 0.3024968206882477,
      "learning_rate": 0.00019383695107683165,
      "loss": 0.9749,
      "step": 5230
    },
    {
      "epoch": 0.6039455468389479,
      "grad_norm": 0.2912804186344147,
      "learning_rate": 0.00019381373119960033,
      "loss": 0.9439,
      "step": 5235
    },
    {
      "epoch": 0.6045223811721273,
      "grad_norm": 0.3093802332878113,
      "learning_rate": 0.00019379046905910045,
      "loss": 0.9043,
      "step": 5240
    },
    {
      "epoch": 0.6050992155053069,
      "grad_norm": 0.3029499650001526,
      "learning_rate": 0.00019376716466581163,
      "loss": 0.9259,
      "step": 5245
    },
    {
      "epoch": 0.6056760498384864,
      "grad_norm": 0.32635313272476196,
      "learning_rate": 0.00019374381803023252,
      "loss": 0.9789,
      "step": 5250
    },
    {
      "epoch": 0.6062528841716659,
      "grad_norm": 0.31280118227005005,
      "learning_rate": 0.00019372042916288083,
      "loss": 0.9825,
      "step": 5255
    },
    {
      "epoch": 0.6068297185048455,
      "grad_norm": 0.2918443977832794,
      "learning_rate": 0.00019369699807429336,
      "loss": 0.9931,
      "step": 5260
    },
    {
      "epoch": 0.6074065528380249,
      "grad_norm": 0.3114100992679596,
      "learning_rate": 0.00019367352477502576,
      "loss": 0.9962,
      "step": 5265
    },
    {
      "epoch": 0.6079833871712045,
      "grad_norm": 0.3649381697177887,
      "learning_rate": 0.00019365000927565285,
      "loss": 0.9618,
      "step": 5270
    },
    {
      "epoch": 0.6085602215043839,
      "grad_norm": 0.3101176917552948,
      "learning_rate": 0.00019362645158676843,
      "loss": 0.9584,
      "step": 5275
    },
    {
      "epoch": 0.6091370558375635,
      "grad_norm": 0.3260380029678345,
      "learning_rate": 0.0001936028517189852,
      "loss": 0.9658,
      "step": 5280
    },
    {
      "epoch": 0.609713890170743,
      "grad_norm": 0.3273615539073944,
      "learning_rate": 0.00019357920968293506,
      "loss": 0.9475,
      "step": 5285
    },
    {
      "epoch": 0.6102907245039225,
      "grad_norm": 0.334358811378479,
      "learning_rate": 0.00019355552548926873,
      "loss": 0.9985,
      "step": 5290
    },
    {
      "epoch": 0.610867558837102,
      "grad_norm": 0.3484663665294647,
      "learning_rate": 0.00019353179914865596,
      "loss": 0.9556,
      "step": 5295
    },
    {
      "epoch": 0.6114443931702815,
      "grad_norm": 0.3266102373600006,
      "learning_rate": 0.00019350803067178556,
      "loss": 0.9634,
      "step": 5300
    },
    {
      "epoch": 0.612021227503461,
      "grad_norm": 0.29789167642593384,
      "learning_rate": 0.00019348422006936527,
      "loss": 0.9699,
      "step": 5305
    },
    {
      "epoch": 0.6125980618366406,
      "grad_norm": 0.31655624508857727,
      "learning_rate": 0.00019346036735212177,
      "loss": 0.9982,
      "step": 5310
    },
    {
      "epoch": 0.61317489616982,
      "grad_norm": 0.3286411762237549,
      "learning_rate": 0.0001934364725308008,
      "loss": 0.9968,
      "step": 5315
    },
    {
      "epoch": 0.6137517305029996,
      "grad_norm": 0.2995690703392029,
      "learning_rate": 0.00019341253561616704,
      "loss": 0.9343,
      "step": 5320
    },
    {
      "epoch": 0.614328564836179,
      "grad_norm": 0.33955976366996765,
      "learning_rate": 0.00019338855661900405,
      "loss": 0.9588,
      "step": 5325
    },
    {
      "epoch": 0.6149053991693586,
      "grad_norm": 0.3300839960575104,
      "learning_rate": 0.00019336453555011447,
      "loss": 0.9648,
      "step": 5330
    },
    {
      "epoch": 0.6154822335025381,
      "grad_norm": 0.30221325159072876,
      "learning_rate": 0.0001933404724203198,
      "loss": 0.9968,
      "step": 5335
    },
    {
      "epoch": 0.6160590678357176,
      "grad_norm": 0.3104861080646515,
      "learning_rate": 0.00019331636724046058,
      "loss": 1.0049,
      "step": 5340
    },
    {
      "epoch": 0.6166359021688971,
      "grad_norm": 0.310660719871521,
      "learning_rate": 0.0001932922200213962,
      "loss": 0.9425,
      "step": 5345
    },
    {
      "epoch": 0.6172127365020766,
      "grad_norm": 0.2998371422290802,
      "learning_rate": 0.00019326803077400503,
      "loss": 0.9332,
      "step": 5350
    },
    {
      "epoch": 0.6177895708352561,
      "grad_norm": 0.3054348826408386,
      "learning_rate": 0.00019324379950918437,
      "loss": 0.9569,
      "step": 5355
    },
    {
      "epoch": 0.6183664051684357,
      "grad_norm": 0.31838247179985046,
      "learning_rate": 0.00019321952623785048,
      "loss": 0.957,
      "step": 5360
    },
    {
      "epoch": 0.6189432395016151,
      "grad_norm": 0.30139824748039246,
      "learning_rate": 0.00019319521097093846,
      "loss": 0.9397,
      "step": 5365
    },
    {
      "epoch": 0.6195200738347947,
      "grad_norm": 0.3231588900089264,
      "learning_rate": 0.00019317085371940246,
      "loss": 0.9888,
      "step": 5370
    },
    {
      "epoch": 0.6200969081679741,
      "grad_norm": 0.2997867465019226,
      "learning_rate": 0.00019314645449421543,
      "loss": 0.9553,
      "step": 5375
    },
    {
      "epoch": 0.6206737425011537,
      "grad_norm": 0.2987963855266571,
      "learning_rate": 0.00019312201330636927,
      "loss": 0.9653,
      "step": 5380
    },
    {
      "epoch": 0.6212505768343332,
      "grad_norm": 0.28030556440353394,
      "learning_rate": 0.00019309753016687477,
      "loss": 0.9596,
      "step": 5385
    },
    {
      "epoch": 0.6218274111675127,
      "grad_norm": 0.3068777620792389,
      "learning_rate": 0.00019307300508676165,
      "loss": 0.9994,
      "step": 5390
    },
    {
      "epoch": 0.6224042455006922,
      "grad_norm": 0.33228716254234314,
      "learning_rate": 0.00019304843807707852,
      "loss": 0.9922,
      "step": 5395
    },
    {
      "epoch": 0.6229810798338717,
      "grad_norm": 0.31027698516845703,
      "learning_rate": 0.00019302382914889284,
      "loss": 1.0011,
      "step": 5400
    },
    {
      "epoch": 0.6235579141670512,
      "grad_norm": 0.29779738187789917,
      "learning_rate": 0.00019299917831329099,
      "loss": 0.995,
      "step": 5405
    },
    {
      "epoch": 0.6241347485002308,
      "grad_norm": 0.3637996315956116,
      "learning_rate": 0.0001929744855813782,
      "loss": 0.9827,
      "step": 5410
    },
    {
      "epoch": 0.6247115828334102,
      "grad_norm": 0.31740111112594604,
      "learning_rate": 0.00019294975096427862,
      "loss": 0.9959,
      "step": 5415
    },
    {
      "epoch": 0.6252884171665898,
      "grad_norm": 0.34880760312080383,
      "learning_rate": 0.0001929249744731352,
      "loss": 0.9982,
      "step": 5420
    },
    {
      "epoch": 0.6258652514997692,
      "grad_norm": 0.3125605285167694,
      "learning_rate": 0.0001929001561191099,
      "loss": 0.9531,
      "step": 5425
    },
    {
      "epoch": 0.6264420858329488,
      "grad_norm": 0.34269124269485474,
      "learning_rate": 0.00019287529591338333,
      "loss": 1.0116,
      "step": 5430
    },
    {
      "epoch": 0.6270189201661283,
      "grad_norm": 0.3100661039352417,
      "learning_rate": 0.00019285039386715512,
      "loss": 0.9815,
      "step": 5435
    },
    {
      "epoch": 0.6275957544993078,
      "grad_norm": 0.35185715556144714,
      "learning_rate": 0.00019282544999164365,
      "loss": 0.9986,
      "step": 5440
    },
    {
      "epoch": 0.6281725888324873,
      "grad_norm": 0.29009369015693665,
      "learning_rate": 0.0001928004642980862,
      "loss": 0.9381,
      "step": 5445
    },
    {
      "epoch": 0.6287494231656668,
      "grad_norm": 0.3412320017814636,
      "learning_rate": 0.00019277543679773889,
      "loss": 0.945,
      "step": 5450
    },
    {
      "epoch": 0.6293262574988463,
      "grad_norm": 0.3375879228115082,
      "learning_rate": 0.00019275036750187664,
      "loss": 1.0051,
      "step": 5455
    },
    {
      "epoch": 0.6299030918320259,
      "grad_norm": 0.3794129490852356,
      "learning_rate": 0.00019272525642179323,
      "loss": 0.9496,
      "step": 5460
    },
    {
      "epoch": 0.6304799261652053,
      "grad_norm": 0.2998787462711334,
      "learning_rate": 0.00019270010356880124,
      "loss": 0.9871,
      "step": 5465
    },
    {
      "epoch": 0.6310567604983849,
      "grad_norm": 0.3201323449611664,
      "learning_rate": 0.00019267490895423208,
      "loss": 0.9909,
      "step": 5470
    },
    {
      "epoch": 0.6316335948315643,
      "grad_norm": 0.30740267038345337,
      "learning_rate": 0.00019264967258943595,
      "loss": 0.9674,
      "step": 5475
    },
    {
      "epoch": 0.6322104291647439,
      "grad_norm": 0.31996774673461914,
      "learning_rate": 0.00019262439448578195,
      "loss": 0.9257,
      "step": 5480
    },
    {
      "epoch": 0.6327872634979234,
      "grad_norm": 0.31498971581459045,
      "learning_rate": 0.00019259907465465784,
      "loss": 0.9565,
      "step": 5485
    },
    {
      "epoch": 0.6333640978311029,
      "grad_norm": 0.3249649405479431,
      "learning_rate": 0.0001925737131074703,
      "loss": 0.951,
      "step": 5490
    },
    {
      "epoch": 0.6339409321642824,
      "grad_norm": 0.29183363914489746,
      "learning_rate": 0.00019254830985564474,
      "loss": 0.9347,
      "step": 5495
    },
    {
      "epoch": 0.6345177664974619,
      "grad_norm": 0.3501313626766205,
      "learning_rate": 0.0001925228649106254,
      "loss": 0.98,
      "step": 5500
    },
    {
      "epoch": 0.6350946008306414,
      "grad_norm": 0.3065842390060425,
      "learning_rate": 0.00019249737828387522,
      "loss": 0.9954,
      "step": 5505
    },
    {
      "epoch": 0.635671435163821,
      "grad_norm": 0.3278423845767975,
      "learning_rate": 0.000192471849986876,
      "loss": 0.9612,
      "step": 5510
    },
    {
      "epoch": 0.6362482694970004,
      "grad_norm": 0.33244070410728455,
      "learning_rate": 0.0001924462800311283,
      "loss": 0.9869,
      "step": 5515
    },
    {
      "epoch": 0.63682510383018,
      "grad_norm": 0.3195953369140625,
      "learning_rate": 0.00019242066842815146,
      "loss": 0.9758,
      "step": 5520
    },
    {
      "epoch": 0.6374019381633594,
      "grad_norm": 0.3243964910507202,
      "learning_rate": 0.0001923950151894835,
      "loss": 0.9614,
      "step": 5525
    },
    {
      "epoch": 0.637978772496539,
      "grad_norm": 0.3167535662651062,
      "learning_rate": 0.0001923693203266813,
      "loss": 0.9606,
      "step": 5530
    },
    {
      "epoch": 0.6385556068297185,
      "grad_norm": 0.32592856884002686,
      "learning_rate": 0.00019234358385132038,
      "loss": 0.9483,
      "step": 5535
    },
    {
      "epoch": 0.639132441162898,
      "grad_norm": 0.31928128004074097,
      "learning_rate": 0.00019231780577499516,
      "loss": 0.9309,
      "step": 5540
    },
    {
      "epoch": 0.6397092754960775,
      "grad_norm": 0.3262840509414673,
      "learning_rate": 0.00019229198610931866,
      "loss": 0.9872,
      "step": 5545
    },
    {
      "epoch": 0.640286109829257,
      "grad_norm": 0.3317157030105591,
      "learning_rate": 0.00019226612486592271,
      "loss": 0.9277,
      "step": 5550
    },
    {
      "epoch": 0.6408629441624365,
      "grad_norm": 0.3596244156360626,
      "learning_rate": 0.00019224022205645785,
      "loss": 0.9683,
      "step": 5555
    },
    {
      "epoch": 0.6414397784956161,
      "grad_norm": 0.3185982406139374,
      "learning_rate": 0.00019221427769259333,
      "loss": 0.9785,
      "step": 5560
    },
    {
      "epoch": 0.6420166128287955,
      "grad_norm": 0.301542729139328,
      "learning_rate": 0.00019218829178601713,
      "loss": 0.9568,
      "step": 5565
    },
    {
      "epoch": 0.6425934471619751,
      "grad_norm": 0.32796069979667664,
      "learning_rate": 0.00019216226434843597,
      "loss": 1.0116,
      "step": 5570
    },
    {
      "epoch": 0.6431702814951545,
      "grad_norm": 0.2905179560184479,
      "learning_rate": 0.0001921361953915753,
      "loss": 0.9531,
      "step": 5575
    },
    {
      "epoch": 0.6437471158283341,
      "grad_norm": 0.33059805631637573,
      "learning_rate": 0.00019211008492717914,
      "loss": 0.9578,
      "step": 5580
    },
    {
      "epoch": 0.6443239501615136,
      "grad_norm": 0.3101516366004944,
      "learning_rate": 0.00019208393296701038,
      "loss": 1.0044,
      "step": 5585
    },
    {
      "epoch": 0.6449007844946931,
      "grad_norm": 0.3204366862773895,
      "learning_rate": 0.00019205773952285052,
      "loss": 0.9383,
      "step": 5590
    },
    {
      "epoch": 0.6454776188278727,
      "grad_norm": 0.308737188577652,
      "learning_rate": 0.0001920315046064997,
      "loss": 0.9672,
      "step": 5595
    },
    {
      "epoch": 0.6460544531610521,
      "grad_norm": 0.2993175983428955,
      "learning_rate": 0.0001920052282297769,
      "loss": 0.9724,
      "step": 5600
    },
    {
      "epoch": 0.6466312874942317,
      "grad_norm": 0.35242629051208496,
      "learning_rate": 0.00019197891040451963,
      "loss": 0.9366,
      "step": 5605
    },
    {
      "epoch": 0.6472081218274112,
      "grad_norm": 0.3320680260658264,
      "learning_rate": 0.00019195255114258408,
      "loss": 1.0048,
      "step": 5610
    },
    {
      "epoch": 0.6477849561605907,
      "grad_norm": 0.3301430940628052,
      "learning_rate": 0.00019192615045584522,
      "loss": 0.9553,
      "step": 5615
    },
    {
      "epoch": 0.6483617904937702,
      "grad_norm": 0.327357679605484,
      "learning_rate": 0.00019189970835619652,
      "loss": 0.963,
      "step": 5620
    },
    {
      "epoch": 0.6489386248269498,
      "grad_norm": 0.3190898597240448,
      "learning_rate": 0.00019187322485555031,
      "loss": 0.9617,
      "step": 5625
    },
    {
      "epoch": 0.6495154591601292,
      "grad_norm": 0.3255945146083832,
      "learning_rate": 0.00019184669996583737,
      "loss": 0.9493,
      "step": 5630
    },
    {
      "epoch": 0.6500922934933088,
      "grad_norm": 0.3097761273384094,
      "learning_rate": 0.00019182013369900726,
      "loss": 0.9099,
      "step": 5635
    },
    {
      "epoch": 0.6506691278264882,
      "grad_norm": 0.3031613528728485,
      "learning_rate": 0.00019179352606702813,
      "loss": 0.9767,
      "step": 5640
    },
    {
      "epoch": 0.6512459621596678,
      "grad_norm": 0.3292886018753052,
      "learning_rate": 0.00019176687708188675,
      "loss": 1.0219,
      "step": 5645
    },
    {
      "epoch": 0.6518227964928472,
      "grad_norm": 0.3024533689022064,
      "learning_rate": 0.00019174018675558854,
      "loss": 0.9109,
      "step": 5650
    },
    {
      "epoch": 0.6523996308260268,
      "grad_norm": 0.3066127896308899,
      "learning_rate": 0.00019171345510015758,
      "loss": 0.9726,
      "step": 5655
    },
    {
      "epoch": 0.6529764651592063,
      "grad_norm": 0.3431326150894165,
      "learning_rate": 0.0001916866821276365,
      "loss": 0.9434,
      "step": 5660
    },
    {
      "epoch": 0.6535532994923858,
      "grad_norm": 0.3115648627281189,
      "learning_rate": 0.00019165986785008658,
      "loss": 0.9453,
      "step": 5665
    },
    {
      "epoch": 0.6541301338255653,
      "grad_norm": 0.3325398862361908,
      "learning_rate": 0.0001916330122795877,
      "loss": 0.9581,
      "step": 5670
    },
    {
      "epoch": 0.6547069681587449,
      "grad_norm": 0.3112443685531616,
      "learning_rate": 0.00019160611542823837,
      "loss": 0.9718,
      "step": 5675
    },
    {
      "epoch": 0.6552838024919243,
      "grad_norm": 0.29900282621383667,
      "learning_rate": 0.00019157917730815567,
      "loss": 0.9952,
      "step": 5680
    },
    {
      "epoch": 0.6558606368251039,
      "grad_norm": 0.31644630432128906,
      "learning_rate": 0.00019155219793147522,
      "loss": 0.96,
      "step": 5685
    },
    {
      "epoch": 0.6564374711582833,
      "grad_norm": 0.3477376103401184,
      "learning_rate": 0.00019152517731035139,
      "loss": 0.9834,
      "step": 5690
    },
    {
      "epoch": 0.6570143054914629,
      "grad_norm": 0.30350586771965027,
      "learning_rate": 0.00019149811545695692,
      "loss": 1.0281,
      "step": 5695
    },
    {
      "epoch": 0.6575911398246423,
      "grad_norm": 0.2941493093967438,
      "learning_rate": 0.00019147101238348326,
      "loss": 0.9535,
      "step": 5700
    },
    {
      "epoch": 0.6581679741578219,
      "grad_norm": 0.32863160967826843,
      "learning_rate": 0.00019144386810214043,
      "loss": 1.0231,
      "step": 5705
    },
    {
      "epoch": 0.6587448084910014,
      "grad_norm": 0.32498711347579956,
      "learning_rate": 0.00019141668262515692,
      "loss": 0.9913,
      "step": 5710
    },
    {
      "epoch": 0.6593216428241809,
      "grad_norm": 0.31866273283958435,
      "learning_rate": 0.00019138945596477994,
      "loss": 1.0029,
      "step": 5715
    },
    {
      "epoch": 0.6598984771573604,
      "grad_norm": 0.3033033013343811,
      "learning_rate": 0.00019136218813327503,
      "loss": 0.98,
      "step": 5720
    },
    {
      "epoch": 0.66047531149054,
      "grad_norm": 0.3383612036705017,
      "learning_rate": 0.0001913348791429265,
      "loss": 1.02,
      "step": 5725
    },
    {
      "epoch": 0.6610521458237194,
      "grad_norm": 0.3032139539718628,
      "learning_rate": 0.00019130752900603702,
      "loss": 0.9907,
      "step": 5730
    },
    {
      "epoch": 0.661628980156899,
      "grad_norm": 0.3184412717819214,
      "learning_rate": 0.00019128013773492795,
      "loss": 0.9988,
      "step": 5735
    },
    {
      "epoch": 0.6622058144900784,
      "grad_norm": 0.4634701907634735,
      "learning_rate": 0.0001912527053419391,
      "loss": 0.9851,
      "step": 5740
    },
    {
      "epoch": 0.662782648823258,
      "grad_norm": 0.3319401443004608,
      "learning_rate": 0.00019122523183942879,
      "loss": 0.9319,
      "step": 5745
    },
    {
      "epoch": 0.6633594831564374,
      "grad_norm": 0.3245648443698883,
      "learning_rate": 0.00019119771723977386,
      "loss": 0.9687,
      "step": 5750
    },
    {
      "epoch": 0.663936317489617,
      "grad_norm": 0.3270566463470459,
      "learning_rate": 0.00019117016155536978,
      "loss": 0.9949,
      "step": 5755
    },
    {
      "epoch": 0.6645131518227965,
      "grad_norm": 0.32096368074417114,
      "learning_rate": 0.00019114256479863038,
      "loss": 0.9699,
      "step": 5760
    },
    {
      "epoch": 0.665089986155976,
      "grad_norm": 0.29931652545928955,
      "learning_rate": 0.00019111492698198804,
      "loss": 0.9539,
      "step": 5765
    },
    {
      "epoch": 0.6656668204891555,
      "grad_norm": 0.40578532218933105,
      "learning_rate": 0.00019108724811789366,
      "loss": 0.9947,
      "step": 5770
    },
    {
      "epoch": 0.666243654822335,
      "grad_norm": 0.3238760530948639,
      "learning_rate": 0.00019105952821881668,
      "loss": 0.9727,
      "step": 5775
    },
    {
      "epoch": 0.6668204891555145,
      "grad_norm": 0.30622220039367676,
      "learning_rate": 0.0001910317672972449,
      "loss": 0.9811,
      "step": 5780
    },
    {
      "epoch": 0.6673973234886941,
      "grad_norm": 0.29258066415786743,
      "learning_rate": 0.0001910039653656847,
      "loss": 0.996,
      "step": 5785
    },
    {
      "epoch": 0.6679741578218735,
      "grad_norm": 0.3499821126461029,
      "learning_rate": 0.00019097612243666086,
      "loss": 0.9957,
      "step": 5790
    },
    {
      "epoch": 0.6685509921550531,
      "grad_norm": 0.32826563715934753,
      "learning_rate": 0.00019094823852271674,
      "loss": 0.9775,
      "step": 5795
    },
    {
      "epoch": 0.6691278264882325,
      "grad_norm": 0.2916145324707031,
      "learning_rate": 0.00019092031363641406,
      "loss": 0.9277,
      "step": 5800
    },
    {
      "epoch": 0.6697046608214121,
      "grad_norm": 0.30102667212486267,
      "learning_rate": 0.00019089234779033306,
      "loss": 1.018,
      "step": 5805
    },
    {
      "epoch": 0.6702814951545916,
      "grad_norm": 0.3221781551837921,
      "learning_rate": 0.00019086434099707238,
      "loss": 0.9232,
      "step": 5810
    },
    {
      "epoch": 0.6708583294877711,
      "grad_norm": 0.3488676846027374,
      "learning_rate": 0.0001908362932692491,
      "loss": 0.9638,
      "step": 5815
    },
    {
      "epoch": 0.6714351638209506,
      "grad_norm": 0.3418358564376831,
      "learning_rate": 0.00019080820461949886,
      "loss": 0.9207,
      "step": 5820
    },
    {
      "epoch": 0.6720119981541302,
      "grad_norm": 0.32656848430633545,
      "learning_rate": 0.00019078007506047564,
      "loss": 0.9877,
      "step": 5825
    },
    {
      "epoch": 0.6725888324873096,
      "grad_norm": 0.32590511441230774,
      "learning_rate": 0.0001907519046048518,
      "loss": 1.0079,
      "step": 5830
    },
    {
      "epoch": 0.6731656668204892,
      "grad_norm": 0.32146522402763367,
      "learning_rate": 0.00019072369326531824,
      "loss": 0.9815,
      "step": 5835
    },
    {
      "epoch": 0.6737425011536686,
      "grad_norm": 0.34426286816596985,
      "learning_rate": 0.00019069544105458416,
      "loss": 0.9684,
      "step": 5840
    },
    {
      "epoch": 0.6743193354868482,
      "grad_norm": 0.3510322868824005,
      "learning_rate": 0.0001906671479853773,
      "loss": 0.9691,
      "step": 5845
    },
    {
      "epoch": 0.6748961698200276,
      "grad_norm": 0.31030935049057007,
      "learning_rate": 0.00019063881407044373,
      "loss": 1.0024,
      "step": 5850
    },
    {
      "epoch": 0.6754730041532072,
      "grad_norm": 0.3513205647468567,
      "learning_rate": 0.00019061043932254795,
      "loss": 0.9485,
      "step": 5855
    },
    {
      "epoch": 0.6760498384863867,
      "grad_norm": 0.323853462934494,
      "learning_rate": 0.00019058202375447277,
      "loss": 0.9441,
      "step": 5860
    },
    {
      "epoch": 0.6766266728195662,
      "grad_norm": 0.32843998074531555,
      "learning_rate": 0.00019055356737901952,
      "loss": 1.0128,
      "step": 5865
    },
    {
      "epoch": 0.6772035071527457,
      "grad_norm": 0.35758525133132935,
      "learning_rate": 0.00019052507020900783,
      "loss": 0.9361,
      "step": 5870
    },
    {
      "epoch": 0.6777803414859253,
      "grad_norm": 0.31614959239959717,
      "learning_rate": 0.00019049653225727573,
      "loss": 0.9649,
      "step": 5875
    },
    {
      "epoch": 0.6783571758191047,
      "grad_norm": 0.2904086410999298,
      "learning_rate": 0.00019046795353667965,
      "loss": 0.9506,
      "step": 5880
    },
    {
      "epoch": 0.6789340101522843,
      "grad_norm": 0.30251458287239075,
      "learning_rate": 0.00019043933406009432,
      "loss": 0.9243,
      "step": 5885
    },
    {
      "epoch": 0.6795108444854637,
      "grad_norm": 0.3204381763935089,
      "learning_rate": 0.0001904106738404129,
      "loss": 0.9446,
      "step": 5890
    },
    {
      "epoch": 0.6800876788186433,
      "grad_norm": 0.3202415704727173,
      "learning_rate": 0.00019038197289054684,
      "loss": 0.9668,
      "step": 5895
    },
    {
      "epoch": 0.6806645131518227,
      "grad_norm": 0.3108007609844208,
      "learning_rate": 0.000190353231223426,
      "loss": 0.9507,
      "step": 5900
    },
    {
      "epoch": 0.6812413474850023,
      "grad_norm": 0.31462720036506653,
      "learning_rate": 0.00019032444885199858,
      "loss": 0.9459,
      "step": 5905
    },
    {
      "epoch": 0.6818181818181818,
      "grad_norm": 0.3226790726184845,
      "learning_rate": 0.00019029562578923106,
      "loss": 0.9564,
      "step": 5910
    },
    {
      "epoch": 0.6823950161513613,
      "grad_norm": 0.35159701108932495,
      "learning_rate": 0.00019026676204810826,
      "loss": 1.0154,
      "step": 5915
    },
    {
      "epoch": 0.6829718504845408,
      "grad_norm": 0.3501424193382263,
      "learning_rate": 0.00019023785764163344,
      "loss": 0.9834,
      "step": 5920
    },
    {
      "epoch": 0.6835486848177204,
      "grad_norm": 0.34057801961898804,
      "learning_rate": 0.000190208912582828,
      "loss": 1.0144,
      "step": 5925
    },
    {
      "epoch": 0.6841255191508998,
      "grad_norm": 0.3468494415283203,
      "learning_rate": 0.0001901799268847318,
      "loss": 0.9348,
      "step": 5930
    },
    {
      "epoch": 0.6847023534840794,
      "grad_norm": 0.30606213212013245,
      "learning_rate": 0.00019015090056040293,
      "loss": 0.9893,
      "step": 5935
    },
    {
      "epoch": 0.6852791878172588,
      "grad_norm": 0.3168538808822632,
      "learning_rate": 0.0001901218336229178,
      "loss": 0.9701,
      "step": 5940
    },
    {
      "epoch": 0.6858560221504384,
      "grad_norm": 0.3141446113586426,
      "learning_rate": 0.00019009272608537113,
      "loss": 1.025,
      "step": 5945
    },
    {
      "epoch": 0.686432856483618,
      "grad_norm": 0.3107317388057709,
      "learning_rate": 0.00019006357796087596,
      "loss": 1.0144,
      "step": 5950
    },
    {
      "epoch": 0.6870096908167974,
      "grad_norm": 0.31770557165145874,
      "learning_rate": 0.0001900343892625635,
      "loss": 0.9909,
      "step": 5955
    },
    {
      "epoch": 0.687586525149977,
      "grad_norm": 0.3323291838169098,
      "learning_rate": 0.0001900051600035834,
      "loss": 1.0551,
      "step": 5960
    },
    {
      "epoch": 0.6881633594831564,
      "grad_norm": 0.3626686632633209,
      "learning_rate": 0.00018997589019710342,
      "loss": 0.9785,
      "step": 5965
    },
    {
      "epoch": 0.688740193816336,
      "grad_norm": 0.34098559617996216,
      "learning_rate": 0.00018994657985630972,
      "loss": 0.9226,
      "step": 5970
    },
    {
      "epoch": 0.6893170281495155,
      "grad_norm": 0.3267138600349426,
      "learning_rate": 0.00018991722899440664,
      "loss": 1.0036,
      "step": 5975
    },
    {
      "epoch": 0.689893862482695,
      "grad_norm": 0.33355411887168884,
      "learning_rate": 0.0001898878376246168,
      "loss": 0.9962,
      "step": 5980
    },
    {
      "epoch": 0.6904706968158745,
      "grad_norm": 0.3542887270450592,
      "learning_rate": 0.00018985840576018107,
      "loss": 0.9529,
      "step": 5985
    },
    {
      "epoch": 0.691047531149054,
      "grad_norm": 0.3203210234642029,
      "learning_rate": 0.0001898289334143586,
      "loss": 0.9599,
      "step": 5990
    },
    {
      "epoch": 0.6916243654822335,
      "grad_norm": 0.3354796767234802,
      "learning_rate": 0.00018979942060042668,
      "loss": 0.9762,
      "step": 5995
    },
    {
      "epoch": 0.6922011998154131,
      "grad_norm": 0.30847635865211487,
      "learning_rate": 0.00018976986733168093,
      "loss": 1.0016,
      "step": 6000
    },
    {
      "epoch": 0.6927780341485925,
      "grad_norm": 0.33731263875961304,
      "learning_rate": 0.00018974027362143514,
      "loss": 0.9719,
      "step": 6005
    },
    {
      "epoch": 0.6933548684817721,
      "grad_norm": 0.31261542439460754,
      "learning_rate": 0.00018971063948302133,
      "loss": 1.0101,
      "step": 6010
    },
    {
      "epoch": 0.6939317028149515,
      "grad_norm": 0.33092984557151794,
      "learning_rate": 0.00018968096492978976,
      "loss": 0.969,
      "step": 6015
    },
    {
      "epoch": 0.6945085371481311,
      "grad_norm": 0.30494657158851624,
      "learning_rate": 0.00018965124997510883,
      "loss": 0.9667,
      "step": 6020
    },
    {
      "epoch": 0.6950853714813106,
      "grad_norm": 0.3187716007232666,
      "learning_rate": 0.00018962149463236524,
      "loss": 0.9676,
      "step": 6025
    },
    {
      "epoch": 0.6956622058144901,
      "grad_norm": 0.3075034022331238,
      "learning_rate": 0.0001895916989149638,
      "loss": 1.0125,
      "step": 6030
    },
    {
      "epoch": 0.6962390401476696,
      "grad_norm": 0.3571907877922058,
      "learning_rate": 0.00018956186283632754,
      "loss": 0.9668,
      "step": 6035
    },
    {
      "epoch": 0.6968158744808491,
      "grad_norm": 0.3178498148918152,
      "learning_rate": 0.00018953198640989764,
      "loss": 0.9635,
      "step": 6040
    },
    {
      "epoch": 0.6973927088140286,
      "grad_norm": 0.309283584356308,
      "learning_rate": 0.00018950206964913355,
      "loss": 0.9705,
      "step": 6045
    },
    {
      "epoch": 0.6979695431472082,
      "grad_norm": 0.3307679295539856,
      "learning_rate": 0.0001894721125675128,
      "loss": 0.9601,
      "step": 6050
    },
    {
      "epoch": 0.6985463774803876,
      "grad_norm": 0.33936187624931335,
      "learning_rate": 0.00018944211517853113,
      "loss": 0.9583,
      "step": 6055
    },
    {
      "epoch": 0.6991232118135672,
      "grad_norm": 0.33675289154052734,
      "learning_rate": 0.00018941207749570237,
      "loss": 0.9807,
      "step": 6060
    },
    {
      "epoch": 0.6997000461467466,
      "grad_norm": 0.2894688546657562,
      "learning_rate": 0.00018938199953255863,
      "loss": 0.9624,
      "step": 6065
    },
    {
      "epoch": 0.7002768804799262,
      "grad_norm": 0.325270414352417,
      "learning_rate": 0.00018935188130265004,
      "loss": 0.9874,
      "step": 6070
    },
    {
      "epoch": 0.7008537148131057,
      "grad_norm": 0.32608747482299805,
      "learning_rate": 0.0001893217228195449,
      "loss": 1.0085,
      "step": 6075
    },
    {
      "epoch": 0.7014305491462852,
      "grad_norm": 0.3333377540111542,
      "learning_rate": 0.00018929152409682972,
      "loss": 0.927,
      "step": 6080
    },
    {
      "epoch": 0.7020073834794647,
      "grad_norm": 0.3285461366176605,
      "learning_rate": 0.00018926128514810907,
      "loss": 0.9696,
      "step": 6085
    },
    {
      "epoch": 0.7025842178126442,
      "grad_norm": 0.32103848457336426,
      "learning_rate": 0.00018923100598700561,
      "loss": 1.017,
      "step": 6090
    },
    {
      "epoch": 0.7031610521458237,
      "grad_norm": 0.3383493423461914,
      "learning_rate": 0.00018920068662716023,
      "loss": 0.9833,
      "step": 6095
    },
    {
      "epoch": 0.7037378864790033,
      "grad_norm": 0.36620640754699707,
      "learning_rate": 0.00018917032708223183,
      "loss": 0.9836,
      "step": 6100
    },
    {
      "epoch": 0.7043147208121827,
      "grad_norm": 0.33626264333724976,
      "learning_rate": 0.00018913992736589746,
      "loss": 0.9322,
      "step": 6105
    },
    {
      "epoch": 0.7048915551453623,
      "grad_norm": 0.3257678151130676,
      "learning_rate": 0.0001891094874918522,
      "loss": 1.0343,
      "step": 6110
    },
    {
      "epoch": 0.7054683894785417,
      "grad_norm": 0.3210621178150177,
      "learning_rate": 0.00018907900747380932,
      "loss": 0.9852,
      "step": 6115
    },
    {
      "epoch": 0.7060452238117213,
      "grad_norm": 0.3681231141090393,
      "learning_rate": 0.0001890484873255001,
      "loss": 0.9589,
      "step": 6120
    },
    {
      "epoch": 0.7066220581449008,
      "grad_norm": 0.3156203627586365,
      "learning_rate": 0.00018901792706067395,
      "loss": 0.9706,
      "step": 6125
    },
    {
      "epoch": 0.7071988924780803,
      "grad_norm": 0.3281550407409668,
      "learning_rate": 0.00018898732669309833,
      "loss": 0.9647,
      "step": 6130
    },
    {
      "epoch": 0.7077757268112598,
      "grad_norm": 0.3214692771434784,
      "learning_rate": 0.00018895668623655873,
      "loss": 0.9394,
      "step": 6135
    },
    {
      "epoch": 0.7083525611444393,
      "grad_norm": 0.3246460258960724,
      "learning_rate": 0.0001889260057048588,
      "loss": 1.0101,
      "step": 6140
    },
    {
      "epoch": 0.7089293954776188,
      "grad_norm": 0.3262513279914856,
      "learning_rate": 0.0001888952851118201,
      "loss": 0.9386,
      "step": 6145
    },
    {
      "epoch": 0.7095062298107984,
      "grad_norm": 0.32824644446372986,
      "learning_rate": 0.0001888645244712824,
      "loss": 1.0007,
      "step": 6150
    },
    {
      "epoch": 0.7100830641439778,
      "grad_norm": 0.33368590474128723,
      "learning_rate": 0.00018883372379710332,
      "loss": 0.9683,
      "step": 6155
    },
    {
      "epoch": 0.7106598984771574,
      "grad_norm": 0.3157143294811249,
      "learning_rate": 0.00018880288310315873,
      "loss": 0.9925,
      "step": 6160
    },
    {
      "epoch": 0.7112367328103368,
      "grad_norm": 0.3037188947200775,
      "learning_rate": 0.00018877200240334236,
      "loss": 0.972,
      "step": 6165
    },
    {
      "epoch": 0.7118135671435164,
      "grad_norm": 0.3481830358505249,
      "learning_rate": 0.00018874108171156606,
      "loss": 0.9672,
      "step": 6170
    },
    {
      "epoch": 0.7123904014766959,
      "grad_norm": 0.33353695273399353,
      "learning_rate": 0.00018871012104175968,
      "loss": 0.9719,
      "step": 6175
    },
    {
      "epoch": 0.7129672358098754,
      "grad_norm": 0.35782551765441895,
      "learning_rate": 0.00018867912040787096,
      "loss": 0.9503,
      "step": 6180
    },
    {
      "epoch": 0.7135440701430549,
      "grad_norm": 0.32563239336013794,
      "learning_rate": 0.00018864807982386586,
      "loss": 1.043,
      "step": 6185
    },
    {
      "epoch": 0.7141209044762344,
      "grad_norm": 0.32650572061538696,
      "learning_rate": 0.00018861699930372816,
      "loss": 0.9347,
      "step": 6190
    },
    {
      "epoch": 0.7146977388094139,
      "grad_norm": 0.3114981949329376,
      "learning_rate": 0.00018858587886145975,
      "loss": 0.9725,
      "step": 6195
    },
    {
      "epoch": 0.7152745731425935,
      "grad_norm": 0.3412109613418579,
      "learning_rate": 0.00018855471851108037,
      "loss": 0.967,
      "step": 6200
    },
    {
      "epoch": 0.7158514074757729,
      "grad_norm": 0.3184211850166321,
      "learning_rate": 0.0001885235182666279,
      "loss": 0.9883,
      "step": 6205
    },
    {
      "epoch": 0.7164282418089525,
      "grad_norm": 0.335102379322052,
      "learning_rate": 0.00018849227814215805,
      "loss": 0.9357,
      "step": 6210
    },
    {
      "epoch": 0.7170050761421319,
      "grad_norm": 0.33024275302886963,
      "learning_rate": 0.00018846099815174458,
      "loss": 0.962,
      "step": 6215
    },
    {
      "epoch": 0.7175819104753115,
      "grad_norm": 0.31068938970565796,
      "learning_rate": 0.00018842967830947916,
      "loss": 0.9599,
      "step": 6220
    },
    {
      "epoch": 0.718158744808491,
      "grad_norm": 0.3194611668586731,
      "learning_rate": 0.00018839831862947152,
      "loss": 0.9768,
      "step": 6225
    },
    {
      "epoch": 0.7187355791416705,
      "grad_norm": 0.34374377131462097,
      "learning_rate": 0.0001883669191258492,
      "loss": 0.9773,
      "step": 6230
    },
    {
      "epoch": 0.71931241347485,
      "grad_norm": 0.33975133299827576,
      "learning_rate": 0.00018833547981275773,
      "loss": 1.0063,
      "step": 6235
    },
    {
      "epoch": 0.7198892478080295,
      "grad_norm": 0.3632505238056183,
      "learning_rate": 0.00018830400070436057,
      "loss": 0.999,
      "step": 6240
    },
    {
      "epoch": 0.720466082141209,
      "grad_norm": 0.2948671281337738,
      "learning_rate": 0.00018827248181483915,
      "loss": 0.9911,
      "step": 6245
    },
    {
      "epoch": 0.7210429164743886,
      "grad_norm": 0.36003610491752625,
      "learning_rate": 0.0001882409231583928,
      "loss": 0.9918,
      "step": 6250
    },
    {
      "epoch": 0.721619750807568,
      "grad_norm": 0.33334627747535706,
      "learning_rate": 0.00018820932474923873,
      "loss": 0.9975,
      "step": 6255
    },
    {
      "epoch": 0.7221965851407476,
      "grad_norm": 0.4000126123428345,
      "learning_rate": 0.0001881776866016121,
      "loss": 0.9123,
      "step": 6260
    },
    {
      "epoch": 0.722773419473927,
      "grad_norm": 0.3385787606239319,
      "learning_rate": 0.00018814600872976594,
      "loss": 0.9299,
      "step": 6265
    },
    {
      "epoch": 0.7233502538071066,
      "grad_norm": 0.31627988815307617,
      "learning_rate": 0.00018811429114797123,
      "loss": 0.9416,
      "step": 6270
    },
    {
      "epoch": 0.7239270881402861,
      "grad_norm": 0.33831337094306946,
      "learning_rate": 0.00018808253387051678,
      "loss": 0.9513,
      "step": 6275
    },
    {
      "epoch": 0.7245039224734656,
      "grad_norm": 0.31706488132476807,
      "learning_rate": 0.00018805073691170927,
      "loss": 0.9834,
      "step": 6280
    },
    {
      "epoch": 0.7250807568066451,
      "grad_norm": 0.3332441449165344,
      "learning_rate": 0.00018801890028587333,
      "loss": 0.9409,
      "step": 6285
    },
    {
      "epoch": 0.7256575911398246,
      "grad_norm": 0.34476813673973083,
      "learning_rate": 0.00018798702400735145,
      "loss": 0.9399,
      "step": 6290
    },
    {
      "epoch": 0.7262344254730042,
      "grad_norm": 0.31431716680526733,
      "learning_rate": 0.0001879551080905039,
      "loss": 1.0019,
      "step": 6295
    },
    {
      "epoch": 0.7268112598061837,
      "grad_norm": 0.3677690625190735,
      "learning_rate": 0.0001879231525497089,
      "loss": 0.9862,
      "step": 6300
    },
    {
      "epoch": 0.7273880941393632,
      "grad_norm": 0.31107670068740845,
      "learning_rate": 0.00018789115739936243,
      "loss": 0.9535,
      "step": 6305
    },
    {
      "epoch": 0.7279649284725427,
      "grad_norm": 0.35047629475593567,
      "learning_rate": 0.00018785912265387845,
      "loss": 0.9733,
      "step": 6310
    },
    {
      "epoch": 0.7285417628057222,
      "grad_norm": 0.33065545558929443,
      "learning_rate": 0.0001878270483276886,
      "loss": 0.99,
      "step": 6315
    },
    {
      "epoch": 0.7291185971389017,
      "grad_norm": 0.3244021236896515,
      "learning_rate": 0.00018779493443524245,
      "loss": 0.9656,
      "step": 6320
    },
    {
      "epoch": 0.7296954314720813,
      "grad_norm": 0.36192965507507324,
      "learning_rate": 0.0001877627809910074,
      "loss": 1.0014,
      "step": 6325
    },
    {
      "epoch": 0.7302722658052607,
      "grad_norm": 0.3353778123855591,
      "learning_rate": 0.00018773058800946858,
      "loss": 0.9828,
      "step": 6330
    },
    {
      "epoch": 0.7308491001384403,
      "grad_norm": 0.32873064279556274,
      "learning_rate": 0.00018769835550512908,
      "loss": 1.0209,
      "step": 6335
    },
    {
      "epoch": 0.7314259344716197,
      "grad_norm": 0.3185397684574127,
      "learning_rate": 0.00018766608349250966,
      "loss": 0.996,
      "step": 6340
    },
    {
      "epoch": 0.7320027688047993,
      "grad_norm": 0.30948343873023987,
      "learning_rate": 0.00018763377198614887,
      "loss": 0.9423,
      "step": 6345
    },
    {
      "epoch": 0.7325796031379788,
      "grad_norm": 0.31592267751693726,
      "learning_rate": 0.0001876014210006032,
      "loss": 0.9255,
      "step": 6350
    },
    {
      "epoch": 0.7331564374711583,
      "grad_norm": 0.3489915430545807,
      "learning_rate": 0.00018756903055044675,
      "loss": 0.9889,
      "step": 6355
    },
    {
      "epoch": 0.7337332718043378,
      "grad_norm": 0.3357676863670349,
      "learning_rate": 0.00018753660065027152,
      "loss": 0.915,
      "step": 6360
    },
    {
      "epoch": 0.7343101061375173,
      "grad_norm": 0.30153629183769226,
      "learning_rate": 0.00018750413131468725,
      "loss": 1.016,
      "step": 6365
    },
    {
      "epoch": 0.7348869404706968,
      "grad_norm": 0.3336898386478424,
      "learning_rate": 0.00018747162255832142,
      "loss": 0.9868,
      "step": 6370
    },
    {
      "epoch": 0.7354637748038764,
      "grad_norm": 0.3211246430873871,
      "learning_rate": 0.00018743907439581933,
      "loss": 0.9637,
      "step": 6375
    },
    {
      "epoch": 0.7360406091370558,
      "grad_norm": 0.32898247241973877,
      "learning_rate": 0.00018740648684184395,
      "loss": 0.9667,
      "step": 6380
    },
    {
      "epoch": 0.7366174434702354,
      "grad_norm": 0.33533334732055664,
      "learning_rate": 0.00018737385991107603,
      "loss": 0.947,
      "step": 6385
    },
    {
      "epoch": 0.7371942778034148,
      "grad_norm": 0.33783718943595886,
      "learning_rate": 0.0001873411936182141,
      "loss": 0.9626,
      "step": 6390
    },
    {
      "epoch": 0.7377711121365944,
      "grad_norm": 0.33185064792633057,
      "learning_rate": 0.00018730848797797437,
      "loss": 0.9885,
      "step": 6395
    },
    {
      "epoch": 0.7383479464697739,
      "grad_norm": 0.35967978835105896,
      "learning_rate": 0.00018727574300509076,
      "loss": 0.9882,
      "step": 6400
    },
    {
      "epoch": 0.7389247808029534,
      "grad_norm": 0.3004412055015564,
      "learning_rate": 0.000187242958714315,
      "loss": 0.9386,
      "step": 6405
    },
    {
      "epoch": 0.7395016151361329,
      "grad_norm": 0.3231200575828552,
      "learning_rate": 0.00018721013512041647,
      "loss": 0.9473,
      "step": 6410
    },
    {
      "epoch": 0.7400784494693124,
      "grad_norm": 0.35318905115127563,
      "learning_rate": 0.00018717727223818223,
      "loss": 0.9524,
      "step": 6415
    },
    {
      "epoch": 0.7406552838024919,
      "grad_norm": 0.32204577326774597,
      "learning_rate": 0.00018714437008241709,
      "loss": 0.9413,
      "step": 6420
    },
    {
      "epoch": 0.7412321181356715,
      "grad_norm": 0.35092103481292725,
      "learning_rate": 0.00018711142866794354,
      "loss": 1.0402,
      "step": 6425
    },
    {
      "epoch": 0.7418089524688509,
      "grad_norm": 0.3684171736240387,
      "learning_rate": 0.00018707844800960177,
      "loss": 0.9495,
      "step": 6430
    },
    {
      "epoch": 0.7423857868020305,
      "grad_norm": 0.34715521335601807,
      "learning_rate": 0.00018704542812224956,
      "loss": 0.9848,
      "step": 6435
    },
    {
      "epoch": 0.7429626211352099,
      "grad_norm": 0.3401312828063965,
      "learning_rate": 0.0001870123690207625,
      "loss": 0.9969,
      "step": 6440
    },
    {
      "epoch": 0.7435394554683895,
      "grad_norm": 0.33639106154441833,
      "learning_rate": 0.00018697927072003378,
      "loss": 0.9768,
      "step": 6445
    },
    {
      "epoch": 0.744116289801569,
      "grad_norm": 0.3611484169960022,
      "learning_rate": 0.00018694613323497422,
      "loss": 0.9478,
      "step": 6450
    },
    {
      "epoch": 0.7446931241347485,
      "grad_norm": 0.3171774446964264,
      "learning_rate": 0.00018691295658051233,
      "loss": 1.0148,
      "step": 6455
    },
    {
      "epoch": 0.745269958467928,
      "grad_norm": 0.33016425371170044,
      "learning_rate": 0.00018687974077159428,
      "loss": 0.9979,
      "step": 6460
    },
    {
      "epoch": 0.7458467928011075,
      "grad_norm": 0.3236633241176605,
      "learning_rate": 0.0001868464858231838,
      "loss": 0.9243,
      "step": 6465
    },
    {
      "epoch": 0.746423627134287,
      "grad_norm": 0.3428235650062561,
      "learning_rate": 0.00018681319175026237,
      "loss": 0.9754,
      "step": 6470
    },
    {
      "epoch": 0.7470004614674666,
      "grad_norm": 0.33331015706062317,
      "learning_rate": 0.000186779858567829,
      "loss": 0.9638,
      "step": 6475
    },
    {
      "epoch": 0.747577295800646,
      "grad_norm": 0.32325270771980286,
      "learning_rate": 0.0001867464862909004,
      "loss": 0.9898,
      "step": 6480
    },
    {
      "epoch": 0.7481541301338256,
      "grad_norm": 0.3331642746925354,
      "learning_rate": 0.00018671307493451074,
      "loss": 0.9401,
      "step": 6485
    },
    {
      "epoch": 0.748730964467005,
      "grad_norm": 0.33681750297546387,
      "learning_rate": 0.000186679624513712,
      "loss": 0.9885,
      "step": 6490
    },
    {
      "epoch": 0.7493077988001846,
      "grad_norm": 0.3355301320552826,
      "learning_rate": 0.00018664613504357366,
      "loss": 0.9511,
      "step": 6495
    },
    {
      "epoch": 0.7498846331333641,
      "grad_norm": 0.33696606755256653,
      "learning_rate": 0.0001866126065391827,
      "loss": 0.951,
      "step": 6500
    },
    {
      "epoch": 0.7504614674665436,
      "grad_norm": 0.317131370306015,
      "learning_rate": 0.00018657903901564388,
      "loss": 0.9431,
      "step": 6505
    },
    {
      "epoch": 0.7510383017997231,
      "grad_norm": 0.31651514768600464,
      "learning_rate": 0.0001865454324880794,
      "loss": 0.9611,
      "step": 6510
    },
    {
      "epoch": 0.7516151361329027,
      "grad_norm": 0.3429376780986786,
      "learning_rate": 0.00018651178697162902,
      "loss": 0.9579,
      "step": 6515
    },
    {
      "epoch": 0.7521919704660821,
      "grad_norm": 0.31091445684432983,
      "learning_rate": 0.00018647810248145018,
      "loss": 1.0021,
      "step": 6520
    },
    {
      "epoch": 0.7527688047992617,
      "grad_norm": 0.38521134853363037,
      "learning_rate": 0.00018644437903271778,
      "loss": 0.9911,
      "step": 6525
    },
    {
      "epoch": 0.7533456391324411,
      "grad_norm": 0.33354058861732483,
      "learning_rate": 0.00018641061664062428,
      "loss": 0.9634,
      "step": 6530
    },
    {
      "epoch": 0.7539224734656207,
      "grad_norm": 0.3340102434158325,
      "learning_rate": 0.0001863768153203797,
      "loss": 0.9433,
      "step": 6535
    },
    {
      "epoch": 0.7544993077988001,
      "grad_norm": 0.3207364082336426,
      "learning_rate": 0.00018634297508721167,
      "loss": 0.9275,
      "step": 6540
    },
    {
      "epoch": 0.7550761421319797,
      "grad_norm": 0.3502042889595032,
      "learning_rate": 0.00018630909595636523,
      "loss": 1.0206,
      "step": 6545
    },
    {
      "epoch": 0.7556529764651592,
      "grad_norm": 0.31954869627952576,
      "learning_rate": 0.00018627517794310298,
      "loss": 0.9621,
      "step": 6550
    },
    {
      "epoch": 0.7562298107983387,
      "grad_norm": 0.33266472816467285,
      "learning_rate": 0.00018624122106270506,
      "loss": 0.9419,
      "step": 6555
    },
    {
      "epoch": 0.7568066451315182,
      "grad_norm": 0.337035596370697,
      "learning_rate": 0.0001862072253304691,
      "loss": 0.9389,
      "step": 6560
    },
    {
      "epoch": 0.7573834794646978,
      "grad_norm": 0.31855568289756775,
      "learning_rate": 0.00018617319076171028,
      "loss": 0.9541,
      "step": 6565
    },
    {
      "epoch": 0.7579603137978772,
      "grad_norm": 0.322386234998703,
      "learning_rate": 0.00018613911737176125,
      "loss": 0.978,
      "step": 6570
    },
    {
      "epoch": 0.7585371481310568,
      "grad_norm": 0.34818416833877563,
      "learning_rate": 0.00018610500517597206,
      "loss": 1.0214,
      "step": 6575
    },
    {
      "epoch": 0.7591139824642362,
      "grad_norm": 0.30302146077156067,
      "learning_rate": 0.0001860708541897104,
      "loss": 0.9401,
      "step": 6580
    },
    {
      "epoch": 0.7596908167974158,
      "grad_norm": 0.3100593388080597,
      "learning_rate": 0.0001860366644283613,
      "loss": 1.0366,
      "step": 6585
    },
    {
      "epoch": 0.7602676511305952,
      "grad_norm": 0.32050052285194397,
      "learning_rate": 0.0001860024359073274,
      "loss": 0.9593,
      "step": 6590
    },
    {
      "epoch": 0.7608444854637748,
      "grad_norm": 0.3930646479129791,
      "learning_rate": 0.00018596816864202862,
      "loss": 0.9761,
      "step": 6595
    },
    {
      "epoch": 0.7614213197969543,
      "grad_norm": 0.31919702887535095,
      "learning_rate": 0.00018593386264790243,
      "loss": 0.9626,
      "step": 6600
    },
    {
      "epoch": 0.7619981541301338,
      "grad_norm": 0.33068013191223145,
      "learning_rate": 0.0001858995179404038,
      "loss": 0.9705,
      "step": 6605
    },
    {
      "epoch": 0.7625749884633133,
      "grad_norm": 0.31021422147750854,
      "learning_rate": 0.00018586513453500508,
      "loss": 0.9527,
      "step": 6610
    },
    {
      "epoch": 0.7631518227964929,
      "grad_norm": 0.38333725929260254,
      "learning_rate": 0.00018583071244719607,
      "loss": 0.9773,
      "step": 6615
    },
    {
      "epoch": 0.7637286571296723,
      "grad_norm": 0.3238351047039032,
      "learning_rate": 0.00018579625169248395,
      "loss": 1.0035,
      "step": 6620
    },
    {
      "epoch": 0.7643054914628519,
      "grad_norm": 0.304609477519989,
      "learning_rate": 0.0001857617522863934,
      "loss": 1.0089,
      "step": 6625
    },
    {
      "epoch": 0.7648823257960313,
      "grad_norm": 0.32782503962516785,
      "learning_rate": 0.0001857272142444664,
      "loss": 0.9194,
      "step": 6630
    },
    {
      "epoch": 0.7654591601292109,
      "grad_norm": 0.3101833462715149,
      "learning_rate": 0.0001856926375822625,
      "loss": 0.9782,
      "step": 6635
    },
    {
      "epoch": 0.7660359944623903,
      "grad_norm": 0.3171398639678955,
      "learning_rate": 0.00018565802231535847,
      "loss": 0.9407,
      "step": 6640
    },
    {
      "epoch": 0.7666128287955699,
      "grad_norm": 0.3236709237098694,
      "learning_rate": 0.0001856233684593486,
      "loss": 1.0112,
      "step": 6645
    },
    {
      "epoch": 0.7671896631287495,
      "grad_norm": 0.36800485849380493,
      "learning_rate": 0.0001855886760298445,
      "loss": 0.9796,
      "step": 6650
    },
    {
      "epoch": 0.7677664974619289,
      "grad_norm": 0.34232667088508606,
      "learning_rate": 0.00018555394504247521,
      "loss": 0.9916,
      "step": 6655
    },
    {
      "epoch": 0.7683433317951085,
      "grad_norm": 0.33114129304885864,
      "learning_rate": 0.00018551917551288706,
      "loss": 0.9857,
      "step": 6660
    },
    {
      "epoch": 0.768920166128288,
      "grad_norm": 0.3472602367401123,
      "learning_rate": 0.00018548436745674383,
      "loss": 0.9574,
      "step": 6665
    },
    {
      "epoch": 0.7694970004614675,
      "grad_norm": 0.3226907551288605,
      "learning_rate": 0.00018544952088972658,
      "loss": 0.9783,
      "step": 6670
    },
    {
      "epoch": 0.770073834794647,
      "grad_norm": 0.34243959188461304,
      "learning_rate": 0.0001854146358275338,
      "loss": 0.9587,
      "step": 6675
    },
    {
      "epoch": 0.7706506691278265,
      "grad_norm": 0.32891300320625305,
      "learning_rate": 0.00018537971228588124,
      "loss": 0.9303,
      "step": 6680
    },
    {
      "epoch": 0.771227503461006,
      "grad_norm": 0.3494172692298889,
      "learning_rate": 0.00018534475028050205,
      "loss": 0.9454,
      "step": 6685
    },
    {
      "epoch": 0.7718043377941856,
      "grad_norm": 0.33006858825683594,
      "learning_rate": 0.00018530974982714667,
      "loss": 0.9697,
      "step": 6690
    },
    {
      "epoch": 0.772381172127365,
      "grad_norm": 0.33270421624183655,
      "learning_rate": 0.00018527471094158287,
      "loss": 0.9943,
      "step": 6695
    },
    {
      "epoch": 0.7729580064605446,
      "grad_norm": 0.34100016951560974,
      "learning_rate": 0.00018523963363959573,
      "loss": 0.9427,
      "step": 6700
    },
    {
      "epoch": 0.773534840793724,
      "grad_norm": 0.32470348477363586,
      "learning_rate": 0.0001852045179369877,
      "loss": 0.9585,
      "step": 6705
    },
    {
      "epoch": 0.7741116751269036,
      "grad_norm": 0.3840593993663788,
      "learning_rate": 0.00018516936384957834,
      "loss": 0.9694,
      "step": 6710
    },
    {
      "epoch": 0.774688509460083,
      "grad_norm": 0.32040587067604065,
      "learning_rate": 0.0001851341713932048,
      "loss": 1.0201,
      "step": 6715
    },
    {
      "epoch": 0.7752653437932626,
      "grad_norm": 0.3363640606403351,
      "learning_rate": 0.0001850989405837212,
      "loss": 0.9696,
      "step": 6720
    },
    {
      "epoch": 0.7758421781264421,
      "grad_norm": 0.3235708177089691,
      "learning_rate": 0.00018506367143699922,
      "loss": 0.917,
      "step": 6725
    },
    {
      "epoch": 0.7764190124596216,
      "grad_norm": 0.32720041275024414,
      "learning_rate": 0.0001850283639689276,
      "loss": 1.0065,
      "step": 6730
    },
    {
      "epoch": 0.7769958467928011,
      "grad_norm": 0.34144657850265503,
      "learning_rate": 0.0001849930181954124,
      "loss": 1.0124,
      "step": 6735
    },
    {
      "epoch": 0.7775726811259807,
      "grad_norm": 0.3346642851829529,
      "learning_rate": 0.00018495763413237706,
      "loss": 0.9115,
      "step": 6740
    },
    {
      "epoch": 0.7781495154591601,
      "grad_norm": 0.3458048105239868,
      "learning_rate": 0.00018492221179576207,
      "loss": 0.9607,
      "step": 6745
    },
    {
      "epoch": 0.7787263497923397,
      "grad_norm": 0.33692750334739685,
      "learning_rate": 0.00018488675120152532,
      "loss": 0.986,
      "step": 6750
    },
    {
      "epoch": 0.7793031841255191,
      "grad_norm": 0.3468713164329529,
      "learning_rate": 0.00018485125236564185,
      "loss": 0.9971,
      "step": 6755
    },
    {
      "epoch": 0.7798800184586987,
      "grad_norm": 0.34560665488243103,
      "learning_rate": 0.00018481571530410397,
      "loss": 0.9439,
      "step": 6760
    },
    {
      "epoch": 0.7804568527918782,
      "grad_norm": 0.3573435842990875,
      "learning_rate": 0.00018478014003292116,
      "loss": 1.0003,
      "step": 6765
    },
    {
      "epoch": 0.7810336871250577,
      "grad_norm": 0.3445967137813568,
      "learning_rate": 0.0001847445265681202,
      "loss": 0.949,
      "step": 6770
    },
    {
      "epoch": 0.7816105214582372,
      "grad_norm": 0.34904471039772034,
      "learning_rate": 0.00018470887492574503,
      "loss": 0.9932,
      "step": 6775
    },
    {
      "epoch": 0.7821873557914167,
      "grad_norm": 0.31675389409065247,
      "learning_rate": 0.0001846731851218567,
      "loss": 1.0151,
      "step": 6780
    },
    {
      "epoch": 0.7827641901245962,
      "grad_norm": 0.35233479738235474,
      "learning_rate": 0.00018463745717253364,
      "loss": 0.9555,
      "step": 6785
    },
    {
      "epoch": 0.7833410244577758,
      "grad_norm": 0.3339458703994751,
      "learning_rate": 0.0001846016910938713,
      "loss": 0.9464,
      "step": 6790
    },
    {
      "epoch": 0.7839178587909552,
      "grad_norm": 0.35707125067710876,
      "learning_rate": 0.00018456588690198236,
      "loss": 0.9831,
      "step": 6795
    },
    {
      "epoch": 0.7844946931241348,
      "grad_norm": 0.3830420672893524,
      "learning_rate": 0.00018453004461299672,
      "loss": 0.9704,
      "step": 6800
    },
    {
      "epoch": 0.7850715274573142,
      "grad_norm": 0.3077176809310913,
      "learning_rate": 0.00018449416424306137,
      "loss": 0.9407,
      "step": 6805
    },
    {
      "epoch": 0.7856483617904938,
      "grad_norm": 0.3609665632247925,
      "learning_rate": 0.0001844582458083405,
      "loss": 0.9694,
      "step": 6810
    },
    {
      "epoch": 0.7862251961236733,
      "grad_norm": 0.34184277057647705,
      "learning_rate": 0.00018442228932501545,
      "loss": 0.9996,
      "step": 6815
    },
    {
      "epoch": 0.7868020304568528,
      "grad_norm": 0.3383423686027527,
      "learning_rate": 0.00018438629480928466,
      "loss": 1.0248,
      "step": 6820
    },
    {
      "epoch": 0.7873788647900323,
      "grad_norm": 0.32342755794525146,
      "learning_rate": 0.0001843502622773637,
      "loss": 0.9698,
      "step": 6825
    },
    {
      "epoch": 0.7879556991232118,
      "grad_norm": 0.3433450758457184,
      "learning_rate": 0.00018431419174548539,
      "loss": 0.9495,
      "step": 6830
    },
    {
      "epoch": 0.7885325334563913,
      "grad_norm": 0.32221582531929016,
      "learning_rate": 0.0001842780832298995,
      "loss": 0.9998,
      "step": 6835
    },
    {
      "epoch": 0.7891093677895709,
      "grad_norm": 0.3204023838043213,
      "learning_rate": 0.00018424193674687297,
      "loss": 0.9729,
      "step": 6840
    },
    {
      "epoch": 0.7896862021227503,
      "grad_norm": 0.3542555868625641,
      "learning_rate": 0.00018420575231268993,
      "loss": 0.9883,
      "step": 6845
    },
    {
      "epoch": 0.7902630364559299,
      "grad_norm": 0.3774206340312958,
      "learning_rate": 0.0001841695299436515,
      "loss": 0.9701,
      "step": 6850
    },
    {
      "epoch": 0.7908398707891093,
      "grad_norm": 0.337774395942688,
      "learning_rate": 0.00018413326965607593,
      "loss": 1.0354,
      "step": 6855
    },
    {
      "epoch": 0.7914167051222889,
      "grad_norm": 0.35492873191833496,
      "learning_rate": 0.00018409697146629854,
      "loss": 0.9896,
      "step": 6860
    },
    {
      "epoch": 0.7919935394554684,
      "grad_norm": 0.3379327952861786,
      "learning_rate": 0.00018406063539067174,
      "loss": 0.9961,
      "step": 6865
    },
    {
      "epoch": 0.7925703737886479,
      "grad_norm": 0.3227647840976715,
      "learning_rate": 0.00018402426144556504,
      "loss": 1.0273,
      "step": 6870
    },
    {
      "epoch": 0.7931472081218274,
      "grad_norm": 0.3234160244464874,
      "learning_rate": 0.00018398784964736493,
      "loss": 0.896,
      "step": 6875
    },
    {
      "epoch": 0.7937240424550069,
      "grad_norm": 0.32284024357795715,
      "learning_rate": 0.00018395140001247498,
      "loss": 0.9938,
      "step": 6880
    },
    {
      "epoch": 0.7943008767881864,
      "grad_norm": 0.34763550758361816,
      "learning_rate": 0.0001839149125573159,
      "loss": 0.9291,
      "step": 6885
    },
    {
      "epoch": 0.794877711121366,
      "grad_norm": 0.363441526889801,
      "learning_rate": 0.00018387838729832528,
      "loss": 0.9537,
      "step": 6890
    },
    {
      "epoch": 0.7954545454545454,
      "grad_norm": 0.3477483093738556,
      "learning_rate": 0.00018384182425195786,
      "loss": 1.0244,
      "step": 6895
    },
    {
      "epoch": 0.796031379787725,
      "grad_norm": 0.398753821849823,
      "learning_rate": 0.00018380522343468532,
      "loss": 0.9409,
      "step": 6900
    },
    {
      "epoch": 0.7966082141209044,
      "grad_norm": 0.3567346930503845,
      "learning_rate": 0.00018376858486299647,
      "loss": 0.9713,
      "step": 6905
    },
    {
      "epoch": 0.797185048454084,
      "grad_norm": 0.3397413492202759,
      "learning_rate": 0.000183731908553397,
      "loss": 0.9822,
      "step": 6910
    },
    {
      "epoch": 0.7977618827872635,
      "grad_norm": 0.3155917823314667,
      "learning_rate": 0.00018369519452240973,
      "loss": 0.9795,
      "step": 6915
    },
    {
      "epoch": 0.798338717120443,
      "grad_norm": 0.37791383266448975,
      "learning_rate": 0.00018365844278657432,
      "loss": 0.9761,
      "step": 6920
    },
    {
      "epoch": 0.7989155514536225,
      "grad_norm": 0.35001733899116516,
      "learning_rate": 0.00018362165336244753,
      "loss": 1.0093,
      "step": 6925
    },
    {
      "epoch": 0.799492385786802,
      "grad_norm": 0.3731771409511566,
      "learning_rate": 0.00018358482626660303,
      "loss": 0.9313,
      "step": 6930
    },
    {
      "epoch": 0.8000692201199815,
      "grad_norm": 0.36500659584999084,
      "learning_rate": 0.00018354796151563157,
      "loss": 0.951,
      "step": 6935
    },
    {
      "epoch": 0.8006460544531611,
      "grad_norm": 0.3376425802707672,
      "learning_rate": 0.00018351105912614078,
      "loss": 0.9301,
      "step": 6940
    },
    {
      "epoch": 0.8012228887863405,
      "grad_norm": 0.4149805009365082,
      "learning_rate": 0.0001834741191147552,
      "loss": 0.9878,
      "step": 6945
    },
    {
      "epoch": 0.8017997231195201,
      "grad_norm": 0.32159149646759033,
      "learning_rate": 0.00018343714149811642,
      "loss": 0.9877,
      "step": 6950
    },
    {
      "epoch": 0.8023765574526995,
      "grad_norm": 0.3310393691062927,
      "learning_rate": 0.00018340012629288293,
      "loss": 0.9769,
      "step": 6955
    },
    {
      "epoch": 0.8029533917858791,
      "grad_norm": 0.36446118354797363,
      "learning_rate": 0.00018336307351573018,
      "loss": 1.0259,
      "step": 6960
    },
    {
      "epoch": 0.8035302261190586,
      "grad_norm": 0.325137734413147,
      "learning_rate": 0.0001833259831833504,
      "loss": 1.0434,
      "step": 6965
    },
    {
      "epoch": 0.8041070604522381,
      "grad_norm": 0.37154528498649597,
      "learning_rate": 0.00018328885531245298,
      "loss": 0.9711,
      "step": 6970
    },
    {
      "epoch": 0.8046838947854176,
      "grad_norm": 0.30865800380706787,
      "learning_rate": 0.00018325168991976408,
      "loss": 0.9248,
      "step": 6975
    },
    {
      "epoch": 0.8052607291185971,
      "grad_norm": 0.33374345302581787,
      "learning_rate": 0.00018321448702202675,
      "loss": 0.9578,
      "step": 6980
    },
    {
      "epoch": 0.8058375634517766,
      "grad_norm": 0.3469264805316925,
      "learning_rate": 0.00018317724663600098,
      "loss": 1.0174,
      "step": 6985
    },
    {
      "epoch": 0.8064143977849562,
      "grad_norm": 0.32863757014274597,
      "learning_rate": 0.00018313996877846361,
      "loss": 0.9553,
      "step": 6990
    },
    {
      "epoch": 0.8069912321181357,
      "grad_norm": 0.34554439783096313,
      "learning_rate": 0.00018310265346620843,
      "loss": 0.9881,
      "step": 6995
    },
    {
      "epoch": 0.8075680664513152,
      "grad_norm": 0.3216921389102936,
      "learning_rate": 0.00018306530071604603,
      "loss": 0.9685,
      "step": 7000
    },
    {
      "epoch": 0.8081449007844947,
      "grad_norm": 0.315304160118103,
      "learning_rate": 0.00018302791054480394,
      "loss": 0.9431,
      "step": 7005
    },
    {
      "epoch": 0.8087217351176742,
      "grad_norm": 0.34708473086357117,
      "learning_rate": 0.00018299048296932643,
      "loss": 0.9449,
      "step": 7010
    },
    {
      "epoch": 0.8092985694508538,
      "grad_norm": 0.32933709025382996,
      "learning_rate": 0.00018295301800647475,
      "loss": 0.9696,
      "step": 7015
    },
    {
      "epoch": 0.8098754037840332,
      "grad_norm": 0.3329271078109741,
      "learning_rate": 0.00018291551567312694,
      "loss": 0.9897,
      "step": 7020
    },
    {
      "epoch": 0.8104522381172128,
      "grad_norm": 0.35639676451683044,
      "learning_rate": 0.00018287797598617785,
      "loss": 0.9221,
      "step": 7025
    },
    {
      "epoch": 0.8110290724503922,
      "grad_norm": 0.3705461919307709,
      "learning_rate": 0.00018284039896253923,
      "loss": 0.9838,
      "step": 7030
    },
    {
      "epoch": 0.8116059067835718,
      "grad_norm": 0.3558962345123291,
      "learning_rate": 0.00018280278461913952,
      "loss": 0.935,
      "step": 7035
    },
    {
      "epoch": 0.8121827411167513,
      "grad_norm": 0.3236217200756073,
      "learning_rate": 0.00018276513297292414,
      "loss": 0.9597,
      "step": 7040
    },
    {
      "epoch": 0.8127595754499308,
      "grad_norm": 0.32449623942375183,
      "learning_rate": 0.00018272744404085512,
      "loss": 1.018,
      "step": 7045
    },
    {
      "epoch": 0.8133364097831103,
      "grad_norm": 0.3508262038230896,
      "learning_rate": 0.00018268971783991152,
      "loss": 1.024,
      "step": 7050
    },
    {
      "epoch": 0.8139132441162898,
      "grad_norm": 0.31073689460754395,
      "learning_rate": 0.00018265195438708904,
      "loss": 0.9202,
      "step": 7055
    },
    {
      "epoch": 0.8144900784494693,
      "grad_norm": 0.3384758532047272,
      "learning_rate": 0.00018261415369940013,
      "loss": 1.0022,
      "step": 7060
    },
    {
      "epoch": 0.8150669127826489,
      "grad_norm": 0.3505370318889618,
      "learning_rate": 0.00018257631579387412,
      "loss": 0.9807,
      "step": 7065
    },
    {
      "epoch": 0.8156437471158283,
      "grad_norm": 0.344837486743927,
      "learning_rate": 0.00018253844068755702,
      "loss": 0.9415,
      "step": 7070
    },
    {
      "epoch": 0.8162205814490079,
      "grad_norm": 0.32771411538124084,
      "learning_rate": 0.00018250052839751172,
      "loss": 0.9546,
      "step": 7075
    },
    {
      "epoch": 0.8167974157821873,
      "grad_norm": 0.3505322337150574,
      "learning_rate": 0.0001824625789408177,
      "loss": 0.9879,
      "step": 7080
    },
    {
      "epoch": 0.8173742501153669,
      "grad_norm": 0.3172287940979004,
      "learning_rate": 0.00018242459233457127,
      "loss": 0.9891,
      "step": 7085
    },
    {
      "epoch": 0.8179510844485464,
      "grad_norm": 0.3298950493335724,
      "learning_rate": 0.00018238656859588553,
      "loss": 0.9842,
      "step": 7090
    },
    {
      "epoch": 0.8185279187817259,
      "grad_norm": 0.3447635769844055,
      "learning_rate": 0.00018234850774189018,
      "loss": 0.9829,
      "step": 7095
    },
    {
      "epoch": 0.8191047531149054,
      "grad_norm": 0.35230880975723267,
      "learning_rate": 0.00018231040978973178,
      "loss": 0.9781,
      "step": 7100
    },
    {
      "epoch": 0.819681587448085,
      "grad_norm": 0.3099910318851471,
      "learning_rate": 0.00018227227475657346,
      "loss": 1.0082,
      "step": 7105
    },
    {
      "epoch": 0.8202584217812644,
      "grad_norm": 0.3409811854362488,
      "learning_rate": 0.00018223410265959516,
      "loss": 1.0083,
      "step": 7110
    },
    {
      "epoch": 0.820835256114444,
      "grad_norm": 0.3492036759853363,
      "learning_rate": 0.0001821958935159935,
      "loss": 1.0077,
      "step": 7115
    },
    {
      "epoch": 0.8214120904476234,
      "grad_norm": 0.3637070655822754,
      "learning_rate": 0.00018215764734298172,
      "loss": 1.0113,
      "step": 7120
    },
    {
      "epoch": 0.821988924780803,
      "grad_norm": 0.37920454144477844,
      "learning_rate": 0.00018211936415778984,
      "loss": 0.9417,
      "step": 7125
    },
    {
      "epoch": 0.8225657591139824,
      "grad_norm": 0.32240965962409973,
      "learning_rate": 0.00018208104397766453,
      "loss": 0.9896,
      "step": 7130
    },
    {
      "epoch": 0.823142593447162,
      "grad_norm": 0.3595122694969177,
      "learning_rate": 0.00018204268681986903,
      "loss": 0.9406,
      "step": 7135
    },
    {
      "epoch": 0.8237194277803415,
      "grad_norm": 0.3721562623977661,
      "learning_rate": 0.0001820042927016834,
      "loss": 0.981,
      "step": 7140
    },
    {
      "epoch": 0.824296262113521,
      "grad_norm": 0.44455429911613464,
      "learning_rate": 0.0001819658616404042,
      "loss": 0.9788,
      "step": 7145
    },
    {
      "epoch": 0.8248730964467005,
      "grad_norm": 0.33321380615234375,
      "learning_rate": 0.00018192739365334473,
      "loss": 1.009,
      "step": 7150
    },
    {
      "epoch": 0.82544993077988,
      "grad_norm": 0.3545369803905487,
      "learning_rate": 0.0001818888887578349,
      "loss": 0.9786,
      "step": 7155
    },
    {
      "epoch": 0.8260267651130595,
      "grad_norm": 0.3792615532875061,
      "learning_rate": 0.0001818503469712212,
      "loss": 1.0339,
      "step": 7160
    },
    {
      "epoch": 0.8266035994462391,
      "grad_norm": 0.3251761794090271,
      "learning_rate": 0.00018181176831086684,
      "loss": 0.9957,
      "step": 7165
    },
    {
      "epoch": 0.8271804337794185,
      "grad_norm": 0.3693098723888397,
      "learning_rate": 0.00018177315279415153,
      "loss": 1.0073,
      "step": 7170
    },
    {
      "epoch": 0.8277572681125981,
      "grad_norm": 0.3383295238018036,
      "learning_rate": 0.00018173450043847163,
      "loss": 0.979,
      "step": 7175
    },
    {
      "epoch": 0.8283341024457775,
      "grad_norm": 0.32615846395492554,
      "learning_rate": 0.00018169581126124015,
      "loss": 0.9472,
      "step": 7180
    },
    {
      "epoch": 0.8289109367789571,
      "grad_norm": 0.3272216022014618,
      "learning_rate": 0.00018165708527988664,
      "loss": 1.0187,
      "step": 7185
    },
    {
      "epoch": 0.8294877711121366,
      "grad_norm": 0.3196505606174469,
      "learning_rate": 0.00018161832251185715,
      "loss": 1.0547,
      "step": 7190
    },
    {
      "epoch": 0.8300646054453161,
      "grad_norm": 0.33881649374961853,
      "learning_rate": 0.00018157952297461448,
      "loss": 0.9226,
      "step": 7195
    },
    {
      "epoch": 0.8306414397784956,
      "grad_norm": 0.3978832960128784,
      "learning_rate": 0.00018154068668563782,
      "loss": 0.99,
      "step": 7200
    },
    {
      "epoch": 0.8312182741116751,
      "grad_norm": 0.3536497950553894,
      "learning_rate": 0.00018150181366242304,
      "loss": 0.9566,
      "step": 7205
    },
    {
      "epoch": 0.8317951084448546,
      "grad_norm": 0.34969255328178406,
      "learning_rate": 0.00018146290392248254,
      "loss": 0.9805,
      "step": 7210
    },
    {
      "epoch": 0.8323719427780342,
      "grad_norm": 0.3390229046344757,
      "learning_rate": 0.00018142395748334513,
      "loss": 0.972,
      "step": 7215
    },
    {
      "epoch": 0.8329487771112136,
      "grad_norm": 0.34188541769981384,
      "learning_rate": 0.00018138497436255636,
      "loss": 0.9357,
      "step": 7220
    },
    {
      "epoch": 0.8335256114443932,
      "grad_norm": 0.3517703711986542,
      "learning_rate": 0.00018134595457767815,
      "loss": 0.954,
      "step": 7225
    },
    {
      "epoch": 0.8341024457775726,
      "grad_norm": 0.361482173204422,
      "learning_rate": 0.000181306898146289,
      "loss": 0.9047,
      "step": 7230
    },
    {
      "epoch": 0.8346792801107522,
      "grad_norm": 0.3323913812637329,
      "learning_rate": 0.00018126780508598392,
      "loss": 0.9574,
      "step": 7235
    },
    {
      "epoch": 0.8352561144439317,
      "grad_norm": 0.34358808398246765,
      "learning_rate": 0.0001812286754143744,
      "loss": 0.9792,
      "step": 7240
    },
    {
      "epoch": 0.8358329487771112,
      "grad_norm": 0.3494786024093628,
      "learning_rate": 0.00018118950914908843,
      "loss": 0.9597,
      "step": 7245
    },
    {
      "epoch": 0.8364097831102907,
      "grad_norm": 0.35309937596321106,
      "learning_rate": 0.0001811503063077705,
      "loss": 0.9857,
      "step": 7250
    },
    {
      "epoch": 0.8369866174434702,
      "grad_norm": 0.3490305542945862,
      "learning_rate": 0.00018111106690808155,
      "loss": 0.9398,
      "step": 7255
    },
    {
      "epoch": 0.8375634517766497,
      "grad_norm": 0.36818861961364746,
      "learning_rate": 0.00018107179096769901,
      "loss": 0.9171,
      "step": 7260
    },
    {
      "epoch": 0.8381402861098293,
      "grad_norm": 0.33169808983802795,
      "learning_rate": 0.0001810324785043168,
      "loss": 0.9641,
      "step": 7265
    },
    {
      "epoch": 0.8387171204430087,
      "grad_norm": 0.3376959264278412,
      "learning_rate": 0.0001809931295356452,
      "loss": 0.9399,
      "step": 7270
    },
    {
      "epoch": 0.8392939547761883,
      "grad_norm": 0.348026841878891,
      "learning_rate": 0.00018095374407941104,
      "loss": 0.9203,
      "step": 7275
    },
    {
      "epoch": 0.8398707891093677,
      "grad_norm": 0.3790396749973297,
      "learning_rate": 0.00018091432215335752,
      "loss": 0.9878,
      "step": 7280
    },
    {
      "epoch": 0.8404476234425473,
      "grad_norm": 0.35788261890411377,
      "learning_rate": 0.00018087486377524434,
      "loss": 0.9829,
      "step": 7285
    },
    {
      "epoch": 0.8410244577757268,
      "grad_norm": 0.35284462571144104,
      "learning_rate": 0.0001808353689628475,
      "loss": 0.9356,
      "step": 7290
    },
    {
      "epoch": 0.8416012921089063,
      "grad_norm": 0.3505180776119232,
      "learning_rate": 0.00018079583773395957,
      "loss": 0.9599,
      "step": 7295
    },
    {
      "epoch": 0.8421781264420858,
      "grad_norm": 0.35011520981788635,
      "learning_rate": 0.00018075627010638942,
      "loss": 1.0106,
      "step": 7300
    },
    {
      "epoch": 0.8427549607752653,
      "grad_norm": 0.3106728792190552,
      "learning_rate": 0.0001807166660979623,
      "loss": 1.0036,
      "step": 7305
    },
    {
      "epoch": 0.8433317951084448,
      "grad_norm": 0.3722745180130005,
      "learning_rate": 0.00018067702572651997,
      "loss": 0.9706,
      "step": 7310
    },
    {
      "epoch": 0.8439086294416244,
      "grad_norm": 0.36281153559684753,
      "learning_rate": 0.00018063734900992045,
      "loss": 0.956,
      "step": 7315
    },
    {
      "epoch": 0.8444854637748038,
      "grad_norm": 0.32843315601348877,
      "learning_rate": 0.00018059763596603814,
      "loss": 0.9172,
      "step": 7320
    },
    {
      "epoch": 0.8450622981079834,
      "grad_norm": 0.32707735896110535,
      "learning_rate": 0.00018055788661276392,
      "loss": 0.975,
      "step": 7325
    },
    {
      "epoch": 0.8456391324411628,
      "grad_norm": 0.36803141236305237,
      "learning_rate": 0.0001805181009680049,
      "loss": 0.9455,
      "step": 7330
    },
    {
      "epoch": 0.8462159667743424,
      "grad_norm": 0.3357301950454712,
      "learning_rate": 0.0001804782790496846,
      "loss": 0.9609,
      "step": 7335
    },
    {
      "epoch": 0.846792801107522,
      "grad_norm": 0.3335096538066864,
      "learning_rate": 0.00018043842087574286,
      "loss": 0.9276,
      "step": 7340
    },
    {
      "epoch": 0.8473696354407014,
      "grad_norm": 0.3681318759918213,
      "learning_rate": 0.00018039852646413592,
      "loss": 1.0033,
      "step": 7345
    },
    {
      "epoch": 0.847946469773881,
      "grad_norm": 0.3279605507850647,
      "learning_rate": 0.00018035859583283626,
      "loss": 0.9662,
      "step": 7350
    },
    {
      "epoch": 0.8485233041070604,
      "grad_norm": 0.38645607233047485,
      "learning_rate": 0.00018031862899983264,
      "loss": 0.9416,
      "step": 7355
    },
    {
      "epoch": 0.84910013844024,
      "grad_norm": 0.3565409183502197,
      "learning_rate": 0.0001802786259831303,
      "loss": 0.9579,
      "step": 7360
    },
    {
      "epoch": 0.8496769727734195,
      "grad_norm": 0.3465147614479065,
      "learning_rate": 0.00018023858680075061,
      "loss": 0.9472,
      "step": 7365
    },
    {
      "epoch": 0.850253807106599,
      "grad_norm": 0.32295745611190796,
      "learning_rate": 0.00018019851147073134,
      "loss": 0.9665,
      "step": 7370
    },
    {
      "epoch": 0.8508306414397785,
      "grad_norm": 0.3312920331954956,
      "learning_rate": 0.0001801584000111265,
      "loss": 0.9478,
      "step": 7375
    },
    {
      "epoch": 0.8514074757729581,
      "grad_norm": 0.35227471590042114,
      "learning_rate": 0.00018011825244000632,
      "loss": 0.9475,
      "step": 7380
    },
    {
      "epoch": 0.8519843101061375,
      "grad_norm": 0.3861836791038513,
      "learning_rate": 0.00018007806877545744,
      "loss": 0.9884,
      "step": 7385
    },
    {
      "epoch": 0.8525611444393171,
      "grad_norm": 0.3391641080379486,
      "learning_rate": 0.00018003784903558264,
      "loss": 0.9773,
      "step": 7390
    },
    {
      "epoch": 0.8531379787724965,
      "grad_norm": 0.37857285141944885,
      "learning_rate": 0.00017999759323850098,
      "loss": 1.0079,
      "step": 7395
    },
    {
      "epoch": 0.8537148131056761,
      "grad_norm": 0.3548443019390106,
      "learning_rate": 0.0001799573014023478,
      "loss": 0.9685,
      "step": 7400
    },
    {
      "epoch": 0.8542916474388556,
      "grad_norm": 0.3362700045108795,
      "learning_rate": 0.00017991697354527463,
      "loss": 0.9403,
      "step": 7405
    },
    {
      "epoch": 0.8548684817720351,
      "grad_norm": 0.41143998503685,
      "learning_rate": 0.0001798766096854493,
      "loss": 0.9746,
      "step": 7410
    },
    {
      "epoch": 0.8554453161052146,
      "grad_norm": 0.33060017228126526,
      "learning_rate": 0.00017983620984105572,
      "loss": 1.013,
      "step": 7415
    },
    {
      "epoch": 0.8560221504383941,
      "grad_norm": 0.35252559185028076,
      "learning_rate": 0.00017979577403029416,
      "loss": 0.9738,
      "step": 7420
    },
    {
      "epoch": 0.8565989847715736,
      "grad_norm": 0.3686401844024658,
      "learning_rate": 0.00017975530227138105,
      "loss": 0.9608,
      "step": 7425
    },
    {
      "epoch": 0.8571758191047532,
      "grad_norm": 0.34325218200683594,
      "learning_rate": 0.00017971479458254894,
      "loss": 0.9885,
      "step": 7430
    },
    {
      "epoch": 0.8577526534379326,
      "grad_norm": 0.33591338992118835,
      "learning_rate": 0.00017967425098204664,
      "loss": 0.9853,
      "step": 7435
    },
    {
      "epoch": 0.8583294877711122,
      "grad_norm": 0.3713604509830475,
      "learning_rate": 0.00017963367148813913,
      "loss": 0.991,
      "step": 7440
    },
    {
      "epoch": 0.8589063221042916,
      "grad_norm": 0.34418532252311707,
      "learning_rate": 0.00017959305611910752,
      "loss": 0.9596,
      "step": 7445
    },
    {
      "epoch": 0.8594831564374712,
      "grad_norm": 0.32015010714530945,
      "learning_rate": 0.00017955240489324917,
      "loss": 0.9974,
      "step": 7450
    },
    {
      "epoch": 0.8600599907706507,
      "grad_norm": 0.3452713191509247,
      "learning_rate": 0.0001795117178288775,
      "loss": 0.9906,
      "step": 7455
    },
    {
      "epoch": 0.8606368251038302,
      "grad_norm": 0.33043158054351807,
      "learning_rate": 0.00017947099494432212,
      "loss": 0.9454,
      "step": 7460
    },
    {
      "epoch": 0.8612136594370097,
      "grad_norm": 0.34975555539131165,
      "learning_rate": 0.00017943023625792878,
      "loss": 0.9766,
      "step": 7465
    },
    {
      "epoch": 0.8617904937701892,
      "grad_norm": 0.37061572074890137,
      "learning_rate": 0.00017938944178805933,
      "loss": 1.0097,
      "step": 7470
    },
    {
      "epoch": 0.8623673281033687,
      "grad_norm": 0.349991112947464,
      "learning_rate": 0.00017934861155309174,
      "loss": 0.9862,
      "step": 7475
    },
    {
      "epoch": 0.8629441624365483,
      "grad_norm": 0.3499099016189575,
      "learning_rate": 0.0001793077455714202,
      "loss": 0.9513,
      "step": 7480
    },
    {
      "epoch": 0.8635209967697277,
      "grad_norm": 0.34608426690101624,
      "learning_rate": 0.00017926684386145478,
      "loss": 0.9782,
      "step": 7485
    },
    {
      "epoch": 0.8640978311029073,
      "grad_norm": 0.4855821132659912,
      "learning_rate": 0.00017922590644162188,
      "loss": 0.9442,
      "step": 7490
    },
    {
      "epoch": 0.8646746654360867,
      "grad_norm": 0.32568657398223877,
      "learning_rate": 0.00017918493333036383,
      "loss": 0.9528,
      "step": 7495
    },
    {
      "epoch": 0.8652514997692663,
      "grad_norm": 0.37212124466896057,
      "learning_rate": 0.00017914392454613913,
      "loss": 1.0034,
      "step": 7500
    },
    {
      "epoch": 0.8658283341024458,
      "grad_norm": 0.3470038175582886,
      "learning_rate": 0.00017910288010742235,
      "loss": 0.9732,
      "step": 7505
    },
    {
      "epoch": 0.8664051684356253,
      "grad_norm": 0.3674761950969696,
      "learning_rate": 0.00017906180003270396,
      "loss": 0.9784,
      "step": 7510
    },
    {
      "epoch": 0.8669820027688048,
      "grad_norm": 0.3694908916950226,
      "learning_rate": 0.00017902068434049077,
      "loss": 0.9666,
      "step": 7515
    },
    {
      "epoch": 0.8675588371019843,
      "grad_norm": 0.35502108931541443,
      "learning_rate": 0.00017897953304930542,
      "loss": 1.0092,
      "step": 7520
    },
    {
      "epoch": 0.8681356714351638,
      "grad_norm": 0.3409530222415924,
      "learning_rate": 0.0001789383461776866,
      "loss": 0.9093,
      "step": 7525
    },
    {
      "epoch": 0.8687125057683434,
      "grad_norm": 0.38244783878326416,
      "learning_rate": 0.00017889712374418912,
      "loss": 1.0119,
      "step": 7530
    },
    {
      "epoch": 0.8692893401015228,
      "grad_norm": 0.3554026782512665,
      "learning_rate": 0.0001788558657673838,
      "loss": 0.9517,
      "step": 7535
    },
    {
      "epoch": 0.8698661744347024,
      "grad_norm": 0.3271549940109253,
      "learning_rate": 0.00017881457226585735,
      "loss": 0.952,
      "step": 7540
    },
    {
      "epoch": 0.8704430087678818,
      "grad_norm": 0.3484821021556854,
      "learning_rate": 0.00017877324325821264,
      "loss": 1.0257,
      "step": 7545
    },
    {
      "epoch": 0.8710198431010614,
      "grad_norm": 0.3790343105792999,
      "learning_rate": 0.00017873187876306848,
      "loss": 0.9899,
      "step": 7550
    },
    {
      "epoch": 0.8715966774342409,
      "grad_norm": 0.3237937390804291,
      "learning_rate": 0.00017869047879905958,
      "loss": 0.977,
      "step": 7555
    },
    {
      "epoch": 0.8721735117674204,
      "grad_norm": 0.2987980246543884,
      "learning_rate": 0.00017864904338483676,
      "loss": 0.93,
      "step": 7560
    },
    {
      "epoch": 0.8727503461005999,
      "grad_norm": 0.3538360893726349,
      "learning_rate": 0.00017860757253906675,
      "loss": 1.0224,
      "step": 7565
    },
    {
      "epoch": 0.8733271804337794,
      "grad_norm": 0.349717915058136,
      "learning_rate": 0.00017856606628043227,
      "loss": 0.9598,
      "step": 7570
    },
    {
      "epoch": 0.8739040147669589,
      "grad_norm": 0.34832173585891724,
      "learning_rate": 0.00017852452462763192,
      "loss": 0.9837,
      "step": 7575
    },
    {
      "epoch": 0.8744808491001385,
      "grad_norm": 0.396838515996933,
      "learning_rate": 0.00017848294759938033,
      "loss": 1.0234,
      "step": 7580
    },
    {
      "epoch": 0.8750576834333179,
      "grad_norm": 0.3279632329940796,
      "learning_rate": 0.000178441335214408,
      "loss": 0.9785,
      "step": 7585
    },
    {
      "epoch": 0.8756345177664975,
      "grad_norm": 0.3444277048110962,
      "learning_rate": 0.00017839968749146142,
      "loss": 0.969,
      "step": 7590
    },
    {
      "epoch": 0.8762113520996769,
      "grad_norm": 0.3291734755039215,
      "learning_rate": 0.00017835800444930298,
      "loss": 0.9432,
      "step": 7595
    },
    {
      "epoch": 0.8767881864328565,
      "grad_norm": 0.3443477749824524,
      "learning_rate": 0.00017831628610671092,
      "loss": 0.9852,
      "step": 7600
    },
    {
      "epoch": 0.877365020766036,
      "grad_norm": 0.3432978093624115,
      "learning_rate": 0.0001782745324824795,
      "loss": 0.9759,
      "step": 7605
    },
    {
      "epoch": 0.8779418550992155,
      "grad_norm": 0.36630696058273315,
      "learning_rate": 0.00017823274359541876,
      "loss": 1.0005,
      "step": 7610
    },
    {
      "epoch": 0.878518689432395,
      "grad_norm": 0.35049259662628174,
      "learning_rate": 0.0001781909194643547,
      "loss": 0.9894,
      "step": 7615
    },
    {
      "epoch": 0.8790955237655745,
      "grad_norm": 0.3564181923866272,
      "learning_rate": 0.00017814906010812912,
      "loss": 1.0125,
      "step": 7620
    },
    {
      "epoch": 0.879672358098754,
      "grad_norm": 0.3521508574485779,
      "learning_rate": 0.00017810716554559982,
      "loss": 0.9931,
      "step": 7625
    },
    {
      "epoch": 0.8802491924319336,
      "grad_norm": 0.37031182646751404,
      "learning_rate": 0.00017806523579564037,
      "loss": 0.985,
      "step": 7630
    },
    {
      "epoch": 0.880826026765113,
      "grad_norm": 0.3313467800617218,
      "learning_rate": 0.00017802327087714016,
      "loss": 0.9245,
      "step": 7635
    },
    {
      "epoch": 0.8814028610982926,
      "grad_norm": 0.32318732142448425,
      "learning_rate": 0.0001779812708090045,
      "loss": 0.9655,
      "step": 7640
    },
    {
      "epoch": 0.881979695431472,
      "grad_norm": 0.3676154315471649,
      "learning_rate": 0.0001779392356101545,
      "loss": 0.9997,
      "step": 7645
    },
    {
      "epoch": 0.8825565297646516,
      "grad_norm": 0.3498569130897522,
      "learning_rate": 0.00017789716529952704,
      "loss": 1.0039,
      "step": 7650
    },
    {
      "epoch": 0.883133364097831,
      "grad_norm": 0.3374147117137909,
      "learning_rate": 0.00017785505989607495,
      "loss": 0.982,
      "step": 7655
    },
    {
      "epoch": 0.8837101984310106,
      "grad_norm": 0.31364133954048157,
      "learning_rate": 0.0001778129194187668,
      "loss": 1.0058,
      "step": 7660
    },
    {
      "epoch": 0.8842870327641901,
      "grad_norm": 0.33285027742385864,
      "learning_rate": 0.00017777074388658693,
      "loss": 0.9447,
      "step": 7665
    },
    {
      "epoch": 0.8848638670973696,
      "grad_norm": 0.31633734703063965,
      "learning_rate": 0.00017772853331853548,
      "loss": 0.9616,
      "step": 7670
    },
    {
      "epoch": 0.8854407014305491,
      "grad_norm": 0.3172147572040558,
      "learning_rate": 0.0001776862877336284,
      "loss": 0.9363,
      "step": 7675
    },
    {
      "epoch": 0.8860175357637287,
      "grad_norm": 0.3388344347476959,
      "learning_rate": 0.00017764400715089744,
      "loss": 0.9531,
      "step": 7680
    },
    {
      "epoch": 0.8865943700969081,
      "grad_norm": 0.3313317596912384,
      "learning_rate": 0.00017760169158939005,
      "loss": 1.0407,
      "step": 7685
    },
    {
      "epoch": 0.8871712044300877,
      "grad_norm": 0.35782188177108765,
      "learning_rate": 0.00017755934106816951,
      "loss": 0.9826,
      "step": 7690
    },
    {
      "epoch": 0.8877480387632672,
      "grad_norm": 0.36099013686180115,
      "learning_rate": 0.0001775169556063148,
      "loss": 0.981,
      "step": 7695
    },
    {
      "epoch": 0.8883248730964467,
      "grad_norm": 0.335542768239975,
      "learning_rate": 0.00017747453522292065,
      "loss": 0.9161,
      "step": 7700
    },
    {
      "epoch": 0.8889017074296263,
      "grad_norm": 0.31969940662384033,
      "learning_rate": 0.00017743207993709746,
      "loss": 0.9504,
      "step": 7705
    },
    {
      "epoch": 0.8894785417628057,
      "grad_norm": 0.364268034696579,
      "learning_rate": 0.00017738958976797157,
      "loss": 1.0168,
      "step": 7710
    },
    {
      "epoch": 0.8900553760959853,
      "grad_norm": 0.3400898873806,
      "learning_rate": 0.0001773470647346847,
      "loss": 0.9774,
      "step": 7715
    },
    {
      "epoch": 0.8906322104291647,
      "grad_norm": 0.33001792430877686,
      "learning_rate": 0.0001773045048563946,
      "loss": 1.0044,
      "step": 7720
    },
    {
      "epoch": 0.8912090447623443,
      "grad_norm": 0.33356598019599915,
      "learning_rate": 0.00017726191015227452,
      "loss": 0.927,
      "step": 7725
    },
    {
      "epoch": 0.8917858790955238,
      "grad_norm": 0.33303821086883545,
      "learning_rate": 0.00017721928064151347,
      "loss": 0.9675,
      "step": 7730
    },
    {
      "epoch": 0.8923627134287033,
      "grad_norm": 0.3559563159942627,
      "learning_rate": 0.00017717661634331612,
      "loss": 1.0159,
      "step": 7735
    },
    {
      "epoch": 0.8929395477618828,
      "grad_norm": 0.4187619686126709,
      "learning_rate": 0.00017713391727690284,
      "loss": 1.0143,
      "step": 7740
    },
    {
      "epoch": 0.8935163820950623,
      "grad_norm": 0.3573208749294281,
      "learning_rate": 0.00017709118346150964,
      "loss": 0.9379,
      "step": 7745
    },
    {
      "epoch": 0.8940932164282418,
      "grad_norm": 0.3350745141506195,
      "learning_rate": 0.00017704841491638816,
      "loss": 0.9326,
      "step": 7750
    },
    {
      "epoch": 0.8946700507614214,
      "grad_norm": 0.35027506947517395,
      "learning_rate": 0.0001770056116608057,
      "loss": 0.989,
      "step": 7755
    },
    {
      "epoch": 0.8952468850946008,
      "grad_norm": 0.4128049910068512,
      "learning_rate": 0.00017696277371404527,
      "loss": 0.9921,
      "step": 7760
    },
    {
      "epoch": 0.8958237194277804,
      "grad_norm": 0.32600775361061096,
      "learning_rate": 0.00017691990109540542,
      "loss": 0.9384,
      "step": 7765
    },
    {
      "epoch": 0.8964005537609598,
      "grad_norm": 0.3520258367061615,
      "learning_rate": 0.0001768769938242003,
      "loss": 0.9502,
      "step": 7770
    },
    {
      "epoch": 0.8969773880941394,
      "grad_norm": 0.34424781799316406,
      "learning_rate": 0.00017683405191975981,
      "loss": 0.9873,
      "step": 7775
    },
    {
      "epoch": 0.8975542224273189,
      "grad_norm": 0.4074617028236389,
      "learning_rate": 0.0001767910754014293,
      "loss": 0.975,
      "step": 7780
    },
    {
      "epoch": 0.8981310567604984,
      "grad_norm": 0.351262629032135,
      "learning_rate": 0.0001767480642885698,
      "loss": 0.9896,
      "step": 7785
    },
    {
      "epoch": 0.8987078910936779,
      "grad_norm": 0.41109660267829895,
      "learning_rate": 0.00017670501860055787,
      "loss": 0.9817,
      "step": 7790
    },
    {
      "epoch": 0.8992847254268574,
      "grad_norm": 0.3734431564807892,
      "learning_rate": 0.00017666193835678571,
      "loss": 0.9658,
      "step": 7795
    },
    {
      "epoch": 0.8998615597600369,
      "grad_norm": 0.3481912612915039,
      "learning_rate": 0.00017661882357666105,
      "loss": 0.9937,
      "step": 7800
    },
    {
      "epoch": 0.9004383940932165,
      "grad_norm": 0.3160158395767212,
      "learning_rate": 0.00017657567427960716,
      "loss": 1.0098,
      "step": 7805
    },
    {
      "epoch": 0.9010152284263959,
      "grad_norm": 0.35561150312423706,
      "learning_rate": 0.00017653249048506288,
      "loss": 0.9887,
      "step": 7810
    },
    {
      "epoch": 0.9015920627595755,
      "grad_norm": 0.35436055064201355,
      "learning_rate": 0.00017648927221248264,
      "loss": 1.0033,
      "step": 7815
    },
    {
      "epoch": 0.9021688970927549,
      "grad_norm": 0.3714834153652191,
      "learning_rate": 0.0001764460194813363,
      "loss": 1.0272,
      "step": 7820
    },
    {
      "epoch": 0.9027457314259345,
      "grad_norm": 0.35957440733909607,
      "learning_rate": 0.00017640273231110933,
      "loss": 0.9775,
      "step": 7825
    },
    {
      "epoch": 0.903322565759114,
      "grad_norm": 0.36524468660354614,
      "learning_rate": 0.00017635941072130268,
      "loss": 0.9737,
      "step": 7830
    },
    {
      "epoch": 0.9038994000922935,
      "grad_norm": 0.34316587448120117,
      "learning_rate": 0.00017631605473143283,
      "loss": 0.9265,
      "step": 7835
    },
    {
      "epoch": 0.904476234425473,
      "grad_norm": 0.3439452648162842,
      "learning_rate": 0.00017627266436103168,
      "loss": 0.9628,
      "step": 7840
    },
    {
      "epoch": 0.9050530687586525,
      "grad_norm": 0.3037916421890259,
      "learning_rate": 0.00017622923962964672,
      "loss": 0.9687,
      "step": 7845
    },
    {
      "epoch": 0.905629903091832,
      "grad_norm": 0.3490997850894928,
      "learning_rate": 0.0001761857805568409,
      "loss": 0.9824,
      "step": 7850
    },
    {
      "epoch": 0.9062067374250116,
      "grad_norm": 0.35158243775367737,
      "learning_rate": 0.00017614228716219255,
      "loss": 0.9667,
      "step": 7855
    },
    {
      "epoch": 0.906783571758191,
      "grad_norm": 0.38031184673309326,
      "learning_rate": 0.0001760987594652956,
      "loss": 1.0161,
      "step": 7860
    },
    {
      "epoch": 0.9073604060913706,
      "grad_norm": 0.34765803813934326,
      "learning_rate": 0.0001760551974857593,
      "loss": 0.9449,
      "step": 7865
    },
    {
      "epoch": 0.90793724042455,
      "grad_norm": 0.36684057116508484,
      "learning_rate": 0.00017601160124320844,
      "loss": 0.9568,
      "step": 7870
    },
    {
      "epoch": 0.9085140747577296,
      "grad_norm": 0.3498166501522064,
      "learning_rate": 0.00017596797075728322,
      "loss": 0.9692,
      "step": 7875
    },
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.32973629236221313,
      "learning_rate": 0.00017592430604763924,
      "loss": 0.9436,
      "step": 7880
    },
    {
      "epoch": 0.9096677434240886,
      "grad_norm": 0.3311896026134491,
      "learning_rate": 0.0001758806071339475,
      "loss": 0.9726,
      "step": 7885
    },
    {
      "epoch": 0.9102445777572681,
      "grad_norm": 0.3403119742870331,
      "learning_rate": 0.00017583687403589454,
      "loss": 0.9592,
      "step": 7890
    },
    {
      "epoch": 0.9108214120904476,
      "grad_norm": 0.3610020577907562,
      "learning_rate": 0.00017579310677318214,
      "loss": 0.9674,
      "step": 7895
    },
    {
      "epoch": 0.9113982464236271,
      "grad_norm": 0.8834409117698669,
      "learning_rate": 0.00017574930536552757,
      "loss": 0.977,
      "step": 7900
    },
    {
      "epoch": 0.9119750807568067,
      "grad_norm": 0.3714768588542938,
      "learning_rate": 0.0001757054698326634,
      "loss": 1.0151,
      "step": 7905
    },
    {
      "epoch": 0.9125519150899861,
      "grad_norm": 0.3592800796031952,
      "learning_rate": 0.00017566160019433767,
      "loss": 0.9825,
      "step": 7910
    },
    {
      "epoch": 0.9131287494231657,
      "grad_norm": 0.334396630525589,
      "learning_rate": 0.0001756176964703137,
      "loss": 0.9823,
      "step": 7915
    },
    {
      "epoch": 0.9137055837563451,
      "grad_norm": 0.5015032291412354,
      "learning_rate": 0.00017557375868037026,
      "loss": 0.9493,
      "step": 7920
    },
    {
      "epoch": 0.9142824180895247,
      "grad_norm": 0.34532681107521057,
      "learning_rate": 0.00017552978684430134,
      "loss": 0.9458,
      "step": 7925
    },
    {
      "epoch": 0.9148592524227042,
      "grad_norm": 0.3358142077922821,
      "learning_rate": 0.00017548578098191636,
      "loss": 0.9446,
      "step": 7930
    },
    {
      "epoch": 0.9154360867558837,
      "grad_norm": 0.334096759557724,
      "learning_rate": 0.0001754417411130401,
      "loss": 0.9478,
      "step": 7935
    },
    {
      "epoch": 0.9160129210890632,
      "grad_norm": 0.38140803575515747,
      "learning_rate": 0.00017539766725751252,
      "loss": 0.9654,
      "step": 7940
    },
    {
      "epoch": 0.9165897554222427,
      "grad_norm": 0.3713916540145874,
      "learning_rate": 0.00017535355943518906,
      "loss": 1.002,
      "step": 7945
    },
    {
      "epoch": 0.9171665897554222,
      "grad_norm": 0.34454530477523804,
      "learning_rate": 0.0001753094176659403,
      "loss": 1.0134,
      "step": 7950
    },
    {
      "epoch": 0.9177434240886018,
      "grad_norm": 0.32925593852996826,
      "learning_rate": 0.0001752652419696523,
      "loss": 1.013,
      "step": 7955
    },
    {
      "epoch": 0.9183202584217812,
      "grad_norm": 0.3872643709182739,
      "learning_rate": 0.0001752210323662262,
      "loss": 1.0731,
      "step": 7960
    },
    {
      "epoch": 0.9188970927549608,
      "grad_norm": 0.3960958421230316,
      "learning_rate": 0.0001751767888755785,
      "loss": 1.0367,
      "step": 7965
    },
    {
      "epoch": 0.9194739270881402,
      "grad_norm": 0.4250330626964569,
      "learning_rate": 0.00017513251151764109,
      "loss": 0.993,
      "step": 7970
    },
    {
      "epoch": 0.9200507614213198,
      "grad_norm": 0.3722861409187317,
      "learning_rate": 0.0001750882003123609,
      "loss": 1.0253,
      "step": 7975
    },
    {
      "epoch": 0.9206275957544993,
      "grad_norm": 0.34457963705062866,
      "learning_rate": 0.00017504385527970028,
      "loss": 0.9638,
      "step": 7980
    },
    {
      "epoch": 0.9212044300876788,
      "grad_norm": 0.42666009068489075,
      "learning_rate": 0.00017499947643963672,
      "loss": 1.0463,
      "step": 7985
    },
    {
      "epoch": 0.9217812644208583,
      "grad_norm": 0.34574225544929504,
      "learning_rate": 0.00017495506381216296,
      "loss": 1.034,
      "step": 7990
    },
    {
      "epoch": 0.9223580987540378,
      "grad_norm": 0.37822163105010986,
      "learning_rate": 0.00017491061741728702,
      "loss": 0.9528,
      "step": 7995
    },
    {
      "epoch": 0.9229349330872173,
      "grad_norm": 0.36716076731681824,
      "learning_rate": 0.00017486613727503206,
      "loss": 0.986,
      "step": 8000
    },
    {
      "epoch": 0.9235117674203969,
      "grad_norm": 0.32711061835289,
      "learning_rate": 0.00017482162340543646,
      "loss": 0.9237,
      "step": 8005
    },
    {
      "epoch": 0.9240886017535763,
      "grad_norm": 0.39083343744277954,
      "learning_rate": 0.00017477707582855384,
      "loss": 0.9882,
      "step": 8010
    },
    {
      "epoch": 0.9246654360867559,
      "grad_norm": 0.3748285472393036,
      "learning_rate": 0.00017473249456445293,
      "loss": 1.025,
      "step": 8015
    },
    {
      "epoch": 0.9252422704199353,
      "grad_norm": 0.3572528064250946,
      "learning_rate": 0.0001746878796332177,
      "loss": 0.9893,
      "step": 8020
    },
    {
      "epoch": 0.9258191047531149,
      "grad_norm": 0.33131203055381775,
      "learning_rate": 0.00017464323105494727,
      "loss": 0.9768,
      "step": 8025
    },
    {
      "epoch": 0.9263959390862944,
      "grad_norm": 0.3413887917995453,
      "learning_rate": 0.0001745985488497559,
      "loss": 0.9692,
      "step": 8030
    },
    {
      "epoch": 0.9269727734194739,
      "grad_norm": 0.33206212520599365,
      "learning_rate": 0.000174553833037773,
      "loss": 0.9642,
      "step": 8035
    },
    {
      "epoch": 0.9275496077526535,
      "grad_norm": 0.3740399181842804,
      "learning_rate": 0.00017450908363914316,
      "loss": 0.9518,
      "step": 8040
    },
    {
      "epoch": 0.928126442085833,
      "grad_norm": 0.32293087244033813,
      "learning_rate": 0.00017446430067402603,
      "loss": 0.9629,
      "step": 8045
    },
    {
      "epoch": 0.9287032764190125,
      "grad_norm": 0.3599385619163513,
      "learning_rate": 0.00017441948416259645,
      "loss": 0.9665,
      "step": 8050
    },
    {
      "epoch": 0.929280110752192,
      "grad_norm": 0.36823639273643494,
      "learning_rate": 0.00017437463412504437,
      "loss": 0.942,
      "step": 8055
    },
    {
      "epoch": 0.9298569450853715,
      "grad_norm": 0.33894339203834534,
      "learning_rate": 0.00017432975058157473,
      "loss": 0.9273,
      "step": 8060
    },
    {
      "epoch": 0.930433779418551,
      "grad_norm": 0.3650658428668976,
      "learning_rate": 0.0001742848335524078,
      "loss": 1.0035,
      "step": 8065
    },
    {
      "epoch": 0.9310106137517306,
      "grad_norm": 0.31818750500679016,
      "learning_rate": 0.00017423988305777864,
      "loss": 0.972,
      "step": 8070
    },
    {
      "epoch": 0.93158744808491,
      "grad_norm": 0.35458680987358093,
      "learning_rate": 0.0001741948991179376,
      "loss": 0.9694,
      "step": 8075
    },
    {
      "epoch": 0.9321642824180896,
      "grad_norm": 0.37025484442710876,
      "learning_rate": 0.00017414988175315006,
      "loss": 0.9765,
      "step": 8080
    },
    {
      "epoch": 0.932741116751269,
      "grad_norm": 0.3461233377456665,
      "learning_rate": 0.0001741048309836964,
      "loss": 0.9824,
      "step": 8085
    },
    {
      "epoch": 0.9333179510844486,
      "grad_norm": 0.3459435701370239,
      "learning_rate": 0.00017405974682987204,
      "loss": 0.9828,
      "step": 8090
    },
    {
      "epoch": 0.933894785417628,
      "grad_norm": 0.6978673338890076,
      "learning_rate": 0.00017401462931198756,
      "loss": 0.9872,
      "step": 8095
    },
    {
      "epoch": 0.9344716197508076,
      "grad_norm": 0.3274736702442169,
      "learning_rate": 0.00017396947845036844,
      "loss": 0.9817,
      "step": 8100
    },
    {
      "epoch": 0.9350484540839871,
      "grad_norm": 0.357372522354126,
      "learning_rate": 0.00017392429426535527,
      "loss": 0.9568,
      "step": 8105
    },
    {
      "epoch": 0.9356252884171666,
      "grad_norm": 0.34130859375,
      "learning_rate": 0.00017387907677730353,
      "loss": 0.998,
      "step": 8110
    },
    {
      "epoch": 0.9362021227503461,
      "grad_norm": 0.3448001742362976,
      "learning_rate": 0.00017383382600658388,
      "loss": 1.0056,
      "step": 8115
    },
    {
      "epoch": 0.9367789570835257,
      "grad_norm": 0.33317503333091736,
      "learning_rate": 0.00017378854197358181,
      "loss": 0.9587,
      "step": 8120
    },
    {
      "epoch": 0.9373557914167051,
      "grad_norm": 0.328156054019928,
      "learning_rate": 0.0001737432246986979,
      "loss": 0.973,
      "step": 8125
    },
    {
      "epoch": 0.9379326257498847,
      "grad_norm": 0.3665638267993927,
      "learning_rate": 0.0001736978742023477,
      "loss": 0.9792,
      "step": 8130
    },
    {
      "epoch": 0.9385094600830641,
      "grad_norm": 0.3370935022830963,
      "learning_rate": 0.00017365249050496165,
      "loss": 0.9701,
      "step": 8135
    },
    {
      "epoch": 0.9390862944162437,
      "grad_norm": 0.3585437536239624,
      "learning_rate": 0.00017360707362698517,
      "loss": 0.9784,
      "step": 8140
    },
    {
      "epoch": 0.9396631287494231,
      "grad_norm": 0.3112260103225708,
      "learning_rate": 0.00017356162358887875,
      "loss": 0.9374,
      "step": 8145
    },
    {
      "epoch": 0.9402399630826027,
      "grad_norm": 0.3765519857406616,
      "learning_rate": 0.00017351614041111763,
      "loss": 0.987,
      "step": 8150
    },
    {
      "epoch": 0.9408167974157822,
      "grad_norm": 0.34944432973861694,
      "learning_rate": 0.00017347062411419208,
      "loss": 0.9721,
      "step": 8155
    },
    {
      "epoch": 0.9413936317489617,
      "grad_norm": 0.31840527057647705,
      "learning_rate": 0.00017342507471860733,
      "loss": 0.9476,
      "step": 8160
    },
    {
      "epoch": 0.9419704660821412,
      "grad_norm": 0.38349196314811707,
      "learning_rate": 0.00017337949224488343,
      "loss": 0.9886,
      "step": 8165
    },
    {
      "epoch": 0.9425473004153208,
      "grad_norm": 0.34952884912490845,
      "learning_rate": 0.00017333387671355542,
      "loss": 0.9522,
      "step": 8170
    },
    {
      "epoch": 0.9431241347485002,
      "grad_norm": 0.35600200295448303,
      "learning_rate": 0.0001732882281451731,
      "loss": 0.9515,
      "step": 8175
    },
    {
      "epoch": 0.9437009690816798,
      "grad_norm": 0.351744681596756,
      "learning_rate": 0.00017324254656030132,
      "loss": 1.053,
      "step": 8180
    },
    {
      "epoch": 0.9442778034148592,
      "grad_norm": 0.3579868674278259,
      "learning_rate": 0.00017319683197951967,
      "loss": 0.9598,
      "step": 8185
    },
    {
      "epoch": 0.9448546377480388,
      "grad_norm": 0.363534539937973,
      "learning_rate": 0.0001731510844234227,
      "loss": 0.9699,
      "step": 8190
    },
    {
      "epoch": 0.9454314720812182,
      "grad_norm": 0.3423145115375519,
      "learning_rate": 0.00017310530391261976,
      "loss": 0.9543,
      "step": 8195
    },
    {
      "epoch": 0.9460083064143978,
      "grad_norm": 0.35032740235328674,
      "learning_rate": 0.00017305949046773504,
      "loss": 0.9589,
      "step": 8200
    },
    {
      "epoch": 0.9465851407475773,
      "grad_norm": 0.32708555459976196,
      "learning_rate": 0.0001730136441094076,
      "loss": 0.9298,
      "step": 8205
    },
    {
      "epoch": 0.9471619750807568,
      "grad_norm": 0.36810487508773804,
      "learning_rate": 0.0001729677648582913,
      "loss": 1.0214,
      "step": 8210
    },
    {
      "epoch": 0.9477388094139363,
      "grad_norm": 0.36619478464126587,
      "learning_rate": 0.00017292185273505486,
      "loss": 1.0026,
      "step": 8215
    },
    {
      "epoch": 0.9483156437471159,
      "grad_norm": 0.34805989265441895,
      "learning_rate": 0.00017287590776038177,
      "loss": 0.9813,
      "step": 8220
    },
    {
      "epoch": 0.9488924780802953,
      "grad_norm": 0.3577708303928375,
      "learning_rate": 0.0001728299299549703,
      "loss": 0.9486,
      "step": 8225
    },
    {
      "epoch": 0.9494693124134749,
      "grad_norm": 0.34766557812690735,
      "learning_rate": 0.00017278391933953362,
      "loss": 0.9412,
      "step": 8230
    },
    {
      "epoch": 0.9500461467466543,
      "grad_norm": 0.4266161620616913,
      "learning_rate": 0.0001727378759347995,
      "loss": 1.0086,
      "step": 8235
    },
    {
      "epoch": 0.9506229810798339,
      "grad_norm": 0.37093380093574524,
      "learning_rate": 0.00017269179976151067,
      "loss": 0.9979,
      "step": 8240
    },
    {
      "epoch": 0.9511998154130133,
      "grad_norm": 0.3407989740371704,
      "learning_rate": 0.00017264569084042447,
      "loss": 0.9537,
      "step": 8245
    },
    {
      "epoch": 0.9517766497461929,
      "grad_norm": 0.34715965390205383,
      "learning_rate": 0.0001725995491923131,
      "loss": 0.9931,
      "step": 8250
    },
    {
      "epoch": 0.9523534840793724,
      "grad_norm": 0.35018789768218994,
      "learning_rate": 0.00017255337483796344,
      "loss": 0.9385,
      "step": 8255
    },
    {
      "epoch": 0.9529303184125519,
      "grad_norm": 0.3394466042518616,
      "learning_rate": 0.00017250716779817715,
      "loss": 0.9758,
      "step": 8260
    },
    {
      "epoch": 0.9535071527457314,
      "grad_norm": 0.3476242125034332,
      "learning_rate": 0.00017246092809377058,
      "loss": 0.9571,
      "step": 8265
    },
    {
      "epoch": 0.954083987078911,
      "grad_norm": 0.34088173508644104,
      "learning_rate": 0.00017241465574557475,
      "loss": 0.8954,
      "step": 8270
    },
    {
      "epoch": 0.9546608214120904,
      "grad_norm": 0.3062012493610382,
      "learning_rate": 0.00017236835077443557,
      "loss": 0.919,
      "step": 8275
    },
    {
      "epoch": 0.95523765574527,
      "grad_norm": 0.33732926845550537,
      "learning_rate": 0.0001723220132012134,
      "loss": 0.9417,
      "step": 8280
    },
    {
      "epoch": 0.9558144900784494,
      "grad_norm": 0.32906705141067505,
      "learning_rate": 0.00017227564304678346,
      "loss": 0.9216,
      "step": 8285
    },
    {
      "epoch": 0.956391324411629,
      "grad_norm": 0.3598426282405853,
      "learning_rate": 0.0001722292403320356,
      "loss": 0.9826,
      "step": 8290
    },
    {
      "epoch": 0.9569681587448085,
      "grad_norm": 0.3878871202468872,
      "learning_rate": 0.00017218280507787435,
      "loss": 0.971,
      "step": 8295
    },
    {
      "epoch": 0.957544993077988,
      "grad_norm": 0.3313681185245514,
      "learning_rate": 0.0001721363373052188,
      "loss": 0.9708,
      "step": 8300
    },
    {
      "epoch": 0.9581218274111675,
      "grad_norm": 0.36274102330207825,
      "learning_rate": 0.00017208983703500286,
      "loss": 0.9782,
      "step": 8305
    },
    {
      "epoch": 0.958698661744347,
      "grad_norm": 0.36623549461364746,
      "learning_rate": 0.00017204330428817496,
      "loss": 0.9838,
      "step": 8310
    },
    {
      "epoch": 0.9592754960775265,
      "grad_norm": 0.36566877365112305,
      "learning_rate": 0.00017199673908569819,
      "loss": 0.9703,
      "step": 8315
    },
    {
      "epoch": 0.9598523304107061,
      "grad_norm": 0.4171159863471985,
      "learning_rate": 0.00017195014144855025,
      "loss": 0.9786,
      "step": 8320
    },
    {
      "epoch": 0.9604291647438855,
      "grad_norm": 0.37554582953453064,
      "learning_rate": 0.00017190351139772348,
      "loss": 0.9437,
      "step": 8325
    },
    {
      "epoch": 0.9610059990770651,
      "grad_norm": 0.34158971905708313,
      "learning_rate": 0.00017185684895422483,
      "loss": 0.9366,
      "step": 8330
    },
    {
      "epoch": 0.9615828334102445,
      "grad_norm": 0.35574156045913696,
      "learning_rate": 0.0001718101541390758,
      "loss": 0.981,
      "step": 8335
    },
    {
      "epoch": 0.9621596677434241,
      "grad_norm": 0.3361159563064575,
      "learning_rate": 0.00017176342697331246,
      "loss": 0.9917,
      "step": 8340
    },
    {
      "epoch": 0.9627365020766036,
      "grad_norm": 0.3361218571662903,
      "learning_rate": 0.00017171666747798557,
      "loss": 0.956,
      "step": 8345
    },
    {
      "epoch": 0.9633133364097831,
      "grad_norm": 0.3416488468647003,
      "learning_rate": 0.00017166987567416033,
      "loss": 0.9817,
      "step": 8350
    },
    {
      "epoch": 0.9638901707429626,
      "grad_norm": 0.35277891159057617,
      "learning_rate": 0.00017162305158291655,
      "loss": 0.939,
      "step": 8355
    },
    {
      "epoch": 0.9644670050761421,
      "grad_norm": 0.3547937572002411,
      "learning_rate": 0.00017157619522534853,
      "loss": 0.9504,
      "step": 8360
    },
    {
      "epoch": 0.9650438394093216,
      "grad_norm": 0.4167003333568573,
      "learning_rate": 0.00017152930662256522,
      "loss": 0.9831,
      "step": 8365
    },
    {
      "epoch": 0.9656206737425012,
      "grad_norm": 0.33474335074424744,
      "learning_rate": 0.00017148238579568995,
      "loss": 0.9962,
      "step": 8370
    },
    {
      "epoch": 0.9661975080756806,
      "grad_norm": 0.3461887538433075,
      "learning_rate": 0.00017143543276586072,
      "loss": 0.9769,
      "step": 8375
    },
    {
      "epoch": 0.9667743424088602,
      "grad_norm": 0.33460527658462524,
      "learning_rate": 0.00017138844755422992,
      "loss": 0.9541,
      "step": 8380
    },
    {
      "epoch": 0.9673511767420396,
      "grad_norm": 0.32638758420944214,
      "learning_rate": 0.00017134143018196447,
      "loss": 0.967,
      "step": 8385
    },
    {
      "epoch": 0.9679280110752192,
      "grad_norm": 0.3325026333332062,
      "learning_rate": 0.0001712943806702458,
      "loss": 0.9852,
      "step": 8390
    },
    {
      "epoch": 0.9685048454083988,
      "grad_norm": 0.3283535838127136,
      "learning_rate": 0.0001712472990402698,
      "loss": 0.9792,
      "step": 8395
    },
    {
      "epoch": 0.9690816797415782,
      "grad_norm": 0.34361085295677185,
      "learning_rate": 0.00017120018531324689,
      "loss": 0.9412,
      "step": 8400
    },
    {
      "epoch": 0.9696585140747578,
      "grad_norm": 0.32706576585769653,
      "learning_rate": 0.00017115303951040182,
      "loss": 0.9493,
      "step": 8405
    },
    {
      "epoch": 0.9702353484079372,
      "grad_norm": 0.34150487184524536,
      "learning_rate": 0.00017110586165297392,
      "loss": 0.9744,
      "step": 8410
    },
    {
      "epoch": 0.9708121827411168,
      "grad_norm": 0.3405235707759857,
      "learning_rate": 0.00017105865176221684,
      "loss": 0.9312,
      "step": 8415
    },
    {
      "epoch": 0.9713890170742963,
      "grad_norm": 0.3444620668888092,
      "learning_rate": 0.0001710114098593988,
      "loss": 1.0102,
      "step": 8420
    },
    {
      "epoch": 0.9719658514074758,
      "grad_norm": 0.3453844487667084,
      "learning_rate": 0.00017096413596580238,
      "loss": 0.9673,
      "step": 8425
    },
    {
      "epoch": 0.9725426857406553,
      "grad_norm": 0.3417566418647766,
      "learning_rate": 0.00017091683010272447,
      "loss": 0.9831,
      "step": 8430
    },
    {
      "epoch": 0.9731195200738348,
      "grad_norm": 0.3529415428638458,
      "learning_rate": 0.00017086949229147652,
      "loss": 0.9837,
      "step": 8435
    },
    {
      "epoch": 0.9736963544070143,
      "grad_norm": 0.3694683909416199,
      "learning_rate": 0.00017082212255338432,
      "loss": 0.9564,
      "step": 8440
    },
    {
      "epoch": 0.9742731887401939,
      "grad_norm": 0.3446985185146332,
      "learning_rate": 0.00017077472090978798,
      "loss": 0.9441,
      "step": 8445
    },
    {
      "epoch": 0.9748500230733733,
      "grad_norm": 0.3844882845878601,
      "learning_rate": 0.000170727287382042,
      "loss": 0.932,
      "step": 8450
    },
    {
      "epoch": 0.9754268574065529,
      "grad_norm": 0.3843259811401367,
      "learning_rate": 0.00017067982199151543,
      "loss": 0.9793,
      "step": 8455
    },
    {
      "epoch": 0.9760036917397323,
      "grad_norm": 0.39412060379981995,
      "learning_rate": 0.00017063232475959133,
      "loss": 1.0562,
      "step": 8460
    },
    {
      "epoch": 0.9765805260729119,
      "grad_norm": 0.3523117005825043,
      "learning_rate": 0.00017058479570766745,
      "loss": 0.938,
      "step": 8465
    },
    {
      "epoch": 0.9771573604060914,
      "grad_norm": 0.3870525658130646,
      "learning_rate": 0.00017053723485715563,
      "loss": 0.9556,
      "step": 8470
    },
    {
      "epoch": 0.9777341947392709,
      "grad_norm": 0.36875876784324646,
      "learning_rate": 0.00017048964222948217,
      "loss": 0.9364,
      "step": 8475
    },
    {
      "epoch": 0.9783110290724504,
      "grad_norm": 0.3694005310535431,
      "learning_rate": 0.00017044201784608762,
      "loss": 0.9481,
      "step": 8480
    },
    {
      "epoch": 0.9788878634056299,
      "grad_norm": 0.46674659848213196,
      "learning_rate": 0.00017039436172842684,
      "loss": 0.9839,
      "step": 8485
    },
    {
      "epoch": 0.9794646977388094,
      "grad_norm": 0.35668930411338806,
      "learning_rate": 0.00017034667389796904,
      "loss": 0.9526,
      "step": 8490
    },
    {
      "epoch": 0.980041532071989,
      "grad_norm": 0.3454805314540863,
      "learning_rate": 0.0001702989543761977,
      "loss": 0.9748,
      "step": 8495
    },
    {
      "epoch": 0.9806183664051684,
      "grad_norm": 0.3518441617488861,
      "learning_rate": 0.00017025120318461047,
      "loss": 0.9725,
      "step": 8500
    },
    {
      "epoch": 0.981195200738348,
      "grad_norm": 0.3471713066101074,
      "learning_rate": 0.00017020342034471944,
      "loss": 0.965,
      "step": 8505
    },
    {
      "epoch": 0.9817720350715274,
      "grad_norm": 0.3555985391139984,
      "learning_rate": 0.00017015560587805081,
      "loss": 0.9563,
      "step": 8510
    },
    {
      "epoch": 0.982348869404707,
      "grad_norm": 0.3868695795536041,
      "learning_rate": 0.00017010775980614518,
      "loss": 0.9863,
      "step": 8515
    },
    {
      "epoch": 0.9829257037378865,
      "grad_norm": 0.3373103737831116,
      "learning_rate": 0.00017005988215055718,
      "loss": 0.9657,
      "step": 8520
    },
    {
      "epoch": 0.983502538071066,
      "grad_norm": 0.34423285722732544,
      "learning_rate": 0.00017001197293285589,
      "loss": 0.9394,
      "step": 8525
    },
    {
      "epoch": 0.9840793724042455,
      "grad_norm": 0.3576836585998535,
      "learning_rate": 0.00016996403217462442,
      "loss": 0.9968,
      "step": 8530
    },
    {
      "epoch": 0.984656206737425,
      "grad_norm": 0.34142541885375977,
      "learning_rate": 0.00016991605989746025,
      "loss": 0.9703,
      "step": 8535
    },
    {
      "epoch": 0.9852330410706045,
      "grad_norm": 0.32961562275886536,
      "learning_rate": 0.00016986805612297494,
      "loss": 0.9336,
      "step": 8540
    },
    {
      "epoch": 0.9858098754037841,
      "grad_norm": 0.36415210366249084,
      "learning_rate": 0.00016982002087279432,
      "loss": 0.9715,
      "step": 8545
    },
    {
      "epoch": 0.9863867097369635,
      "grad_norm": 0.3524576425552368,
      "learning_rate": 0.00016977195416855828,
      "loss": 0.9801,
      "step": 8550
    },
    {
      "epoch": 0.9869635440701431,
      "grad_norm": 0.33668380975723267,
      "learning_rate": 0.00016972385603192106,
      "loss": 0.9813,
      "step": 8555
    },
    {
      "epoch": 0.9875403784033225,
      "grad_norm": 0.32700344920158386,
      "learning_rate": 0.00016967572648455097,
      "loss": 0.9726,
      "step": 8560
    },
    {
      "epoch": 0.9881172127365021,
      "grad_norm": 0.3518313765525818,
      "learning_rate": 0.00016962756554813037,
      "loss": 1.0142,
      "step": 8565
    },
    {
      "epoch": 0.9886940470696816,
      "grad_norm": 0.35196545720100403,
      "learning_rate": 0.00016957937324435594,
      "loss": 1.0086,
      "step": 8570
    },
    {
      "epoch": 0.9892708814028611,
      "grad_norm": 0.38598722219467163,
      "learning_rate": 0.00016953114959493835,
      "loss": 0.9976,
      "step": 8575
    },
    {
      "epoch": 0.9898477157360406,
      "grad_norm": 0.3629986345767975,
      "learning_rate": 0.0001694828946216025,
      "loss": 0.9404,
      "step": 8580
    },
    {
      "epoch": 0.9904245500692201,
      "grad_norm": 0.36070430278778076,
      "learning_rate": 0.00016943460834608728,
      "loss": 1.0135,
      "step": 8585
    },
    {
      "epoch": 0.9910013844023996,
      "grad_norm": 0.3393903374671936,
      "learning_rate": 0.0001693862907901458,
      "loss": 0.9574,
      "step": 8590
    },
    {
      "epoch": 0.9915782187355792,
      "grad_norm": 0.35163483023643494,
      "learning_rate": 0.00016933794197554524,
      "loss": 0.9809,
      "step": 8595
    },
    {
      "epoch": 0.9921550530687586,
      "grad_norm": 0.36458709836006165,
      "learning_rate": 0.00016928956192406678,
      "loss": 0.9738,
      "step": 8600
    },
    {
      "epoch": 0.9927318874019382,
      "grad_norm": 0.3441014587879181,
      "learning_rate": 0.00016924115065750575,
      "loss": 0.9831,
      "step": 8605
    },
    {
      "epoch": 0.9933087217351176,
      "grad_norm": 0.3553078770637512,
      "learning_rate": 0.00016919270819767152,
      "loss": 0.953,
      "step": 8610
    },
    {
      "epoch": 0.9938855560682972,
      "grad_norm": 0.36507415771484375,
      "learning_rate": 0.00016914423456638753,
      "loss": 0.9955,
      "step": 8615
    },
    {
      "epoch": 0.9944623904014767,
      "grad_norm": 0.3221859335899353,
      "learning_rate": 0.0001690957297854912,
      "loss": 0.975,
      "step": 8620
    },
    {
      "epoch": 0.9950392247346562,
      "grad_norm": 0.39070311188697815,
      "learning_rate": 0.00016904719387683407,
      "loss": 0.9846,
      "step": 8625
    },
    {
      "epoch": 0.9956160590678357,
      "grad_norm": 0.3547630310058594,
      "learning_rate": 0.00016899862686228163,
      "loss": 0.9936,
      "step": 8630
    },
    {
      "epoch": 0.9961928934010152,
      "grad_norm": 0.36742347478866577,
      "learning_rate": 0.00016895002876371343,
      "loss": 0.9796,
      "step": 8635
    },
    {
      "epoch": 0.9967697277341947,
      "grad_norm": 0.3844885230064392,
      "learning_rate": 0.00016890139960302304,
      "loss": 0.9951,
      "step": 8640
    },
    {
      "epoch": 0.9973465620673743,
      "grad_norm": 0.3780565559864044,
      "learning_rate": 0.00016885273940211795,
      "loss": 0.9467,
      "step": 8645
    },
    {
      "epoch": 0.9979233964005537,
      "grad_norm": 0.35448718070983887,
      "learning_rate": 0.0001688040481829197,
      "loss": 0.9981,
      "step": 8650
    },
    {
      "epoch": 0.9985002307337333,
      "grad_norm": 0.3584054708480835,
      "learning_rate": 0.00016875532596736373,
      "loss": 0.963,
      "step": 8655
    },
    {
      "epoch": 0.9990770650669127,
      "grad_norm": 0.343289315700531,
      "learning_rate": 0.00016870657277739953,
      "loss": 0.9238,
      "step": 8660
    },
    {
      "epoch": 0.9996538994000923,
      "grad_norm": 0.3527401089668274,
      "learning_rate": 0.00016865778863499054,
      "loss": 0.9112,
      "step": 8665
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.9769963622093201,
      "eval_runtime": 961.8297,
      "eval_samples_per_second": 15.959,
      "eval_steps_per_second": 0.998,
      "step": 8668
    },
    {
      "epoch": 1.0002307337332719,
      "grad_norm": 0.3392125070095062,
      "learning_rate": 0.00016860897356211403,
      "loss": 0.9856,
      "step": 8670
    },
    {
      "epoch": 1.0008075680664512,
      "grad_norm": 0.36300501227378845,
      "learning_rate": 0.00016856012758076133,
      "loss": 0.9442,
      "step": 8675
    },
    {
      "epoch": 1.0013844023996308,
      "grad_norm": 0.3717747926712036,
      "learning_rate": 0.0001685112507129377,
      "loss": 0.9853,
      "step": 8680
    },
    {
      "epoch": 1.0019612367328103,
      "grad_norm": 0.39347657561302185,
      "learning_rate": 0.00016846234298066218,
      "loss": 0.9624,
      "step": 8685
    },
    {
      "epoch": 1.00253807106599,
      "grad_norm": 0.3865641951560974,
      "learning_rate": 0.00016841340440596785,
      "loss": 0.941,
      "step": 8690
    },
    {
      "epoch": 1.0031149053991693,
      "grad_norm": 0.3533646762371063,
      "learning_rate": 0.00016836443501090163,
      "loss": 0.89,
      "step": 8695
    },
    {
      "epoch": 1.0036917397323488,
      "grad_norm": 0.3816198408603668,
      "learning_rate": 0.0001683154348175243,
      "loss": 0.9631,
      "step": 8700
    },
    {
      "epoch": 1.0042685740655284,
      "grad_norm": 0.3821849822998047,
      "learning_rate": 0.00016826640384791052,
      "loss": 0.9817,
      "step": 8705
    },
    {
      "epoch": 1.004845408398708,
      "grad_norm": 0.36628416180610657,
      "learning_rate": 0.00016821734212414894,
      "loss": 0.8998,
      "step": 8710
    },
    {
      "epoch": 1.0054222427318873,
      "grad_norm": 0.38809841871261597,
      "learning_rate": 0.00016816824966834183,
      "loss": 0.9391,
      "step": 8715
    },
    {
      "epoch": 1.0059990770650669,
      "grad_norm": 0.4090449810028076,
      "learning_rate": 0.00016811912650260556,
      "loss": 0.9141,
      "step": 8720
    },
    {
      "epoch": 1.0065759113982464,
      "grad_norm": 0.35133248567581177,
      "learning_rate": 0.0001680699726490701,
      "loss": 0.9621,
      "step": 8725
    },
    {
      "epoch": 1.007152745731426,
      "grad_norm": 0.3541509807109833,
      "learning_rate": 0.00016802078812987948,
      "loss": 0.9135,
      "step": 8730
    },
    {
      "epoch": 1.0077295800646053,
      "grad_norm": 0.34470343589782715,
      "learning_rate": 0.0001679715729671913,
      "loss": 0.9444,
      "step": 8735
    },
    {
      "epoch": 1.008306414397785,
      "grad_norm": 0.3753330409526825,
      "learning_rate": 0.00016792232718317718,
      "loss": 0.9491,
      "step": 8740
    },
    {
      "epoch": 1.0088832487309645,
      "grad_norm": 0.3640606701374054,
      "learning_rate": 0.0001678730508000224,
      "loss": 0.9451,
      "step": 8745
    },
    {
      "epoch": 1.009460083064144,
      "grad_norm": 0.35499805212020874,
      "learning_rate": 0.00016782374383992604,
      "loss": 0.9287,
      "step": 8750
    },
    {
      "epoch": 1.0100369173973236,
      "grad_norm": 0.3767320513725281,
      "learning_rate": 0.000167774406325101,
      "loss": 0.9379,
      "step": 8755
    },
    {
      "epoch": 1.010613751730503,
      "grad_norm": 0.39077675342559814,
      "learning_rate": 0.00016772503827777396,
      "loss": 0.9044,
      "step": 8760
    },
    {
      "epoch": 1.0111905860636825,
      "grad_norm": 0.3857879936695099,
      "learning_rate": 0.0001676756397201853,
      "loss": 0.9548,
      "step": 8765
    },
    {
      "epoch": 1.011767420396862,
      "grad_norm": 0.37087294459342957,
      "learning_rate": 0.00016762621067458917,
      "loss": 0.9255,
      "step": 8770
    },
    {
      "epoch": 1.0123442547300416,
      "grad_norm": 0.3710249662399292,
      "learning_rate": 0.00016757675116325343,
      "loss": 0.9263,
      "step": 8775
    },
    {
      "epoch": 1.012921089063221,
      "grad_norm": 0.37042540311813354,
      "learning_rate": 0.00016752726120845973,
      "loss": 0.9117,
      "step": 8780
    },
    {
      "epoch": 1.0134979233964005,
      "grad_norm": 0.37328779697418213,
      "learning_rate": 0.00016747774083250333,
      "loss": 0.9152,
      "step": 8785
    },
    {
      "epoch": 1.0140747577295801,
      "grad_norm": 0.35958462953567505,
      "learning_rate": 0.0001674281900576933,
      "loss": 0.9049,
      "step": 8790
    },
    {
      "epoch": 1.0146515920627597,
      "grad_norm": 0.38205039501190186,
      "learning_rate": 0.00016737860890635235,
      "loss": 0.928,
      "step": 8795
    },
    {
      "epoch": 1.015228426395939,
      "grad_norm": 0.3680003881454468,
      "learning_rate": 0.0001673289974008169,
      "loss": 0.9349,
      "step": 8800
    },
    {
      "epoch": 1.0158052607291186,
      "grad_norm": 0.3842025697231293,
      "learning_rate": 0.00016727935556343698,
      "loss": 0.9005,
      "step": 8805
    },
    {
      "epoch": 1.0163820950622982,
      "grad_norm": 0.37604400515556335,
      "learning_rate": 0.00016722968341657642,
      "loss": 0.9511,
      "step": 8810
    },
    {
      "epoch": 1.0169589293954777,
      "grad_norm": 0.36115026473999023,
      "learning_rate": 0.00016717998098261254,
      "loss": 0.9738,
      "step": 8815
    },
    {
      "epoch": 1.017535763728657,
      "grad_norm": 0.3732301890850067,
      "learning_rate": 0.0001671302482839364,
      "loss": 0.9471,
      "step": 8820
    },
    {
      "epoch": 1.0181125980618366,
      "grad_norm": 0.362886518239975,
      "learning_rate": 0.0001670804853429527,
      "loss": 0.9463,
      "step": 8825
    },
    {
      "epoch": 1.0186894323950162,
      "grad_norm": 0.38515588641166687,
      "learning_rate": 0.00016703069218207972,
      "loss": 0.9291,
      "step": 8830
    },
    {
      "epoch": 1.0192662667281958,
      "grad_norm": 0.3255932331085205,
      "learning_rate": 0.00016698086882374939,
      "loss": 0.957,
      "step": 8835
    },
    {
      "epoch": 1.019843101061375,
      "grad_norm": 0.37959185242652893,
      "learning_rate": 0.00016693101529040725,
      "loss": 0.9248,
      "step": 8840
    },
    {
      "epoch": 1.0204199353945547,
      "grad_norm": 0.3593595623970032,
      "learning_rate": 0.00016688113160451238,
      "loss": 0.9432,
      "step": 8845
    },
    {
      "epoch": 1.0209967697277342,
      "grad_norm": 0.3396543264389038,
      "learning_rate": 0.00016683121778853746,
      "loss": 0.8758,
      "step": 8850
    },
    {
      "epoch": 1.0215736040609138,
      "grad_norm": 0.355819433927536,
      "learning_rate": 0.00016678127386496883,
      "loss": 0.9419,
      "step": 8855
    },
    {
      "epoch": 1.0221504383940931,
      "grad_norm": 0.38481202721595764,
      "learning_rate": 0.00016673129985630625,
      "loss": 0.9459,
      "step": 8860
    },
    {
      "epoch": 1.0227272727272727,
      "grad_norm": 0.3595040440559387,
      "learning_rate": 0.00016668129578506315,
      "loss": 0.9144,
      "step": 8865
    },
    {
      "epoch": 1.0233041070604523,
      "grad_norm": 0.401304692029953,
      "learning_rate": 0.00016663126167376646,
      "loss": 0.9194,
      "step": 8870
    },
    {
      "epoch": 1.0238809413936318,
      "grad_norm": 0.32624685764312744,
      "learning_rate": 0.0001665811975449566,
      "loss": 0.931,
      "step": 8875
    },
    {
      "epoch": 1.0244577757268112,
      "grad_norm": 0.39976969361305237,
      "learning_rate": 0.00016653110342118764,
      "loss": 0.9067,
      "step": 8880
    },
    {
      "epoch": 1.0250346100599907,
      "grad_norm": 0.34697604179382324,
      "learning_rate": 0.00016648097932502704,
      "loss": 0.9473,
      "step": 8885
    },
    {
      "epoch": 1.0256114443931703,
      "grad_norm": 0.32559990882873535,
      "learning_rate": 0.0001664308252790558,
      "loss": 0.8861,
      "step": 8890
    },
    {
      "epoch": 1.0261882787263499,
      "grad_norm": 0.3570786416530609,
      "learning_rate": 0.0001663806413058684,
      "loss": 0.9822,
      "step": 8895
    },
    {
      "epoch": 1.0267651130595292,
      "grad_norm": 0.3664936125278473,
      "learning_rate": 0.00016633042742807285,
      "loss": 0.8971,
      "step": 8900
    },
    {
      "epoch": 1.0273419473927088,
      "grad_norm": 0.34375905990600586,
      "learning_rate": 0.00016628018366829055,
      "loss": 0.9683,
      "step": 8905
    },
    {
      "epoch": 1.0279187817258884,
      "grad_norm": 0.3472381830215454,
      "learning_rate": 0.00016622991004915645,
      "loss": 0.9341,
      "step": 8910
    },
    {
      "epoch": 1.028495616059068,
      "grad_norm": 0.4032275676727295,
      "learning_rate": 0.00016617960659331892,
      "loss": 0.9457,
      "step": 8915
    },
    {
      "epoch": 1.0290724503922473,
      "grad_norm": 0.3763919174671173,
      "learning_rate": 0.00016612927332343975,
      "loss": 0.9638,
      "step": 8920
    },
    {
      "epoch": 1.0296492847254268,
      "grad_norm": 0.3896368145942688,
      "learning_rate": 0.00016607891026219418,
      "loss": 0.9588,
      "step": 8925
    },
    {
      "epoch": 1.0302261190586064,
      "grad_norm": 0.3642440736293793,
      "learning_rate": 0.00016602851743227083,
      "loss": 0.9543,
      "step": 8930
    },
    {
      "epoch": 1.030802953391786,
      "grad_norm": 0.3432563245296478,
      "learning_rate": 0.0001659780948563719,
      "loss": 0.9543,
      "step": 8935
    },
    {
      "epoch": 1.0313797877249653,
      "grad_norm": 0.3308102786540985,
      "learning_rate": 0.00016592764255721264,
      "loss": 0.9284,
      "step": 8940
    },
    {
      "epoch": 1.0319566220581449,
      "grad_norm": 0.39647376537323,
      "learning_rate": 0.0001658771605575221,
      "loss": 0.8985,
      "step": 8945
    },
    {
      "epoch": 1.0325334563913244,
      "grad_norm": 0.35071223974227905,
      "learning_rate": 0.00016582664888004244,
      "loss": 0.9836,
      "step": 8950
    },
    {
      "epoch": 1.033110290724504,
      "grad_norm": 0.4009931981563568,
      "learning_rate": 0.00016577610754752925,
      "loss": 0.9932,
      "step": 8955
    },
    {
      "epoch": 1.0336871250576833,
      "grad_norm": 0.37220340967178345,
      "learning_rate": 0.00016572553658275157,
      "loss": 0.9339,
      "step": 8960
    },
    {
      "epoch": 1.034263959390863,
      "grad_norm": 0.5049521923065186,
      "learning_rate": 0.00016567493600849165,
      "loss": 0.9203,
      "step": 8965
    },
    {
      "epoch": 1.0348407937240425,
      "grad_norm": 0.3833288550376892,
      "learning_rate": 0.00016562430584754516,
      "loss": 0.9392,
      "step": 8970
    },
    {
      "epoch": 1.035417628057222,
      "grad_norm": 0.3854983448982239,
      "learning_rate": 0.00016557364612272113,
      "loss": 0.967,
      "step": 8975
    },
    {
      "epoch": 1.0359944623904014,
      "grad_norm": 0.38779276609420776,
      "learning_rate": 0.0001655229568568418,
      "loss": 0.9716,
      "step": 8980
    },
    {
      "epoch": 1.036571296723581,
      "grad_norm": 0.38969096541404724,
      "learning_rate": 0.00016547223807274287,
      "loss": 0.9235,
      "step": 8985
    },
    {
      "epoch": 1.0371481310567605,
      "grad_norm": 0.3601762354373932,
      "learning_rate": 0.00016542148979327315,
      "loss": 0.9388,
      "step": 8990
    },
    {
      "epoch": 1.03772496538994,
      "grad_norm": 0.37787869572639465,
      "learning_rate": 0.00016537071204129487,
      "loss": 0.9323,
      "step": 8995
    },
    {
      "epoch": 1.0383017997231194,
      "grad_norm": 0.3725028336048126,
      "learning_rate": 0.00016531990483968357,
      "loss": 0.911,
      "step": 9000
    },
    {
      "epoch": 1.038878634056299,
      "grad_norm": 0.3681508004665375,
      "learning_rate": 0.00016526906821132792,
      "loss": 0.9863,
      "step": 9005
    },
    {
      "epoch": 1.0394554683894786,
      "grad_norm": 0.3951893150806427,
      "learning_rate": 0.00016521820217912998,
      "loss": 0.9698,
      "step": 9010
    },
    {
      "epoch": 1.0400323027226581,
      "grad_norm": 0.40850746631622314,
      "learning_rate": 0.00016516730676600493,
      "loss": 0.9386,
      "step": 9015
    },
    {
      "epoch": 1.0406091370558375,
      "grad_norm": 0.3597157895565033,
      "learning_rate": 0.0001651163819948813,
      "loss": 0.895,
      "step": 9020
    },
    {
      "epoch": 1.041185971389017,
      "grad_norm": 0.35712316632270813,
      "learning_rate": 0.00016506542788870076,
      "loss": 0.9504,
      "step": 9025
    },
    {
      "epoch": 1.0417628057221966,
      "grad_norm": 0.3754498064517975,
      "learning_rate": 0.00016501444447041824,
      "loss": 0.8897,
      "step": 9030
    },
    {
      "epoch": 1.0423396400553762,
      "grad_norm": 0.3659985065460205,
      "learning_rate": 0.00016496343176300196,
      "loss": 0.8833,
      "step": 9035
    },
    {
      "epoch": 1.0429164743885555,
      "grad_norm": 0.4013921022415161,
      "learning_rate": 0.00016491238978943312,
      "loss": 0.9698,
      "step": 9040
    },
    {
      "epoch": 1.043493308721735,
      "grad_norm": 0.3634096682071686,
      "learning_rate": 0.00016486131857270628,
      "loss": 0.9354,
      "step": 9045
    },
    {
      "epoch": 1.0440701430549146,
      "grad_norm": 0.4177205562591553,
      "learning_rate": 0.00016481021813582913,
      "loss": 1.0056,
      "step": 9050
    },
    {
      "epoch": 1.0446469773880942,
      "grad_norm": 0.3462320864200592,
      "learning_rate": 0.00016475908850182251,
      "loss": 0.9092,
      "step": 9055
    },
    {
      "epoch": 1.0452238117212735,
      "grad_norm": 0.3686048984527588,
      "learning_rate": 0.00016470792969372039,
      "loss": 0.9341,
      "step": 9060
    },
    {
      "epoch": 1.045800646054453,
      "grad_norm": 0.368020623922348,
      "learning_rate": 0.00016465674173456998,
      "loss": 0.9805,
      "step": 9065
    },
    {
      "epoch": 1.0463774803876327,
      "grad_norm": 0.36095044016838074,
      "learning_rate": 0.0001646055246474315,
      "loss": 0.9008,
      "step": 9070
    },
    {
      "epoch": 1.0469543147208122,
      "grad_norm": 0.4059806168079376,
      "learning_rate": 0.00016455427845537835,
      "loss": 0.9404,
      "step": 9075
    },
    {
      "epoch": 1.0475311490539916,
      "grad_norm": 0.38529303669929504,
      "learning_rate": 0.00016450300318149707,
      "loss": 0.925,
      "step": 9080
    },
    {
      "epoch": 1.0481079833871711,
      "grad_norm": 0.34358593821525574,
      "learning_rate": 0.00016445169884888726,
      "loss": 0.9386,
      "step": 9085
    },
    {
      "epoch": 1.0486848177203507,
      "grad_norm": 0.3995891213417053,
      "learning_rate": 0.0001644003654806616,
      "loss": 0.9915,
      "step": 9090
    },
    {
      "epoch": 1.0492616520535303,
      "grad_norm": 0.37649253010749817,
      "learning_rate": 0.00016434900309994589,
      "loss": 0.9543,
      "step": 9095
    },
    {
      "epoch": 1.0498384863867098,
      "grad_norm": 0.3805069029331207,
      "learning_rate": 0.00016429761172987898,
      "loss": 0.9396,
      "step": 9100
    },
    {
      "epoch": 1.0504153207198892,
      "grad_norm": 0.35094547271728516,
      "learning_rate": 0.00016424619139361282,
      "loss": 0.9467,
      "step": 9105
    },
    {
      "epoch": 1.0509921550530688,
      "grad_norm": 0.375531941652298,
      "learning_rate": 0.00016419474211431227,
      "loss": 0.9519,
      "step": 9110
    },
    {
      "epoch": 1.0515689893862483,
      "grad_norm": 0.38208749890327454,
      "learning_rate": 0.0001641432639151554,
      "loss": 0.9717,
      "step": 9115
    },
    {
      "epoch": 1.0521458237194279,
      "grad_norm": 0.40280410647392273,
      "learning_rate": 0.00016409175681933328,
      "loss": 0.9059,
      "step": 9120
    },
    {
      "epoch": 1.0527226580526072,
      "grad_norm": 0.3839852511882782,
      "learning_rate": 0.0001640402208500499,
      "loss": 0.9399,
      "step": 9125
    },
    {
      "epoch": 1.0532994923857868,
      "grad_norm": 0.3738575875759125,
      "learning_rate": 0.00016398865603052228,
      "loss": 0.9179,
      "step": 9130
    },
    {
      "epoch": 1.0538763267189664,
      "grad_norm": 0.36099398136138916,
      "learning_rate": 0.00016393706238398056,
      "loss": 0.9475,
      "step": 9135
    },
    {
      "epoch": 1.054453161052146,
      "grad_norm": 0.37569257616996765,
      "learning_rate": 0.00016388543993366774,
      "loss": 0.9202,
      "step": 9140
    },
    {
      "epoch": 1.0550299953853253,
      "grad_norm": 0.3682805001735687,
      "learning_rate": 0.0001638337887028398,
      "loss": 0.8981,
      "step": 9145
    },
    {
      "epoch": 1.0556068297185048,
      "grad_norm": 0.3938318192958832,
      "learning_rate": 0.00016378210871476577,
      "loss": 0.9511,
      "step": 9150
    },
    {
      "epoch": 1.0561836640516844,
      "grad_norm": 0.4259416460990906,
      "learning_rate": 0.00016373039999272756,
      "loss": 0.8954,
      "step": 9155
    },
    {
      "epoch": 1.056760498384864,
      "grad_norm": 0.4090641140937805,
      "learning_rate": 0.00016367866256002003,
      "loss": 0.955,
      "step": 9160
    },
    {
      "epoch": 1.0573373327180433,
      "grad_norm": 0.3798772394657135,
      "learning_rate": 0.00016362689643995105,
      "loss": 0.9026,
      "step": 9165
    },
    {
      "epoch": 1.0579141670512229,
      "grad_norm": 0.42819643020629883,
      "learning_rate": 0.0001635751016558413,
      "loss": 0.9789,
      "step": 9170
    },
    {
      "epoch": 1.0584910013844024,
      "grad_norm": 0.4076824486255646,
      "learning_rate": 0.00016352327823102448,
      "loss": 0.9453,
      "step": 9175
    },
    {
      "epoch": 1.059067835717582,
      "grad_norm": 0.42359670996665955,
      "learning_rate": 0.00016347142618884712,
      "loss": 0.8879,
      "step": 9180
    },
    {
      "epoch": 1.0596446700507614,
      "grad_norm": 0.347460001707077,
      "learning_rate": 0.00016341954555266865,
      "loss": 0.9195,
      "step": 9185
    },
    {
      "epoch": 1.060221504383941,
      "grad_norm": 0.3660471737384796,
      "learning_rate": 0.00016336763634586143,
      "loss": 0.9269,
      "step": 9190
    },
    {
      "epoch": 1.0607983387171205,
      "grad_norm": 0.40367934107780457,
      "learning_rate": 0.00016331569859181062,
      "loss": 0.937,
      "step": 9195
    },
    {
      "epoch": 1.0613751730503,
      "grad_norm": 0.3569383919239044,
      "learning_rate": 0.00016326373231391434,
      "loss": 0.9234,
      "step": 9200
    },
    {
      "epoch": 1.0619520073834794,
      "grad_norm": 0.3617344796657562,
      "learning_rate": 0.00016321173753558343,
      "loss": 0.9292,
      "step": 9205
    },
    {
      "epoch": 1.062528841716659,
      "grad_norm": 0.34451478719711304,
      "learning_rate": 0.00016315971428024168,
      "loss": 0.8996,
      "step": 9210
    },
    {
      "epoch": 1.0631056760498385,
      "grad_norm": 0.3708736300468445,
      "learning_rate": 0.00016310766257132567,
      "loss": 0.9464,
      "step": 9215
    },
    {
      "epoch": 1.063682510383018,
      "grad_norm": 0.3594589829444885,
      "learning_rate": 0.00016305558243228475,
      "loss": 0.9344,
      "step": 9220
    },
    {
      "epoch": 1.0642593447161974,
      "grad_norm": 0.4037278890609741,
      "learning_rate": 0.0001630034738865812,
      "loss": 0.9109,
      "step": 9225
    },
    {
      "epoch": 1.064836179049377,
      "grad_norm": 0.3730737864971161,
      "learning_rate": 0.00016295133695768996,
      "loss": 0.9734,
      "step": 9230
    },
    {
      "epoch": 1.0654130133825566,
      "grad_norm": 0.4240753650665283,
      "learning_rate": 0.00016289917166909884,
      "loss": 0.9375,
      "step": 9235
    },
    {
      "epoch": 1.0659898477157361,
      "grad_norm": 0.3585973381996155,
      "learning_rate": 0.00016284697804430843,
      "loss": 0.948,
      "step": 9240
    },
    {
      "epoch": 1.0665666820489155,
      "grad_norm": 0.38361573219299316,
      "learning_rate": 0.00016279475610683203,
      "loss": 0.9547,
      "step": 9245
    },
    {
      "epoch": 1.067143516382095,
      "grad_norm": 0.4068223834037781,
      "learning_rate": 0.00016274250588019568,
      "loss": 0.9829,
      "step": 9250
    },
    {
      "epoch": 1.0677203507152746,
      "grad_norm": 0.37644630670547485,
      "learning_rate": 0.00016269022738793832,
      "loss": 0.9747,
      "step": 9255
    },
    {
      "epoch": 1.0682971850484542,
      "grad_norm": 0.35857805609703064,
      "learning_rate": 0.00016263792065361135,
      "loss": 0.9538,
      "step": 9260
    },
    {
      "epoch": 1.0688740193816335,
      "grad_norm": 0.37298527359962463,
      "learning_rate": 0.00016258558570077925,
      "loss": 0.9559,
      "step": 9265
    },
    {
      "epoch": 1.069450853714813,
      "grad_norm": 0.35640233755111694,
      "learning_rate": 0.00016253322255301887,
      "loss": 0.9729,
      "step": 9270
    },
    {
      "epoch": 1.0700276880479926,
      "grad_norm": 0.3905540108680725,
      "learning_rate": 0.00016248083123392,
      "loss": 0.9356,
      "step": 9275
    },
    {
      "epoch": 1.0706045223811722,
      "grad_norm": 0.3624536395072937,
      "learning_rate": 0.00016242841176708497,
      "loss": 0.9457,
      "step": 9280
    },
    {
      "epoch": 1.0711813567143516,
      "grad_norm": 0.3997156620025635,
      "learning_rate": 0.0001623759641761289,
      "loss": 0.919,
      "step": 9285
    },
    {
      "epoch": 1.0717581910475311,
      "grad_norm": 0.34290432929992676,
      "learning_rate": 0.00016232348848467946,
      "loss": 0.9155,
      "step": 9290
    },
    {
      "epoch": 1.0723350253807107,
      "grad_norm": 0.3913106322288513,
      "learning_rate": 0.00016227098471637713,
      "loss": 0.9814,
      "step": 9295
    },
    {
      "epoch": 1.0729118597138902,
      "grad_norm": 0.3945711553096771,
      "learning_rate": 0.00016221845289487492,
      "loss": 0.9145,
      "step": 9300
    },
    {
      "epoch": 1.0734886940470696,
      "grad_norm": 0.3874426484107971,
      "learning_rate": 0.0001621658930438385,
      "loss": 0.9411,
      "step": 9305
    },
    {
      "epoch": 1.0740655283802492,
      "grad_norm": 0.37332093715667725,
      "learning_rate": 0.00016211330518694624,
      "loss": 0.9382,
      "step": 9310
    },
    {
      "epoch": 1.0746423627134287,
      "grad_norm": 0.3567892014980316,
      "learning_rate": 0.00016206068934788905,
      "loss": 0.9323,
      "step": 9315
    },
    {
      "epoch": 1.0752191970466083,
      "grad_norm": 0.3612172305583954,
      "learning_rate": 0.00016200804555037047,
      "loss": 0.9779,
      "step": 9320
    },
    {
      "epoch": 1.0757960313797876,
      "grad_norm": 0.4569231867790222,
      "learning_rate": 0.0001619553738181066,
      "loss": 0.957,
      "step": 9325
    },
    {
      "epoch": 1.0763728657129672,
      "grad_norm": 0.3518175184726715,
      "learning_rate": 0.0001619026741748262,
      "loss": 0.8803,
      "step": 9330
    },
    {
      "epoch": 1.0769497000461468,
      "grad_norm": 0.37040144205093384,
      "learning_rate": 0.00016184994664427053,
      "loss": 0.9358,
      "step": 9335
    },
    {
      "epoch": 1.0775265343793263,
      "grad_norm": 0.3865705132484436,
      "learning_rate": 0.00016179719125019345,
      "loss": 0.9072,
      "step": 9340
    },
    {
      "epoch": 1.0781033687125057,
      "grad_norm": 0.37067902088165283,
      "learning_rate": 0.00016174440801636138,
      "loss": 0.9472,
      "step": 9345
    },
    {
      "epoch": 1.0786802030456852,
      "grad_norm": 0.3600660264492035,
      "learning_rate": 0.0001616915969665533,
      "loss": 0.9108,
      "step": 9350
    },
    {
      "epoch": 1.0792570373788648,
      "grad_norm": 0.3454911410808563,
      "learning_rate": 0.00016163875812456063,
      "loss": 0.9037,
      "step": 9355
    },
    {
      "epoch": 1.0798338717120444,
      "grad_norm": 0.3558720648288727,
      "learning_rate": 0.0001615858915141874,
      "loss": 0.8982,
      "step": 9360
    },
    {
      "epoch": 1.0804107060452237,
      "grad_norm": 0.35229113698005676,
      "learning_rate": 0.00016153299715925012,
      "loss": 0.9239,
      "step": 9365
    },
    {
      "epoch": 1.0809875403784033,
      "grad_norm": 0.3825600743293762,
      "learning_rate": 0.00016148007508357784,
      "loss": 0.9469,
      "step": 9370
    },
    {
      "epoch": 1.0815643747115828,
      "grad_norm": 0.44566377997398376,
      "learning_rate": 0.00016142712531101196,
      "loss": 0.9333,
      "step": 9375
    },
    {
      "epoch": 1.0821412090447624,
      "grad_norm": 0.38880455493927,
      "learning_rate": 0.00016137414786540654,
      "loss": 0.9118,
      "step": 9380
    },
    {
      "epoch": 1.0827180433779418,
      "grad_norm": 0.39327144622802734,
      "learning_rate": 0.00016132114277062797,
      "loss": 0.9402,
      "step": 9385
    },
    {
      "epoch": 1.0832948777111213,
      "grad_norm": 0.36946901679039,
      "learning_rate": 0.0001612681100505552,
      "loss": 0.8793,
      "step": 9390
    },
    {
      "epoch": 1.0838717120443009,
      "grad_norm": 0.3460526764392853,
      "learning_rate": 0.00016121504972907956,
      "loss": 0.8921,
      "step": 9395
    },
    {
      "epoch": 1.0844485463774804,
      "grad_norm": 0.4143986999988556,
      "learning_rate": 0.0001611619618301048,
      "loss": 0.9805,
      "step": 9400
    },
    {
      "epoch": 1.0850253807106598,
      "grad_norm": 0.38757944107055664,
      "learning_rate": 0.00016110884637754713,
      "loss": 0.9422,
      "step": 9405
    },
    {
      "epoch": 1.0856022150438394,
      "grad_norm": 0.37840455770492554,
      "learning_rate": 0.00016105570339533518,
      "loss": 0.9387,
      "step": 9410
    },
    {
      "epoch": 1.086179049377019,
      "grad_norm": 0.38999855518341064,
      "learning_rate": 0.00016100253290740995,
      "loss": 0.948,
      "step": 9415
    },
    {
      "epoch": 1.0867558837101985,
      "grad_norm": 0.38250136375427246,
      "learning_rate": 0.00016094933493772487,
      "loss": 0.934,
      "step": 9420
    },
    {
      "epoch": 1.087332718043378,
      "grad_norm": 0.36484387516975403,
      "learning_rate": 0.0001608961095102457,
      "loss": 0.8776,
      "step": 9425
    },
    {
      "epoch": 1.0879095523765574,
      "grad_norm": 0.4226197600364685,
      "learning_rate": 0.00016084285664895066,
      "loss": 0.8962,
      "step": 9430
    },
    {
      "epoch": 1.088486386709737,
      "grad_norm": 0.41604354977607727,
      "learning_rate": 0.00016078957637783017,
      "loss": 0.9563,
      "step": 9435
    },
    {
      "epoch": 1.0890632210429165,
      "grad_norm": 0.37396174669265747,
      "learning_rate": 0.00016073626872088718,
      "loss": 0.9355,
      "step": 9440
    },
    {
      "epoch": 1.0896400553760959,
      "grad_norm": 0.37299230694770813,
      "learning_rate": 0.00016068293370213684,
      "loss": 0.9513,
      "step": 9445
    },
    {
      "epoch": 1.0902168897092754,
      "grad_norm": 0.38365793228149414,
      "learning_rate": 0.00016062957134560675,
      "loss": 0.94,
      "step": 9450
    },
    {
      "epoch": 1.090793724042455,
      "grad_norm": 0.3528333306312561,
      "learning_rate": 0.00016057618167533667,
      "loss": 0.9808,
      "step": 9455
    },
    {
      "epoch": 1.0913705583756346,
      "grad_norm": 0.38088691234588623,
      "learning_rate": 0.00016052276471537877,
      "loss": 0.9308,
      "step": 9460
    },
    {
      "epoch": 1.0919473927088141,
      "grad_norm": 0.377972275018692,
      "learning_rate": 0.0001604693204897975,
      "loss": 0.8924,
      "step": 9465
    },
    {
      "epoch": 1.0925242270419935,
      "grad_norm": 0.323946475982666,
      "learning_rate": 0.00016041584902266968,
      "loss": 0.9062,
      "step": 9470
    },
    {
      "epoch": 1.093101061375173,
      "grad_norm": 0.4198917746543884,
      "learning_rate": 0.00016036235033808417,
      "loss": 0.9348,
      "step": 9475
    },
    {
      "epoch": 1.0936778957083526,
      "grad_norm": 0.36399000883102417,
      "learning_rate": 0.00016030882446014234,
      "loss": 0.9395,
      "step": 9480
    },
    {
      "epoch": 1.0942547300415322,
      "grad_norm": 0.37311363220214844,
      "learning_rate": 0.0001602552714129576,
      "loss": 0.9531,
      "step": 9485
    },
    {
      "epoch": 1.0948315643747115,
      "grad_norm": 0.3694175183773041,
      "learning_rate": 0.00016020169122065578,
      "loss": 0.9254,
      "step": 9490
    },
    {
      "epoch": 1.095408398707891,
      "grad_norm": 0.4136304557323456,
      "learning_rate": 0.00016014808390737485,
      "loss": 0.9588,
      "step": 9495
    },
    {
      "epoch": 1.0959852330410707,
      "grad_norm": 0.3715604841709137,
      "learning_rate": 0.000160094449497265,
      "loss": 0.9446,
      "step": 9500
    },
    {
      "epoch": 1.0965620673742502,
      "grad_norm": 0.3993631601333618,
      "learning_rate": 0.0001600407880144886,
      "loss": 0.9291,
      "step": 9505
    },
    {
      "epoch": 1.0971389017074296,
      "grad_norm": 0.37997785210609436,
      "learning_rate": 0.00015998709948322027,
      "loss": 0.9908,
      "step": 9510
    },
    {
      "epoch": 1.0977157360406091,
      "grad_norm": 0.3883385956287384,
      "learning_rate": 0.00015993338392764685,
      "loss": 0.901,
      "step": 9515
    },
    {
      "epoch": 1.0982925703737887,
      "grad_norm": 0.3730206787586212,
      "learning_rate": 0.00015987964137196726,
      "loss": 0.8803,
      "step": 9520
    },
    {
      "epoch": 1.0988694047069683,
      "grad_norm": 0.38496091961860657,
      "learning_rate": 0.00015982587184039263,
      "loss": 0.8802,
      "step": 9525
    },
    {
      "epoch": 1.0994462390401476,
      "grad_norm": 0.3969172239303589,
      "learning_rate": 0.00015977207535714625,
      "loss": 0.9055,
      "step": 9530
    },
    {
      "epoch": 1.1000230733733272,
      "grad_norm": 0.44119396805763245,
      "learning_rate": 0.0001597182519464635,
      "loss": 1.0069,
      "step": 9535
    },
    {
      "epoch": 1.1005999077065067,
      "grad_norm": 0.3768390119075775,
      "learning_rate": 0.00015966440163259202,
      "loss": 0.9637,
      "step": 9540
    },
    {
      "epoch": 1.1011767420396863,
      "grad_norm": 0.39733919501304626,
      "learning_rate": 0.00015961052443979137,
      "loss": 0.9896,
      "step": 9545
    },
    {
      "epoch": 1.1017535763728656,
      "grad_norm": 0.3685975968837738,
      "learning_rate": 0.0001595566203923334,
      "loss": 0.9021,
      "step": 9550
    },
    {
      "epoch": 1.1023304107060452,
      "grad_norm": 0.4275849759578705,
      "learning_rate": 0.00015950268951450198,
      "loss": 0.897,
      "step": 9555
    },
    {
      "epoch": 1.1029072450392248,
      "grad_norm": 0.401770681142807,
      "learning_rate": 0.00015944873183059303,
      "loss": 0.9091,
      "step": 9560
    },
    {
      "epoch": 1.1034840793724043,
      "grad_norm": 0.38329482078552246,
      "learning_rate": 0.00015939474736491468,
      "loss": 0.9396,
      "step": 9565
    },
    {
      "epoch": 1.1040609137055837,
      "grad_norm": 0.34465184807777405,
      "learning_rate": 0.00015934073614178696,
      "loss": 0.9683,
      "step": 9570
    },
    {
      "epoch": 1.1046377480387632,
      "grad_norm": 0.40164217352867126,
      "learning_rate": 0.00015928669818554206,
      "loss": 0.9354,
      "step": 9575
    },
    {
      "epoch": 1.1052145823719428,
      "grad_norm": 0.39106857776641846,
      "learning_rate": 0.0001592326335205242,
      "loss": 0.909,
      "step": 9580
    },
    {
      "epoch": 1.1057914167051224,
      "grad_norm": 0.36791619658470154,
      "learning_rate": 0.00015917854217108954,
      "loss": 0.8927,
      "step": 9585
    },
    {
      "epoch": 1.1063682510383017,
      "grad_norm": 0.3477643132209778,
      "learning_rate": 0.00015912442416160644,
      "loss": 0.8746,
      "step": 9590
    },
    {
      "epoch": 1.1069450853714813,
      "grad_norm": 0.36076053977012634,
      "learning_rate": 0.0001590702795164551,
      "loss": 0.9222,
      "step": 9595
    },
    {
      "epoch": 1.1075219197046609,
      "grad_norm": 0.4136887490749359,
      "learning_rate": 0.00015901610826002787,
      "loss": 0.9537,
      "step": 9600
    },
    {
      "epoch": 1.1080987540378404,
      "grad_norm": 0.3848995566368103,
      "learning_rate": 0.0001589619104167289,
      "loss": 0.9322,
      "step": 9605
    },
    {
      "epoch": 1.1086755883710198,
      "grad_norm": 0.3714704215526581,
      "learning_rate": 0.00015890768601097447,
      "loss": 0.9499,
      "step": 9610
    },
    {
      "epoch": 1.1092524227041993,
      "grad_norm": 0.3850856125354767,
      "learning_rate": 0.0001588534350671928,
      "loss": 0.9638,
      "step": 9615
    },
    {
      "epoch": 1.109829257037379,
      "grad_norm": 0.3717416226863861,
      "learning_rate": 0.00015879915760982406,
      "loss": 0.943,
      "step": 9620
    },
    {
      "epoch": 1.1104060913705585,
      "grad_norm": 0.36091673374176025,
      "learning_rate": 0.0001587448536633203,
      "loss": 0.9536,
      "step": 9625
    },
    {
      "epoch": 1.1109829257037378,
      "grad_norm": 0.343685507774353,
      "learning_rate": 0.00015869052325214554,
      "loss": 0.9391,
      "step": 9630
    },
    {
      "epoch": 1.1115597600369174,
      "grad_norm": 0.40542444586753845,
      "learning_rate": 0.00015863616640077578,
      "loss": 0.8862,
      "step": 9635
    },
    {
      "epoch": 1.112136594370097,
      "grad_norm": 0.3820521831512451,
      "learning_rate": 0.00015858178313369893,
      "loss": 0.9461,
      "step": 9640
    },
    {
      "epoch": 1.1127134287032765,
      "grad_norm": 0.3812884986400604,
      "learning_rate": 0.00015852737347541465,
      "loss": 0.9391,
      "step": 9645
    },
    {
      "epoch": 1.1132902630364558,
      "grad_norm": 0.3547990918159485,
      "learning_rate": 0.00015847293745043466,
      "loss": 0.9091,
      "step": 9650
    },
    {
      "epoch": 1.1138670973696354,
      "grad_norm": 0.3752082884311676,
      "learning_rate": 0.0001584184750832825,
      "loss": 0.9802,
      "step": 9655
    },
    {
      "epoch": 1.114443931702815,
      "grad_norm": 0.4098625183105469,
      "learning_rate": 0.00015836398639849355,
      "loss": 0.8935,
      "step": 9660
    },
    {
      "epoch": 1.1150207660359945,
      "grad_norm": 0.40601786971092224,
      "learning_rate": 0.0001583094714206151,
      "loss": 0.9065,
      "step": 9665
    },
    {
      "epoch": 1.1155976003691739,
      "grad_norm": 0.3601096272468567,
      "learning_rate": 0.0001582549301742062,
      "loss": 0.942,
      "step": 9670
    },
    {
      "epoch": 1.1161744347023534,
      "grad_norm": 0.37831351161003113,
      "learning_rate": 0.00015820036268383785,
      "loss": 0.949,
      "step": 9675
    },
    {
      "epoch": 1.116751269035533,
      "grad_norm": 0.3862978219985962,
      "learning_rate": 0.00015814576897409273,
      "loss": 0.9181,
      "step": 9680
    },
    {
      "epoch": 1.1173281033687126,
      "grad_norm": 0.3954308331012726,
      "learning_rate": 0.00015809114906956552,
      "loss": 0.9699,
      "step": 9685
    },
    {
      "epoch": 1.117904937701892,
      "grad_norm": 0.39789891242980957,
      "learning_rate": 0.00015803650299486252,
      "loss": 0.9297,
      "step": 9690
    },
    {
      "epoch": 1.1184817720350715,
      "grad_norm": 0.3621467053890228,
      "learning_rate": 0.00015798183077460188,
      "loss": 0.9135,
      "step": 9695
    },
    {
      "epoch": 1.119058606368251,
      "grad_norm": 0.3689694404602051,
      "learning_rate": 0.0001579271324334136,
      "loss": 0.9608,
      "step": 9700
    },
    {
      "epoch": 1.1196354407014306,
      "grad_norm": 0.3975661098957062,
      "learning_rate": 0.00015787240799593937,
      "loss": 0.9225,
      "step": 9705
    },
    {
      "epoch": 1.12021227503461,
      "grad_norm": 0.3825543224811554,
      "learning_rate": 0.00015781765748683262,
      "loss": 0.9709,
      "step": 9710
    },
    {
      "epoch": 1.1207891093677895,
      "grad_norm": 0.434542715549469,
      "learning_rate": 0.0001577628809307586,
      "loss": 0.9354,
      "step": 9715
    },
    {
      "epoch": 1.121365943700969,
      "grad_norm": 0.36350882053375244,
      "learning_rate": 0.00015770807835239424,
      "loss": 0.9486,
      "step": 9720
    },
    {
      "epoch": 1.1219427780341487,
      "grad_norm": 0.3788936138153076,
      "learning_rate": 0.00015765324977642822,
      "loss": 1.0066,
      "step": 9725
    },
    {
      "epoch": 1.122519612367328,
      "grad_norm": 0.3581905663013458,
      "learning_rate": 0.00015759839522756092,
      "loss": 0.9538,
      "step": 9730
    },
    {
      "epoch": 1.1230964467005076,
      "grad_norm": 0.41105157136917114,
      "learning_rate": 0.00015754351473050435,
      "loss": 0.9166,
      "step": 9735
    },
    {
      "epoch": 1.1236732810336871,
      "grad_norm": 0.3867192566394806,
      "learning_rate": 0.0001574886083099824,
      "loss": 0.931,
      "step": 9740
    },
    {
      "epoch": 1.1242501153668667,
      "grad_norm": 0.3927655518054962,
      "learning_rate": 0.00015743367599073044,
      "loss": 0.9501,
      "step": 9745
    },
    {
      "epoch": 1.1248269497000463,
      "grad_norm": 0.3890318274497986,
      "learning_rate": 0.0001573787177974956,
      "loss": 0.9712,
      "step": 9750
    },
    {
      "epoch": 1.1254037840332256,
      "grad_norm": 0.3845767080783844,
      "learning_rate": 0.0001573237337550367,
      "loss": 0.9395,
      "step": 9755
    },
    {
      "epoch": 1.1259806183664052,
      "grad_norm": 0.38565099239349365,
      "learning_rate": 0.00015726872388812407,
      "loss": 0.9331,
      "step": 9760
    },
    {
      "epoch": 1.1265574526995847,
      "grad_norm": 0.3627215027809143,
      "learning_rate": 0.00015721368822153986,
      "loss": 0.9511,
      "step": 9765
    },
    {
      "epoch": 1.127134287032764,
      "grad_norm": 0.35866937041282654,
      "learning_rate": 0.00015715862678007767,
      "loss": 0.9542,
      "step": 9770
    },
    {
      "epoch": 1.1277111213659436,
      "grad_norm": 0.36874574422836304,
      "learning_rate": 0.0001571035395885428,
      "loss": 0.8729,
      "step": 9775
    },
    {
      "epoch": 1.1282879556991232,
      "grad_norm": 0.4252176880836487,
      "learning_rate": 0.0001570484266717522,
      "loss": 0.9313,
      "step": 9780
    },
    {
      "epoch": 1.1288647900323028,
      "grad_norm": 0.36453625559806824,
      "learning_rate": 0.00015699328805453424,
      "loss": 0.8858,
      "step": 9785
    },
    {
      "epoch": 1.1294416243654823,
      "grad_norm": 0.4107474982738495,
      "learning_rate": 0.00015693812376172902,
      "loss": 0.9887,
      "step": 9790
    },
    {
      "epoch": 1.1300184586986617,
      "grad_norm": 0.4069174528121948,
      "learning_rate": 0.00015688293381818823,
      "loss": 0.9478,
      "step": 9795
    },
    {
      "epoch": 1.1305952930318413,
      "grad_norm": 0.37103304266929626,
      "learning_rate": 0.00015682771824877494,
      "loss": 0.9144,
      "step": 9800
    },
    {
      "epoch": 1.1311721273650208,
      "grad_norm": 0.40417689085006714,
      "learning_rate": 0.00015677247707836397,
      "loss": 0.9464,
      "step": 9805
    },
    {
      "epoch": 1.1317489616982002,
      "grad_norm": 0.3834182918071747,
      "learning_rate": 0.0001567172103318415,
      "loss": 0.9902,
      "step": 9810
    },
    {
      "epoch": 1.1323257960313797,
      "grad_norm": 0.37694182991981506,
      "learning_rate": 0.00015666191803410536,
      "loss": 0.9339,
      "step": 9815
    },
    {
      "epoch": 1.1329026303645593,
      "grad_norm": 0.3973928987979889,
      "learning_rate": 0.00015660660021006478,
      "loss": 0.9301,
      "step": 9820
    },
    {
      "epoch": 1.1334794646977389,
      "grad_norm": 0.41115155816078186,
      "learning_rate": 0.00015655125688464062,
      "loss": 0.9196,
      "step": 9825
    },
    {
      "epoch": 1.1340562990309184,
      "grad_norm": 0.3878481686115265,
      "learning_rate": 0.00015649588808276505,
      "loss": 0.9395,
      "step": 9830
    },
    {
      "epoch": 1.1346331333640978,
      "grad_norm": 0.362389475107193,
      "learning_rate": 0.00015644049382938191,
      "loss": 0.9005,
      "step": 9835
    },
    {
      "epoch": 1.1352099676972773,
      "grad_norm": 0.40141263604164124,
      "learning_rate": 0.00015638507414944642,
      "loss": 0.9412,
      "step": 9840
    },
    {
      "epoch": 1.135786802030457,
      "grad_norm": 0.3529787063598633,
      "learning_rate": 0.00015632962906792522,
      "loss": 0.9619,
      "step": 9845
    },
    {
      "epoch": 1.1363636363636362,
      "grad_norm": 0.37504446506500244,
      "learning_rate": 0.00015627415860979641,
      "loss": 0.96,
      "step": 9850
    },
    {
      "epoch": 1.1369404706968158,
      "grad_norm": 0.39885926246643066,
      "learning_rate": 0.0001562186628000496,
      "loss": 0.9156,
      "step": 9855
    },
    {
      "epoch": 1.1375173050299954,
      "grad_norm": 0.37924274802207947,
      "learning_rate": 0.0001561631416636857,
      "loss": 0.958,
      "step": 9860
    },
    {
      "epoch": 1.138094139363175,
      "grad_norm": 0.37374287843704224,
      "learning_rate": 0.00015610759522571713,
      "loss": 0.8828,
      "step": 9865
    },
    {
      "epoch": 1.1386709736963545,
      "grad_norm": 0.37404969334602356,
      "learning_rate": 0.00015605202351116765,
      "loss": 0.9512,
      "step": 9870
    },
    {
      "epoch": 1.1392478080295338,
      "grad_norm": 0.3823772966861725,
      "learning_rate": 0.00015599642654507244,
      "loss": 0.9525,
      "step": 9875
    },
    {
      "epoch": 1.1398246423627134,
      "grad_norm": 0.45090019702911377,
      "learning_rate": 0.00015594080435247802,
      "loss": 0.9666,
      "step": 9880
    },
    {
      "epoch": 1.140401476695893,
      "grad_norm": 0.37332072854042053,
      "learning_rate": 0.00015588515695844234,
      "loss": 0.9481,
      "step": 9885
    },
    {
      "epoch": 1.1409783110290725,
      "grad_norm": 0.36044663190841675,
      "learning_rate": 0.0001558294843880346,
      "loss": 0.9188,
      "step": 9890
    },
    {
      "epoch": 1.1415551453622519,
      "grad_norm": 0.3901468515396118,
      "learning_rate": 0.00015577378666633545,
      "loss": 0.996,
      "step": 9895
    },
    {
      "epoch": 1.1421319796954315,
      "grad_norm": 0.3967418074607849,
      "learning_rate": 0.00015571806381843676,
      "loss": 0.9345,
      "step": 9900
    },
    {
      "epoch": 1.142708814028611,
      "grad_norm": 0.3700670301914215,
      "learning_rate": 0.00015566231586944186,
      "loss": 0.9379,
      "step": 9905
    },
    {
      "epoch": 1.1432856483617906,
      "grad_norm": 0.37614816427230835,
      "learning_rate": 0.00015560654284446526,
      "loss": 0.9585,
      "step": 9910
    },
    {
      "epoch": 1.14386248269497,
      "grad_norm": 0.38715970516204834,
      "learning_rate": 0.00015555074476863282,
      "loss": 0.9538,
      "step": 9915
    },
    {
      "epoch": 1.1444393170281495,
      "grad_norm": 0.38108375668525696,
      "learning_rate": 0.0001554949216670817,
      "loss": 0.9557,
      "step": 9920
    },
    {
      "epoch": 1.145016151361329,
      "grad_norm": 0.39917248487472534,
      "learning_rate": 0.00015543907356496033,
      "loss": 0.9416,
      "step": 9925
    },
    {
      "epoch": 1.1455929856945086,
      "grad_norm": 0.41090819239616394,
      "learning_rate": 0.00015538320048742835,
      "loss": 0.9457,
      "step": 9930
    },
    {
      "epoch": 1.146169820027688,
      "grad_norm": 0.38558125495910645,
      "learning_rate": 0.00015532730245965668,
      "loss": 0.9564,
      "step": 9935
    },
    {
      "epoch": 1.1467466543608675,
      "grad_norm": 0.4513038694858551,
      "learning_rate": 0.00015527137950682756,
      "loss": 0.9503,
      "step": 9940
    },
    {
      "epoch": 1.147323488694047,
      "grad_norm": 0.3840969502925873,
      "learning_rate": 0.00015521543165413428,
      "loss": 0.9037,
      "step": 9945
    },
    {
      "epoch": 1.1479003230272267,
      "grad_norm": 0.3525768220424652,
      "learning_rate": 0.00015515945892678157,
      "loss": 0.9935,
      "step": 9950
    },
    {
      "epoch": 1.148477157360406,
      "grad_norm": 0.37335145473480225,
      "learning_rate": 0.0001551034613499852,
      "loss": 0.9736,
      "step": 9955
    },
    {
      "epoch": 1.1490539916935856,
      "grad_norm": 0.4112907350063324,
      "learning_rate": 0.00015504743894897218,
      "loss": 0.9434,
      "step": 9960
    },
    {
      "epoch": 1.1496308260267651,
      "grad_norm": 0.3955402374267578,
      "learning_rate": 0.00015499139174898071,
      "loss": 0.9893,
      "step": 9965
    },
    {
      "epoch": 1.1502076603599447,
      "grad_norm": 0.39400553703308105,
      "learning_rate": 0.0001549353197752602,
      "loss": 0.9892,
      "step": 9970
    },
    {
      "epoch": 1.150784494693124,
      "grad_norm": 0.38637709617614746,
      "learning_rate": 0.00015487922305307118,
      "loss": 0.9282,
      "step": 9975
    },
    {
      "epoch": 1.1513613290263036,
      "grad_norm": 0.44860902428627014,
      "learning_rate": 0.00015482310160768527,
      "loss": 1.0229,
      "step": 9980
    },
    {
      "epoch": 1.1519381633594832,
      "grad_norm": 0.378139466047287,
      "learning_rate": 0.00015476695546438535,
      "loss": 0.9421,
      "step": 9985
    },
    {
      "epoch": 1.1525149976926627,
      "grad_norm": 0.4301804304122925,
      "learning_rate": 0.0001547107846484653,
      "loss": 0.9599,
      "step": 9990
    },
    {
      "epoch": 1.153091832025842,
      "grad_norm": 0.3928966522216797,
      "learning_rate": 0.0001546545891852303,
      "loss": 0.9652,
      "step": 9995
    },
    {
      "epoch": 1.1536686663590217,
      "grad_norm": 0.35950881242752075,
      "learning_rate": 0.0001545983690999964,
      "loss": 0.8964,
      "step": 10000
    },
    {
      "epoch": 1.1542455006922012,
      "grad_norm": 0.3532848656177521,
      "learning_rate": 0.00015454212441809095,
      "loss": 0.9069,
      "step": 10005
    },
    {
      "epoch": 1.1548223350253808,
      "grad_norm": 0.4111880958080292,
      "learning_rate": 0.0001544858551648522,
      "loss": 0.94,
      "step": 10010
    },
    {
      "epoch": 1.1553991693585601,
      "grad_norm": 0.4143196642398834,
      "learning_rate": 0.0001544295613656296,
      "loss": 0.9598,
      "step": 10015
    },
    {
      "epoch": 1.1559760036917397,
      "grad_norm": 0.4328601062297821,
      "learning_rate": 0.00015437324304578363,
      "loss": 0.9171,
      "step": 10020
    },
    {
      "epoch": 1.1565528380249193,
      "grad_norm": 0.4185430407524109,
      "learning_rate": 0.0001543169002306858,
      "loss": 0.8857,
      "step": 10025
    },
    {
      "epoch": 1.1571296723580988,
      "grad_norm": 0.34174492955207825,
      "learning_rate": 0.00015426053294571865,
      "loss": 0.9565,
      "step": 10030
    },
    {
      "epoch": 1.1577065066912782,
      "grad_norm": 0.4023081660270691,
      "learning_rate": 0.00015420414121627575,
      "loss": 0.9048,
      "step": 10035
    },
    {
      "epoch": 1.1582833410244577,
      "grad_norm": 0.3603830635547638,
      "learning_rate": 0.00015414772506776165,
      "loss": 0.9337,
      "step": 10040
    },
    {
      "epoch": 1.1588601753576373,
      "grad_norm": 0.35782790184020996,
      "learning_rate": 0.000154091284525592,
      "loss": 0.906,
      "step": 10045
    },
    {
      "epoch": 1.1594370096908169,
      "grad_norm": 0.4056355953216553,
      "learning_rate": 0.00015403481961519334,
      "loss": 0.8862,
      "step": 10050
    },
    {
      "epoch": 1.1600138440239962,
      "grad_norm": 0.3880009949207306,
      "learning_rate": 0.00015397833036200322,
      "loss": 0.9643,
      "step": 10055
    },
    {
      "epoch": 1.1605906783571758,
      "grad_norm": 0.37115952372550964,
      "learning_rate": 0.00015392181679147013,
      "loss": 0.9906,
      "step": 10060
    },
    {
      "epoch": 1.1611675126903553,
      "grad_norm": 0.40016767382621765,
      "learning_rate": 0.00015386527892905365,
      "loss": 0.928,
      "step": 10065
    },
    {
      "epoch": 1.161744347023535,
      "grad_norm": 0.38123106956481934,
      "learning_rate": 0.00015380871680022406,
      "loss": 0.9618,
      "step": 10070
    },
    {
      "epoch": 1.1623211813567145,
      "grad_norm": 0.36609750986099243,
      "learning_rate": 0.00015375213043046276,
      "loss": 0.9228,
      "step": 10075
    },
    {
      "epoch": 1.1628980156898938,
      "grad_norm": 0.39698731899261475,
      "learning_rate": 0.0001536955198452621,
      "loss": 0.9658,
      "step": 10080
    },
    {
      "epoch": 1.1634748500230734,
      "grad_norm": 0.37321779131889343,
      "learning_rate": 0.00015363888507012515,
      "loss": 0.9488,
      "step": 10085
    },
    {
      "epoch": 1.164051684356253,
      "grad_norm": 0.3812940716743469,
      "learning_rate": 0.00015358222613056602,
      "loss": 0.9386,
      "step": 10090
    },
    {
      "epoch": 1.1646285186894323,
      "grad_norm": 0.42194968461990356,
      "learning_rate": 0.0001535255430521097,
      "loss": 0.9154,
      "step": 10095
    },
    {
      "epoch": 1.1652053530226119,
      "grad_norm": 0.419939786195755,
      "learning_rate": 0.00015346883586029198,
      "loss": 0.9235,
      "step": 10100
    },
    {
      "epoch": 1.1657821873557914,
      "grad_norm": 0.35055387020111084,
      "learning_rate": 0.00015341210458065963,
      "loss": 0.9365,
      "step": 10105
    },
    {
      "epoch": 1.166359021688971,
      "grad_norm": 0.3702836334705353,
      "learning_rate": 0.00015335534923877013,
      "loss": 0.9896,
      "step": 10110
    },
    {
      "epoch": 1.1669358560221506,
      "grad_norm": 0.3982597291469574,
      "learning_rate": 0.00015329856986019192,
      "loss": 0.9258,
      "step": 10115
    },
    {
      "epoch": 1.16751269035533,
      "grad_norm": 0.41476061940193176,
      "learning_rate": 0.00015324176647050415,
      "loss": 0.9549,
      "step": 10120
    },
    {
      "epoch": 1.1680895246885095,
      "grad_norm": 0.37612512707710266,
      "learning_rate": 0.000153184939095297,
      "loss": 0.9374,
      "step": 10125
    },
    {
      "epoch": 1.168666359021689,
      "grad_norm": 0.3923216462135315,
      "learning_rate": 0.00015312808776017113,
      "loss": 0.9184,
      "step": 10130
    },
    {
      "epoch": 1.1692431933548684,
      "grad_norm": 0.35655319690704346,
      "learning_rate": 0.00015307121249073831,
      "loss": 0.9052,
      "step": 10135
    },
    {
      "epoch": 1.169820027688048,
      "grad_norm": 0.3752961754798889,
      "learning_rate": 0.00015301431331262095,
      "loss": 0.9356,
      "step": 10140
    },
    {
      "epoch": 1.1703968620212275,
      "grad_norm": 0.3986845314502716,
      "learning_rate": 0.0001529573902514522,
      "loss": 0.9992,
      "step": 10145
    },
    {
      "epoch": 1.170973696354407,
      "grad_norm": 0.39377009868621826,
      "learning_rate": 0.00015290044333287597,
      "loss": 0.9546,
      "step": 10150
    },
    {
      "epoch": 1.1715505306875866,
      "grad_norm": 0.3905738294124603,
      "learning_rate": 0.00015284347258254704,
      "loss": 0.9224,
      "step": 10155
    },
    {
      "epoch": 1.172127365020766,
      "grad_norm": 0.38576817512512207,
      "learning_rate": 0.00015278647802613083,
      "loss": 0.8831,
      "step": 10160
    },
    {
      "epoch": 1.1727041993539455,
      "grad_norm": 0.36881786584854126,
      "learning_rate": 0.00015272945968930346,
      "loss": 0.9233,
      "step": 10165
    },
    {
      "epoch": 1.173281033687125,
      "grad_norm": 0.3943594694137573,
      "learning_rate": 0.0001526724175977518,
      "loss": 0.8819,
      "step": 10170
    },
    {
      "epoch": 1.1738578680203045,
      "grad_norm": 0.3838745355606079,
      "learning_rate": 0.0001526153517771735,
      "loss": 0.9679,
      "step": 10175
    },
    {
      "epoch": 1.174434702353484,
      "grad_norm": 0.38031336665153503,
      "learning_rate": 0.00015255826225327675,
      "loss": 0.9789,
      "step": 10180
    },
    {
      "epoch": 1.1750115366866636,
      "grad_norm": 0.4234495759010315,
      "learning_rate": 0.0001525011490517805,
      "loss": 0.9495,
      "step": 10185
    },
    {
      "epoch": 1.1755883710198431,
      "grad_norm": 0.3952755033969879,
      "learning_rate": 0.00015244401219841438,
      "loss": 0.9164,
      "step": 10190
    },
    {
      "epoch": 1.1761652053530227,
      "grad_norm": 0.38901087641716003,
      "learning_rate": 0.00015238685171891863,
      "loss": 0.9748,
      "step": 10195
    },
    {
      "epoch": 1.176742039686202,
      "grad_norm": 0.39485955238342285,
      "learning_rate": 0.00015232966763904416,
      "loss": 0.9365,
      "step": 10200
    },
    {
      "epoch": 1.1773188740193816,
      "grad_norm": 0.36091843247413635,
      "learning_rate": 0.00015227245998455254,
      "loss": 0.9711,
      "step": 10205
    },
    {
      "epoch": 1.1778957083525612,
      "grad_norm": 0.3655792772769928,
      "learning_rate": 0.00015221522878121593,
      "loss": 0.9597,
      "step": 10210
    },
    {
      "epoch": 1.1784725426857405,
      "grad_norm": 0.37455499172210693,
      "learning_rate": 0.00015215797405481704,
      "loss": 0.9553,
      "step": 10215
    },
    {
      "epoch": 1.17904937701892,
      "grad_norm": 0.41447341442108154,
      "learning_rate": 0.00015210069583114928,
      "loss": 0.9201,
      "step": 10220
    },
    {
      "epoch": 1.1796262113520997,
      "grad_norm": 0.40085557103157043,
      "learning_rate": 0.0001520433941360166,
      "loss": 0.9807,
      "step": 10225
    },
    {
      "epoch": 1.1802030456852792,
      "grad_norm": 0.405367374420166,
      "learning_rate": 0.00015198606899523352,
      "loss": 0.9543,
      "step": 10230
    },
    {
      "epoch": 1.1807798800184588,
      "grad_norm": 0.3848768472671509,
      "learning_rate": 0.00015192872043462514,
      "loss": 0.917,
      "step": 10235
    },
    {
      "epoch": 1.1813567143516381,
      "grad_norm": 0.3763655126094818,
      "learning_rate": 0.0001518713484800271,
      "loss": 0.944,
      "step": 10240
    },
    {
      "epoch": 1.1819335486848177,
      "grad_norm": 0.3986711800098419,
      "learning_rate": 0.00015181395315728554,
      "loss": 0.8734,
      "step": 10245
    },
    {
      "epoch": 1.1825103830179973,
      "grad_norm": 0.3781437873840332,
      "learning_rate": 0.00015175653449225716,
      "loss": 0.9581,
      "step": 10250
    },
    {
      "epoch": 1.1830872173511768,
      "grad_norm": 0.40199145674705505,
      "learning_rate": 0.00015169909251080922,
      "loss": 0.9467,
      "step": 10255
    },
    {
      "epoch": 1.1836640516843562,
      "grad_norm": 0.3898562788963318,
      "learning_rate": 0.00015164162723881947,
      "loss": 0.9474,
      "step": 10260
    },
    {
      "epoch": 1.1842408860175357,
      "grad_norm": 0.3571324348449707,
      "learning_rate": 0.00015158413870217606,
      "loss": 0.9509,
      "step": 10265
    },
    {
      "epoch": 1.1848177203507153,
      "grad_norm": 0.4020764231681824,
      "learning_rate": 0.00015152662692677774,
      "loss": 0.9347,
      "step": 10270
    },
    {
      "epoch": 1.1853945546838949,
      "grad_norm": 0.3949398398399353,
      "learning_rate": 0.00015146909193853363,
      "loss": 0.9403,
      "step": 10275
    },
    {
      "epoch": 1.1859713890170742,
      "grad_norm": 0.37582725286483765,
      "learning_rate": 0.0001514115337633634,
      "loss": 0.9477,
      "step": 10280
    },
    {
      "epoch": 1.1865482233502538,
      "grad_norm": 0.3682326674461365,
      "learning_rate": 0.0001513539524271971,
      "loss": 0.9516,
      "step": 10285
    },
    {
      "epoch": 1.1871250576834333,
      "grad_norm": 0.3528582453727722,
      "learning_rate": 0.0001512963479559752,
      "loss": 0.9185,
      "step": 10290
    },
    {
      "epoch": 1.187701892016613,
      "grad_norm": 0.43728992342948914,
      "learning_rate": 0.0001512387203756487,
      "loss": 0.9085,
      "step": 10295
    },
    {
      "epoch": 1.1882787263497923,
      "grad_norm": 0.3973561227321625,
      "learning_rate": 0.00015118106971217883,
      "loss": 0.9589,
      "step": 10300
    },
    {
      "epoch": 1.1888555606829718,
      "grad_norm": 0.3903520405292511,
      "learning_rate": 0.00015112339599153746,
      "loss": 0.9408,
      "step": 10305
    },
    {
      "epoch": 1.1894323950161514,
      "grad_norm": 0.47012007236480713,
      "learning_rate": 0.00015106569923970664,
      "loss": 1.0075,
      "step": 10310
    },
    {
      "epoch": 1.190009229349331,
      "grad_norm": 0.41596439480781555,
      "learning_rate": 0.00015100797948267882,
      "loss": 0.9521,
      "step": 10315
    },
    {
      "epoch": 1.1905860636825103,
      "grad_norm": 0.3753730058670044,
      "learning_rate": 0.00015095023674645698,
      "loss": 0.9424,
      "step": 10320
    },
    {
      "epoch": 1.1911628980156899,
      "grad_norm": 0.37456628680229187,
      "learning_rate": 0.00015089247105705425,
      "loss": 0.9179,
      "step": 10325
    },
    {
      "epoch": 1.1917397323488694,
      "grad_norm": 0.422911137342453,
      "learning_rate": 0.0001508346824404942,
      "loss": 0.9415,
      "step": 10330
    },
    {
      "epoch": 1.192316566682049,
      "grad_norm": 0.3563242256641388,
      "learning_rate": 0.00015077687092281074,
      "loss": 0.9618,
      "step": 10335
    },
    {
      "epoch": 1.1928934010152283,
      "grad_norm": 0.38106414675712585,
      "learning_rate": 0.000150719036530048,
      "loss": 0.9488,
      "step": 10340
    },
    {
      "epoch": 1.193470235348408,
      "grad_norm": 0.39531293511390686,
      "learning_rate": 0.00015066117928826063,
      "loss": 0.9512,
      "step": 10345
    },
    {
      "epoch": 1.1940470696815875,
      "grad_norm": 1.4611742496490479,
      "learning_rate": 0.00015060329922351326,
      "loss": 0.9554,
      "step": 10350
    },
    {
      "epoch": 1.194623904014767,
      "grad_norm": 0.39308950304985046,
      "learning_rate": 0.0001505453963618811,
      "loss": 0.9498,
      "step": 10355
    },
    {
      "epoch": 1.1952007383479464,
      "grad_norm": 0.43254947662353516,
      "learning_rate": 0.00015048747072944944,
      "loss": 0.9345,
      "step": 10360
    },
    {
      "epoch": 1.195777572681126,
      "grad_norm": 0.38925978541374207,
      "learning_rate": 0.0001504295223523139,
      "loss": 0.9409,
      "step": 10365
    },
    {
      "epoch": 1.1963544070143055,
      "grad_norm": 0.3758449852466583,
      "learning_rate": 0.00015037155125658037,
      "loss": 0.925,
      "step": 10370
    },
    {
      "epoch": 1.196931241347485,
      "grad_norm": 0.4111412465572357,
      "learning_rate": 0.00015031355746836485,
      "loss": 0.9333,
      "step": 10375
    },
    {
      "epoch": 1.1975080756806644,
      "grad_norm": 0.4146568179130554,
      "learning_rate": 0.00015025554101379379,
      "loss": 0.9382,
      "step": 10380
    },
    {
      "epoch": 1.198084910013844,
      "grad_norm": 0.38229039311408997,
      "learning_rate": 0.00015019750191900362,
      "loss": 0.8596,
      "step": 10385
    },
    {
      "epoch": 1.1986617443470236,
      "grad_norm": 0.3730371296405792,
      "learning_rate": 0.00015013944021014105,
      "loss": 0.9667,
      "step": 10390
    },
    {
      "epoch": 1.1992385786802031,
      "grad_norm": 0.4008365273475647,
      "learning_rate": 0.0001500813559133631,
      "loss": 0.9299,
      "step": 10395
    },
    {
      "epoch": 1.1998154130133827,
      "grad_norm": 0.4008704721927643,
      "learning_rate": 0.00015002324905483673,
      "loss": 0.9336,
      "step": 10400
    },
    {
      "epoch": 1.200392247346562,
      "grad_norm": 0.46172234416007996,
      "learning_rate": 0.00014996511966073925,
      "loss": 0.925,
      "step": 10405
    },
    {
      "epoch": 1.2009690816797416,
      "grad_norm": 0.3420531451702118,
      "learning_rate": 0.00014990696775725812,
      "loss": 0.9524,
      "step": 10410
    },
    {
      "epoch": 1.2015459160129212,
      "grad_norm": 0.4005972445011139,
      "learning_rate": 0.0001498487933705908,
      "loss": 0.9926,
      "step": 10415
    },
    {
      "epoch": 1.2021227503461005,
      "grad_norm": 0.38163304328918457,
      "learning_rate": 0.00014979059652694501,
      "loss": 0.9485,
      "step": 10420
    },
    {
      "epoch": 1.20269958467928,
      "grad_norm": 0.37857162952423096,
      "learning_rate": 0.0001497323772525385,
      "loss": 0.9081,
      "step": 10425
    },
    {
      "epoch": 1.2032764190124596,
      "grad_norm": 0.41365325450897217,
      "learning_rate": 0.00014967413557359923,
      "loss": 0.9242,
      "step": 10430
    },
    {
      "epoch": 1.2038532533456392,
      "grad_norm": 0.4273558557033539,
      "learning_rate": 0.00014961587151636515,
      "loss": 0.9069,
      "step": 10435
    },
    {
      "epoch": 1.2044300876788188,
      "grad_norm": 0.402115136384964,
      "learning_rate": 0.00014955758510708434,
      "loss": 0.9709,
      "step": 10440
    },
    {
      "epoch": 1.205006922011998,
      "grad_norm": 0.4027498960494995,
      "learning_rate": 0.00014949927637201494,
      "loss": 0.9186,
      "step": 10445
    },
    {
      "epoch": 1.2055837563451777,
      "grad_norm": 0.39657580852508545,
      "learning_rate": 0.00014944094533742513,
      "loss": 0.9311,
      "step": 10450
    },
    {
      "epoch": 1.2061605906783572,
      "grad_norm": 0.3923443555831909,
      "learning_rate": 0.00014938259202959317,
      "loss": 0.901,
      "step": 10455
    },
    {
      "epoch": 1.2067374250115366,
      "grad_norm": 0.3821752965450287,
      "learning_rate": 0.00014932421647480737,
      "loss": 0.9169,
      "step": 10460
    },
    {
      "epoch": 1.2073142593447161,
      "grad_norm": 0.3927006721496582,
      "learning_rate": 0.00014926581869936597,
      "loss": 0.9571,
      "step": 10465
    },
    {
      "epoch": 1.2078910936778957,
      "grad_norm": 0.4481658935546875,
      "learning_rate": 0.00014920739872957732,
      "loss": 0.9038,
      "step": 10470
    },
    {
      "epoch": 1.2084679280110753,
      "grad_norm": 0.4267770051956177,
      "learning_rate": 0.00014914895659175973,
      "loss": 0.9711,
      "step": 10475
    },
    {
      "epoch": 1.2090447623442548,
      "grad_norm": 0.36944663524627686,
      "learning_rate": 0.0001490904923122415,
      "loss": 0.9154,
      "step": 10480
    },
    {
      "epoch": 1.2096215966774342,
      "grad_norm": 0.36720767617225647,
      "learning_rate": 0.00014903200591736087,
      "loss": 0.9095,
      "step": 10485
    },
    {
      "epoch": 1.2101984310106138,
      "grad_norm": 0.3741086721420288,
      "learning_rate": 0.00014897349743346613,
      "loss": 0.9757,
      "step": 10490
    },
    {
      "epoch": 1.2107752653437933,
      "grad_norm": 0.35875627398490906,
      "learning_rate": 0.00014891496688691539,
      "loss": 0.9721,
      "step": 10495
    },
    {
      "epoch": 1.2113520996769727,
      "grad_norm": 0.4409966468811035,
      "learning_rate": 0.00014885641430407686,
      "loss": 0.953,
      "step": 10500
    },
    {
      "epoch": 1.2119289340101522,
      "grad_norm": 0.40664994716644287,
      "learning_rate": 0.0001487978397113285,
      "loss": 0.9727,
      "step": 10505
    },
    {
      "epoch": 1.2125057683433318,
      "grad_norm": 0.4651268422603607,
      "learning_rate": 0.0001487392431350584,
      "loss": 0.9417,
      "step": 10510
    },
    {
      "epoch": 1.2130826026765114,
      "grad_norm": 0.38706329464912415,
      "learning_rate": 0.0001486806246016643,
      "loss": 0.9368,
      "step": 10515
    },
    {
      "epoch": 1.213659437009691,
      "grad_norm": 0.40690895915031433,
      "learning_rate": 0.00014862198413755401,
      "loss": 0.9325,
      "step": 10520
    },
    {
      "epoch": 1.2142362713428703,
      "grad_norm": 0.3677927851676941,
      "learning_rate": 0.00014856332176914526,
      "loss": 0.9543,
      "step": 10525
    },
    {
      "epoch": 1.2148131056760498,
      "grad_norm": 0.3821386396884918,
      "learning_rate": 0.00014850463752286543,
      "loss": 0.9702,
      "step": 10530
    },
    {
      "epoch": 1.2153899400092294,
      "grad_norm": 0.4116564691066742,
      "learning_rate": 0.00014844593142515196,
      "loss": 0.9497,
      "step": 10535
    },
    {
      "epoch": 1.2159667743424087,
      "grad_norm": 0.4448542892932892,
      "learning_rate": 0.00014838720350245205,
      "loss": 0.9941,
      "step": 10540
    },
    {
      "epoch": 1.2165436086755883,
      "grad_norm": 0.37002575397491455,
      "learning_rate": 0.00014832845378122276,
      "loss": 0.9204,
      "step": 10545
    },
    {
      "epoch": 1.2171204430087679,
      "grad_norm": 0.394660085439682,
      "learning_rate": 0.0001482696822879309,
      "loss": 0.9252,
      "step": 10550
    },
    {
      "epoch": 1.2176972773419474,
      "grad_norm": 0.38183853030204773,
      "learning_rate": 0.00014821088904905315,
      "loss": 0.9943,
      "step": 10555
    },
    {
      "epoch": 1.218274111675127,
      "grad_norm": 0.36868584156036377,
      "learning_rate": 0.00014815207409107608,
      "loss": 0.9442,
      "step": 10560
    },
    {
      "epoch": 1.2188509460083063,
      "grad_norm": 0.39112672209739685,
      "learning_rate": 0.0001480932374404958,
      "loss": 0.9472,
      "step": 10565
    },
    {
      "epoch": 1.219427780341486,
      "grad_norm": 0.3754420578479767,
      "learning_rate": 0.00014803437912381845,
      "loss": 0.9284,
      "step": 10570
    },
    {
      "epoch": 1.2200046146746655,
      "grad_norm": 0.43286576867103577,
      "learning_rate": 0.00014797549916755975,
      "loss": 0.893,
      "step": 10575
    },
    {
      "epoch": 1.220581449007845,
      "grad_norm": 0.40418797731399536,
      "learning_rate": 0.00014791659759824527,
      "loss": 0.9163,
      "step": 10580
    },
    {
      "epoch": 1.2211582833410244,
      "grad_norm": 0.35531437397003174,
      "learning_rate": 0.00014785767444241025,
      "loss": 0.895,
      "step": 10585
    },
    {
      "epoch": 1.221735117674204,
      "grad_norm": 0.46868211030960083,
      "learning_rate": 0.0001477987297265997,
      "loss": 0.9587,
      "step": 10590
    },
    {
      "epoch": 1.2223119520073835,
      "grad_norm": 0.4548766314983368,
      "learning_rate": 0.00014773976347736835,
      "loss": 0.9508,
      "step": 10595
    },
    {
      "epoch": 1.222888786340563,
      "grad_norm": 0.3882719576358795,
      "learning_rate": 0.00014768077572128058,
      "loss": 0.9317,
      "step": 10600
    },
    {
      "epoch": 1.2234656206737424,
      "grad_norm": 0.3923512101173401,
      "learning_rate": 0.0001476217664849105,
      "loss": 0.9394,
      "step": 10605
    },
    {
      "epoch": 1.224042455006922,
      "grad_norm": 0.37607160210609436,
      "learning_rate": 0.00014756273579484187,
      "loss": 0.9321,
      "step": 10610
    },
    {
      "epoch": 1.2246192893401016,
      "grad_norm": 0.3971920311450958,
      "learning_rate": 0.0001475036836776682,
      "loss": 0.9607,
      "step": 10615
    },
    {
      "epoch": 1.2251961236732811,
      "grad_norm": 0.3635753095149994,
      "learning_rate": 0.00014744461015999248,
      "loss": 0.9038,
      "step": 10620
    },
    {
      "epoch": 1.2257729580064605,
      "grad_norm": 0.37379416823387146,
      "learning_rate": 0.00014738551526842755,
      "loss": 0.9371,
      "step": 10625
    },
    {
      "epoch": 1.22634979233964,
      "grad_norm": 0.4032329022884369,
      "learning_rate": 0.00014732639902959567,
      "loss": 0.9379,
      "step": 10630
    },
    {
      "epoch": 1.2269266266728196,
      "grad_norm": 0.37104472517967224,
      "learning_rate": 0.00014726726147012889,
      "loss": 0.9528,
      "step": 10635
    },
    {
      "epoch": 1.2275034610059992,
      "grad_norm": 0.3992524743080139,
      "learning_rate": 0.0001472081026166688,
      "loss": 0.9536,
      "step": 10640
    },
    {
      "epoch": 1.2280802953391785,
      "grad_norm": 0.3816789388656616,
      "learning_rate": 0.0001471489224958665,
      "loss": 0.9038,
      "step": 10645
    },
    {
      "epoch": 1.228657129672358,
      "grad_norm": 0.4063278138637543,
      "learning_rate": 0.00014708972113438285,
      "loss": 0.9336,
      "step": 10650
    },
    {
      "epoch": 1.2292339640055376,
      "grad_norm": 0.3935660421848297,
      "learning_rate": 0.00014703049855888808,
      "loss": 0.92,
      "step": 10655
    },
    {
      "epoch": 1.2298107983387172,
      "grad_norm": 0.3983980119228363,
      "learning_rate": 0.0001469712547960622,
      "loss": 0.9693,
      "step": 10660
    },
    {
      "epoch": 1.2303876326718965,
      "grad_norm": 0.433312326669693,
      "learning_rate": 0.00014691198987259454,
      "loss": 0.9097,
      "step": 10665
    },
    {
      "epoch": 1.2309644670050761,
      "grad_norm": 0.4001083970069885,
      "learning_rate": 0.00014685270381518408,
      "loss": 0.8826,
      "step": 10670
    },
    {
      "epoch": 1.2315413013382557,
      "grad_norm": 0.41054749488830566,
      "learning_rate": 0.00014679339665053933,
      "loss": 0.8979,
      "step": 10675
    },
    {
      "epoch": 1.2321181356714352,
      "grad_norm": 0.3685263991355896,
      "learning_rate": 0.00014673406840537824,
      "loss": 0.9549,
      "step": 10680
    },
    {
      "epoch": 1.2326949700046146,
      "grad_norm": 0.44796431064605713,
      "learning_rate": 0.0001466747191064284,
      "loss": 0.9824,
      "step": 10685
    },
    {
      "epoch": 1.2332718043377942,
      "grad_norm": 0.3935278356075287,
      "learning_rate": 0.00014661534878042664,
      "loss": 0.9385,
      "step": 10690
    },
    {
      "epoch": 1.2338486386709737,
      "grad_norm": 0.4125763773918152,
      "learning_rate": 0.00014655595745411955,
      "loss": 0.9568,
      "step": 10695
    },
    {
      "epoch": 1.2344254730041533,
      "grad_norm": 0.39233893156051636,
      "learning_rate": 0.0001464965451542629,
      "loss": 0.9506,
      "step": 10700
    },
    {
      "epoch": 1.2350023073373326,
      "grad_norm": 0.40938007831573486,
      "learning_rate": 0.00014643711190762216,
      "loss": 0.9222,
      "step": 10705
    },
    {
      "epoch": 1.2355791416705122,
      "grad_norm": 0.3984127342700958,
      "learning_rate": 0.00014637765774097206,
      "loss": 0.907,
      "step": 10710
    },
    {
      "epoch": 1.2361559760036918,
      "grad_norm": 0.4171662926673889,
      "learning_rate": 0.00014631818268109688,
      "loss": 0.9628,
      "step": 10715
    },
    {
      "epoch": 1.2367328103368713,
      "grad_norm": 0.4212305545806885,
      "learning_rate": 0.00014625868675479018,
      "loss": 0.9216,
      "step": 10720
    },
    {
      "epoch": 1.2373096446700507,
      "grad_norm": 0.39830756187438965,
      "learning_rate": 0.000146199169988855,
      "loss": 0.9357,
      "step": 10725
    },
    {
      "epoch": 1.2378864790032302,
      "grad_norm": 0.4301896393299103,
      "learning_rate": 0.00014613963241010382,
      "loss": 0.8656,
      "step": 10730
    },
    {
      "epoch": 1.2384633133364098,
      "grad_norm": 0.3532908856868744,
      "learning_rate": 0.00014608007404535837,
      "loss": 0.9407,
      "step": 10735
    },
    {
      "epoch": 1.2390401476695894,
      "grad_norm": 0.4185565412044525,
      "learning_rate": 0.00014602049492144984,
      "loss": 0.9255,
      "step": 10740
    },
    {
      "epoch": 1.2396169820027687,
      "grad_norm": 0.36691245436668396,
      "learning_rate": 0.00014596089506521874,
      "loss": 0.931,
      "step": 10745
    },
    {
      "epoch": 1.2401938163359483,
      "grad_norm": 0.40366676449775696,
      "learning_rate": 0.00014590127450351493,
      "loss": 0.9576,
      "step": 10750
    },
    {
      "epoch": 1.2407706506691278,
      "grad_norm": 0.4176740050315857,
      "learning_rate": 0.00014584163326319754,
      "loss": 0.9434,
      "step": 10755
    },
    {
      "epoch": 1.2413474850023074,
      "grad_norm": 0.37950599193573,
      "learning_rate": 0.0001457819713711351,
      "loss": 0.8734,
      "step": 10760
    },
    {
      "epoch": 1.241924319335487,
      "grad_norm": 0.3910213112831116,
      "learning_rate": 0.00014572228885420543,
      "loss": 0.9363,
      "step": 10765
    },
    {
      "epoch": 1.2425011536686663,
      "grad_norm": 0.42837202548980713,
      "learning_rate": 0.00014566258573929557,
      "loss": 0.9179,
      "step": 10770
    },
    {
      "epoch": 1.2430779880018459,
      "grad_norm": 0.37683621048927307,
      "learning_rate": 0.00014560286205330197,
      "loss": 0.9571,
      "step": 10775
    },
    {
      "epoch": 1.2436548223350254,
      "grad_norm": 0.35878995060920715,
      "learning_rate": 0.00014554311782313014,
      "loss": 0.9394,
      "step": 10780
    },
    {
      "epoch": 1.2442316566682048,
      "grad_norm": 0.3822707533836365,
      "learning_rate": 0.0001454833530756951,
      "loss": 0.9223,
      "step": 10785
    },
    {
      "epoch": 1.2448084910013844,
      "grad_norm": 0.4296896457672119,
      "learning_rate": 0.00014542356783792094,
      "loss": 0.9011,
      "step": 10790
    },
    {
      "epoch": 1.245385325334564,
      "grad_norm": 0.3938808739185333,
      "learning_rate": 0.00014536376213674098,
      "loss": 0.9506,
      "step": 10795
    },
    {
      "epoch": 1.2459621596677435,
      "grad_norm": 0.36194875836372375,
      "learning_rate": 0.0001453039359990979,
      "loss": 0.946,
      "step": 10800
    },
    {
      "epoch": 1.246538994000923,
      "grad_norm": 0.3902686536312103,
      "learning_rate": 0.00014524408945194338,
      "loss": 0.9213,
      "step": 10805
    },
    {
      "epoch": 1.2471158283341024,
      "grad_norm": 0.4296351671218872,
      "learning_rate": 0.00014518422252223845,
      "loss": 0.9559,
      "step": 10810
    },
    {
      "epoch": 1.247692662667282,
      "grad_norm": 0.3877114951610565,
      "learning_rate": 0.00014512433523695332,
      "loss": 0.9368,
      "step": 10815
    },
    {
      "epoch": 1.2482694970004615,
      "grad_norm": 0.3983345329761505,
      "learning_rate": 0.00014506442762306728,
      "loss": 0.9569,
      "step": 10820
    },
    {
      "epoch": 1.2488463313336409,
      "grad_norm": 0.37539294362068176,
      "learning_rate": 0.0001450044997075689,
      "loss": 0.9275,
      "step": 10825
    },
    {
      "epoch": 1.2494231656668204,
      "grad_norm": 0.40253835916519165,
      "learning_rate": 0.0001449445515174557,
      "loss": 0.9245,
      "step": 10830
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.4149536192417145,
      "learning_rate": 0.00014488458307973455,
      "loss": 0.9281,
      "step": 10835
    },
    {
      "epoch": 1.2505768343331796,
      "grad_norm": 0.36260801553726196,
      "learning_rate": 0.0001448245944214213,
      "loss": 0.9594,
      "step": 10840
    },
    {
      "epoch": 1.2511536686663591,
      "grad_norm": 0.3983232378959656,
      "learning_rate": 0.000144764585569541,
      "loss": 0.9249,
      "step": 10845
    },
    {
      "epoch": 1.2517305029995385,
      "grad_norm": 0.3661304712295532,
      "learning_rate": 0.00014470455655112772,
      "loss": 0.9267,
      "step": 10850
    },
    {
      "epoch": 1.252307337332718,
      "grad_norm": 0.4051198959350586,
      "learning_rate": 0.0001446445073932247,
      "loss": 0.9467,
      "step": 10855
    },
    {
      "epoch": 1.2528841716658976,
      "grad_norm": 0.38413557410240173,
      "learning_rate": 0.00014458443812288415,
      "loss": 0.9106,
      "step": 10860
    },
    {
      "epoch": 1.253461005999077,
      "grad_norm": 0.3864401876926422,
      "learning_rate": 0.00014452434876716737,
      "loss": 0.9145,
      "step": 10865
    },
    {
      "epoch": 1.2540378403322565,
      "grad_norm": 0.3908703625202179,
      "learning_rate": 0.0001444642393531448,
      "loss": 0.9148,
      "step": 10870
    },
    {
      "epoch": 1.254614674665436,
      "grad_norm": 0.3725275695323944,
      "learning_rate": 0.00014440410990789582,
      "loss": 0.9505,
      "step": 10875
    },
    {
      "epoch": 1.2551915089986156,
      "grad_norm": 0.4420537054538727,
      "learning_rate": 0.00014434396045850885,
      "loss": 0.9221,
      "step": 10880
    },
    {
      "epoch": 1.2557683433317952,
      "grad_norm": 0.392156183719635,
      "learning_rate": 0.00014428379103208135,
      "loss": 0.8979,
      "step": 10885
    },
    {
      "epoch": 1.2563451776649746,
      "grad_norm": 0.40596142411231995,
      "learning_rate": 0.00014422360165571976,
      "loss": 0.9549,
      "step": 10890
    },
    {
      "epoch": 1.2569220119981541,
      "grad_norm": 0.38918864727020264,
      "learning_rate": 0.00014416339235653948,
      "loss": 0.9563,
      "step": 10895
    },
    {
      "epoch": 1.2574988463313337,
      "grad_norm": 0.37681928277015686,
      "learning_rate": 0.00014410316316166498,
      "loss": 0.925,
      "step": 10900
    },
    {
      "epoch": 1.258075680664513,
      "grad_norm": 0.3767823278903961,
      "learning_rate": 0.0001440429140982296,
      "loss": 1.014,
      "step": 10905
    },
    {
      "epoch": 1.2586525149976926,
      "grad_norm": 0.4258878231048584,
      "learning_rate": 0.00014398264519337566,
      "loss": 0.9529,
      "step": 10910
    },
    {
      "epoch": 1.2592293493308722,
      "grad_norm": 0.4223175346851349,
      "learning_rate": 0.00014392235647425438,
      "loss": 0.9176,
      "step": 10915
    },
    {
      "epoch": 1.2598061836640517,
      "grad_norm": 0.4392840564250946,
      "learning_rate": 0.000143862047968026,
      "loss": 0.9255,
      "step": 10920
    },
    {
      "epoch": 1.2603830179972313,
      "grad_norm": 0.4153304994106293,
      "learning_rate": 0.0001438017197018596,
      "loss": 0.8901,
      "step": 10925
    },
    {
      "epoch": 1.2609598523304106,
      "grad_norm": 0.4319252073764801,
      "learning_rate": 0.00014374137170293318,
      "loss": 0.9407,
      "step": 10930
    },
    {
      "epoch": 1.2615366866635902,
      "grad_norm": 0.3682355582714081,
      "learning_rate": 0.00014368100399843366,
      "loss": 0.9164,
      "step": 10935
    },
    {
      "epoch": 1.2621135209967698,
      "grad_norm": 0.3747365474700928,
      "learning_rate": 0.00014362061661555675,
      "loss": 0.9524,
      "step": 10940
    },
    {
      "epoch": 1.262690355329949,
      "grad_norm": 0.35737344622612,
      "learning_rate": 0.00014356020958150714,
      "loss": 0.9653,
      "step": 10945
    },
    {
      "epoch": 1.2632671896631287,
      "grad_norm": 0.3945746421813965,
      "learning_rate": 0.00014349978292349825,
      "loss": 0.9456,
      "step": 10950
    },
    {
      "epoch": 1.2638440239963082,
      "grad_norm": 0.40850600600242615,
      "learning_rate": 0.00014343933666875245,
      "loss": 0.9359,
      "step": 10955
    },
    {
      "epoch": 1.2644208583294878,
      "grad_norm": 0.42189982533454895,
      "learning_rate": 0.00014337887084450094,
      "loss": 0.9438,
      "step": 10960
    },
    {
      "epoch": 1.2649976926626674,
      "grad_norm": 0.3748544454574585,
      "learning_rate": 0.0001433183854779836,
      "loss": 0.9477,
      "step": 10965
    },
    {
      "epoch": 1.2655745269958467,
      "grad_norm": 0.4351139962673187,
      "learning_rate": 0.0001432578805964493,
      "loss": 0.9571,
      "step": 10970
    },
    {
      "epoch": 1.2661513613290263,
      "grad_norm": 0.36444708704948425,
      "learning_rate": 0.0001431973562271555,
      "loss": 0.9544,
      "step": 10975
    },
    {
      "epoch": 1.2667281956622058,
      "grad_norm": 0.34223493933677673,
      "learning_rate": 0.00014313681239736865,
      "loss": 0.9266,
      "step": 10980
    },
    {
      "epoch": 1.2673050299953852,
      "grad_norm": 0.36005809903144836,
      "learning_rate": 0.00014307624913436378,
      "loss": 0.9378,
      "step": 10985
    },
    {
      "epoch": 1.2678818643285648,
      "grad_norm": 0.4061218500137329,
      "learning_rate": 0.00014301566646542484,
      "loss": 0.9565,
      "step": 10990
    },
    {
      "epoch": 1.2684586986617443,
      "grad_norm": 0.40021246671676636,
      "learning_rate": 0.00014295506441784435,
      "loss": 0.9401,
      "step": 10995
    },
    {
      "epoch": 1.2690355329949239,
      "grad_norm": 0.36139312386512756,
      "learning_rate": 0.0001428944430189237,
      "loss": 0.9554,
      "step": 11000
    },
    {
      "epoch": 1.2696123673281035,
      "grad_norm": 0.3915020525455475,
      "learning_rate": 0.00014283380229597296,
      "loss": 0.931,
      "step": 11005
    },
    {
      "epoch": 1.270189201661283,
      "grad_norm": 0.3968125581741333,
      "learning_rate": 0.00014277314227631086,
      "loss": 0.8924,
      "step": 11010
    },
    {
      "epoch": 1.2707660359944624,
      "grad_norm": 0.3936527669429779,
      "learning_rate": 0.00014271246298726493,
      "loss": 0.9559,
      "step": 11015
    },
    {
      "epoch": 1.271342870327642,
      "grad_norm": 0.37673893570899963,
      "learning_rate": 0.00014265176445617118,
      "loss": 0.9228,
      "step": 11020
    },
    {
      "epoch": 1.2719197046608215,
      "grad_norm": 0.4325665533542633,
      "learning_rate": 0.00014259104671037452,
      "loss": 0.9025,
      "step": 11025
    },
    {
      "epoch": 1.2724965389940008,
      "grad_norm": 0.3954697847366333,
      "learning_rate": 0.0001425303097772284,
      "loss": 0.9346,
      "step": 11030
    },
    {
      "epoch": 1.2730733733271804,
      "grad_norm": 0.3768031597137451,
      "learning_rate": 0.00014246955368409488,
      "loss": 0.8911,
      "step": 11035
    },
    {
      "epoch": 1.27365020766036,
      "grad_norm": 0.3749455511569977,
      "learning_rate": 0.00014240877845834472,
      "loss": 0.9224,
      "step": 11040
    },
    {
      "epoch": 1.2742270419935395,
      "grad_norm": 0.41520991921424866,
      "learning_rate": 0.0001423479841273573,
      "loss": 0.964,
      "step": 11045
    },
    {
      "epoch": 1.274803876326719,
      "grad_norm": 0.3900809586048126,
      "learning_rate": 0.00014228717071852057,
      "loss": 0.9803,
      "step": 11050
    },
    {
      "epoch": 1.2753807106598984,
      "grad_norm": 0.370792955160141,
      "learning_rate": 0.00014222633825923108,
      "loss": 0.9121,
      "step": 11055
    },
    {
      "epoch": 1.275957544993078,
      "grad_norm": 0.3843114674091339,
      "learning_rate": 0.000142165486776894,
      "loss": 0.9405,
      "step": 11060
    },
    {
      "epoch": 1.2765343793262576,
      "grad_norm": 0.4016331732273102,
      "learning_rate": 0.00014210461629892302,
      "loss": 0.9385,
      "step": 11065
    },
    {
      "epoch": 1.277111213659437,
      "grad_norm": 0.3488540053367615,
      "learning_rate": 0.00014204372685274039,
      "loss": 0.9676,
      "step": 11070
    },
    {
      "epoch": 1.2776880479926165,
      "grad_norm": 0.3962653875350952,
      "learning_rate": 0.00014198281846577695,
      "loss": 0.93,
      "step": 11075
    },
    {
      "epoch": 1.278264882325796,
      "grad_norm": 0.36145344376564026,
      "learning_rate": 0.00014192189116547202,
      "loss": 0.9463,
      "step": 11080
    },
    {
      "epoch": 1.2788417166589756,
      "grad_norm": 0.40102216601371765,
      "learning_rate": 0.00014186094497927352,
      "loss": 0.8905,
      "step": 11085
    },
    {
      "epoch": 1.2794185509921552,
      "grad_norm": 0.40777820348739624,
      "learning_rate": 0.00014179997993463776,
      "loss": 0.9455,
      "step": 11090
    },
    {
      "epoch": 1.2799953853253345,
      "grad_norm": 0.36451178789138794,
      "learning_rate": 0.00014173899605902967,
      "loss": 0.8928,
      "step": 11095
    },
    {
      "epoch": 1.280572219658514,
      "grad_norm": 0.3938768804073334,
      "learning_rate": 0.00014167799337992258,
      "loss": 0.9016,
      "step": 11100
    },
    {
      "epoch": 1.2811490539916937,
      "grad_norm": 0.3848060071468353,
      "learning_rate": 0.0001416169719247983,
      "loss": 0.9278,
      "step": 11105
    },
    {
      "epoch": 1.281725888324873,
      "grad_norm": 0.38790416717529297,
      "learning_rate": 0.00014155593172114714,
      "loss": 0.918,
      "step": 11110
    },
    {
      "epoch": 1.2823027226580526,
      "grad_norm": 0.4134730100631714,
      "learning_rate": 0.00014149487279646781,
      "loss": 0.9552,
      "step": 11115
    },
    {
      "epoch": 1.2828795569912321,
      "grad_norm": 0.397656112909317,
      "learning_rate": 0.0001414337951782675,
      "loss": 0.9243,
      "step": 11120
    },
    {
      "epoch": 1.2834563913244117,
      "grad_norm": 0.39988553524017334,
      "learning_rate": 0.00014137269889406175,
      "loss": 0.951,
      "step": 11125
    },
    {
      "epoch": 1.2840332256575913,
      "grad_norm": 0.4649205505847931,
      "learning_rate": 0.00014131158397137462,
      "loss": 0.9284,
      "step": 11130
    },
    {
      "epoch": 1.2846100599907706,
      "grad_norm": 0.39595118165016174,
      "learning_rate": 0.00014125045043773845,
      "loss": 0.9351,
      "step": 11135
    },
    {
      "epoch": 1.2851868943239502,
      "grad_norm": 0.3864995837211609,
      "learning_rate": 0.00014118929832069405,
      "loss": 0.8899,
      "step": 11140
    },
    {
      "epoch": 1.2857637286571297,
      "grad_norm": 0.44582268595695496,
      "learning_rate": 0.00014112812764779053,
      "loss": 0.9924,
      "step": 11145
    },
    {
      "epoch": 1.286340562990309,
      "grad_norm": 0.38030120730400085,
      "learning_rate": 0.00014106693844658544,
      "loss": 0.9403,
      "step": 11150
    },
    {
      "epoch": 1.2869173973234886,
      "grad_norm": 0.35102468729019165,
      "learning_rate": 0.00014100573074464457,
      "loss": 0.9364,
      "step": 11155
    },
    {
      "epoch": 1.2874942316566682,
      "grad_norm": 0.43488121032714844,
      "learning_rate": 0.00014094450456954218,
      "loss": 0.9572,
      "step": 11160
    },
    {
      "epoch": 1.2880710659898478,
      "grad_norm": 0.3914535939693451,
      "learning_rate": 0.00014088325994886076,
      "loss": 0.8916,
      "step": 11165
    },
    {
      "epoch": 1.2886479003230273,
      "grad_norm": 0.39461666345596313,
      "learning_rate": 0.0001408219969101911,
      "loss": 0.9366,
      "step": 11170
    },
    {
      "epoch": 1.2892247346562067,
      "grad_norm": 0.37475186586380005,
      "learning_rate": 0.00014076071548113238,
      "loss": 0.9289,
      "step": 11175
    },
    {
      "epoch": 1.2898015689893862,
      "grad_norm": 0.3968532383441925,
      "learning_rate": 0.00014069941568929192,
      "loss": 0.9489,
      "step": 11180
    },
    {
      "epoch": 1.2903784033225658,
      "grad_norm": 0.378654807806015,
      "learning_rate": 0.00014063809756228546,
      "loss": 0.9033,
      "step": 11185
    },
    {
      "epoch": 1.2909552376557452,
      "grad_norm": 0.37253937125205994,
      "learning_rate": 0.0001405767611277369,
      "loss": 0.8878,
      "step": 11190
    },
    {
      "epoch": 1.2915320719889247,
      "grad_norm": 0.37073519825935364,
      "learning_rate": 0.00014051540641327846,
      "loss": 0.9219,
      "step": 11195
    },
    {
      "epoch": 1.2921089063221043,
      "grad_norm": 0.47666656970977783,
      "learning_rate": 0.00014045403344655052,
      "loss": 0.9549,
      "step": 11200
    },
    {
      "epoch": 1.2926857406552839,
      "grad_norm": 0.4085308909416199,
      "learning_rate": 0.00014039264225520175,
      "loss": 0.9306,
      "step": 11205
    },
    {
      "epoch": 1.2932625749884634,
      "grad_norm": 0.3675244450569153,
      "learning_rate": 0.00014033123286688902,
      "loss": 0.8491,
      "step": 11210
    },
    {
      "epoch": 1.2938394093216428,
      "grad_norm": 0.38852930068969727,
      "learning_rate": 0.0001402698053092773,
      "loss": 0.9174,
      "step": 11215
    },
    {
      "epoch": 1.2944162436548223,
      "grad_norm": 0.3479853570461273,
      "learning_rate": 0.0001402083596100399,
      "loss": 0.8958,
      "step": 11220
    },
    {
      "epoch": 1.294993077988002,
      "grad_norm": 0.3830512762069702,
      "learning_rate": 0.00014014689579685817,
      "loss": 0.9252,
      "step": 11225
    },
    {
      "epoch": 1.2955699123211812,
      "grad_norm": 0.3972710967063904,
      "learning_rate": 0.00014008541389742173,
      "loss": 0.9099,
      "step": 11230
    },
    {
      "epoch": 1.2961467466543608,
      "grad_norm": 0.41176527738571167,
      "learning_rate": 0.00014002391393942826,
      "loss": 0.9279,
      "step": 11235
    },
    {
      "epoch": 1.2967235809875404,
      "grad_norm": 0.3941210210323334,
      "learning_rate": 0.0001399623959505836,
      "loss": 0.9368,
      "step": 11240
    },
    {
      "epoch": 1.29730041532072,
      "grad_norm": 0.39330047369003296,
      "learning_rate": 0.00013990085995860182,
      "loss": 0.9322,
      "step": 11245
    },
    {
      "epoch": 1.2978772496538995,
      "grad_norm": 0.3845579922199249,
      "learning_rate": 0.00013983930599120487,
      "loss": 0.94,
      "step": 11250
    },
    {
      "epoch": 1.2984540839870788,
      "grad_norm": 0.4267570972442627,
      "learning_rate": 0.00013977773407612305,
      "loss": 0.917,
      "step": 11255
    },
    {
      "epoch": 1.2990309183202584,
      "grad_norm": 0.43357259035110474,
      "learning_rate": 0.0001397161442410945,
      "loss": 0.9466,
      "step": 11260
    },
    {
      "epoch": 1.299607752653438,
      "grad_norm": 0.35696274042129517,
      "learning_rate": 0.0001396545365138657,
      "loss": 0.8862,
      "step": 11265
    },
    {
      "epoch": 1.3001845869866173,
      "grad_norm": 0.4099060297012329,
      "learning_rate": 0.00013959291092219096,
      "loss": 0.9313,
      "step": 11270
    },
    {
      "epoch": 1.3007614213197969,
      "grad_norm": 0.40767526626586914,
      "learning_rate": 0.00013953126749383272,
      "loss": 0.9765,
      "step": 11275
    },
    {
      "epoch": 1.3013382556529764,
      "grad_norm": 0.4065137803554535,
      "learning_rate": 0.00013946960625656153,
      "loss": 0.9726,
      "step": 11280
    },
    {
      "epoch": 1.301915089986156,
      "grad_norm": 0.3992060720920563,
      "learning_rate": 0.00013940792723815586,
      "loss": 0.8959,
      "step": 11285
    },
    {
      "epoch": 1.3024919243193356,
      "grad_norm": 0.3845786452293396,
      "learning_rate": 0.00013934623046640222,
      "loss": 0.9131,
      "step": 11290
    },
    {
      "epoch": 1.303068758652515,
      "grad_norm": 0.4595773220062256,
      "learning_rate": 0.00013928451596909516,
      "loss": 0.9231,
      "step": 11295
    },
    {
      "epoch": 1.3036455929856945,
      "grad_norm": 0.4528485834598541,
      "learning_rate": 0.00013922278377403714,
      "loss": 0.922,
      "step": 11300
    },
    {
      "epoch": 1.304222427318874,
      "grad_norm": 0.4147089421749115,
      "learning_rate": 0.00013916103390903864,
      "loss": 1.0079,
      "step": 11305
    },
    {
      "epoch": 1.3047992616520534,
      "grad_norm": 0.39060330390930176,
      "learning_rate": 0.00013909926640191813,
      "loss": 0.9211,
      "step": 11310
    },
    {
      "epoch": 1.305376095985233,
      "grad_norm": 0.428657203912735,
      "learning_rate": 0.00013903748128050197,
      "loss": 0.9599,
      "step": 11315
    },
    {
      "epoch": 1.3059529303184125,
      "grad_norm": 0.4070096015930176,
      "learning_rate": 0.00013897567857262447,
      "loss": 0.9656,
      "step": 11320
    },
    {
      "epoch": 1.306529764651592,
      "grad_norm": 0.4431822896003723,
      "learning_rate": 0.0001389138583061279,
      "loss": 0.9206,
      "step": 11325
    },
    {
      "epoch": 1.3071065989847717,
      "grad_norm": 0.3884516656398773,
      "learning_rate": 0.00013885202050886237,
      "loss": 0.948,
      "step": 11330
    },
    {
      "epoch": 1.307683433317951,
      "grad_norm": 0.3520592749118805,
      "learning_rate": 0.00013879016520868594,
      "loss": 0.9439,
      "step": 11335
    },
    {
      "epoch": 1.3082602676511306,
      "grad_norm": 0.38204270601272583,
      "learning_rate": 0.00013872829243346453,
      "loss": 0.9851,
      "step": 11340
    },
    {
      "epoch": 1.3088371019843101,
      "grad_norm": 0.3736591339111328,
      "learning_rate": 0.000138666402211072,
      "loss": 0.9526,
      "step": 11345
    },
    {
      "epoch": 1.3094139363174895,
      "grad_norm": 0.3967388868331909,
      "learning_rate": 0.00013860449456939,
      "loss": 0.934,
      "step": 11350
    },
    {
      "epoch": 1.309990770650669,
      "grad_norm": 0.38638293743133545,
      "learning_rate": 0.00013854256953630797,
      "loss": 0.9615,
      "step": 11355
    },
    {
      "epoch": 1.3105676049838486,
      "grad_norm": 0.37511980533599854,
      "learning_rate": 0.0001384806271397233,
      "loss": 0.9241,
      "step": 11360
    },
    {
      "epoch": 1.3111444393170282,
      "grad_norm": 0.4248884320259094,
      "learning_rate": 0.00013841866740754125,
      "loss": 0.9263,
      "step": 11365
    },
    {
      "epoch": 1.3117212736502077,
      "grad_norm": 0.3675033450126648,
      "learning_rate": 0.00013835669036767466,
      "loss": 0.9261,
      "step": 11370
    },
    {
      "epoch": 1.3122981079833873,
      "grad_norm": 0.4006101191043854,
      "learning_rate": 0.00013829469604804438,
      "loss": 0.9089,
      "step": 11375
    },
    {
      "epoch": 1.3128749423165667,
      "grad_norm": 0.40753471851348877,
      "learning_rate": 0.00013823268447657897,
      "loss": 0.9243,
      "step": 11380
    },
    {
      "epoch": 1.3134517766497462,
      "grad_norm": 0.38653838634490967,
      "learning_rate": 0.00013817065568121477,
      "loss": 0.9265,
      "step": 11385
    },
    {
      "epoch": 1.3140286109829258,
      "grad_norm": 0.41960474848747253,
      "learning_rate": 0.00013810860968989586,
      "loss": 0.9732,
      "step": 11390
    },
    {
      "epoch": 1.3146054453161051,
      "grad_norm": 0.3811507821083069,
      "learning_rate": 0.00013804654653057404,
      "loss": 0.9718,
      "step": 11395
    },
    {
      "epoch": 1.3151822796492847,
      "grad_norm": 0.3971553444862366,
      "learning_rate": 0.00013798446623120893,
      "loss": 0.9336,
      "step": 11400
    },
    {
      "epoch": 1.3157591139824643,
      "grad_norm": 0.4036201536655426,
      "learning_rate": 0.00013792236881976784,
      "loss": 0.9603,
      "step": 11405
    },
    {
      "epoch": 1.3163359483156438,
      "grad_norm": 0.382589727640152,
      "learning_rate": 0.00013786025432422573,
      "loss": 0.9482,
      "step": 11410
    },
    {
      "epoch": 1.3169127826488234,
      "grad_norm": 0.4491170644760132,
      "learning_rate": 0.00013779812277256537,
      "loss": 0.9335,
      "step": 11415
    },
    {
      "epoch": 1.3174896169820027,
      "grad_norm": 0.3773971498012543,
      "learning_rate": 0.00013773597419277703,
      "loss": 0.9342,
      "step": 11420
    },
    {
      "epoch": 1.3180664513151823,
      "grad_norm": 0.37888625264167786,
      "learning_rate": 0.0001376738086128589,
      "loss": 0.8975,
      "step": 11425
    },
    {
      "epoch": 1.3186432856483619,
      "grad_norm": 0.38423123955726624,
      "learning_rate": 0.0001376116260608166,
      "loss": 0.905,
      "step": 11430
    },
    {
      "epoch": 1.3192201199815412,
      "grad_norm": 0.3804115355014801,
      "learning_rate": 0.0001375494265646635,
      "loss": 0.934,
      "step": 11435
    },
    {
      "epoch": 1.3197969543147208,
      "grad_norm": 0.41067662835121155,
      "learning_rate": 0.00013748721015242066,
      "loss": 0.9475,
      "step": 11440
    },
    {
      "epoch": 1.3203737886479003,
      "grad_norm": 0.4139721095561981,
      "learning_rate": 0.0001374249768521166,
      "loss": 0.928,
      "step": 11445
    },
    {
      "epoch": 1.32095062298108,
      "grad_norm": 0.38291290402412415,
      "learning_rate": 0.0001373627266917876,
      "loss": 0.9601,
      "step": 11450
    },
    {
      "epoch": 1.3215274573142595,
      "grad_norm": 0.37222400307655334,
      "learning_rate": 0.00013730045969947752,
      "loss": 0.943,
      "step": 11455
    },
    {
      "epoch": 1.3221042916474388,
      "grad_norm": 0.38442909717559814,
      "learning_rate": 0.0001372381759032377,
      "loss": 0.9419,
      "step": 11460
    },
    {
      "epoch": 1.3226811259806184,
      "grad_norm": 0.3971159756183624,
      "learning_rate": 0.00013717587533112707,
      "loss": 0.9123,
      "step": 11465
    },
    {
      "epoch": 1.323257960313798,
      "grad_norm": 0.4297926127910614,
      "learning_rate": 0.00013711355801121226,
      "loss": 0.9524,
      "step": 11470
    },
    {
      "epoch": 1.3238347946469773,
      "grad_norm": 0.34397372603416443,
      "learning_rate": 0.00013705122397156727,
      "loss": 0.8974,
      "step": 11475
    },
    {
      "epoch": 1.3244116289801569,
      "grad_norm": 0.4033229649066925,
      "learning_rate": 0.00013698887324027373,
      "loss": 0.9497,
      "step": 11480
    },
    {
      "epoch": 1.3249884633133364,
      "grad_norm": 0.4018646776676178,
      "learning_rate": 0.0001369265058454208,
      "loss": 0.968,
      "step": 11485
    },
    {
      "epoch": 1.325565297646516,
      "grad_norm": 0.40979576110839844,
      "learning_rate": 0.00013686412181510504,
      "loss": 0.8928,
      "step": 11490
    },
    {
      "epoch": 1.3261421319796955,
      "grad_norm": 0.4052782952785492,
      "learning_rate": 0.00013680172117743066,
      "loss": 0.945,
      "step": 11495
    },
    {
      "epoch": 1.326718966312875,
      "grad_norm": 0.42259207367897034,
      "learning_rate": 0.0001367393039605092,
      "loss": 0.9269,
      "step": 11500
    },
    {
      "epoch": 1.3272958006460545,
      "grad_norm": 0.37201040983200073,
      "learning_rate": 0.0001366768701924598,
      "loss": 0.9516,
      "step": 11505
    },
    {
      "epoch": 1.327872634979234,
      "grad_norm": 0.38049280643463135,
      "learning_rate": 0.00013661441990140894,
      "loss": 0.9897,
      "step": 11510
    },
    {
      "epoch": 1.3284494693124134,
      "grad_norm": 0.3636796176433563,
      "learning_rate": 0.00013655195311549059,
      "loss": 1.0134,
      "step": 11515
    },
    {
      "epoch": 1.329026303645593,
      "grad_norm": 0.40373724699020386,
      "learning_rate": 0.0001364894698628462,
      "loss": 0.9467,
      "step": 11520
    },
    {
      "epoch": 1.3296031379787725,
      "grad_norm": 0.43449637293815613,
      "learning_rate": 0.0001364269701716246,
      "loss": 0.9938,
      "step": 11525
    },
    {
      "epoch": 1.330179972311952,
      "grad_norm": 0.47336867451667786,
      "learning_rate": 0.00013636445406998198,
      "loss": 0.9202,
      "step": 11530
    },
    {
      "epoch": 1.3307568066451316,
      "grad_norm": 0.3779871165752411,
      "learning_rate": 0.00013630192158608202,
      "loss": 0.9789,
      "step": 11535
    },
    {
      "epoch": 1.331333640978311,
      "grad_norm": 0.3905300498008728,
      "learning_rate": 0.00013623937274809568,
      "loss": 0.8662,
      "step": 11540
    },
    {
      "epoch": 1.3319104753114905,
      "grad_norm": 0.4032379388809204,
      "learning_rate": 0.00013617680758420134,
      "loss": 0.9047,
      "step": 11545
    },
    {
      "epoch": 1.33248730964467,
      "grad_norm": 0.3876950740814209,
      "learning_rate": 0.00013611422612258477,
      "loss": 0.9225,
      "step": 11550
    },
    {
      "epoch": 1.3330641439778494,
      "grad_norm": 0.3889716565608978,
      "learning_rate": 0.000136051628391439,
      "loss": 0.9556,
      "step": 11555
    },
    {
      "epoch": 1.333640978311029,
      "grad_norm": 0.38099825382232666,
      "learning_rate": 0.0001359890144189644,
      "loss": 0.9339,
      "step": 11560
    },
    {
      "epoch": 1.3342178126442086,
      "grad_norm": 0.44365194439888,
      "learning_rate": 0.00013592638423336875,
      "loss": 0.8879,
      "step": 11565
    },
    {
      "epoch": 1.3347946469773881,
      "grad_norm": 0.39158475399017334,
      "learning_rate": 0.00013586373786286706,
      "loss": 1.0019,
      "step": 11570
    },
    {
      "epoch": 1.3353714813105677,
      "grad_norm": 0.3753363788127899,
      "learning_rate": 0.00013580107533568163,
      "loss": 0.9233,
      "step": 11575
    },
    {
      "epoch": 1.335948315643747,
      "grad_norm": 0.4002339541912079,
      "learning_rate": 0.00013573839668004202,
      "loss": 0.9503,
      "step": 11580
    },
    {
      "epoch": 1.3365251499769266,
      "grad_norm": 0.38539615273475647,
      "learning_rate": 0.00013567570192418512,
      "loss": 0.9241,
      "step": 11585
    },
    {
      "epoch": 1.3371019843101062,
      "grad_norm": 0.40463823080062866,
      "learning_rate": 0.00013561299109635507,
      "loss": 0.9342,
      "step": 11590
    },
    {
      "epoch": 1.3376788186432855,
      "grad_norm": 0.39062389731407166,
      "learning_rate": 0.00013555026422480313,
      "loss": 0.922,
      "step": 11595
    },
    {
      "epoch": 1.338255652976465,
      "grad_norm": 0.3820863366127014,
      "learning_rate": 0.00013548752133778796,
      "loss": 0.9366,
      "step": 11600
    },
    {
      "epoch": 1.3388324873096447,
      "grad_norm": 0.3850560784339905,
      "learning_rate": 0.0001354247624635753,
      "loss": 0.9345,
      "step": 11605
    },
    {
      "epoch": 1.3394093216428242,
      "grad_norm": 0.40325823426246643,
      "learning_rate": 0.00013536198763043823,
      "loss": 0.8968,
      "step": 11610
    },
    {
      "epoch": 1.3399861559760038,
      "grad_norm": 0.4057595431804657,
      "learning_rate": 0.00013529919686665679,
      "loss": 0.9466,
      "step": 11615
    },
    {
      "epoch": 1.3405629903091831,
      "grad_norm": 0.3949381411075592,
      "learning_rate": 0.0001352363902005185,
      "loss": 0.9278,
      "step": 11620
    },
    {
      "epoch": 1.3411398246423627,
      "grad_norm": 0.4039568603038788,
      "learning_rate": 0.00013517356766031777,
      "loss": 0.9706,
      "step": 11625
    },
    {
      "epoch": 1.3417166589755423,
      "grad_norm": 0.3839099407196045,
      "learning_rate": 0.00013511072927435632,
      "loss": 0.9001,
      "step": 11630
    },
    {
      "epoch": 1.3422934933087216,
      "grad_norm": 0.41112446784973145,
      "learning_rate": 0.00013504787507094296,
      "loss": 0.9224,
      "step": 11635
    },
    {
      "epoch": 1.3428703276419012,
      "grad_norm": 0.40775299072265625,
      "learning_rate": 0.00013498500507839363,
      "loss": 0.894,
      "step": 11640
    },
    {
      "epoch": 1.3434471619750807,
      "grad_norm": 0.39060235023498535,
      "learning_rate": 0.0001349221193250314,
      "loss": 0.9165,
      "step": 11645
    },
    {
      "epoch": 1.3440239963082603,
      "grad_norm": 0.370009183883667,
      "learning_rate": 0.0001348592178391864,
      "loss": 0.9315,
      "step": 11650
    },
    {
      "epoch": 1.3446008306414399,
      "grad_norm": 0.5149419903755188,
      "learning_rate": 0.00013479630064919593,
      "loss": 0.9746,
      "step": 11655
    },
    {
      "epoch": 1.3451776649746192,
      "grad_norm": 0.3872247338294983,
      "learning_rate": 0.0001347333677834042,
      "loss": 0.9233,
      "step": 11660
    },
    {
      "epoch": 1.3457544993077988,
      "grad_norm": 0.4263676404953003,
      "learning_rate": 0.0001346704192701627,
      "loss": 0.9739,
      "step": 11665
    },
    {
      "epoch": 1.3463313336409783,
      "grad_norm": 0.4168122112751007,
      "learning_rate": 0.00013460745513782976,
      "loss": 0.946,
      "step": 11670
    },
    {
      "epoch": 1.3469081679741577,
      "grad_norm": 0.48353296518325806,
      "learning_rate": 0.0001345444754147709,
      "loss": 0.9424,
      "step": 11675
    },
    {
      "epoch": 1.3474850023073373,
      "grad_norm": 0.38045409321784973,
      "learning_rate": 0.00013448148012935865,
      "loss": 0.9149,
      "step": 11680
    },
    {
      "epoch": 1.3480618366405168,
      "grad_norm": 0.4222165644168854,
      "learning_rate": 0.0001344184693099724,
      "loss": 0.9751,
      "step": 11685
    },
    {
      "epoch": 1.3486386709736964,
      "grad_norm": 0.374525785446167,
      "learning_rate": 0.00013435544298499874,
      "loss": 0.9155,
      "step": 11690
    },
    {
      "epoch": 1.349215505306876,
      "grad_norm": 0.41011539101600647,
      "learning_rate": 0.0001342924011828311,
      "loss": 0.9135,
      "step": 11695
    },
    {
      "epoch": 1.3497923396400553,
      "grad_norm": 0.40688735246658325,
      "learning_rate": 0.00013422934393186994,
      "loss": 0.9194,
      "step": 11700
    },
    {
      "epoch": 1.3503691739732349,
      "grad_norm": 0.38346344232559204,
      "learning_rate": 0.0001341662712605227,
      "loss": 0.9104,
      "step": 11705
    },
    {
      "epoch": 1.3509460083064144,
      "grad_norm": 0.39127588272094727,
      "learning_rate": 0.00013410318319720372,
      "loss": 0.8961,
      "step": 11710
    },
    {
      "epoch": 1.351522842639594,
      "grad_norm": 0.36714041233062744,
      "learning_rate": 0.0001340400797703343,
      "loss": 0.8935,
      "step": 11715
    },
    {
      "epoch": 1.3520996769727733,
      "grad_norm": 0.4213907718658447,
      "learning_rate": 0.00013397696100834265,
      "loss": 0.9102,
      "step": 11720
    },
    {
      "epoch": 1.352676511305953,
      "grad_norm": 0.3632507622241974,
      "learning_rate": 0.00013391382693966395,
      "loss": 0.896,
      "step": 11725
    },
    {
      "epoch": 1.3532533456391325,
      "grad_norm": 0.39109885692596436,
      "learning_rate": 0.00013385067759274014,
      "loss": 0.9276,
      "step": 11730
    },
    {
      "epoch": 1.353830179972312,
      "grad_norm": 0.3651511073112488,
      "learning_rate": 0.00013378751299602016,
      "loss": 0.9459,
      "step": 11735
    },
    {
      "epoch": 1.3544070143054916,
      "grad_norm": 0.43356800079345703,
      "learning_rate": 0.00013372433317795977,
      "loss": 0.9392,
      "step": 11740
    },
    {
      "epoch": 1.354983848638671,
      "grad_norm": 0.39868101477622986,
      "learning_rate": 0.00013366113816702164,
      "loss": 0.9124,
      "step": 11745
    },
    {
      "epoch": 1.3555606829718505,
      "grad_norm": 0.4322625398635864,
      "learning_rate": 0.0001335979279916752,
      "loss": 0.9534,
      "step": 11750
    },
    {
      "epoch": 1.35613751730503,
      "grad_norm": 0.4016193449497223,
      "learning_rate": 0.0001335347026803968,
      "loss": 0.9087,
      "step": 11755
    },
    {
      "epoch": 1.3567143516382094,
      "grad_norm": 0.399722158908844,
      "learning_rate": 0.0001334714622616695,
      "loss": 0.9338,
      "step": 11760
    },
    {
      "epoch": 1.357291185971389,
      "grad_norm": 0.3979092538356781,
      "learning_rate": 0.0001334082067639833,
      "loss": 0.905,
      "step": 11765
    },
    {
      "epoch": 1.3578680203045685,
      "grad_norm": 0.3598368465900421,
      "learning_rate": 0.0001333449362158349,
      "loss": 0.8926,
      "step": 11770
    },
    {
      "epoch": 1.3584448546377481,
      "grad_norm": 0.39615514874458313,
      "learning_rate": 0.0001332816506457278,
      "loss": 0.9034,
      "step": 11775
    },
    {
      "epoch": 1.3590216889709277,
      "grad_norm": 0.41682448983192444,
      "learning_rate": 0.0001332183500821723,
      "loss": 0.9388,
      "step": 11780
    },
    {
      "epoch": 1.359598523304107,
      "grad_norm": 0.3829791843891144,
      "learning_rate": 0.00013315503455368536,
      "loss": 0.9496,
      "step": 11785
    },
    {
      "epoch": 1.3601753576372866,
      "grad_norm": 0.3748948872089386,
      "learning_rate": 0.0001330917040887908,
      "loss": 0.9622,
      "step": 11790
    },
    {
      "epoch": 1.3607521919704662,
      "grad_norm": 0.3581671416759491,
      "learning_rate": 0.00013302835871601914,
      "loss": 0.9801,
      "step": 11795
    },
    {
      "epoch": 1.3613290263036455,
      "grad_norm": 0.37090566754341125,
      "learning_rate": 0.00013296499846390756,
      "loss": 0.9408,
      "step": 11800
    },
    {
      "epoch": 1.361905860636825,
      "grad_norm": 0.4028850197792053,
      "learning_rate": 0.00013290162336099996,
      "loss": 0.9306,
      "step": 11805
    },
    {
      "epoch": 1.3624826949700046,
      "grad_norm": 0.38192489743232727,
      "learning_rate": 0.000132838233435847,
      "loss": 0.9256,
      "step": 11810
    },
    {
      "epoch": 1.3630595293031842,
      "grad_norm": 0.45534375309944153,
      "learning_rate": 0.00013277482871700588,
      "loss": 0.936,
      "step": 11815
    },
    {
      "epoch": 1.3636363636363638,
      "grad_norm": 0.44784387946128845,
      "learning_rate": 0.00013271140923304064,
      "loss": 0.9184,
      "step": 11820
    },
    {
      "epoch": 1.364213197969543,
      "grad_norm": 0.4402177035808563,
      "learning_rate": 0.00013264797501252184,
      "loss": 0.9465,
      "step": 11825
    },
    {
      "epoch": 1.3647900323027227,
      "grad_norm": 0.4333953261375427,
      "learning_rate": 0.00013258452608402673,
      "loss": 0.9135,
      "step": 11830
    },
    {
      "epoch": 1.3653668666359022,
      "grad_norm": 0.4207151234149933,
      "learning_rate": 0.00013252106247613914,
      "loss": 0.9334,
      "step": 11835
    },
    {
      "epoch": 1.3659437009690816,
      "grad_norm": 0.36296144127845764,
      "learning_rate": 0.0001324575842174496,
      "loss": 0.9816,
      "step": 11840
    },
    {
      "epoch": 1.3665205353022611,
      "grad_norm": 0.3740653395652771,
      "learning_rate": 0.00013239409133655516,
      "loss": 0.9217,
      "step": 11845
    },
    {
      "epoch": 1.3670973696354407,
      "grad_norm": 0.4140547513961792,
      "learning_rate": 0.00013233058386205948,
      "loss": 0.975,
      "step": 11850
    },
    {
      "epoch": 1.3676742039686203,
      "grad_norm": 0.3837771713733673,
      "learning_rate": 0.00013226706182257284,
      "loss": 0.9681,
      "step": 11855
    },
    {
      "epoch": 1.3682510383017998,
      "grad_norm": 0.38130462169647217,
      "learning_rate": 0.000132203525246712,
      "loss": 0.9067,
      "step": 11860
    },
    {
      "epoch": 1.3688278726349792,
      "grad_norm": 0.3723084628582001,
      "learning_rate": 0.00013213997416310034,
      "loss": 0.8401,
      "step": 11865
    },
    {
      "epoch": 1.3694047069681587,
      "grad_norm": 0.366595059633255,
      "learning_rate": 0.00013207640860036775,
      "loss": 0.9539,
      "step": 11870
    },
    {
      "epoch": 1.3699815413013383,
      "grad_norm": 0.39958158135414124,
      "learning_rate": 0.0001320128285871506,
      "loss": 0.9879,
      "step": 11875
    },
    {
      "epoch": 1.3705583756345177,
      "grad_norm": 0.3819301426410675,
      "learning_rate": 0.00013194923415209183,
      "loss": 0.9204,
      "step": 11880
    },
    {
      "epoch": 1.3711352099676972,
      "grad_norm": 0.34480926394462585,
      "learning_rate": 0.00013188562532384087,
      "loss": 0.8921,
      "step": 11885
    },
    {
      "epoch": 1.3717120443008768,
      "grad_norm": 0.3757553696632385,
      "learning_rate": 0.0001318220021310536,
      "loss": 0.974,
      "step": 11890
    },
    {
      "epoch": 1.3722888786340564,
      "grad_norm": 0.36825573444366455,
      "learning_rate": 0.00013175836460239243,
      "loss": 0.8998,
      "step": 11895
    },
    {
      "epoch": 1.372865712967236,
      "grad_norm": 0.38179588317871094,
      "learning_rate": 0.00013169471276652613,
      "loss": 0.9382,
      "step": 11900
    },
    {
      "epoch": 1.3734425473004153,
      "grad_norm": 0.40566423535346985,
      "learning_rate": 0.00013163104665213008,
      "loss": 0.9505,
      "step": 11905
    },
    {
      "epoch": 1.3740193816335948,
      "grad_norm": 0.4374394714832306,
      "learning_rate": 0.00013156736628788584,
      "loss": 0.9523,
      "step": 11910
    },
    {
      "epoch": 1.3745962159667744,
      "grad_norm": 0.39277151226997375,
      "learning_rate": 0.00013150367170248169,
      "loss": 0.9238,
      "step": 11915
    },
    {
      "epoch": 1.3751730502999537,
      "grad_norm": 0.39160189032554626,
      "learning_rate": 0.00013143996292461202,
      "loss": 0.8842,
      "step": 11920
    },
    {
      "epoch": 1.3757498846331333,
      "grad_norm": 0.36961859464645386,
      "learning_rate": 0.00013137623998297785,
      "loss": 0.9399,
      "step": 11925
    },
    {
      "epoch": 1.3763267189663129,
      "grad_norm": 0.3880990147590637,
      "learning_rate": 0.0001313125029062865,
      "loss": 0.8803,
      "step": 11930
    },
    {
      "epoch": 1.3769035532994924,
      "grad_norm": 0.41162627935409546,
      "learning_rate": 0.00013124875172325159,
      "loss": 0.968,
      "step": 11935
    },
    {
      "epoch": 1.377480387632672,
      "grad_norm": 0.376601904630661,
      "learning_rate": 0.00013118498646259323,
      "loss": 0.9859,
      "step": 11940
    },
    {
      "epoch": 1.3780572219658513,
      "grad_norm": 0.4360121488571167,
      "learning_rate": 0.0001311212071530377,
      "loss": 0.9316,
      "step": 11945
    },
    {
      "epoch": 1.378634056299031,
      "grad_norm": 0.4116380214691162,
      "learning_rate": 0.00013105741382331775,
      "loss": 0.9582,
      "step": 11950
    },
    {
      "epoch": 1.3792108906322105,
      "grad_norm": 0.39656034111976624,
      "learning_rate": 0.0001309936065021724,
      "loss": 0.9395,
      "step": 11955
    },
    {
      "epoch": 1.3797877249653898,
      "grad_norm": 0.3742821514606476,
      "learning_rate": 0.00013092978521834695,
      "loss": 0.9787,
      "step": 11960
    },
    {
      "epoch": 1.3803645592985694,
      "grad_norm": 0.3952317535877228,
      "learning_rate": 0.00013086595000059306,
      "loss": 0.937,
      "step": 11965
    },
    {
      "epoch": 1.380941393631749,
      "grad_norm": 0.43107977509498596,
      "learning_rate": 0.0001308021008776686,
      "loss": 0.9629,
      "step": 11970
    },
    {
      "epoch": 1.3815182279649285,
      "grad_norm": 0.3768942952156067,
      "learning_rate": 0.00013073823787833767,
      "loss": 0.9404,
      "step": 11975
    },
    {
      "epoch": 1.382095062298108,
      "grad_norm": 0.44299280643463135,
      "learning_rate": 0.00013067436103137074,
      "loss": 0.9767,
      "step": 11980
    },
    {
      "epoch": 1.3826718966312874,
      "grad_norm": 0.4110683798789978,
      "learning_rate": 0.00013061047036554444,
      "loss": 0.9309,
      "step": 11985
    },
    {
      "epoch": 1.383248730964467,
      "grad_norm": 0.3969920873641968,
      "learning_rate": 0.00013054656590964165,
      "loss": 0.9472,
      "step": 11990
    },
    {
      "epoch": 1.3838255652976466,
      "grad_norm": 0.37778428196907043,
      "learning_rate": 0.00013048264769245142,
      "loss": 0.8792,
      "step": 11995
    },
    {
      "epoch": 1.384402399630826,
      "grad_norm": 0.39364323019981384,
      "learning_rate": 0.00013041871574276905,
      "loss": 0.9477,
      "step": 12000
    },
    {
      "epoch": 1.3849792339640055,
      "grad_norm": 0.4027245044708252,
      "learning_rate": 0.00013035477008939598,
      "loss": 0.9367,
      "step": 12005
    },
    {
      "epoch": 1.385556068297185,
      "grad_norm": 0.38217535614967346,
      "learning_rate": 0.00013029081076113992,
      "loss": 0.8929,
      "step": 12010
    },
    {
      "epoch": 1.3861329026303646,
      "grad_norm": 0.47565191984176636,
      "learning_rate": 0.00013022683778681458,
      "loss": 0.9033,
      "step": 12015
    },
    {
      "epoch": 1.3867097369635442,
      "grad_norm": 0.610550582408905,
      "learning_rate": 0.00013016285119524002,
      "loss": 0.9522,
      "step": 12020
    },
    {
      "epoch": 1.3872865712967235,
      "grad_norm": 0.3981565237045288,
      "learning_rate": 0.00013009885101524223,
      "loss": 0.9176,
      "step": 12025
    },
    {
      "epoch": 1.387863405629903,
      "grad_norm": 0.39330512285232544,
      "learning_rate": 0.00013003483727565344,
      "loss": 0.9062,
      "step": 12030
    },
    {
      "epoch": 1.3884402399630826,
      "grad_norm": 0.3685097098350525,
      "learning_rate": 0.00012997081000531196,
      "loss": 0.9752,
      "step": 12035
    },
    {
      "epoch": 1.389017074296262,
      "grad_norm": 0.4128875732421875,
      "learning_rate": 0.00012990676923306223,
      "loss": 0.9488,
      "step": 12040
    },
    {
      "epoch": 1.3895939086294415,
      "grad_norm": 0.3621658980846405,
      "learning_rate": 0.00012984271498775473,
      "loss": 0.9536,
      "step": 12045
    },
    {
      "epoch": 1.390170742962621,
      "grad_norm": 0.3921271860599518,
      "learning_rate": 0.000129778647298246,
      "loss": 0.9134,
      "step": 12050
    },
    {
      "epoch": 1.3907475772958007,
      "grad_norm": 0.3891122341156006,
      "learning_rate": 0.0001297145661933987,
      "loss": 0.9275,
      "step": 12055
    },
    {
      "epoch": 1.3913244116289802,
      "grad_norm": 0.3682466149330139,
      "learning_rate": 0.00012965047170208145,
      "loss": 0.9552,
      "step": 12060
    },
    {
      "epoch": 1.3919012459621598,
      "grad_norm": 0.49361538887023926,
      "learning_rate": 0.00012958636385316895,
      "loss": 0.9195,
      "step": 12065
    },
    {
      "epoch": 1.3924780802953391,
      "grad_norm": 0.3890072703361511,
      "learning_rate": 0.00012952224267554193,
      "loss": 0.9439,
      "step": 12070
    },
    {
      "epoch": 1.3930549146285187,
      "grad_norm": 0.4101308286190033,
      "learning_rate": 0.00012945810819808715,
      "loss": 0.8946,
      "step": 12075
    },
    {
      "epoch": 1.3936317489616983,
      "grad_norm": 0.41521626710891724,
      "learning_rate": 0.0001293939604496972,
      "loss": 0.9408,
      "step": 12080
    },
    {
      "epoch": 1.3942085832948776,
      "grad_norm": 0.4195525050163269,
      "learning_rate": 0.00012932979945927083,
      "loss": 0.9213,
      "step": 12085
    },
    {
      "epoch": 1.3947854176280572,
      "grad_norm": 0.41010963916778564,
      "learning_rate": 0.00012926562525571273,
      "loss": 0.989,
      "step": 12090
    },
    {
      "epoch": 1.3953622519612368,
      "grad_norm": 0.37773555517196655,
      "learning_rate": 0.00012920143786793344,
      "loss": 0.9573,
      "step": 12095
    },
    {
      "epoch": 1.3959390862944163,
      "grad_norm": 0.3921565115451813,
      "learning_rate": 0.00012913723732484953,
      "loss": 0.919,
      "step": 12100
    },
    {
      "epoch": 1.3965159206275959,
      "grad_norm": 0.39858192205429077,
      "learning_rate": 0.00012907302365538348,
      "loss": 0.9566,
      "step": 12105
    },
    {
      "epoch": 1.3970927549607752,
      "grad_norm": 0.4024829566478729,
      "learning_rate": 0.00012900879688846365,
      "loss": 0.9249,
      "step": 12110
    },
    {
      "epoch": 1.3976695892939548,
      "grad_norm": 0.41164299845695496,
      "learning_rate": 0.00012894455705302432,
      "loss": 0.8791,
      "step": 12115
    },
    {
      "epoch": 1.3982464236271344,
      "grad_norm": 0.3956606388092041,
      "learning_rate": 0.0001288803041780057,
      "loss": 0.9589,
      "step": 12120
    },
    {
      "epoch": 1.3988232579603137,
      "grad_norm": 0.3662050664424896,
      "learning_rate": 0.0001288160382923538,
      "loss": 0.956,
      "step": 12125
    },
    {
      "epoch": 1.3994000922934933,
      "grad_norm": 0.3646704852581024,
      "learning_rate": 0.00012875175942502054,
      "loss": 0.9914,
      "step": 12130
    },
    {
      "epoch": 1.3999769266266728,
      "grad_norm": 0.3993260860443115,
      "learning_rate": 0.0001286874676049637,
      "loss": 0.9225,
      "step": 12135
    },
    {
      "epoch": 1.4005537609598524,
      "grad_norm": 0.38320133090019226,
      "learning_rate": 0.00012862316286114676,
      "loss": 0.9126,
      "step": 12140
    },
    {
      "epoch": 1.401130595293032,
      "grad_norm": 0.4064159095287323,
      "learning_rate": 0.00012855884522253928,
      "loss": 0.9135,
      "step": 12145
    },
    {
      "epoch": 1.4017074296262113,
      "grad_norm": 0.42876338958740234,
      "learning_rate": 0.00012849451471811643,
      "loss": 0.9421,
      "step": 12150
    },
    {
      "epoch": 1.4022842639593909,
      "grad_norm": 0.3715895414352417,
      "learning_rate": 0.0001284301713768592,
      "loss": 0.9415,
      "step": 12155
    },
    {
      "epoch": 1.4028610982925704,
      "grad_norm": 0.37385094165802,
      "learning_rate": 0.00012836581522775438,
      "loss": 0.9066,
      "step": 12160
    },
    {
      "epoch": 1.4034379326257498,
      "grad_norm": 0.3633922338485718,
      "learning_rate": 0.00012830144629979456,
      "loss": 0.9539,
      "step": 12165
    },
    {
      "epoch": 1.4040147669589293,
      "grad_norm": 0.38846006989479065,
      "learning_rate": 0.0001282370646219781,
      "loss": 0.9294,
      "step": 12170
    },
    {
      "epoch": 1.404591601292109,
      "grad_norm": 0.37591782212257385,
      "learning_rate": 0.00012817267022330903,
      "loss": 0.8834,
      "step": 12175
    },
    {
      "epoch": 1.4051684356252885,
      "grad_norm": 0.417021244764328,
      "learning_rate": 0.00012810826313279717,
      "loss": 0.9032,
      "step": 12180
    },
    {
      "epoch": 1.405745269958468,
      "grad_norm": 0.42905882000923157,
      "learning_rate": 0.00012804384337945803,
      "loss": 0.9289,
      "step": 12185
    },
    {
      "epoch": 1.4063221042916474,
      "grad_norm": 0.4415873885154724,
      "learning_rate": 0.00012797941099231284,
      "loss": 0.9051,
      "step": 12190
    },
    {
      "epoch": 1.406898938624827,
      "grad_norm": 0.3640197217464447,
      "learning_rate": 0.00012791496600038854,
      "loss": 0.9079,
      "step": 12195
    },
    {
      "epoch": 1.4074757729580065,
      "grad_norm": 0.3957792818546295,
      "learning_rate": 0.00012785050843271763,
      "loss": 0.9206,
      "step": 12200
    },
    {
      "epoch": 1.4080526072911859,
      "grad_norm": 0.3771734833717346,
      "learning_rate": 0.0001277860383183385,
      "loss": 0.9383,
      "step": 12205
    },
    {
      "epoch": 1.4086294416243654,
      "grad_norm": 0.3889096975326538,
      "learning_rate": 0.00012772155568629499,
      "loss": 0.9629,
      "step": 12210
    },
    {
      "epoch": 1.409206275957545,
      "grad_norm": 0.4572511911392212,
      "learning_rate": 0.00012765706056563667,
      "loss": 0.9915,
      "step": 12215
    },
    {
      "epoch": 1.4097831102907246,
      "grad_norm": 0.38007521629333496,
      "learning_rate": 0.00012759255298541868,
      "loss": 0.8918,
      "step": 12220
    },
    {
      "epoch": 1.4103599446239041,
      "grad_norm": 0.3732183873653412,
      "learning_rate": 0.00012752803297470187,
      "loss": 0.9185,
      "step": 12225
    },
    {
      "epoch": 1.4109367789570835,
      "grad_norm": 0.3802737593650818,
      "learning_rate": 0.00012746350056255259,
      "loss": 0.9226,
      "step": 12230
    },
    {
      "epoch": 1.411513613290263,
      "grad_norm": 0.3844064176082611,
      "learning_rate": 0.00012739895577804284,
      "loss": 0.8875,
      "step": 12235
    },
    {
      "epoch": 1.4120904476234426,
      "grad_norm": 0.39902496337890625,
      "learning_rate": 0.00012733439865025012,
      "loss": 0.9545,
      "step": 12240
    },
    {
      "epoch": 1.412667281956622,
      "grad_norm": 0.4251570999622345,
      "learning_rate": 0.00012726982920825762,
      "loss": 0.9587,
      "step": 12245
    },
    {
      "epoch": 1.4132441162898015,
      "grad_norm": 0.45643094182014465,
      "learning_rate": 0.00012720524748115395,
      "loss": 0.9062,
      "step": 12250
    },
    {
      "epoch": 1.413820950622981,
      "grad_norm": 0.4236997663974762,
      "learning_rate": 0.0001271406534980333,
      "loss": 0.9377,
      "step": 12255
    },
    {
      "epoch": 1.4143977849561606,
      "grad_norm": 0.3734123408794403,
      "learning_rate": 0.00012707604728799543,
      "loss": 0.9715,
      "step": 12260
    },
    {
      "epoch": 1.4149746192893402,
      "grad_norm": 0.381076455116272,
      "learning_rate": 0.0001270114288801455,
      "loss": 0.9119,
      "step": 12265
    },
    {
      "epoch": 1.4155514536225196,
      "grad_norm": 0.3949384391307831,
      "learning_rate": 0.0001269467983035943,
      "loss": 0.9251,
      "step": 12270
    },
    {
      "epoch": 1.4161282879556991,
      "grad_norm": 0.43294233083724976,
      "learning_rate": 0.00012688215558745794,
      "loss": 0.9293,
      "step": 12275
    },
    {
      "epoch": 1.4167051222888787,
      "grad_norm": 0.3953537940979004,
      "learning_rate": 0.00012681750076085817,
      "loss": 0.9745,
      "step": 12280
    },
    {
      "epoch": 1.417281956622058,
      "grad_norm": 0.4244721829891205,
      "learning_rate": 0.00012675283385292212,
      "loss": 0.9527,
      "step": 12285
    },
    {
      "epoch": 1.4178587909552376,
      "grad_norm": 0.4531431198120117,
      "learning_rate": 0.00012668815489278227,
      "loss": 0.9598,
      "step": 12290
    },
    {
      "epoch": 1.4184356252884172,
      "grad_norm": 0.5036474466323853,
      "learning_rate": 0.0001266234639095767,
      "loss": 0.9285,
      "step": 12295
    },
    {
      "epoch": 1.4190124596215967,
      "grad_norm": 0.371463418006897,
      "learning_rate": 0.00012655876093244878,
      "loss": 0.9574,
      "step": 12300
    },
    {
      "epoch": 1.4195892939547763,
      "grad_norm": 0.3933194577693939,
      "learning_rate": 0.00012649404599054736,
      "loss": 0.972,
      "step": 12305
    },
    {
      "epoch": 1.4201661282879556,
      "grad_norm": 0.3995529115200043,
      "learning_rate": 0.00012642931911302662,
      "loss": 0.9281,
      "step": 12310
    },
    {
      "epoch": 1.4207429626211352,
      "grad_norm": 0.37661126255989075,
      "learning_rate": 0.00012636458032904617,
      "loss": 0.9105,
      "step": 12315
    },
    {
      "epoch": 1.4213197969543148,
      "grad_norm": 0.4267100393772125,
      "learning_rate": 0.00012629982966777095,
      "loss": 0.9245,
      "step": 12320
    },
    {
      "epoch": 1.421896631287494,
      "grad_norm": 0.4244813621044159,
      "learning_rate": 0.00012623506715837122,
      "loss": 0.929,
      "step": 12325
    },
    {
      "epoch": 1.4224734656206737,
      "grad_norm": 0.38589537143707275,
      "learning_rate": 0.00012617029283002265,
      "loss": 0.8674,
      "step": 12330
    },
    {
      "epoch": 1.4230502999538532,
      "grad_norm": 0.3990231156349182,
      "learning_rate": 0.0001261055067119062,
      "loss": 0.9488,
      "step": 12335
    },
    {
      "epoch": 1.4236271342870328,
      "grad_norm": 0.39353740215301514,
      "learning_rate": 0.00012604070883320817,
      "loss": 0.9148,
      "step": 12340
    },
    {
      "epoch": 1.4242039686202124,
      "grad_norm": 0.4017605781555176,
      "learning_rate": 0.00012597589922312008,
      "loss": 0.9591,
      "step": 12345
    },
    {
      "epoch": 1.4247808029533917,
      "grad_norm": 0.412386417388916,
      "learning_rate": 0.0001259110779108388,
      "loss": 0.9367,
      "step": 12350
    },
    {
      "epoch": 1.4253576372865713,
      "grad_norm": 0.3855336308479309,
      "learning_rate": 0.0001258462449255665,
      "loss": 0.9365,
      "step": 12355
    },
    {
      "epoch": 1.4259344716197508,
      "grad_norm": 0.4509463310241699,
      "learning_rate": 0.00012578140029651053,
      "loss": 0.8986,
      "step": 12360
    },
    {
      "epoch": 1.4265113059529302,
      "grad_norm": 0.39673352241516113,
      "learning_rate": 0.0001257165440528835,
      "loss": 0.8817,
      "step": 12365
    },
    {
      "epoch": 1.4270881402861098,
      "grad_norm": 0.4036000072956085,
      "learning_rate": 0.0001256516762239033,
      "loss": 0.9114,
      "step": 12370
    },
    {
      "epoch": 1.4276649746192893,
      "grad_norm": 0.383042573928833,
      "learning_rate": 0.00012558679683879301,
      "loss": 0.9879,
      "step": 12375
    },
    {
      "epoch": 1.4282418089524689,
      "grad_norm": 0.36675453186035156,
      "learning_rate": 0.00012552190592678096,
      "loss": 0.9855,
      "step": 12380
    },
    {
      "epoch": 1.4288186432856484,
      "grad_norm": 0.40489956736564636,
      "learning_rate": 0.00012545700351710055,
      "loss": 0.8809,
      "step": 12385
    },
    {
      "epoch": 1.4293954776188278,
      "grad_norm": 0.4169880449771881,
      "learning_rate": 0.0001253920896389905,
      "loss": 0.9405,
      "step": 12390
    },
    {
      "epoch": 1.4299723119520074,
      "grad_norm": 0.4987186789512634,
      "learning_rate": 0.00012532716432169463,
      "loss": 0.8999,
      "step": 12395
    },
    {
      "epoch": 1.430549146285187,
      "grad_norm": 0.37798649072647095,
      "learning_rate": 0.0001252622275944619,
      "loss": 0.9265,
      "step": 12400
    },
    {
      "epoch": 1.4311259806183663,
      "grad_norm": 0.3762564957141876,
      "learning_rate": 0.00012519727948654642,
      "loss": 0.9024,
      "step": 12405
    },
    {
      "epoch": 1.4317028149515458,
      "grad_norm": 0.39836904406547546,
      "learning_rate": 0.00012513232002720753,
      "loss": 0.9163,
      "step": 12410
    },
    {
      "epoch": 1.4322796492847254,
      "grad_norm": 0.3596324920654297,
      "learning_rate": 0.00012506734924570947,
      "loss": 0.9298,
      "step": 12415
    },
    {
      "epoch": 1.432856483617905,
      "grad_norm": 0.452575147151947,
      "learning_rate": 0.00012500236717132178,
      "loss": 0.9485,
      "step": 12420
    },
    {
      "epoch": 1.4334333179510845,
      "grad_norm": 0.41744694113731384,
      "learning_rate": 0.000124937373833319,
      "loss": 0.9301,
      "step": 12425
    },
    {
      "epoch": 1.434010152284264,
      "grad_norm": 0.4188467264175415,
      "learning_rate": 0.00012487236926098075,
      "loss": 0.9236,
      "step": 12430
    },
    {
      "epoch": 1.4345869866174434,
      "grad_norm": 0.4042269289493561,
      "learning_rate": 0.0001248073534835917,
      "loss": 0.9311,
      "step": 12435
    },
    {
      "epoch": 1.435163820950623,
      "grad_norm": 0.47039151191711426,
      "learning_rate": 0.00012474232653044163,
      "loss": 0.8941,
      "step": 12440
    },
    {
      "epoch": 1.4357406552838026,
      "grad_norm": 0.37996938824653625,
      "learning_rate": 0.00012467728843082527,
      "loss": 0.9221,
      "step": 12445
    },
    {
      "epoch": 1.436317489616982,
      "grad_norm": 0.4154973328113556,
      "learning_rate": 0.0001246122392140424,
      "loss": 0.9594,
      "step": 12450
    },
    {
      "epoch": 1.4368943239501615,
      "grad_norm": 0.4697798192501068,
      "learning_rate": 0.00012454717890939787,
      "loss": 0.9577,
      "step": 12455
    },
    {
      "epoch": 1.437471158283341,
      "grad_norm": 0.3927803039550781,
      "learning_rate": 0.0001244821075462014,
      "loss": 0.9436,
      "step": 12460
    },
    {
      "epoch": 1.4380479926165206,
      "grad_norm": 0.4236595034599304,
      "learning_rate": 0.00012441702515376786,
      "loss": 0.9482,
      "step": 12465
    },
    {
      "epoch": 1.4386248269497002,
      "grad_norm": 0.4353571832180023,
      "learning_rate": 0.00012435193176141689,
      "loss": 0.9142,
      "step": 12470
    },
    {
      "epoch": 1.4392016612828795,
      "grad_norm": 0.4359499216079712,
      "learning_rate": 0.00012428682739847329,
      "loss": 0.8943,
      "step": 12475
    },
    {
      "epoch": 1.439778495616059,
      "grad_norm": 0.424065500497818,
      "learning_rate": 0.0001242217120942666,
      "loss": 0.9089,
      "step": 12480
    },
    {
      "epoch": 1.4403553299492386,
      "grad_norm": 0.4124845564365387,
      "learning_rate": 0.00012415658587813146,
      "loss": 0.9146,
      "step": 12485
    },
    {
      "epoch": 1.440932164282418,
      "grad_norm": 0.34853968024253845,
      "learning_rate": 0.00012409144877940738,
      "loss": 0.8773,
      "step": 12490
    },
    {
      "epoch": 1.4415089986155976,
      "grad_norm": 0.38272228837013245,
      "learning_rate": 0.00012402630082743868,
      "loss": 1.0033,
      "step": 12495
    },
    {
      "epoch": 1.4420858329487771,
      "grad_norm": 0.4090026319026947,
      "learning_rate": 0.0001239611420515747,
      "loss": 0.96,
      "step": 12500
    },
    {
      "epoch": 1.4426626672819567,
      "grad_norm": 0.4200041890144348,
      "learning_rate": 0.00012389597248116952,
      "loss": 0.9338,
      "step": 12505
    },
    {
      "epoch": 1.4432395016151363,
      "grad_norm": 0.4104383587837219,
      "learning_rate": 0.00012383079214558227,
      "loss": 0.931,
      "step": 12510
    },
    {
      "epoch": 1.4438163359483156,
      "grad_norm": 0.45613524317741394,
      "learning_rate": 0.0001237656010741767,
      "loss": 0.9265,
      "step": 12515
    },
    {
      "epoch": 1.4443931702814952,
      "grad_norm": 0.41018715500831604,
      "learning_rate": 0.0001237003992963216,
      "loss": 0.9479,
      "step": 12520
    },
    {
      "epoch": 1.4449700046146747,
      "grad_norm": 0.41325533390045166,
      "learning_rate": 0.00012363518684139043,
      "loss": 0.8916,
      "step": 12525
    },
    {
      "epoch": 1.445546838947854,
      "grad_norm": 0.37731215357780457,
      "learning_rate": 0.0001235699637387616,
      "loss": 0.9599,
      "step": 12530
    },
    {
      "epoch": 1.4461236732810336,
      "grad_norm": 0.38812848925590515,
      "learning_rate": 0.0001235047300178182,
      "loss": 0.9288,
      "step": 12535
    },
    {
      "epoch": 1.4467005076142132,
      "grad_norm": 0.46964332461357117,
      "learning_rate": 0.00012343948570794815,
      "loss": 0.9503,
      "step": 12540
    },
    {
      "epoch": 1.4472773419473928,
      "grad_norm": 0.39741063117980957,
      "learning_rate": 0.00012337423083854415,
      "loss": 0.9676,
      "step": 12545
    },
    {
      "epoch": 1.4478541762805723,
      "grad_norm": 0.40735316276550293,
      "learning_rate": 0.00012330896543900362,
      "loss": 0.9923,
      "step": 12550
    },
    {
      "epoch": 1.4484310106137517,
      "grad_norm": 0.3687584698200226,
      "learning_rate": 0.00012324368953872883,
      "loss": 0.9076,
      "step": 12555
    },
    {
      "epoch": 1.4490078449469312,
      "grad_norm": 0.39105790853500366,
      "learning_rate": 0.0001231784031671266,
      "loss": 0.9131,
      "step": 12560
    },
    {
      "epoch": 1.4495846792801108,
      "grad_norm": 0.4149412214756012,
      "learning_rate": 0.00012311310635360856,
      "loss": 0.9179,
      "step": 12565
    },
    {
      "epoch": 1.4501615136132902,
      "grad_norm": 0.374827116727829,
      "learning_rate": 0.00012304779912759118,
      "loss": 0.9475,
      "step": 12570
    },
    {
      "epoch": 1.4507383479464697,
      "grad_norm": 0.44704556465148926,
      "learning_rate": 0.00012298248151849537,
      "loss": 0.9788,
      "step": 12575
    },
    {
      "epoch": 1.4513151822796493,
      "grad_norm": 0.39649584889411926,
      "learning_rate": 0.00012291715355574692,
      "loss": 0.9261,
      "step": 12580
    },
    {
      "epoch": 1.4518920166128289,
      "grad_norm": 0.3798285722732544,
      "learning_rate": 0.00012285181526877615,
      "loss": 0.9477,
      "step": 12585
    },
    {
      "epoch": 1.4524688509460084,
      "grad_norm": 0.39250555634498596,
      "learning_rate": 0.00012278646668701813,
      "loss": 0.9529,
      "step": 12590
    },
    {
      "epoch": 1.4530456852791878,
      "grad_norm": 0.3975488543510437,
      "learning_rate": 0.00012272110783991243,
      "loss": 0.9347,
      "step": 12595
    },
    {
      "epoch": 1.4536225196123673,
      "grad_norm": 0.3645585775375366,
      "learning_rate": 0.00012265573875690344,
      "loss": 0.9507,
      "step": 12600
    },
    {
      "epoch": 1.454199353945547,
      "grad_norm": 0.37658560276031494,
      "learning_rate": 0.00012259035946744003,
      "loss": 0.9392,
      "step": 12605
    },
    {
      "epoch": 1.4547761882787262,
      "grad_norm": 0.44661521911621094,
      "learning_rate": 0.0001225249700009757,
      "loss": 0.9495,
      "step": 12610
    },
    {
      "epoch": 1.4553530226119058,
      "grad_norm": 0.4087098240852356,
      "learning_rate": 0.0001224595703869685,
      "loss": 0.9337,
      "step": 12615
    },
    {
      "epoch": 1.4559298569450854,
      "grad_norm": 0.4374881088733673,
      "learning_rate": 0.00012239416065488112,
      "loss": 0.9702,
      "step": 12620
    },
    {
      "epoch": 1.456506691278265,
      "grad_norm": 0.42702898383140564,
      "learning_rate": 0.00012232874083418078,
      "loss": 0.9305,
      "step": 12625
    },
    {
      "epoch": 1.4570835256114445,
      "grad_norm": 0.399262011051178,
      "learning_rate": 0.0001222633109543392,
      "loss": 0.9267,
      "step": 12630
    },
    {
      "epoch": 1.4576603599446238,
      "grad_norm": 0.38637447357177734,
      "learning_rate": 0.00012219787104483264,
      "loss": 0.9453,
      "step": 12635
    },
    {
      "epoch": 1.4582371942778034,
      "grad_norm": 0.40154480934143066,
      "learning_rate": 0.00012213242113514198,
      "loss": 0.9293,
      "step": 12640
    },
    {
      "epoch": 1.458814028610983,
      "grad_norm": 0.4145256280899048,
      "learning_rate": 0.00012206696125475249,
      "loss": 0.9248,
      "step": 12645
    },
    {
      "epoch": 1.4593908629441623,
      "grad_norm": 0.39756283164024353,
      "learning_rate": 0.00012200149143315403,
      "loss": 0.951,
      "step": 12650
    },
    {
      "epoch": 1.4599676972773419,
      "grad_norm": 0.42253583669662476,
      "learning_rate": 0.0001219360116998408,
      "loss": 1.0005,
      "step": 12655
    },
    {
      "epoch": 1.4605445316105214,
      "grad_norm": 0.4099411964416504,
      "learning_rate": 0.00012187052208431158,
      "loss": 0.9329,
      "step": 12660
    },
    {
      "epoch": 1.461121365943701,
      "grad_norm": 0.4196203351020813,
      "learning_rate": 0.00012180502261606958,
      "loss": 0.9457,
      "step": 12665
    },
    {
      "epoch": 1.4616982002768806,
      "grad_norm": 0.372347891330719,
      "learning_rate": 0.00012173951332462245,
      "loss": 0.9179,
      "step": 12670
    },
    {
      "epoch": 1.46227503461006,
      "grad_norm": 0.4399539530277252,
      "learning_rate": 0.0001216739942394822,
      "loss": 0.9634,
      "step": 12675
    },
    {
      "epoch": 1.4628518689432395,
      "grad_norm": 0.37646159529685974,
      "learning_rate": 0.00012160846539016535,
      "loss": 0.9668,
      "step": 12680
    },
    {
      "epoch": 1.463428703276419,
      "grad_norm": 0.4296335279941559,
      "learning_rate": 0.0001215429268061928,
      "loss": 0.9561,
      "step": 12685
    },
    {
      "epoch": 1.4640055376095984,
      "grad_norm": 0.40583279728889465,
      "learning_rate": 0.00012147737851708973,
      "loss": 0.9384,
      "step": 12690
    },
    {
      "epoch": 1.464582371942778,
      "grad_norm": 0.399314284324646,
      "learning_rate": 0.00012141182055238585,
      "loss": 0.9043,
      "step": 12695
    },
    {
      "epoch": 1.4651592062759575,
      "grad_norm": 0.44123348593711853,
      "learning_rate": 0.00012134625294161508,
      "loss": 0.9361,
      "step": 12700
    },
    {
      "epoch": 1.465736040609137,
      "grad_norm": 0.411359578371048,
      "learning_rate": 0.00012128067571431583,
      "loss": 0.9424,
      "step": 12705
    },
    {
      "epoch": 1.4663128749423167,
      "grad_norm": 0.4213956296443939,
      "learning_rate": 0.0001212150889000307,
      "loss": 0.9272,
      "step": 12710
    },
    {
      "epoch": 1.466889709275496,
      "grad_norm": 0.40593644976615906,
      "learning_rate": 0.00012114949252830674,
      "loss": 0.9378,
      "step": 12715
    },
    {
      "epoch": 1.4674665436086756,
      "grad_norm": 0.37980008125305176,
      "learning_rate": 0.00012108388662869519,
      "loss": 0.9346,
      "step": 12720
    },
    {
      "epoch": 1.4680433779418551,
      "grad_norm": 0.42362499237060547,
      "learning_rate": 0.00012101827123075167,
      "loss": 0.968,
      "step": 12725
    },
    {
      "epoch": 1.4686202122750345,
      "grad_norm": 0.4329698085784912,
      "learning_rate": 0.00012095264636403603,
      "loss": 0.954,
      "step": 12730
    },
    {
      "epoch": 1.469197046608214,
      "grad_norm": 0.4111020267009735,
      "learning_rate": 0.0001208870120581124,
      "loss": 0.9294,
      "step": 12735
    },
    {
      "epoch": 1.4697738809413936,
      "grad_norm": 0.4156142473220825,
      "learning_rate": 0.00012082136834254918,
      "loss": 0.909,
      "step": 12740
    },
    {
      "epoch": 1.4703507152745732,
      "grad_norm": 0.4147590696811676,
      "learning_rate": 0.00012075571524691895,
      "loss": 0.9334,
      "step": 12745
    },
    {
      "epoch": 1.4709275496077527,
      "grad_norm": 0.3994022607803345,
      "learning_rate": 0.00012069005280079862,
      "loss": 0.9461,
      "step": 12750
    },
    {
      "epoch": 1.471504383940932,
      "grad_norm": 0.36893755197525024,
      "learning_rate": 0.00012062438103376918,
      "loss": 0.9126,
      "step": 12755
    },
    {
      "epoch": 1.4720812182741116,
      "grad_norm": 0.38947948813438416,
      "learning_rate": 0.00012055869997541593,
      "loss": 0.8755,
      "step": 12760
    },
    {
      "epoch": 1.4726580526072912,
      "grad_norm": 0.36021241545677185,
      "learning_rate": 0.00012049300965532832,
      "loss": 0.9173,
      "step": 12765
    },
    {
      "epoch": 1.4732348869404708,
      "grad_norm": 0.39924636483192444,
      "learning_rate": 0.00012042731010309995,
      "loss": 0.9659,
      "step": 12770
    },
    {
      "epoch": 1.4738117212736501,
      "grad_norm": 0.3978961110115051,
      "learning_rate": 0.00012036160134832862,
      "loss": 0.9637,
      "step": 12775
    },
    {
      "epoch": 1.4743885556068297,
      "grad_norm": 0.4412531852722168,
      "learning_rate": 0.00012029588342061621,
      "loss": 0.9188,
      "step": 12780
    },
    {
      "epoch": 1.4749653899400093,
      "grad_norm": 0.39058932662010193,
      "learning_rate": 0.00012023015634956882,
      "loss": 0.9557,
      "step": 12785
    },
    {
      "epoch": 1.4755422242731888,
      "grad_norm": 0.3821706473827362,
      "learning_rate": 0.00012016442016479656,
      "loss": 0.936,
      "step": 12790
    },
    {
      "epoch": 1.4761190586063684,
      "grad_norm": 0.3862445652484894,
      "learning_rate": 0.00012009867489591377,
      "loss": 0.8954,
      "step": 12795
    },
    {
      "epoch": 1.4766958929395477,
      "grad_norm": 0.3745687007904053,
      "learning_rate": 0.00012003292057253883,
      "loss": 0.9194,
      "step": 12800
    },
    {
      "epoch": 1.4772727272727273,
      "grad_norm": 0.42594972252845764,
      "learning_rate": 0.00011996715722429413,
      "loss": 0.972,
      "step": 12805
    },
    {
      "epoch": 1.4778495616059069,
      "grad_norm": 0.40421348810195923,
      "learning_rate": 0.00011990138488080622,
      "loss": 0.9433,
      "step": 12810
    },
    {
      "epoch": 1.4784263959390862,
      "grad_norm": 0.3907431662082672,
      "learning_rate": 0.00011983560357170568,
      "loss": 0.9475,
      "step": 12815
    },
    {
      "epoch": 1.4790032302722658,
      "grad_norm": 0.42429277300834656,
      "learning_rate": 0.00011976981332662711,
      "loss": 0.9317,
      "step": 12820
    },
    {
      "epoch": 1.4795800646054453,
      "grad_norm": 0.5049063563346863,
      "learning_rate": 0.00011970401417520913,
      "loss": 0.9408,
      "step": 12825
    },
    {
      "epoch": 1.480156898938625,
      "grad_norm": 0.43257758021354675,
      "learning_rate": 0.0001196382061470944,
      "loss": 0.961,
      "step": 12830
    },
    {
      "epoch": 1.4807337332718045,
      "grad_norm": 0.3847612738609314,
      "learning_rate": 0.00011957238927192955,
      "loss": 0.895,
      "step": 12835
    },
    {
      "epoch": 1.4813105676049838,
      "grad_norm": 0.39073553681373596,
      "learning_rate": 0.00011950656357936525,
      "loss": 0.9157,
      "step": 12840
    },
    {
      "epoch": 1.4818874019381634,
      "grad_norm": 0.4299897849559784,
      "learning_rate": 0.00011944072909905604,
      "loss": 0.9503,
      "step": 12845
    },
    {
      "epoch": 1.482464236271343,
      "grad_norm": 0.41576239466667175,
      "learning_rate": 0.00011937488586066054,
      "loss": 0.9498,
      "step": 12850
    },
    {
      "epoch": 1.4830410706045223,
      "grad_norm": 0.4170142412185669,
      "learning_rate": 0.00011930903389384123,
      "loss": 0.9183,
      "step": 12855
    },
    {
      "epoch": 1.4836179049377018,
      "grad_norm": 0.39141911268234253,
      "learning_rate": 0.00011924317322826452,
      "loss": 0.898,
      "step": 12860
    },
    {
      "epoch": 1.4841947392708814,
      "grad_norm": 0.38259202241897583,
      "learning_rate": 0.00011917730389360085,
      "loss": 0.8972,
      "step": 12865
    },
    {
      "epoch": 1.484771573604061,
      "grad_norm": 0.3913986086845398,
      "learning_rate": 0.00011911142591952437,
      "loss": 0.9063,
      "step": 12870
    },
    {
      "epoch": 1.4853484079372405,
      "grad_norm": 0.3920874297618866,
      "learning_rate": 0.00011904553933571336,
      "loss": 0.907,
      "step": 12875
    },
    {
      "epoch": 1.4859252422704199,
      "grad_norm": 0.40604132413864136,
      "learning_rate": 0.00011897964417184975,
      "loss": 0.8798,
      "step": 12880
    },
    {
      "epoch": 1.4865020766035995,
      "grad_norm": 0.40422797203063965,
      "learning_rate": 0.0001189137404576195,
      "loss": 0.8955,
      "step": 12885
    },
    {
      "epoch": 1.487078910936779,
      "grad_norm": 0.41283461451530457,
      "learning_rate": 0.00011884782822271235,
      "loss": 0.9368,
      "step": 12890
    },
    {
      "epoch": 1.4876557452699584,
      "grad_norm": 0.4172614812850952,
      "learning_rate": 0.00011878190749682187,
      "loss": 0.9014,
      "step": 12895
    },
    {
      "epoch": 1.488232579603138,
      "grad_norm": 0.38606515526771545,
      "learning_rate": 0.00011871597830964551,
      "loss": 0.9087,
      "step": 12900
    },
    {
      "epoch": 1.4888094139363175,
      "grad_norm": 0.43429484963417053,
      "learning_rate": 0.00011865004069088446,
      "loss": 0.9084,
      "step": 12905
    },
    {
      "epoch": 1.489386248269497,
      "grad_norm": 0.41635558009147644,
      "learning_rate": 0.00011858409467024376,
      "loss": 0.9449,
      "step": 12910
    },
    {
      "epoch": 1.4899630826026766,
      "grad_norm": 0.40536168217658997,
      "learning_rate": 0.00011851814027743223,
      "loss": 0.9013,
      "step": 12915
    },
    {
      "epoch": 1.490539916935856,
      "grad_norm": 0.3647763729095459,
      "learning_rate": 0.00011845217754216245,
      "loss": 0.9269,
      "step": 12920
    },
    {
      "epoch": 1.4911167512690355,
      "grad_norm": 0.3644658923149109,
      "learning_rate": 0.00011838620649415076,
      "loss": 0.9173,
      "step": 12925
    },
    {
      "epoch": 1.491693585602215,
      "grad_norm": 0.39510855078697205,
      "learning_rate": 0.00011832022716311722,
      "loss": 0.9111,
      "step": 12930
    },
    {
      "epoch": 1.4922704199353944,
      "grad_norm": 0.40662682056427,
      "learning_rate": 0.0001182542395787857,
      "loss": 0.9532,
      "step": 12935
    },
    {
      "epoch": 1.492847254268574,
      "grad_norm": 0.38650259375572205,
      "learning_rate": 0.00011818824377088366,
      "loss": 0.9268,
      "step": 12940
    },
    {
      "epoch": 1.4934240886017536,
      "grad_norm": 0.3982434868812561,
      "learning_rate": 0.00011812223976914243,
      "loss": 0.9307,
      "step": 12945
    },
    {
      "epoch": 1.4940009229349331,
      "grad_norm": 0.4159766733646393,
      "learning_rate": 0.00011805622760329687,
      "loss": 0.9542,
      "step": 12950
    },
    {
      "epoch": 1.4945777572681127,
      "grad_norm": 0.3663424551486969,
      "learning_rate": 0.00011799020730308563,
      "loss": 0.8649,
      "step": 12955
    },
    {
      "epoch": 1.495154591601292,
      "grad_norm": 0.38212850689888,
      "learning_rate": 0.00011792417889825094,
      "loss": 0.8616,
      "step": 12960
    },
    {
      "epoch": 1.4957314259344716,
      "grad_norm": 0.42534342408180237,
      "learning_rate": 0.00011785814241853876,
      "loss": 0.9143,
      "step": 12965
    },
    {
      "epoch": 1.4963082602676512,
      "grad_norm": 0.35233616828918457,
      "learning_rate": 0.00011779209789369867,
      "loss": 0.9374,
      "step": 12970
    },
    {
      "epoch": 1.4968850946008305,
      "grad_norm": 0.3654002249240875,
      "learning_rate": 0.00011772604535348382,
      "loss": 0.89,
      "step": 12975
    },
    {
      "epoch": 1.49746192893401,
      "grad_norm": 0.3802456259727478,
      "learning_rate": 0.00011765998482765104,
      "loss": 0.9309,
      "step": 12980
    },
    {
      "epoch": 1.4980387632671897,
      "grad_norm": 0.42161834239959717,
      "learning_rate": 0.00011759391634596067,
      "loss": 0.9935,
      "step": 12985
    },
    {
      "epoch": 1.4986155976003692,
      "grad_norm": 0.3783545196056366,
      "learning_rate": 0.00011752783993817675,
      "loss": 0.9277,
      "step": 12990
    },
    {
      "epoch": 1.4991924319335488,
      "grad_norm": 0.38438281416893005,
      "learning_rate": 0.00011746175563406681,
      "loss": 0.9102,
      "step": 12995
    },
    {
      "epoch": 1.4997692662667281,
      "grad_norm": 0.39648398756980896,
      "learning_rate": 0.00011739566346340194,
      "loss": 0.9303,
      "step": 13000
    },
    {
      "epoch": 1.5003461005999077,
      "grad_norm": 0.36507081985473633,
      "learning_rate": 0.00011732956345595682,
      "loss": 0.9049,
      "step": 13005
    },
    {
      "epoch": 1.5009229349330873,
      "grad_norm": 0.3909667730331421,
      "learning_rate": 0.0001172634556415096,
      "loss": 0.9377,
      "step": 13010
    },
    {
      "epoch": 1.5014997692662666,
      "grad_norm": 0.40388140082359314,
      "learning_rate": 0.00011719734004984201,
      "loss": 0.9625,
      "step": 13015
    },
    {
      "epoch": 1.5020766035994462,
      "grad_norm": 0.41368624567985535,
      "learning_rate": 0.00011713121671073924,
      "loss": 0.903,
      "step": 13020
    },
    {
      "epoch": 1.5026534379326257,
      "grad_norm": 0.3890875577926636,
      "learning_rate": 0.00011706508565399,
      "loss": 0.9366,
      "step": 13025
    },
    {
      "epoch": 1.5032302722658053,
      "grad_norm": 0.3867320120334625,
      "learning_rate": 0.0001169989469093864,
      "loss": 0.9226,
      "step": 13030
    },
    {
      "epoch": 1.5038071065989849,
      "grad_norm": 0.4172716438770294,
      "learning_rate": 0.00011693280050672417,
      "loss": 0.9627,
      "step": 13035
    },
    {
      "epoch": 1.5043839409321644,
      "grad_norm": 0.38736414909362793,
      "learning_rate": 0.0001168666464758023,
      "loss": 0.9435,
      "step": 13040
    },
    {
      "epoch": 1.5049607752653438,
      "grad_norm": 0.37807396054267883,
      "learning_rate": 0.00011680048484642334,
      "loss": 0.9486,
      "step": 13045
    },
    {
      "epoch": 1.5055376095985233,
      "grad_norm": 0.3694530427455902,
      "learning_rate": 0.00011673431564839327,
      "loss": 0.9804,
      "step": 13050
    },
    {
      "epoch": 1.5061144439317027,
      "grad_norm": 0.41525396704673767,
      "learning_rate": 0.0001166681389115214,
      "loss": 0.9222,
      "step": 13055
    },
    {
      "epoch": 1.5066912782648822,
      "grad_norm": 0.4183506369590759,
      "learning_rate": 0.00011660195466562051,
      "loss": 0.9199,
      "step": 13060
    },
    {
      "epoch": 1.5072681125980618,
      "grad_norm": 0.3661426901817322,
      "learning_rate": 0.0001165357629405067,
      "loss": 0.9377,
      "step": 13065
    },
    {
      "epoch": 1.5078449469312414,
      "grad_norm": 0.3974156379699707,
      "learning_rate": 0.00011646956376599951,
      "loss": 0.8701,
      "step": 13070
    },
    {
      "epoch": 1.508421781264421,
      "grad_norm": 0.3940170109272003,
      "learning_rate": 0.00011640335717192172,
      "loss": 0.9155,
      "step": 13075
    },
    {
      "epoch": 1.5089986155976005,
      "grad_norm": 0.40755462646484375,
      "learning_rate": 0.00011633714318809962,
      "loss": 0.9542,
      "step": 13080
    },
    {
      "epoch": 1.5095754499307799,
      "grad_norm": 0.4307992160320282,
      "learning_rate": 0.0001162709218443627,
      "loss": 0.9364,
      "step": 13085
    },
    {
      "epoch": 1.5101522842639594,
      "grad_norm": 0.4064287841320038,
      "learning_rate": 0.0001162046931705438,
      "loss": 0.9736,
      "step": 13090
    },
    {
      "epoch": 1.5107291185971388,
      "grad_norm": 0.41723504662513733,
      "learning_rate": 0.00011613845719647909,
      "loss": 0.9962,
      "step": 13095
    },
    {
      "epoch": 1.5113059529303183,
      "grad_norm": 0.37900570034980774,
      "learning_rate": 0.00011607221395200796,
      "loss": 0.907,
      "step": 13100
    },
    {
      "epoch": 1.511882787263498,
      "grad_norm": 0.43227189779281616,
      "learning_rate": 0.00011600596346697317,
      "loss": 0.9348,
      "step": 13105
    },
    {
      "epoch": 1.5124596215966775,
      "grad_norm": 0.40124958753585815,
      "learning_rate": 0.00011593970577122067,
      "loss": 0.9054,
      "step": 13110
    },
    {
      "epoch": 1.513036455929857,
      "grad_norm": 0.38149794936180115,
      "learning_rate": 0.00011587344089459966,
      "loss": 0.9172,
      "step": 13115
    },
    {
      "epoch": 1.5136132902630366,
      "grad_norm": 0.49807044863700867,
      "learning_rate": 0.00011580716886696263,
      "loss": 0.9209,
      "step": 13120
    },
    {
      "epoch": 1.514190124596216,
      "grad_norm": 0.4255905747413635,
      "learning_rate": 0.00011574088971816523,
      "loss": 0.9201,
      "step": 13125
    },
    {
      "epoch": 1.5147669589293955,
      "grad_norm": 0.34574073553085327,
      "learning_rate": 0.00011567460347806638,
      "loss": 0.9229,
      "step": 13130
    },
    {
      "epoch": 1.5153437932625748,
      "grad_norm": 0.43729299306869507,
      "learning_rate": 0.00011560831017652813,
      "loss": 0.9621,
      "step": 13135
    },
    {
      "epoch": 1.5159206275957544,
      "grad_norm": 0.40615105628967285,
      "learning_rate": 0.00011554200984341577,
      "loss": 0.9251,
      "step": 13140
    },
    {
      "epoch": 1.516497461928934,
      "grad_norm": 0.37189602851867676,
      "learning_rate": 0.0001154757025085977,
      "loss": 0.9241,
      "step": 13145
    },
    {
      "epoch": 1.5170742962621135,
      "grad_norm": 0.37347811460494995,
      "learning_rate": 0.00011540938820194553,
      "loss": 0.9336,
      "step": 13150
    },
    {
      "epoch": 1.517651130595293,
      "grad_norm": 0.4073767066001892,
      "learning_rate": 0.00011534306695333395,
      "loss": 0.9168,
      "step": 13155
    },
    {
      "epoch": 1.5182279649284727,
      "grad_norm": 0.42767712473869324,
      "learning_rate": 0.0001152767387926408,
      "loss": 0.8952,
      "step": 13160
    },
    {
      "epoch": 1.518804799261652,
      "grad_norm": 0.4040094316005707,
      "learning_rate": 0.00011521040374974714,
      "loss": 0.9037,
      "step": 13165
    },
    {
      "epoch": 1.5193816335948316,
      "grad_norm": 0.36918869614601135,
      "learning_rate": 0.00011514406185453692,
      "loss": 0.9313,
      "step": 13170
    },
    {
      "epoch": 1.519958467928011,
      "grad_norm": 0.36346372961997986,
      "learning_rate": 0.00011507771313689739,
      "loss": 0.8932,
      "step": 13175
    },
    {
      "epoch": 1.5205353022611905,
      "grad_norm": 0.36180615425109863,
      "learning_rate": 0.00011501135762671869,
      "loss": 0.9271,
      "step": 13180
    },
    {
      "epoch": 1.52111213659437,
      "grad_norm": 0.4362223446369171,
      "learning_rate": 0.00011494499535389418,
      "loss": 0.9458,
      "step": 13185
    },
    {
      "epoch": 1.5216889709275496,
      "grad_norm": 0.39161691069602966,
      "learning_rate": 0.00011487862634832014,
      "loss": 0.971,
      "step": 13190
    },
    {
      "epoch": 1.5222658052607292,
      "grad_norm": 0.40720003843307495,
      "learning_rate": 0.00011481225063989597,
      "loss": 0.9547,
      "step": 13195
    },
    {
      "epoch": 1.5228426395939088,
      "grad_norm": 0.3815619945526123,
      "learning_rate": 0.00011474586825852405,
      "loss": 0.9162,
      "step": 13200
    },
    {
      "epoch": 1.523419473927088,
      "grad_norm": 0.38358306884765625,
      "learning_rate": 0.00011467947923410973,
      "loss": 0.8822,
      "step": 13205
    },
    {
      "epoch": 1.5239963082602677,
      "grad_norm": 0.37997207045555115,
      "learning_rate": 0.00011461308359656149,
      "loss": 0.9794,
      "step": 13210
    },
    {
      "epoch": 1.524573142593447,
      "grad_norm": 0.3825436532497406,
      "learning_rate": 0.00011454668137579059,
      "loss": 0.9013,
      "step": 13215
    },
    {
      "epoch": 1.5251499769266266,
      "grad_norm": 0.38891690969467163,
      "learning_rate": 0.00011448027260171142,
      "loss": 0.9753,
      "step": 13220
    },
    {
      "epoch": 1.5257268112598061,
      "grad_norm": 0.3794013559818268,
      "learning_rate": 0.00011441385730424123,
      "loss": 0.8855,
      "step": 13225
    },
    {
      "epoch": 1.5263036455929857,
      "grad_norm": 0.42887309193611145,
      "learning_rate": 0.00011434743551330028,
      "loss": 0.896,
      "step": 13230
    },
    {
      "epoch": 1.5268804799261653,
      "grad_norm": 0.391722172498703,
      "learning_rate": 0.00011428100725881167,
      "loss": 0.9659,
      "step": 13235
    },
    {
      "epoch": 1.5274573142593448,
      "grad_norm": 0.4026354253292084,
      "learning_rate": 0.00011421457257070148,
      "loss": 0.9392,
      "step": 13240
    },
    {
      "epoch": 1.5280341485925242,
      "grad_norm": 0.4350236654281616,
      "learning_rate": 0.00011414813147889868,
      "loss": 0.9289,
      "step": 13245
    },
    {
      "epoch": 1.5286109829257037,
      "grad_norm": 0.4506089687347412,
      "learning_rate": 0.0001140816840133351,
      "loss": 0.9446,
      "step": 13250
    },
    {
      "epoch": 1.529187817258883,
      "grad_norm": 0.4440595805644989,
      "learning_rate": 0.00011401523020394546,
      "loss": 0.9314,
      "step": 13255
    },
    {
      "epoch": 1.5297646515920627,
      "grad_norm": 0.3944181501865387,
      "learning_rate": 0.00011394877008066731,
      "loss": 0.9291,
      "step": 13260
    },
    {
      "epoch": 1.5303414859252422,
      "grad_norm": 0.4218878746032715,
      "learning_rate": 0.00011388230367344111,
      "loss": 0.8998,
      "step": 13265
    },
    {
      "epoch": 1.5309183202584218,
      "grad_norm": 0.4063720405101776,
      "learning_rate": 0.00011381583101221003,
      "loss": 0.961,
      "step": 13270
    },
    {
      "epoch": 1.5314951545916013,
      "grad_norm": 0.38921263813972473,
      "learning_rate": 0.00011374935212692018,
      "loss": 0.9483,
      "step": 13275
    },
    {
      "epoch": 1.532071988924781,
      "grad_norm": 0.41635677218437195,
      "learning_rate": 0.00011368286704752042,
      "loss": 0.9531,
      "step": 13280
    },
    {
      "epoch": 1.5326488232579605,
      "grad_norm": 0.4070545732975006,
      "learning_rate": 0.00011361637580396243,
      "loss": 0.9497,
      "step": 13285
    },
    {
      "epoch": 1.5332256575911398,
      "grad_norm": 0.39755600690841675,
      "learning_rate": 0.00011354987842620061,
      "loss": 0.9894,
      "step": 13290
    },
    {
      "epoch": 1.5338024919243194,
      "grad_norm": 0.41410693526268005,
      "learning_rate": 0.00011348337494419219,
      "loss": 0.888,
      "step": 13295
    },
    {
      "epoch": 1.5343793262574987,
      "grad_norm": 0.4335348904132843,
      "learning_rate": 0.00011341686538789708,
      "loss": 0.922,
      "step": 13300
    },
    {
      "epoch": 1.5349561605906783,
      "grad_norm": 0.44754913449287415,
      "learning_rate": 0.000113350349787278,
      "loss": 0.9532,
      "step": 13305
    },
    {
      "epoch": 1.5355329949238579,
      "grad_norm": 0.4027005136013031,
      "learning_rate": 0.00011328382817230034,
      "loss": 0.9525,
      "step": 13310
    },
    {
      "epoch": 1.5361098292570374,
      "grad_norm": 0.364678293466568,
      "learning_rate": 0.00011321730057293225,
      "loss": 0.9321,
      "step": 13315
    },
    {
      "epoch": 1.536686663590217,
      "grad_norm": 0.41008302569389343,
      "learning_rate": 0.00011315076701914449,
      "loss": 0.927,
      "step": 13320
    },
    {
      "epoch": 1.5372634979233966,
      "grad_norm": 0.3624386489391327,
      "learning_rate": 0.00011308422754091057,
      "loss": 0.8554,
      "step": 13325
    },
    {
      "epoch": 1.537840332256576,
      "grad_norm": 0.3806852400302887,
      "learning_rate": 0.0001130176821682067,
      "loss": 0.8788,
      "step": 13330
    },
    {
      "epoch": 1.5384171665897555,
      "grad_norm": 0.37646257877349854,
      "learning_rate": 0.00011295113093101162,
      "loss": 0.9034,
      "step": 13335
    },
    {
      "epoch": 1.5389940009229348,
      "grad_norm": 0.41849663853645325,
      "learning_rate": 0.00011288457385930686,
      "loss": 0.9242,
      "step": 13340
    },
    {
      "epoch": 1.5395708352561144,
      "grad_norm": 0.36853688955307007,
      "learning_rate": 0.00011281801098307647,
      "loss": 0.8691,
      "step": 13345
    },
    {
      "epoch": 1.540147669589294,
      "grad_norm": 0.3795246481895447,
      "learning_rate": 0.0001127514423323072,
      "loss": 0.9911,
      "step": 13350
    },
    {
      "epoch": 1.5407245039224735,
      "grad_norm": 0.4382937550544739,
      "learning_rate": 0.00011268486793698832,
      "loss": 0.9828,
      "step": 13355
    },
    {
      "epoch": 1.541301338255653,
      "grad_norm": 0.3911653161048889,
      "learning_rate": 0.00011261828782711173,
      "loss": 0.9234,
      "step": 13360
    },
    {
      "epoch": 1.5418781725888326,
      "grad_norm": 0.37677720189094543,
      "learning_rate": 0.00011255170203267186,
      "loss": 0.9197,
      "step": 13365
    },
    {
      "epoch": 1.542455006922012,
      "grad_norm": 0.38520315289497375,
      "learning_rate": 0.00011248511058366586,
      "loss": 0.8861,
      "step": 13370
    },
    {
      "epoch": 1.5430318412551915,
      "grad_norm": 0.4070914387702942,
      "learning_rate": 0.00011241851351009318,
      "loss": 0.9964,
      "step": 13375
    },
    {
      "epoch": 1.543608675588371,
      "grad_norm": 0.411170095205307,
      "learning_rate": 0.000112351910841956,
      "loss": 0.9454,
      "step": 13380
    },
    {
      "epoch": 1.5441855099215505,
      "grad_norm": 0.3714311718940735,
      "learning_rate": 0.00011228530260925894,
      "loss": 0.8964,
      "step": 13385
    },
    {
      "epoch": 1.54476234425473,
      "grad_norm": 0.3878600597381592,
      "learning_rate": 0.00011221868884200912,
      "loss": 0.9266,
      "step": 13390
    },
    {
      "epoch": 1.5453391785879096,
      "grad_norm": 0.43673795461654663,
      "learning_rate": 0.00011215206957021618,
      "loss": 0.9352,
      "step": 13395
    },
    {
      "epoch": 1.5459160129210892,
      "grad_norm": 0.40795305371284485,
      "learning_rate": 0.00011208544482389223,
      "loss": 0.9074,
      "step": 13400
    },
    {
      "epoch": 1.5464928472542687,
      "grad_norm": 0.40639349818229675,
      "learning_rate": 0.00011201881463305188,
      "loss": 0.9287,
      "step": 13405
    },
    {
      "epoch": 1.547069681587448,
      "grad_norm": 0.36830776929855347,
      "learning_rate": 0.00011195217902771212,
      "loss": 0.9482,
      "step": 13410
    },
    {
      "epoch": 1.5476465159206276,
      "grad_norm": 0.38134291768074036,
      "learning_rate": 0.00011188553803789244,
      "loss": 0.9183,
      "step": 13415
    },
    {
      "epoch": 1.548223350253807,
      "grad_norm": 0.37543559074401855,
      "learning_rate": 0.00011181889169361473,
      "loss": 0.9702,
      "step": 13420
    },
    {
      "epoch": 1.5488001845869865,
      "grad_norm": 0.3869517743587494,
      "learning_rate": 0.0001117522400249033,
      "loss": 0.8916,
      "step": 13425
    },
    {
      "epoch": 1.549377018920166,
      "grad_norm": 0.3994382619857788,
      "learning_rate": 0.00011168558306178483,
      "loss": 0.9326,
      "step": 13430
    },
    {
      "epoch": 1.5499538532533457,
      "grad_norm": 0.4107452929019928,
      "learning_rate": 0.00011161892083428846,
      "loss": 0.8895,
      "step": 13435
    },
    {
      "epoch": 1.5505306875865252,
      "grad_norm": 0.4027215838432312,
      "learning_rate": 0.00011155225337244562,
      "loss": 0.9403,
      "step": 13440
    },
    {
      "epoch": 1.5511075219197048,
      "grad_norm": 0.42710939049720764,
      "learning_rate": 0.00011148558070629011,
      "loss": 0.9366,
      "step": 13445
    },
    {
      "epoch": 1.5516843562528841,
      "grad_norm": 0.3737245500087738,
      "learning_rate": 0.00011141890286585819,
      "loss": 0.9509,
      "step": 13450
    },
    {
      "epoch": 1.5522611905860637,
      "grad_norm": 0.38691097497940063,
      "learning_rate": 0.00011135221988118825,
      "loss": 0.9097,
      "step": 13455
    },
    {
      "epoch": 1.552838024919243,
      "grad_norm": 0.42986229062080383,
      "learning_rate": 0.00011128553178232117,
      "loss": 0.9406,
      "step": 13460
    },
    {
      "epoch": 1.5534148592524226,
      "grad_norm": 0.37808936834335327,
      "learning_rate": 0.00011121883859930002,
      "loss": 0.8939,
      "step": 13465
    },
    {
      "epoch": 1.5539916935856022,
      "grad_norm": 0.41650715470314026,
      "learning_rate": 0.00011115214036217026,
      "loss": 0.9611,
      "step": 13470
    },
    {
      "epoch": 1.5545685279187818,
      "grad_norm": 0.40282508730888367,
      "learning_rate": 0.00011108543710097954,
      "loss": 0.9266,
      "step": 13475
    },
    {
      "epoch": 1.5551453622519613,
      "grad_norm": 0.39564791321754456,
      "learning_rate": 0.00011101872884577784,
      "loss": 0.9209,
      "step": 13480
    },
    {
      "epoch": 1.5557221965851409,
      "grad_norm": 0.48436239361763,
      "learning_rate": 0.0001109520156266173,
      "loss": 0.9363,
      "step": 13485
    },
    {
      "epoch": 1.5562990309183202,
      "grad_norm": 0.4382035434246063,
      "learning_rate": 0.0001108852974735524,
      "loss": 0.9322,
      "step": 13490
    },
    {
      "epoch": 1.5568758652514998,
      "grad_norm": 0.34939664602279663,
      "learning_rate": 0.00011081857441663983,
      "loss": 0.9466,
      "step": 13495
    },
    {
      "epoch": 1.5574526995846791,
      "grad_norm": 0.37167319655418396,
      "learning_rate": 0.00011075184648593838,
      "loss": 0.9381,
      "step": 13500
    },
    {
      "epoch": 1.5580295339178587,
      "grad_norm": 0.39715757966041565,
      "learning_rate": 0.00011068511371150918,
      "loss": 0.9075,
      "step": 13505
    },
    {
      "epoch": 1.5586063682510383,
      "grad_norm": 0.38853919506073,
      "learning_rate": 0.00011061837612341542,
      "loss": 0.9422,
      "step": 13510
    },
    {
      "epoch": 1.5591832025842178,
      "grad_norm": 0.37417513132095337,
      "learning_rate": 0.00011055163375172257,
      "loss": 0.9533,
      "step": 13515
    },
    {
      "epoch": 1.5597600369173974,
      "grad_norm": 0.40994784235954285,
      "learning_rate": 0.00011048488662649814,
      "loss": 0.8954,
      "step": 13520
    },
    {
      "epoch": 1.560336871250577,
      "grad_norm": 0.38082724809646606,
      "learning_rate": 0.00011041813477781186,
      "loss": 0.9343,
      "step": 13525
    },
    {
      "epoch": 1.5609137055837563,
      "grad_norm": 0.41218113899230957,
      "learning_rate": 0.00011035137823573561,
      "loss": 0.9772,
      "step": 13530
    },
    {
      "epoch": 1.5614905399169359,
      "grad_norm": 0.43310537934303284,
      "learning_rate": 0.0001102846170303433,
      "loss": 0.958,
      "step": 13535
    },
    {
      "epoch": 1.5620673742501152,
      "grad_norm": 0.4469921588897705,
      "learning_rate": 0.00011021785119171098,
      "loss": 0.9022,
      "step": 13540
    },
    {
      "epoch": 1.5626442085832948,
      "grad_norm": 0.3993799388408661,
      "learning_rate": 0.0001101510807499168,
      "loss": 0.8787,
      "step": 13545
    },
    {
      "epoch": 1.5632210429164743,
      "grad_norm": 0.3866843283176422,
      "learning_rate": 0.00011008430573504099,
      "loss": 0.9063,
      "step": 13550
    },
    {
      "epoch": 1.563797877249654,
      "grad_norm": 0.37747833132743835,
      "learning_rate": 0.00011001752617716579,
      "loss": 0.9077,
      "step": 13555
    },
    {
      "epoch": 1.5643747115828335,
      "grad_norm": 0.3823941648006439,
      "learning_rate": 0.00010995074210637557,
      "loss": 0.963,
      "step": 13560
    },
    {
      "epoch": 1.564951545916013,
      "grad_norm": 0.3883691132068634,
      "learning_rate": 0.00010988395355275663,
      "loss": 0.9022,
      "step": 13565
    },
    {
      "epoch": 1.5655283802491924,
      "grad_norm": 0.4316290020942688,
      "learning_rate": 0.00010981716054639735,
      "loss": 0.9569,
      "step": 13570
    },
    {
      "epoch": 1.566105214582372,
      "grad_norm": 0.38802143931388855,
      "learning_rate": 0.00010975036311738818,
      "loss": 0.9098,
      "step": 13575
    },
    {
      "epoch": 1.5666820489155513,
      "grad_norm": 0.36971673369407654,
      "learning_rate": 0.00010968356129582139,
      "loss": 0.9798,
      "step": 13580
    },
    {
      "epoch": 1.5672588832487309,
      "grad_norm": 0.38266512751579285,
      "learning_rate": 0.00010961675511179142,
      "loss": 0.937,
      "step": 13585
    },
    {
      "epoch": 1.5678357175819104,
      "grad_norm": 0.3889155685901642,
      "learning_rate": 0.00010954994459539452,
      "loss": 0.9518,
      "step": 13590
    },
    {
      "epoch": 1.56841255191509,
      "grad_norm": 0.3645268380641937,
      "learning_rate": 0.00010948312977672899,
      "loss": 0.9222,
      "step": 13595
    },
    {
      "epoch": 1.5689893862482696,
      "grad_norm": 0.41634300351142883,
      "learning_rate": 0.00010941631068589502,
      "loss": 0.8753,
      "step": 13600
    },
    {
      "epoch": 1.5695662205814491,
      "grad_norm": 0.3983922600746155,
      "learning_rate": 0.00010934948735299475,
      "loss": 0.9589,
      "step": 13605
    },
    {
      "epoch": 1.5701430549146285,
      "grad_norm": 0.3916638493537903,
      "learning_rate": 0.00010928265980813223,
      "loss": 0.9154,
      "step": 13610
    },
    {
      "epoch": 1.570719889247808,
      "grad_norm": 0.41058340668678284,
      "learning_rate": 0.0001092158280814134,
      "loss": 0.9635,
      "step": 13615
    },
    {
      "epoch": 1.5712967235809874,
      "grad_norm": 0.4408254027366638,
      "learning_rate": 0.00010914899220294607,
      "loss": 0.8695,
      "step": 13620
    },
    {
      "epoch": 1.571873557914167,
      "grad_norm": 0.39679133892059326,
      "learning_rate": 0.00010908215220283993,
      "loss": 0.9355,
      "step": 13625
    },
    {
      "epoch": 1.5724503922473465,
      "grad_norm": 0.38094478845596313,
      "learning_rate": 0.00010901530811120655,
      "loss": 0.9367,
      "step": 13630
    },
    {
      "epoch": 1.573027226580526,
      "grad_norm": 0.4355633556842804,
      "learning_rate": 0.00010894845995815928,
      "loss": 0.9188,
      "step": 13635
    },
    {
      "epoch": 1.5736040609137056,
      "grad_norm": 0.4333462715148926,
      "learning_rate": 0.00010888160777381342,
      "loss": 0.9727,
      "step": 13640
    },
    {
      "epoch": 1.5741808952468852,
      "grad_norm": 0.4014212489128113,
      "learning_rate": 0.00010881475158828592,
      "loss": 0.9598,
      "step": 13645
    },
    {
      "epoch": 1.5747577295800648,
      "grad_norm": 0.3798421025276184,
      "learning_rate": 0.00010874789143169568,
      "loss": 0.9769,
      "step": 13650
    },
    {
      "epoch": 1.5753345639132441,
      "grad_norm": 0.42204970121383667,
      "learning_rate": 0.00010868102733416332,
      "loss": 0.9029,
      "step": 13655
    },
    {
      "epoch": 1.5759113982464237,
      "grad_norm": 0.42524123191833496,
      "learning_rate": 0.00010861415932581123,
      "loss": 0.9278,
      "step": 13660
    },
    {
      "epoch": 1.576488232579603,
      "grad_norm": 0.43853500485420227,
      "learning_rate": 0.00010854728743676362,
      "loss": 0.944,
      "step": 13665
    },
    {
      "epoch": 1.5770650669127826,
      "grad_norm": 0.41227036714553833,
      "learning_rate": 0.00010848041169714635,
      "loss": 0.8973,
      "step": 13670
    },
    {
      "epoch": 1.5776419012459622,
      "grad_norm": 0.37476420402526855,
      "learning_rate": 0.00010841353213708711,
      "loss": 0.9499,
      "step": 13675
    },
    {
      "epoch": 1.5782187355791417,
      "grad_norm": 0.39963239431381226,
      "learning_rate": 0.00010834664878671525,
      "loss": 0.9036,
      "step": 13680
    },
    {
      "epoch": 1.5787955699123213,
      "grad_norm": 0.37388959527015686,
      "learning_rate": 0.00010827976167616185,
      "loss": 0.9154,
      "step": 13685
    },
    {
      "epoch": 1.5793724042455008,
      "grad_norm": 0.39449089765548706,
      "learning_rate": 0.00010821287083555971,
      "loss": 0.9183,
      "step": 13690
    },
    {
      "epoch": 1.5799492385786802,
      "grad_norm": 0.42294618487358093,
      "learning_rate": 0.00010814597629504324,
      "loss": 0.9375,
      "step": 13695
    },
    {
      "epoch": 1.5805260729118598,
      "grad_norm": 0.40645015239715576,
      "learning_rate": 0.00010807907808474862,
      "loss": 0.9377,
      "step": 13700
    },
    {
      "epoch": 1.581102907245039,
      "grad_norm": 0.3905043601989746,
      "learning_rate": 0.00010801217623481356,
      "loss": 0.9371,
      "step": 13705
    },
    {
      "epoch": 1.5816797415782187,
      "grad_norm": 0.46238189935684204,
      "learning_rate": 0.00010794527077537755,
      "loss": 0.9441,
      "step": 13710
    },
    {
      "epoch": 1.5822565759113982,
      "grad_norm": 0.44243723154067993,
      "learning_rate": 0.00010787836173658155,
      "loss": 0.9134,
      "step": 13715
    },
    {
      "epoch": 1.5828334102445778,
      "grad_norm": 0.4234520196914673,
      "learning_rate": 0.00010781144914856826,
      "loss": 0.968,
      "step": 13720
    },
    {
      "epoch": 1.5834102445777574,
      "grad_norm": 0.42427393794059753,
      "learning_rate": 0.00010774453304148192,
      "loss": 0.8884,
      "step": 13725
    },
    {
      "epoch": 1.583987078910937,
      "grad_norm": 0.40130579471588135,
      "learning_rate": 0.00010767761344546831,
      "loss": 0.9292,
      "step": 13730
    },
    {
      "epoch": 1.5845639132441163,
      "grad_norm": 0.3994501233100891,
      "learning_rate": 0.00010761069039067498,
      "loss": 1.0001,
      "step": 13735
    },
    {
      "epoch": 1.5851407475772958,
      "grad_norm": 0.40696844458580017,
      "learning_rate": 0.00010754376390725074,
      "loss": 0.9145,
      "step": 13740
    },
    {
      "epoch": 1.5857175819104752,
      "grad_norm": 0.42791715264320374,
      "learning_rate": 0.00010747683402534621,
      "loss": 0.9214,
      "step": 13745
    },
    {
      "epoch": 1.5862944162436547,
      "grad_norm": 0.36512282490730286,
      "learning_rate": 0.00010740990077511337,
      "loss": 0.9127,
      "step": 13750
    },
    {
      "epoch": 1.5868712505768343,
      "grad_norm": 0.4037910997867584,
      "learning_rate": 0.00010734296418670582,
      "loss": 0.9214,
      "step": 13755
    },
    {
      "epoch": 1.5874480849100139,
      "grad_norm": 0.41456806659698486,
      "learning_rate": 0.00010727602429027859,
      "loss": 0.9446,
      "step": 13760
    },
    {
      "epoch": 1.5880249192431934,
      "grad_norm": 0.3830060660839081,
      "learning_rate": 0.00010720908111598824,
      "loss": 0.9261,
      "step": 13765
    },
    {
      "epoch": 1.588601753576373,
      "grad_norm": 0.4202171862125397,
      "learning_rate": 0.00010714213469399283,
      "loss": 0.954,
      "step": 13770
    },
    {
      "epoch": 1.5891785879095524,
      "grad_norm": 0.4009062647819519,
      "learning_rate": 0.00010707518505445182,
      "loss": 0.9539,
      "step": 13775
    },
    {
      "epoch": 1.589755422242732,
      "grad_norm": 0.4225723147392273,
      "learning_rate": 0.00010700823222752618,
      "loss": 0.9219,
      "step": 13780
    },
    {
      "epoch": 1.5903322565759113,
      "grad_norm": 0.392956405878067,
      "learning_rate": 0.00010694127624337826,
      "loss": 0.9375,
      "step": 13785
    },
    {
      "epoch": 1.5909090909090908,
      "grad_norm": 0.3861681818962097,
      "learning_rate": 0.00010687431713217186,
      "loss": 0.9202,
      "step": 13790
    },
    {
      "epoch": 1.5914859252422704,
      "grad_norm": 0.39695194363594055,
      "learning_rate": 0.00010680735492407225,
      "loss": 0.8907,
      "step": 13795
    },
    {
      "epoch": 1.59206275957545,
      "grad_norm": 0.4258616864681244,
      "learning_rate": 0.00010674038964924597,
      "loss": 0.8806,
      "step": 13800
    },
    {
      "epoch": 1.5926395939086295,
      "grad_norm": 0.4228580594062805,
      "learning_rate": 0.00010667342133786102,
      "loss": 0.9581,
      "step": 13805
    },
    {
      "epoch": 1.593216428241809,
      "grad_norm": 0.37822139263153076,
      "learning_rate": 0.00010660645002008678,
      "loss": 0.8637,
      "step": 13810
    },
    {
      "epoch": 1.5937932625749884,
      "grad_norm": 0.3744790852069855,
      "learning_rate": 0.00010653947572609393,
      "loss": 0.8927,
      "step": 13815
    },
    {
      "epoch": 1.594370096908168,
      "grad_norm": 0.4195287525653839,
      "learning_rate": 0.00010647249848605454,
      "loss": 0.9231,
      "step": 13820
    },
    {
      "epoch": 1.5949469312413473,
      "grad_norm": 0.41662541031837463,
      "learning_rate": 0.00010640551833014196,
      "loss": 0.9706,
      "step": 13825
    },
    {
      "epoch": 1.595523765574527,
      "grad_norm": 0.4328131377696991,
      "learning_rate": 0.0001063385352885309,
      "loss": 1.004,
      "step": 13830
    },
    {
      "epoch": 1.5961005999077065,
      "grad_norm": 0.3658946752548218,
      "learning_rate": 0.00010627154939139737,
      "loss": 0.9176,
      "step": 13835
    },
    {
      "epoch": 1.596677434240886,
      "grad_norm": 0.4352788031101227,
      "learning_rate": 0.00010620456066891862,
      "loss": 0.9649,
      "step": 13840
    },
    {
      "epoch": 1.5972542685740656,
      "grad_norm": 0.39662760496139526,
      "learning_rate": 0.00010613756915127319,
      "loss": 0.968,
      "step": 13845
    },
    {
      "epoch": 1.5978311029072452,
      "grad_norm": 0.3719061613082886,
      "learning_rate": 0.00010607057486864091,
      "loss": 0.935,
      "step": 13850
    },
    {
      "epoch": 1.5984079372404245,
      "grad_norm": 0.4000123143196106,
      "learning_rate": 0.00010600357785120285,
      "loss": 0.925,
      "step": 13855
    },
    {
      "epoch": 1.598984771573604,
      "grad_norm": 0.3936002254486084,
      "learning_rate": 0.00010593657812914129,
      "loss": 0.9394,
      "step": 13860
    },
    {
      "epoch": 1.5995616059067834,
      "grad_norm": 0.42291298508644104,
      "learning_rate": 0.00010586957573263968,
      "loss": 0.9529,
      "step": 13865
    },
    {
      "epoch": 1.600138440239963,
      "grad_norm": 0.39400723576545715,
      "learning_rate": 0.00010580257069188279,
      "loss": 0.9482,
      "step": 13870
    },
    {
      "epoch": 1.6007152745731426,
      "grad_norm": 0.4413566291332245,
      "learning_rate": 0.00010573556303705652,
      "loss": 0.9673,
      "step": 13875
    },
    {
      "epoch": 1.6012921089063221,
      "grad_norm": 0.39717692136764526,
      "learning_rate": 0.00010566855279834793,
      "loss": 0.9588,
      "step": 13880
    },
    {
      "epoch": 1.6018689432395017,
      "grad_norm": 0.38071373105049133,
      "learning_rate": 0.00010560154000594524,
      "loss": 0.9404,
      "step": 13885
    },
    {
      "epoch": 1.6024457775726813,
      "grad_norm": 0.38415026664733887,
      "learning_rate": 0.00010553452469003789,
      "loss": 0.9299,
      "step": 13890
    },
    {
      "epoch": 1.6030226119058606,
      "grad_norm": 0.3782908022403717,
      "learning_rate": 0.00010546750688081638,
      "loss": 0.9524,
      "step": 13895
    },
    {
      "epoch": 1.6035994462390402,
      "grad_norm": 0.4405461549758911,
      "learning_rate": 0.00010540048660847239,
      "loss": 0.9121,
      "step": 13900
    },
    {
      "epoch": 1.6041762805722195,
      "grad_norm": 0.37163928151130676,
      "learning_rate": 0.00010533346390319867,
      "loss": 0.9111,
      "step": 13905
    },
    {
      "epoch": 1.604753114905399,
      "grad_norm": 0.4153117537498474,
      "learning_rate": 0.00010526643879518905,
      "loss": 0.9026,
      "step": 13910
    },
    {
      "epoch": 1.6053299492385786,
      "grad_norm": 0.39725857973098755,
      "learning_rate": 0.00010519941131463852,
      "loss": 0.9864,
      "step": 13915
    },
    {
      "epoch": 1.6059067835717582,
      "grad_norm": 0.37876853346824646,
      "learning_rate": 0.00010513238149174304,
      "loss": 0.9289,
      "step": 13920
    },
    {
      "epoch": 1.6064836179049378,
      "grad_norm": 0.4003385901451111,
      "learning_rate": 0.00010506534935669974,
      "loss": 0.9353,
      "step": 13925
    },
    {
      "epoch": 1.6070604522381173,
      "grad_norm": 0.4417904317378998,
      "learning_rate": 0.00010499831493970669,
      "loss": 0.9107,
      "step": 13930
    },
    {
      "epoch": 1.6076372865712967,
      "grad_norm": 0.3675759434700012,
      "learning_rate": 0.00010493127827096298,
      "loss": 0.9558,
      "step": 13935
    },
    {
      "epoch": 1.6082141209044762,
      "grad_norm": 0.44555363059043884,
      "learning_rate": 0.00010486423938066887,
      "loss": 0.9315,
      "step": 13940
    },
    {
      "epoch": 1.6087909552376556,
      "grad_norm": 0.36408329010009766,
      "learning_rate": 0.00010479719829902539,
      "loss": 0.9076,
      "step": 13945
    },
    {
      "epoch": 1.6093677895708351,
      "grad_norm": 0.39997726678848267,
      "learning_rate": 0.00010473015505623477,
      "loss": 0.9268,
      "step": 13950
    },
    {
      "epoch": 1.6099446239040147,
      "grad_norm": 0.3789185583591461,
      "learning_rate": 0.00010466310968250009,
      "loss": 0.9367,
      "step": 13955
    },
    {
      "epoch": 1.6105214582371943,
      "grad_norm": 0.41184380650520325,
      "learning_rate": 0.0001045960622080254,
      "loss": 0.9795,
      "step": 13960
    },
    {
      "epoch": 1.6110982925703738,
      "grad_norm": 0.4009269177913666,
      "learning_rate": 0.00010452901266301574,
      "loss": 0.9496,
      "step": 13965
    },
    {
      "epoch": 1.6116751269035534,
      "grad_norm": 0.3926098942756653,
      "learning_rate": 0.00010446196107767705,
      "loss": 0.9602,
      "step": 13970
    },
    {
      "epoch": 1.6122519612367328,
      "grad_norm": 0.3812084197998047,
      "learning_rate": 0.00010439490748221621,
      "loss": 0.9553,
      "step": 13975
    },
    {
      "epoch": 1.6128287955699123,
      "grad_norm": 0.3911786675453186,
      "learning_rate": 0.000104327851906841,
      "loss": 0.885,
      "step": 13980
    },
    {
      "epoch": 1.6134056299030919,
      "grad_norm": 0.38076895475387573,
      "learning_rate": 0.00010426079438176009,
      "loss": 0.9734,
      "step": 13985
    },
    {
      "epoch": 1.6139824642362712,
      "grad_norm": 0.4474939703941345,
      "learning_rate": 0.00010419373493718298,
      "loss": 0.9702,
      "step": 13990
    },
    {
      "epoch": 1.6145592985694508,
      "grad_norm": 0.3928569257259369,
      "learning_rate": 0.00010412667360332013,
      "loss": 0.897,
      "step": 13995
    },
    {
      "epoch": 1.6151361329026304,
      "grad_norm": 0.392778605222702,
      "learning_rate": 0.00010405961041038279,
      "loss": 0.9189,
      "step": 14000
    },
    {
      "epoch": 1.61571296723581,
      "grad_norm": 0.39536142349243164,
      "learning_rate": 0.00010399254538858303,
      "loss": 0.9164,
      "step": 14005
    },
    {
      "epoch": 1.6162898015689895,
      "grad_norm": 0.42655783891677856,
      "learning_rate": 0.00010392547856813384,
      "loss": 0.9268,
      "step": 14010
    },
    {
      "epoch": 1.616866635902169,
      "grad_norm": 0.4114214777946472,
      "learning_rate": 0.00010385840997924887,
      "loss": 0.946,
      "step": 14015
    },
    {
      "epoch": 1.6174434702353484,
      "grad_norm": 0.40836742520332336,
      "learning_rate": 0.00010379133965214274,
      "loss": 0.907,
      "step": 14020
    },
    {
      "epoch": 1.618020304568528,
      "grad_norm": 0.4120984971523285,
      "learning_rate": 0.00010372426761703067,
      "loss": 0.9156,
      "step": 14025
    },
    {
      "epoch": 1.6185971389017073,
      "grad_norm": 0.3959936201572418,
      "learning_rate": 0.00010365719390412882,
      "loss": 0.862,
      "step": 14030
    },
    {
      "epoch": 1.6191739732348869,
      "grad_norm": 0.4160866439342499,
      "learning_rate": 0.00010359011854365397,
      "loss": 0.9078,
      "step": 14035
    },
    {
      "epoch": 1.6197508075680664,
      "grad_norm": 0.39500558376312256,
      "learning_rate": 0.00010352304156582376,
      "loss": 0.9374,
      "step": 14040
    },
    {
      "epoch": 1.620327641901246,
      "grad_norm": 0.39849910140037537,
      "learning_rate": 0.0001034559630008564,
      "loss": 0.885,
      "step": 14045
    },
    {
      "epoch": 1.6209044762344256,
      "grad_norm": 0.4136286973953247,
      "learning_rate": 0.00010338888287897102,
      "loss": 0.9146,
      "step": 14050
    },
    {
      "epoch": 1.6214813105676051,
      "grad_norm": 0.4256516396999359,
      "learning_rate": 0.0001033218012303873,
      "loss": 0.9246,
      "step": 14055
    },
    {
      "epoch": 1.6220581449007845,
      "grad_norm": 0.4077073633670807,
      "learning_rate": 0.00010325471808532566,
      "loss": 0.9234,
      "step": 14060
    },
    {
      "epoch": 1.622634979233964,
      "grad_norm": 0.42370501160621643,
      "learning_rate": 0.00010318763347400719,
      "loss": 0.9254,
      "step": 14065
    },
    {
      "epoch": 1.6232118135671434,
      "grad_norm": 0.3607090711593628,
      "learning_rate": 0.00010312054742665362,
      "loss": 0.9417,
      "step": 14070
    },
    {
      "epoch": 1.623788647900323,
      "grad_norm": 0.4143436551094055,
      "learning_rate": 0.00010305345997348736,
      "loss": 0.9438,
      "step": 14075
    },
    {
      "epoch": 1.6243654822335025,
      "grad_norm": 0.35811230540275574,
      "learning_rate": 0.00010298637114473144,
      "loss": 0.9488,
      "step": 14080
    },
    {
      "epoch": 1.624942316566682,
      "grad_norm": 0.4050202965736389,
      "learning_rate": 0.0001029192809706095,
      "loss": 0.9373,
      "step": 14085
    },
    {
      "epoch": 1.6255191508998617,
      "grad_norm": 0.4086194336414337,
      "learning_rate": 0.00010285218948134581,
      "loss": 0.983,
      "step": 14090
    },
    {
      "epoch": 1.6260959852330412,
      "grad_norm": 0.37604227662086487,
      "learning_rate": 0.00010278509670716518,
      "loss": 0.8987,
      "step": 14095
    },
    {
      "epoch": 1.6266728195662206,
      "grad_norm": 0.4030003845691681,
      "learning_rate": 0.00010271800267829308,
      "loss": 0.9356,
      "step": 14100
    },
    {
      "epoch": 1.6272496538994001,
      "grad_norm": 0.4271502196788788,
      "learning_rate": 0.00010265090742495546,
      "loss": 0.953,
      "step": 14105
    },
    {
      "epoch": 1.6278264882325795,
      "grad_norm": 0.41166070103645325,
      "learning_rate": 0.00010258381097737892,
      "loss": 0.9577,
      "step": 14110
    },
    {
      "epoch": 1.628403322565759,
      "grad_norm": 0.4195760488510132,
      "learning_rate": 0.00010251671336579048,
      "loss": 0.9357,
      "step": 14115
    },
    {
      "epoch": 1.6289801568989386,
      "grad_norm": 0.40065228939056396,
      "learning_rate": 0.00010244961462041777,
      "loss": 0.9749,
      "step": 14120
    },
    {
      "epoch": 1.6295569912321182,
      "grad_norm": 0.40311920642852783,
      "learning_rate": 0.0001023825147714889,
      "loss": 0.9107,
      "step": 14125
    },
    {
      "epoch": 1.6301338255652977,
      "grad_norm": 0.420380562543869,
      "learning_rate": 0.00010231541384923248,
      "loss": 0.9697,
      "step": 14130
    },
    {
      "epoch": 1.6307106598984773,
      "grad_norm": 0.3689268231391907,
      "learning_rate": 0.00010224831188387765,
      "loss": 0.9005,
      "step": 14135
    },
    {
      "epoch": 1.6312874942316566,
      "grad_norm": 0.4116344153881073,
      "learning_rate": 0.0001021812089056539,
      "loss": 0.8997,
      "step": 14140
    },
    {
      "epoch": 1.6318643285648362,
      "grad_norm": 0.37291398644447327,
      "learning_rate": 0.0001021141049447913,
      "loss": 0.9649,
      "step": 14145
    },
    {
      "epoch": 1.6324411628980156,
      "grad_norm": 0.3837934136390686,
      "learning_rate": 0.0001020470000315203,
      "loss": 0.9648,
      "step": 14150
    },
    {
      "epoch": 1.6330179972311951,
      "grad_norm": 0.3765500485897064,
      "learning_rate": 0.00010197989419607184,
      "loss": 0.8798,
      "step": 14155
    },
    {
      "epoch": 1.6335948315643747,
      "grad_norm": 0.36903536319732666,
      "learning_rate": 0.00010191278746867714,
      "loss": 0.9312,
      "step": 14160
    },
    {
      "epoch": 1.6341716658975542,
      "grad_norm": 0.41757214069366455,
      "learning_rate": 0.00010184567987956797,
      "loss": 0.9221,
      "step": 14165
    },
    {
      "epoch": 1.6347485002307338,
      "grad_norm": 0.38226452469825745,
      "learning_rate": 0.00010177857145897643,
      "loss": 0.9661,
      "step": 14170
    },
    {
      "epoch": 1.6353253345639134,
      "grad_norm": 0.3918800354003906,
      "learning_rate": 0.00010171146223713496,
      "loss": 0.9104,
      "step": 14175
    },
    {
      "epoch": 1.6359021688970927,
      "grad_norm": 0.3708522915840149,
      "learning_rate": 0.00010164435224427646,
      "loss": 0.9644,
      "step": 14180
    },
    {
      "epoch": 1.6364790032302723,
      "grad_norm": 0.3736863434314728,
      "learning_rate": 0.00010157724151063406,
      "loss": 0.9076,
      "step": 14185
    },
    {
      "epoch": 1.6370558375634516,
      "grad_norm": 0.38738158345222473,
      "learning_rate": 0.00010151013006644128,
      "loss": 0.9173,
      "step": 14190
    },
    {
      "epoch": 1.6376326718966312,
      "grad_norm": 0.41743552684783936,
      "learning_rate": 0.00010144301794193197,
      "loss": 0.9011,
      "step": 14195
    },
    {
      "epoch": 1.6382095062298108,
      "grad_norm": 0.3966219425201416,
      "learning_rate": 0.00010137590516734026,
      "loss": 0.9458,
      "step": 14200
    },
    {
      "epoch": 1.6387863405629903,
      "grad_norm": 0.3808024525642395,
      "learning_rate": 0.00010130879177290061,
      "loss": 0.9484,
      "step": 14205
    },
    {
      "epoch": 1.63936317489617,
      "grad_norm": 0.3857729434967041,
      "learning_rate": 0.00010124167778884767,
      "loss": 0.8844,
      "step": 14210
    },
    {
      "epoch": 1.6399400092293495,
      "grad_norm": 0.3597935438156128,
      "learning_rate": 0.00010117456324541652,
      "loss": 0.9238,
      "step": 14215
    },
    {
      "epoch": 1.6405168435625288,
      "grad_norm": 0.40039053559303284,
      "learning_rate": 0.00010110744817284232,
      "loss": 0.9245,
      "step": 14220
    },
    {
      "epoch": 1.6410936778957084,
      "grad_norm": 0.4076060354709625,
      "learning_rate": 0.00010104033260136056,
      "loss": 0.9351,
      "step": 14225
    },
    {
      "epoch": 1.6416705122288877,
      "grad_norm": 0.41253358125686646,
      "learning_rate": 0.00010097321656120695,
      "loss": 0.8715,
      "step": 14230
    },
    {
      "epoch": 1.6422473465620673,
      "grad_norm": 0.45987001061439514,
      "learning_rate": 0.00010090610008261738,
      "loss": 0.9585,
      "step": 14235
    },
    {
      "epoch": 1.6428241808952468,
      "grad_norm": 0.39234888553619385,
      "learning_rate": 0.00010083898319582795,
      "loss": 0.8901,
      "step": 14240
    },
    {
      "epoch": 1.6434010152284264,
      "grad_norm": 0.41099515557289124,
      "learning_rate": 0.00010077186593107495,
      "loss": 0.9179,
      "step": 14245
    },
    {
      "epoch": 1.643977849561606,
      "grad_norm": 0.43246176838874817,
      "learning_rate": 0.00010070474831859486,
      "loss": 0.9627,
      "step": 14250
    },
    {
      "epoch": 1.6445546838947855,
      "grad_norm": 0.37442895770072937,
      "learning_rate": 0.00010063763038862428,
      "loss": 0.9208,
      "step": 14255
    },
    {
      "epoch": 1.6451315182279649,
      "grad_norm": 0.38451138138771057,
      "learning_rate": 0.00010057051217139997,
      "loss": 0.9064,
      "step": 14260
    },
    {
      "epoch": 1.6457083525611444,
      "grad_norm": 0.3907986879348755,
      "learning_rate": 0.0001005033936971588,
      "loss": 0.9274,
      "step": 14265
    },
    {
      "epoch": 1.6462851868943238,
      "grad_norm": 0.41767701506614685,
      "learning_rate": 0.00010043627499613778,
      "loss": 0.8953,
      "step": 14270
    },
    {
      "epoch": 1.6468620212275034,
      "grad_norm": 0.42702117562294006,
      "learning_rate": 0.00010036915609857406,
      "loss": 0.9665,
      "step": 14275
    },
    {
      "epoch": 1.647438855560683,
      "grad_norm": 0.5105391144752502,
      "learning_rate": 0.00010030203703470477,
      "loss": 0.9829,
      "step": 14280
    },
    {
      "epoch": 1.6480156898938625,
      "grad_norm": 0.40759360790252686,
      "learning_rate": 0.00010023491783476724,
      "loss": 0.9246,
      "step": 14285
    },
    {
      "epoch": 1.648592524227042,
      "grad_norm": 0.445537269115448,
      "learning_rate": 0.00010016779852899873,
      "loss": 0.9676,
      "step": 14290
    },
    {
      "epoch": 1.6491693585602216,
      "grad_norm": 0.3821800947189331,
      "learning_rate": 0.00010010067914763668,
      "loss": 0.9158,
      "step": 14295
    },
    {
      "epoch": 1.649746192893401,
      "grad_norm": 0.39797648787498474,
      "learning_rate": 0.00010003355972091848,
      "loss": 0.9002,
      "step": 14300
    },
    {
      "epoch": 1.6503230272265805,
      "grad_norm": 0.39020875096321106,
      "learning_rate": 9.996644027908154e-05,
      "loss": 0.9021,
      "step": 14305
    },
    {
      "epoch": 1.6508998615597599,
      "grad_norm": 0.4043421745300293,
      "learning_rate": 9.989932085236334e-05,
      "loss": 0.9118,
      "step": 14310
    },
    {
      "epoch": 1.6514766958929394,
      "grad_norm": 0.39409375190734863,
      "learning_rate": 9.983220147100129e-05,
      "loss": 0.9214,
      "step": 14315
    },
    {
      "epoch": 1.652053530226119,
      "grad_norm": 0.4032192826271057,
      "learning_rate": 9.976508216523278e-05,
      "loss": 0.9188,
      "step": 14320
    },
    {
      "epoch": 1.6526303645592986,
      "grad_norm": 0.3894086182117462,
      "learning_rate": 9.969796296529525e-05,
      "loss": 1.036,
      "step": 14325
    },
    {
      "epoch": 1.6532071988924781,
      "grad_norm": 0.382533460855484,
      "learning_rate": 9.963084390142595e-05,
      "loss": 0.9062,
      "step": 14330
    },
    {
      "epoch": 1.6537840332256577,
      "grad_norm": 0.3999064564704895,
      "learning_rate": 9.956372500386222e-05,
      "loss": 0.9372,
      "step": 14335
    },
    {
      "epoch": 1.6543608675588373,
      "grad_norm": 0.4040856957435608,
      "learning_rate": 9.949660630284122e-05,
      "loss": 0.8744,
      "step": 14340
    },
    {
      "epoch": 1.6549377018920166,
      "grad_norm": 0.4170020818710327,
      "learning_rate": 9.942948782860008e-05,
      "loss": 0.9355,
      "step": 14345
    },
    {
      "epoch": 1.6555145362251962,
      "grad_norm": 0.39724001288414,
      "learning_rate": 9.936236961137575e-05,
      "loss": 0.9466,
      "step": 14350
    },
    {
      "epoch": 1.6560913705583755,
      "grad_norm": 0.4035636782646179,
      "learning_rate": 9.929525168140516e-05,
      "loss": 0.9407,
      "step": 14355
    },
    {
      "epoch": 1.656668204891555,
      "grad_norm": 0.40529632568359375,
      "learning_rate": 9.922813406892508e-05,
      "loss": 0.9191,
      "step": 14360
    },
    {
      "epoch": 1.6572450392247347,
      "grad_norm": 0.4170621633529663,
      "learning_rate": 9.916101680417208e-05,
      "loss": 0.9501,
      "step": 14365
    },
    {
      "epoch": 1.6578218735579142,
      "grad_norm": 0.367270290851593,
      "learning_rate": 9.909389991738263e-05,
      "loss": 0.9359,
      "step": 14370
    },
    {
      "epoch": 1.6583987078910938,
      "grad_norm": 0.36950138211250305,
      "learning_rate": 9.902678343879308e-05,
      "loss": 0.9433,
      "step": 14375
    },
    {
      "epoch": 1.6589755422242733,
      "grad_norm": 0.36553606390953064,
      "learning_rate": 9.895966739863947e-05,
      "loss": 0.8903,
      "step": 14380
    },
    {
      "epoch": 1.6595523765574527,
      "grad_norm": 0.379629522562027,
      "learning_rate": 9.889255182715769e-05,
      "loss": 0.878,
      "step": 14385
    },
    {
      "epoch": 1.6601292108906323,
      "grad_norm": 0.4937252104282379,
      "learning_rate": 9.88254367545835e-05,
      "loss": 0.9175,
      "step": 14390
    },
    {
      "epoch": 1.6607060452238116,
      "grad_norm": 0.4445362985134125,
      "learning_rate": 9.875832221115234e-05,
      "loss": 1.0388,
      "step": 14395
    },
    {
      "epoch": 1.6612828795569912,
      "grad_norm": 0.3727785050868988,
      "learning_rate": 9.869120822709946e-05,
      "loss": 0.9752,
      "step": 14400
    },
    {
      "epoch": 1.6618597138901707,
      "grad_norm": 0.4096865653991699,
      "learning_rate": 9.862409483265975e-05,
      "loss": 0.9334,
      "step": 14405
    },
    {
      "epoch": 1.6624365482233503,
      "grad_norm": 0.4165703058242798,
      "learning_rate": 9.855698205806805e-05,
      "loss": 0.9634,
      "step": 14410
    },
    {
      "epoch": 1.6630133825565299,
      "grad_norm": 0.4018830955028534,
      "learning_rate": 9.848986993355877e-05,
      "loss": 0.9348,
      "step": 14415
    },
    {
      "epoch": 1.6635902168897094,
      "grad_norm": 0.3881585896015167,
      "learning_rate": 9.842275848936595e-05,
      "loss": 0.9336,
      "step": 14420
    },
    {
      "epoch": 1.6641670512228888,
      "grad_norm": 0.39819440245628357,
      "learning_rate": 9.835564775572356e-05,
      "loss": 0.9359,
      "step": 14425
    },
    {
      "epoch": 1.6647438855560683,
      "grad_norm": 0.45221155881881714,
      "learning_rate": 9.828853776286505e-05,
      "loss": 0.9702,
      "step": 14430
    },
    {
      "epoch": 1.6653207198892477,
      "grad_norm": 0.3747832179069519,
      "learning_rate": 9.822142854102362e-05,
      "loss": 0.91,
      "step": 14435
    },
    {
      "epoch": 1.6658975542224272,
      "grad_norm": 0.4133490324020386,
      "learning_rate": 9.815432012043204e-05,
      "loss": 0.9482,
      "step": 14440
    },
    {
      "epoch": 1.6664743885556068,
      "grad_norm": 0.37815743684768677,
      "learning_rate": 9.808721253132289e-05,
      "loss": 0.9418,
      "step": 14445
    },
    {
      "epoch": 1.6670512228887864,
      "grad_norm": 0.3955732583999634,
      "learning_rate": 9.802010580392821e-05,
      "loss": 0.9085,
      "step": 14450
    },
    {
      "epoch": 1.667628057221966,
      "grad_norm": 0.3749368488788605,
      "learning_rate": 9.79529999684797e-05,
      "loss": 0.9217,
      "step": 14455
    },
    {
      "epoch": 1.6682048915551455,
      "grad_norm": 0.3877532184123993,
      "learning_rate": 9.78858950552087e-05,
      "loss": 0.9568,
      "step": 14460
    },
    {
      "epoch": 1.6687817258883249,
      "grad_norm": 0.3820559084415436,
      "learning_rate": 9.781879109434614e-05,
      "loss": 0.8994,
      "step": 14465
    },
    {
      "epoch": 1.6693585602215044,
      "grad_norm": 0.38123568892478943,
      "learning_rate": 9.77516881161224e-05,
      "loss": 0.9013,
      "step": 14470
    },
    {
      "epoch": 1.6699353945546838,
      "grad_norm": 0.41897571086883545,
      "learning_rate": 9.768458615076751e-05,
      "loss": 0.9194,
      "step": 14475
    },
    {
      "epoch": 1.6705122288878633,
      "grad_norm": 0.39537757635116577,
      "learning_rate": 9.761748522851112e-05,
      "loss": 0.9243,
      "step": 14480
    },
    {
      "epoch": 1.671089063221043,
      "grad_norm": 0.41747602820396423,
      "learning_rate": 9.755038537958226e-05,
      "loss": 0.9695,
      "step": 14485
    },
    {
      "epoch": 1.6716658975542225,
      "grad_norm": 0.41853514313697815,
      "learning_rate": 9.748328663420952e-05,
      "loss": 0.8954,
      "step": 14490
    },
    {
      "epoch": 1.672242731887402,
      "grad_norm": 0.4038637578487396,
      "learning_rate": 9.74161890226211e-05,
      "loss": 0.907,
      "step": 14495
    },
    {
      "epoch": 1.6728195662205816,
      "grad_norm": 0.3900943994522095,
      "learning_rate": 9.734909257504455e-05,
      "loss": 0.952,
      "step": 14500
    },
    {
      "epoch": 1.673396400553761,
      "grad_norm": 0.3971070349216461,
      "learning_rate": 9.728199732170696e-05,
      "loss": 0.9212,
      "step": 14505
    },
    {
      "epoch": 1.6739732348869405,
      "grad_norm": 0.4064873456954956,
      "learning_rate": 9.721490329283483e-05,
      "loss": 0.9615,
      "step": 14510
    },
    {
      "epoch": 1.6745500692201198,
      "grad_norm": 0.39063361287117004,
      "learning_rate": 9.714781051865421e-05,
      "loss": 0.9701,
      "step": 14515
    },
    {
      "epoch": 1.6751269035532994,
      "grad_norm": 0.38286256790161133,
      "learning_rate": 9.708071902939054e-05,
      "loss": 0.9138,
      "step": 14520
    },
    {
      "epoch": 1.675703737886479,
      "grad_norm": 0.3789064586162567,
      "learning_rate": 9.701362885526856e-05,
      "loss": 0.9297,
      "step": 14525
    },
    {
      "epoch": 1.6762805722196585,
      "grad_norm": 0.4091351628303528,
      "learning_rate": 9.694654002651266e-05,
      "loss": 0.971,
      "step": 14530
    },
    {
      "epoch": 1.676857406552838,
      "grad_norm": 0.38930168747901917,
      "learning_rate": 9.687945257334641e-05,
      "loss": 0.9607,
      "step": 14535
    },
    {
      "epoch": 1.6774342408860177,
      "grad_norm": 0.39753562211990356,
      "learning_rate": 9.681236652599286e-05,
      "loss": 0.9611,
      "step": 14540
    },
    {
      "epoch": 1.678011075219197,
      "grad_norm": 0.39889010787010193,
      "learning_rate": 9.674528191467434e-05,
      "loss": 0.9181,
      "step": 14545
    },
    {
      "epoch": 1.6785879095523766,
      "grad_norm": 0.40444427728652954,
      "learning_rate": 9.667819876961272e-05,
      "loss": 0.9109,
      "step": 14550
    },
    {
      "epoch": 1.679164743885556,
      "grad_norm": 0.41998520493507385,
      "learning_rate": 9.661111712102901e-05,
      "loss": 0.9045,
      "step": 14555
    },
    {
      "epoch": 1.6797415782187355,
      "grad_norm": 0.43666961789131165,
      "learning_rate": 9.654403699914363e-05,
      "loss": 0.8748,
      "step": 14560
    },
    {
      "epoch": 1.680318412551915,
      "grad_norm": 0.38630232214927673,
      "learning_rate": 9.647695843417628e-05,
      "loss": 0.9359,
      "step": 14565
    },
    {
      "epoch": 1.6808952468850946,
      "grad_norm": 0.4208744466304779,
      "learning_rate": 9.640988145634606e-05,
      "loss": 0.9247,
      "step": 14570
    },
    {
      "epoch": 1.6814720812182742,
      "grad_norm": 0.40704822540283203,
      "learning_rate": 9.634280609587123e-05,
      "loss": 0.955,
      "step": 14575
    },
    {
      "epoch": 1.6820489155514537,
      "grad_norm": 0.43808555603027344,
      "learning_rate": 9.627573238296933e-05,
      "loss": 0.9144,
      "step": 14580
    },
    {
      "epoch": 1.682625749884633,
      "grad_norm": 0.3939140737056732,
      "learning_rate": 9.620866034785728e-05,
      "loss": 0.9081,
      "step": 14585
    },
    {
      "epoch": 1.6832025842178127,
      "grad_norm": 0.38371533155441284,
      "learning_rate": 9.614159002075114e-05,
      "loss": 0.9486,
      "step": 14590
    },
    {
      "epoch": 1.683779418550992,
      "grad_norm": 0.3783860504627228,
      "learning_rate": 9.60745214318662e-05,
      "loss": 0.9226,
      "step": 14595
    },
    {
      "epoch": 1.6843562528841716,
      "grad_norm": 0.40141963958740234,
      "learning_rate": 9.600745461141696e-05,
      "loss": 0.9796,
      "step": 14600
    },
    {
      "epoch": 1.6849330872173511,
      "grad_norm": 0.4136003851890564,
      "learning_rate": 9.594038958961725e-05,
      "loss": 0.9096,
      "step": 14605
    },
    {
      "epoch": 1.6855099215505307,
      "grad_norm": 0.36876803636550903,
      "learning_rate": 9.58733263966799e-05,
      "loss": 0.8963,
      "step": 14610
    },
    {
      "epoch": 1.6860867558837103,
      "grad_norm": 0.3809376657009125,
      "learning_rate": 9.580626506281703e-05,
      "loss": 0.9696,
      "step": 14615
    },
    {
      "epoch": 1.6866635902168898,
      "grad_norm": 0.40277227759361267,
      "learning_rate": 9.573920561823995e-05,
      "loss": 0.9342,
      "step": 14620
    },
    {
      "epoch": 1.6872404245500692,
      "grad_norm": 0.43503326177597046,
      "learning_rate": 9.567214809315903e-05,
      "loss": 0.9483,
      "step": 14625
    },
    {
      "epoch": 1.6878172588832487,
      "grad_norm": 0.39147305488586426,
      "learning_rate": 9.560509251778383e-05,
      "loss": 0.9396,
      "step": 14630
    },
    {
      "epoch": 1.688394093216428,
      "grad_norm": 0.4333549439907074,
      "learning_rate": 9.553803892232296e-05,
      "loss": 0.9104,
      "step": 14635
    },
    {
      "epoch": 1.6889709275496076,
      "grad_norm": 0.39406928420066833,
      "learning_rate": 9.547098733698428e-05,
      "loss": 0.9478,
      "step": 14640
    },
    {
      "epoch": 1.6895477618827872,
      "grad_norm": 0.3725603222846985,
      "learning_rate": 9.540393779197464e-05,
      "loss": 0.8539,
      "step": 14645
    },
    {
      "epoch": 1.6901245962159668,
      "grad_norm": 0.37901946902275085,
      "learning_rate": 9.533689031749991e-05,
      "loss": 0.9301,
      "step": 14650
    },
    {
      "epoch": 1.6907014305491463,
      "grad_norm": 0.3747023642063141,
      "learning_rate": 9.526984494376524e-05,
      "loss": 0.9269,
      "step": 14655
    },
    {
      "epoch": 1.691278264882326,
      "grad_norm": 0.37854355573654175,
      "learning_rate": 9.520280170097462e-05,
      "loss": 0.867,
      "step": 14660
    },
    {
      "epoch": 1.6918550992155053,
      "grad_norm": 0.38338419795036316,
      "learning_rate": 9.513576061933118e-05,
      "loss": 0.9194,
      "step": 14665
    },
    {
      "epoch": 1.6924319335486848,
      "grad_norm": 0.3833276033401489,
      "learning_rate": 9.5068721729037e-05,
      "loss": 0.9376,
      "step": 14670
    },
    {
      "epoch": 1.6930087678818642,
      "grad_norm": 0.37685397267341614,
      "learning_rate": 9.500168506029334e-05,
      "loss": 0.9075,
      "step": 14675
    },
    {
      "epoch": 1.6935856022150437,
      "grad_norm": 0.4300825595855713,
      "learning_rate": 9.493465064330029e-05,
      "loss": 0.9386,
      "step": 14680
    },
    {
      "epoch": 1.6941624365482233,
      "grad_norm": 0.3908791244029999,
      "learning_rate": 9.486761850825694e-05,
      "loss": 0.9543,
      "step": 14685
    },
    {
      "epoch": 1.6947392708814029,
      "grad_norm": 0.3841436803340912,
      "learning_rate": 9.480058868536149e-05,
      "loss": 0.9034,
      "step": 14690
    },
    {
      "epoch": 1.6953161052145824,
      "grad_norm": 0.4477061927318573,
      "learning_rate": 9.473356120481098e-05,
      "loss": 0.9531,
      "step": 14695
    },
    {
      "epoch": 1.695892939547762,
      "grad_norm": 0.38848182559013367,
      "learning_rate": 9.466653609680137e-05,
      "loss": 0.9192,
      "step": 14700
    },
    {
      "epoch": 1.6964697738809416,
      "grad_norm": 0.42426785826683044,
      "learning_rate": 9.459951339152762e-05,
      "loss": 0.8993,
      "step": 14705
    },
    {
      "epoch": 1.697046608214121,
      "grad_norm": 0.4407934844493866,
      "learning_rate": 9.453249311918362e-05,
      "loss": 0.9737,
      "step": 14710
    },
    {
      "epoch": 1.6976234425473005,
      "grad_norm": 0.4233091175556183,
      "learning_rate": 9.446547530996214e-05,
      "loss": 0.9115,
      "step": 14715
    },
    {
      "epoch": 1.6982002768804798,
      "grad_norm": 0.38162070512771606,
      "learning_rate": 9.439845999405478e-05,
      "loss": 0.9048,
      "step": 14720
    },
    {
      "epoch": 1.6987771112136594,
      "grad_norm": 0.44918137788772583,
      "learning_rate": 9.43314472016521e-05,
      "loss": 0.8878,
      "step": 14725
    },
    {
      "epoch": 1.699353945546839,
      "grad_norm": 0.40736523270606995,
      "learning_rate": 9.426443696294351e-05,
      "loss": 0.9339,
      "step": 14730
    },
    {
      "epoch": 1.6999307798800185,
      "grad_norm": 0.3635164201259613,
      "learning_rate": 9.419742930811722e-05,
      "loss": 0.9033,
      "step": 14735
    },
    {
      "epoch": 1.700507614213198,
      "grad_norm": 0.3836980164051056,
      "learning_rate": 9.413042426736032e-05,
      "loss": 0.9218,
      "step": 14740
    },
    {
      "epoch": 1.7010844485463776,
      "grad_norm": 0.3805466294288635,
      "learning_rate": 9.406342187085875e-05,
      "loss": 0.8939,
      "step": 14745
    },
    {
      "epoch": 1.701661282879557,
      "grad_norm": 0.3768046200275421,
      "learning_rate": 9.399642214879717e-05,
      "loss": 0.9541,
      "step": 14750
    },
    {
      "epoch": 1.7022381172127365,
      "grad_norm": 0.42572706937789917,
      "learning_rate": 9.39294251313591e-05,
      "loss": 0.9927,
      "step": 14755
    },
    {
      "epoch": 1.702814951545916,
      "grad_norm": 0.4072059392929077,
      "learning_rate": 9.386243084872682e-05,
      "loss": 0.9676,
      "step": 14760
    },
    {
      "epoch": 1.7033917858790955,
      "grad_norm": 0.39494749903678894,
      "learning_rate": 9.379543933108141e-05,
      "loss": 0.9201,
      "step": 14765
    },
    {
      "epoch": 1.703968620212275,
      "grad_norm": 0.42822760343551636,
      "learning_rate": 9.372845060860264e-05,
      "loss": 0.9036,
      "step": 14770
    },
    {
      "epoch": 1.7045454545454546,
      "grad_norm": 0.3813650906085968,
      "learning_rate": 9.36614647114691e-05,
      "loss": 0.8976,
      "step": 14775
    },
    {
      "epoch": 1.7051222888786342,
      "grad_norm": 0.4004404544830322,
      "learning_rate": 9.359448166985806e-05,
      "loss": 0.9066,
      "step": 14780
    },
    {
      "epoch": 1.7056991232118137,
      "grad_norm": 0.39617690443992615,
      "learning_rate": 9.35275015139455e-05,
      "loss": 0.9306,
      "step": 14785
    },
    {
      "epoch": 1.706275957544993,
      "grad_norm": 0.3876945674419403,
      "learning_rate": 9.34605242739061e-05,
      "loss": 0.9129,
      "step": 14790
    },
    {
      "epoch": 1.7068527918781726,
      "grad_norm": 0.3834667205810547,
      "learning_rate": 9.339354997991325e-05,
      "loss": 0.9551,
      "step": 14795
    },
    {
      "epoch": 1.707429626211352,
      "grad_norm": 0.3988478481769562,
      "learning_rate": 9.3326578662139e-05,
      "loss": 0.9172,
      "step": 14800
    },
    {
      "epoch": 1.7080064605445315,
      "grad_norm": 0.40340641140937805,
      "learning_rate": 9.325961035075405e-05,
      "loss": 0.9626,
      "step": 14805
    },
    {
      "epoch": 1.708583294877711,
      "grad_norm": 0.44429507851600647,
      "learning_rate": 9.319264507592776e-05,
      "loss": 0.9331,
      "step": 14810
    },
    {
      "epoch": 1.7091601292108907,
      "grad_norm": 0.41271674633026123,
      "learning_rate": 9.312568286782815e-05,
      "loss": 0.9173,
      "step": 14815
    },
    {
      "epoch": 1.7097369635440702,
      "grad_norm": 0.3779640793800354,
      "learning_rate": 9.305872375662176e-05,
      "loss": 0.9004,
      "step": 14820
    },
    {
      "epoch": 1.7103137978772498,
      "grad_norm": 0.40028929710388184,
      "learning_rate": 9.299176777247386e-05,
      "loss": 0.8877,
      "step": 14825
    },
    {
      "epoch": 1.7108906322104291,
      "grad_norm": 0.3729310631752014,
      "learning_rate": 9.29248149455482e-05,
      "loss": 0.9557,
      "step": 14830
    },
    {
      "epoch": 1.7114674665436087,
      "grad_norm": 0.37121349573135376,
      "learning_rate": 9.285786530600718e-05,
      "loss": 0.9302,
      "step": 14835
    },
    {
      "epoch": 1.712044300876788,
      "grad_norm": 0.36666813492774963,
      "learning_rate": 9.279091888401179e-05,
      "loss": 0.9732,
      "step": 14840
    },
    {
      "epoch": 1.7126211352099676,
      "grad_norm": 0.4723244309425354,
      "learning_rate": 9.272397570972145e-05,
      "loss": 1.0227,
      "step": 14845
    },
    {
      "epoch": 1.7131979695431472,
      "grad_norm": 0.44618186354637146,
      "learning_rate": 9.26570358132942e-05,
      "loss": 0.946,
      "step": 14850
    },
    {
      "epoch": 1.7137748038763267,
      "grad_norm": 0.3813045024871826,
      "learning_rate": 9.259009922488665e-05,
      "loss": 0.9072,
      "step": 14855
    },
    {
      "epoch": 1.7143516382095063,
      "grad_norm": 0.40963122248649597,
      "learning_rate": 9.252316597465384e-05,
      "loss": 0.9181,
      "step": 14860
    },
    {
      "epoch": 1.7149284725426859,
      "grad_norm": 0.43289676308631897,
      "learning_rate": 9.245623609274928e-05,
      "loss": 0.9365,
      "step": 14865
    },
    {
      "epoch": 1.7155053068758652,
      "grad_norm": 0.4129135310649872,
      "learning_rate": 9.238930960932506e-05,
      "loss": 0.9466,
      "step": 14870
    },
    {
      "epoch": 1.7160821412090448,
      "grad_norm": 0.3834366202354431,
      "learning_rate": 9.23223865545317e-05,
      "loss": 0.9291,
      "step": 14875
    },
    {
      "epoch": 1.7166589755422241,
      "grad_norm": 0.380035400390625,
      "learning_rate": 9.225546695851815e-05,
      "loss": 0.9214,
      "step": 14880
    },
    {
      "epoch": 1.7172358098754037,
      "grad_norm": 0.406563937664032,
      "learning_rate": 9.218855085143176e-05,
      "loss": 0.9089,
      "step": 14885
    },
    {
      "epoch": 1.7178126442085833,
      "grad_norm": 0.40587136149406433,
      "learning_rate": 9.212163826341847e-05,
      "loss": 0.9524,
      "step": 14890
    },
    {
      "epoch": 1.7183894785417628,
      "grad_norm": 0.39258936047554016,
      "learning_rate": 9.20547292246225e-05,
      "loss": 0.9747,
      "step": 14895
    },
    {
      "epoch": 1.7189663128749424,
      "grad_norm": 0.3985291123390198,
      "learning_rate": 9.198782376518642e-05,
      "loss": 0.9987,
      "step": 14900
    },
    {
      "epoch": 1.719543147208122,
      "grad_norm": 0.3905879557132721,
      "learning_rate": 9.192092191525139e-05,
      "loss": 0.9427,
      "step": 14905
    },
    {
      "epoch": 1.7201199815413013,
      "grad_norm": 0.42461061477661133,
      "learning_rate": 9.185402370495677e-05,
      "loss": 0.9667,
      "step": 14910
    },
    {
      "epoch": 1.7206968158744809,
      "grad_norm": 0.3873096704483032,
      "learning_rate": 9.178712916444033e-05,
      "loss": 0.9011,
      "step": 14915
    },
    {
      "epoch": 1.7212736502076602,
      "grad_norm": 0.4047510027885437,
      "learning_rate": 9.172023832383816e-05,
      "loss": 0.9815,
      "step": 14920
    },
    {
      "epoch": 1.7218504845408398,
      "grad_norm": 0.4329635500907898,
      "learning_rate": 9.165335121328477e-05,
      "loss": 0.9505,
      "step": 14925
    },
    {
      "epoch": 1.7224273188740193,
      "grad_norm": 0.37576204538345337,
      "learning_rate": 9.158646786291292e-05,
      "loss": 0.9236,
      "step": 14930
    },
    {
      "epoch": 1.723004153207199,
      "grad_norm": 0.3973287343978882,
      "learning_rate": 9.151958830285366e-05,
      "loss": 0.8911,
      "step": 14935
    },
    {
      "epoch": 1.7235809875403785,
      "grad_norm": 0.4271901249885559,
      "learning_rate": 9.14527125632364e-05,
      "loss": 0.982,
      "step": 14940
    },
    {
      "epoch": 1.724157821873558,
      "grad_norm": 0.43565037846565247,
      "learning_rate": 9.138584067418878e-05,
      "loss": 0.9617,
      "step": 14945
    },
    {
      "epoch": 1.7247346562067374,
      "grad_norm": 0.4059095084667206,
      "learning_rate": 9.131897266583672e-05,
      "loss": 0.8999,
      "step": 14950
    },
    {
      "epoch": 1.725311490539917,
      "grad_norm": 0.437881201505661,
      "learning_rate": 9.125210856830433e-05,
      "loss": 0.9047,
      "step": 14955
    },
    {
      "epoch": 1.7258883248730963,
      "grad_norm": 0.3824065029621124,
      "learning_rate": 9.118524841171409e-05,
      "loss": 0.9533,
      "step": 14960
    },
    {
      "epoch": 1.7264651592062759,
      "grad_norm": 0.39069101214408875,
      "learning_rate": 9.111839222618663e-05,
      "loss": 0.9776,
      "step": 14965
    },
    {
      "epoch": 1.7270419935394554,
      "grad_norm": 0.43811115622520447,
      "learning_rate": 9.105154004184071e-05,
      "loss": 0.8873,
      "step": 14970
    },
    {
      "epoch": 1.727618827872635,
      "grad_norm": 0.40116867423057556,
      "learning_rate": 9.098469188879349e-05,
      "loss": 0.9287,
      "step": 14975
    },
    {
      "epoch": 1.7281956622058146,
      "grad_norm": 0.3768905699253082,
      "learning_rate": 9.091784779716011e-05,
      "loss": 0.8807,
      "step": 14980
    },
    {
      "epoch": 1.7287724965389941,
      "grad_norm": 0.38657596707344055,
      "learning_rate": 9.085100779705398e-05,
      "loss": 0.9756,
      "step": 14985
    },
    {
      "epoch": 1.7293493308721735,
      "grad_norm": 0.40648481249809265,
      "learning_rate": 9.078417191858662e-05,
      "loss": 0.8996,
      "step": 14990
    },
    {
      "epoch": 1.729926165205353,
      "grad_norm": 0.37395209074020386,
      "learning_rate": 9.071734019186778e-05,
      "loss": 0.8956,
      "step": 14995
    },
    {
      "epoch": 1.7305029995385324,
      "grad_norm": 0.4580058455467224,
      "learning_rate": 9.065051264700527e-05,
      "loss": 0.9537,
      "step": 15000
    },
    {
      "epoch": 1.731079833871712,
      "grad_norm": 0.3521862030029297,
      "learning_rate": 9.058368931410498e-05,
      "loss": 0.9096,
      "step": 15005
    },
    {
      "epoch": 1.7316566682048915,
      "grad_norm": 0.4156653881072998,
      "learning_rate": 9.051687022327103e-05,
      "loss": 0.8714,
      "step": 15010
    },
    {
      "epoch": 1.732233502538071,
      "grad_norm": 0.37951210141181946,
      "learning_rate": 9.045005540460552e-05,
      "loss": 0.9564,
      "step": 15015
    },
    {
      "epoch": 1.7328103368712506,
      "grad_norm": 0.41788360476493835,
      "learning_rate": 9.038324488820863e-05,
      "loss": 0.9462,
      "step": 15020
    },
    {
      "epoch": 1.7333871712044302,
      "grad_norm": 0.45095932483673096,
      "learning_rate": 9.031643870417861e-05,
      "loss": 0.9131,
      "step": 15025
    },
    {
      "epoch": 1.7339640055376098,
      "grad_norm": 0.3977847099304199,
      "learning_rate": 9.024963688261186e-05,
      "loss": 0.9116,
      "step": 15030
    },
    {
      "epoch": 1.734540839870789,
      "grad_norm": 0.4043447971343994,
      "learning_rate": 9.018283945360266e-05,
      "loss": 0.9271,
      "step": 15035
    },
    {
      "epoch": 1.7351176742039687,
      "grad_norm": 0.44181013107299805,
      "learning_rate": 9.011604644724342e-05,
      "loss": 0.9155,
      "step": 15040
    },
    {
      "epoch": 1.735694508537148,
      "grad_norm": 0.4287402927875519,
      "learning_rate": 9.004925789362446e-05,
      "loss": 0.9138,
      "step": 15045
    },
    {
      "epoch": 1.7362713428703276,
      "grad_norm": 0.3614751994609833,
      "learning_rate": 8.998247382283423e-05,
      "loss": 0.9452,
      "step": 15050
    },
    {
      "epoch": 1.7368481772035071,
      "grad_norm": 0.41066932678222656,
      "learning_rate": 8.991569426495905e-05,
      "loss": 0.898,
      "step": 15055
    },
    {
      "epoch": 1.7374250115366867,
      "grad_norm": 0.457390159368515,
      "learning_rate": 8.984891925008321e-05,
      "loss": 0.8802,
      "step": 15060
    },
    {
      "epoch": 1.7380018458698663,
      "grad_norm": 0.35870426893234253,
      "learning_rate": 8.978214880828903e-05,
      "loss": 0.8995,
      "step": 15065
    },
    {
      "epoch": 1.7385786802030458,
      "grad_norm": 0.39848119020462036,
      "learning_rate": 8.971538296965674e-05,
      "loss": 0.9064,
      "step": 15070
    },
    {
      "epoch": 1.7391555145362252,
      "grad_norm": 0.38636332750320435,
      "learning_rate": 8.964862176426443e-05,
      "loss": 0.953,
      "step": 15075
    },
    {
      "epoch": 1.7397323488694048,
      "grad_norm": 0.41869068145751953,
      "learning_rate": 8.958186522218813e-05,
      "loss": 0.9417,
      "step": 15080
    },
    {
      "epoch": 1.740309183202584,
      "grad_norm": 0.3965960144996643,
      "learning_rate": 8.951511337350188e-05,
      "loss": 0.9319,
      "step": 15085
    },
    {
      "epoch": 1.7408860175357637,
      "grad_norm": 0.38021016120910645,
      "learning_rate": 8.944836624827748e-05,
      "loss": 0.9162,
      "step": 15090
    },
    {
      "epoch": 1.7414628518689432,
      "grad_norm": 0.41196557879447937,
      "learning_rate": 8.938162387658458e-05,
      "loss": 0.9287,
      "step": 15095
    },
    {
      "epoch": 1.7420396862021228,
      "grad_norm": 0.3578120172023773,
      "learning_rate": 8.931488628849085e-05,
      "loss": 0.96,
      "step": 15100
    },
    {
      "epoch": 1.7426165205353024,
      "grad_norm": 0.4296330213546753,
      "learning_rate": 8.924815351406163e-05,
      "loss": 0.9434,
      "step": 15105
    },
    {
      "epoch": 1.743193354868482,
      "grad_norm": 0.3744300305843353,
      "learning_rate": 8.918142558336022e-05,
      "loss": 0.9601,
      "step": 15110
    },
    {
      "epoch": 1.7437701892016613,
      "grad_norm": 0.41023698449134827,
      "learning_rate": 8.91147025264476e-05,
      "loss": 0.9312,
      "step": 15115
    },
    {
      "epoch": 1.7443470235348408,
      "grad_norm": 0.40638571977615356,
      "learning_rate": 8.904798437338272e-05,
      "loss": 0.9658,
      "step": 15120
    },
    {
      "epoch": 1.7449238578680202,
      "grad_norm": 0.45808759331703186,
      "learning_rate": 8.898127115422221e-05,
      "loss": 0.9522,
      "step": 15125
    },
    {
      "epoch": 1.7455006922011997,
      "grad_norm": 0.4006982147693634,
      "learning_rate": 8.891456289902046e-05,
      "loss": 0.8894,
      "step": 15130
    },
    {
      "epoch": 1.7460775265343793,
      "grad_norm": 0.3888961970806122,
      "learning_rate": 8.884785963782975e-05,
      "loss": 0.9107,
      "step": 15135
    },
    {
      "epoch": 1.7466543608675589,
      "grad_norm": 0.4383211135864258,
      "learning_rate": 8.878116140069999e-05,
      "loss": 0.9225,
      "step": 15140
    },
    {
      "epoch": 1.7472311952007384,
      "grad_norm": 0.38487568497657776,
      "learning_rate": 8.871446821767888e-05,
      "loss": 0.9299,
      "step": 15145
    },
    {
      "epoch": 1.747808029533918,
      "grad_norm": 0.3626287877559662,
      "learning_rate": 8.864778011881175e-05,
      "loss": 0.9082,
      "step": 15150
    },
    {
      "epoch": 1.7483848638670973,
      "grad_norm": 0.4096532166004181,
      "learning_rate": 8.858109713414185e-05,
      "loss": 0.9614,
      "step": 15155
    },
    {
      "epoch": 1.748961698200277,
      "grad_norm": 0.3614518344402313,
      "learning_rate": 8.85144192937099e-05,
      "loss": 0.9248,
      "step": 15160
    },
    {
      "epoch": 1.7495385325334563,
      "grad_norm": 0.3788556754589081,
      "learning_rate": 8.84477466275544e-05,
      "loss": 0.8654,
      "step": 15165
    },
    {
      "epoch": 1.7501153668666358,
      "grad_norm": 0.3660648465156555,
      "learning_rate": 8.838107916571156e-05,
      "loss": 0.9343,
      "step": 15170
    },
    {
      "epoch": 1.7506922011998154,
      "grad_norm": 0.4170313775539398,
      "learning_rate": 8.831441693821519e-05,
      "loss": 0.948,
      "step": 15175
    },
    {
      "epoch": 1.751269035532995,
      "grad_norm": 0.3601408004760742,
      "learning_rate": 8.824775997509675e-05,
      "loss": 0.9301,
      "step": 15180
    },
    {
      "epoch": 1.7518458698661745,
      "grad_norm": 0.38346150517463684,
      "learning_rate": 8.818110830638528e-05,
      "loss": 0.9182,
      "step": 15185
    },
    {
      "epoch": 1.752422704199354,
      "grad_norm": 0.3932247757911682,
      "learning_rate": 8.811446196210757e-05,
      "loss": 0.9271,
      "step": 15190
    },
    {
      "epoch": 1.7529995385325334,
      "grad_norm": 0.40483179688453674,
      "learning_rate": 8.80478209722879e-05,
      "loss": 0.975,
      "step": 15195
    },
    {
      "epoch": 1.753576372865713,
      "grad_norm": 0.40138062834739685,
      "learning_rate": 8.798118536694813e-05,
      "loss": 0.9006,
      "step": 15200
    },
    {
      "epoch": 1.7541532071988923,
      "grad_norm": 0.4747352600097656,
      "learning_rate": 8.791455517610776e-05,
      "loss": 0.9065,
      "step": 15205
    },
    {
      "epoch": 1.754730041532072,
      "grad_norm": 0.4437057375907898,
      "learning_rate": 8.784793042978384e-05,
      "loss": 0.9266,
      "step": 15210
    },
    {
      "epoch": 1.7553068758652515,
      "grad_norm": 0.4352372884750366,
      "learning_rate": 8.778131115799093e-05,
      "loss": 1.0089,
      "step": 15215
    },
    {
      "epoch": 1.755883710198431,
      "grad_norm": 0.4017319083213806,
      "learning_rate": 8.771469739074107e-05,
      "loss": 0.9516,
      "step": 15220
    },
    {
      "epoch": 1.7564605445316106,
      "grad_norm": 0.4051602780818939,
      "learning_rate": 8.764808915804401e-05,
      "loss": 0.9162,
      "step": 15225
    },
    {
      "epoch": 1.7570373788647902,
      "grad_norm": 0.41565215587615967,
      "learning_rate": 8.758148648990684e-05,
      "loss": 0.9296,
      "step": 15230
    },
    {
      "epoch": 1.7576142131979695,
      "grad_norm": 0.4095606505870819,
      "learning_rate": 8.751488941633416e-05,
      "loss": 0.9492,
      "step": 15235
    },
    {
      "epoch": 1.758191047531149,
      "grad_norm": 0.36888161301612854,
      "learning_rate": 8.744829796732812e-05,
      "loss": 0.9687,
      "step": 15240
    },
    {
      "epoch": 1.7587678818643284,
      "grad_norm": 0.3901865482330322,
      "learning_rate": 8.738171217288831e-05,
      "loss": 0.8896,
      "step": 15245
    },
    {
      "epoch": 1.759344716197508,
      "grad_norm": 0.39652901887893677,
      "learning_rate": 8.73151320630117e-05,
      "loss": 0.9804,
      "step": 15250
    },
    {
      "epoch": 1.7599215505306876,
      "grad_norm": 0.4261581301689148,
      "learning_rate": 8.724855766769282e-05,
      "loss": 0.9291,
      "step": 15255
    },
    {
      "epoch": 1.7604983848638671,
      "grad_norm": 0.4231615960597992,
      "learning_rate": 8.718198901692354e-05,
      "loss": 0.8976,
      "step": 15260
    },
    {
      "epoch": 1.7610752191970467,
      "grad_norm": 0.38978713750839233,
      "learning_rate": 8.711542614069316e-05,
      "loss": 0.9492,
      "step": 15265
    },
    {
      "epoch": 1.7616520535302262,
      "grad_norm": 0.4083541929721832,
      "learning_rate": 8.70488690689884e-05,
      "loss": 0.9148,
      "step": 15270
    },
    {
      "epoch": 1.7622288878634056,
      "grad_norm": 0.3902876377105713,
      "learning_rate": 8.698231783179334e-05,
      "loss": 0.9179,
      "step": 15275
    },
    {
      "epoch": 1.7628057221965852,
      "grad_norm": 0.46301478147506714,
      "learning_rate": 8.691577245908947e-05,
      "loss": 0.9431,
      "step": 15280
    },
    {
      "epoch": 1.7633825565297645,
      "grad_norm": 0.39645519852638245,
      "learning_rate": 8.684923298085555e-05,
      "loss": 0.968,
      "step": 15285
    },
    {
      "epoch": 1.763959390862944,
      "grad_norm": 0.41923776268959045,
      "learning_rate": 8.678269942706777e-05,
      "loss": 0.9646,
      "step": 15290
    },
    {
      "epoch": 1.7645362251961236,
      "grad_norm": 0.36724886298179626,
      "learning_rate": 8.671617182769967e-05,
      "loss": 0.8777,
      "step": 15295
    },
    {
      "epoch": 1.7651130595293032,
      "grad_norm": 0.39986652135849,
      "learning_rate": 8.6649650212722e-05,
      "loss": 0.8661,
      "step": 15300
    },
    {
      "epoch": 1.7656898938624828,
      "grad_norm": 0.3949691653251648,
      "learning_rate": 8.658313461210294e-05,
      "loss": 0.8907,
      "step": 15305
    },
    {
      "epoch": 1.7662667281956623,
      "grad_norm": 0.380906879901886,
      "learning_rate": 8.651662505580785e-05,
      "loss": 0.8585,
      "step": 15310
    },
    {
      "epoch": 1.7668435625288417,
      "grad_norm": 0.4579210877418518,
      "learning_rate": 8.645012157379941e-05,
      "loss": 0.9024,
      "step": 15315
    },
    {
      "epoch": 1.7674203968620212,
      "grad_norm": 0.4028342068195343,
      "learning_rate": 8.63836241960376e-05,
      "loss": 0.8992,
      "step": 15320
    },
    {
      "epoch": 1.7679972311952006,
      "grad_norm": 0.39463692903518677,
      "learning_rate": 8.631713295247961e-05,
      "loss": 0.8753,
      "step": 15325
    },
    {
      "epoch": 1.7685740655283801,
      "grad_norm": 0.397307813167572,
      "learning_rate": 8.625064787307986e-05,
      "loss": 0.9066,
      "step": 15330
    },
    {
      "epoch": 1.7691508998615597,
      "grad_norm": 0.35301893949508667,
      "learning_rate": 8.618416898779e-05,
      "loss": 0.9314,
      "step": 15335
    },
    {
      "epoch": 1.7697277341947393,
      "grad_norm": 0.36975350975990295,
      "learning_rate": 8.611769632655896e-05,
      "loss": 0.92,
      "step": 15340
    },
    {
      "epoch": 1.7703045685279188,
      "grad_norm": 0.3862159550189972,
      "learning_rate": 8.605122991933271e-05,
      "loss": 0.9481,
      "step": 15345
    },
    {
      "epoch": 1.7708814028610984,
      "grad_norm": 0.3985309600830078,
      "learning_rate": 8.598476979605456e-05,
      "loss": 0.9441,
      "step": 15350
    },
    {
      "epoch": 1.7714582371942778,
      "grad_norm": 0.3654569089412689,
      "learning_rate": 8.591831598666491e-05,
      "loss": 0.9268,
      "step": 15355
    },
    {
      "epoch": 1.7720350715274573,
      "grad_norm": 0.4199677109718323,
      "learning_rate": 8.585186852110134e-05,
      "loss": 0.909,
      "step": 15360
    },
    {
      "epoch": 1.7726119058606367,
      "grad_norm": 0.47328895330429077,
      "learning_rate": 8.578542742929852e-05,
      "loss": 0.9117,
      "step": 15365
    },
    {
      "epoch": 1.7731887401938162,
      "grad_norm": 0.4488062560558319,
      "learning_rate": 8.571899274118835e-05,
      "loss": 0.8804,
      "step": 15370
    },
    {
      "epoch": 1.7737655745269958,
      "grad_norm": 0.4089053273200989,
      "learning_rate": 8.565256448669976e-05,
      "loss": 0.9349,
      "step": 15375
    },
    {
      "epoch": 1.7743424088601754,
      "grad_norm": 0.42348453402519226,
      "learning_rate": 8.558614269575878e-05,
      "loss": 0.9348,
      "step": 15380
    },
    {
      "epoch": 1.774919243193355,
      "grad_norm": 0.36207103729248047,
      "learning_rate": 8.551972739828861e-05,
      "loss": 0.9457,
      "step": 15385
    },
    {
      "epoch": 1.7754960775265345,
      "grad_norm": 0.40925994515419006,
      "learning_rate": 8.545331862420944e-05,
      "loss": 0.9087,
      "step": 15390
    },
    {
      "epoch": 1.776072911859714,
      "grad_norm": 0.3908781111240387,
      "learning_rate": 8.538691640343856e-05,
      "loss": 0.8865,
      "step": 15395
    },
    {
      "epoch": 1.7766497461928934,
      "grad_norm": 0.41950979828834534,
      "learning_rate": 8.532052076589025e-05,
      "loss": 0.9217,
      "step": 15400
    },
    {
      "epoch": 1.777226580526073,
      "grad_norm": 0.4013879597187042,
      "learning_rate": 8.525413174147598e-05,
      "loss": 0.9121,
      "step": 15405
    },
    {
      "epoch": 1.7778034148592523,
      "grad_norm": 0.40807363390922546,
      "learning_rate": 8.518774936010406e-05,
      "loss": 0.9025,
      "step": 15410
    },
    {
      "epoch": 1.7783802491924319,
      "grad_norm": 0.41417497396469116,
      "learning_rate": 8.512137365167986e-05,
      "loss": 0.9211,
      "step": 15415
    },
    {
      "epoch": 1.7789570835256114,
      "grad_norm": 0.38941362500190735,
      "learning_rate": 8.505500464610584e-05,
      "loss": 0.9524,
      "step": 15420
    },
    {
      "epoch": 1.779533917858791,
      "grad_norm": 0.3910941183567047,
      "learning_rate": 8.498864237328132e-05,
      "loss": 0.9518,
      "step": 15425
    },
    {
      "epoch": 1.7801107521919706,
      "grad_norm": 0.4338248372077942,
      "learning_rate": 8.492228686310266e-05,
      "loss": 0.9739,
      "step": 15430
    },
    {
      "epoch": 1.7806875865251501,
      "grad_norm": 0.37239012122154236,
      "learning_rate": 8.485593814546307e-05,
      "loss": 0.9104,
      "step": 15435
    },
    {
      "epoch": 1.7812644208583295,
      "grad_norm": 0.41907015442848206,
      "learning_rate": 8.478959625025288e-05,
      "loss": 0.9151,
      "step": 15440
    },
    {
      "epoch": 1.781841255191509,
      "grad_norm": 0.3942820429801941,
      "learning_rate": 8.47232612073592e-05,
      "loss": 0.9246,
      "step": 15445
    },
    {
      "epoch": 1.7824180895246884,
      "grad_norm": 0.3966974914073944,
      "learning_rate": 8.465693304666606e-05,
      "loss": 0.8847,
      "step": 15450
    },
    {
      "epoch": 1.782994923857868,
      "grad_norm": 0.4247607886791229,
      "learning_rate": 8.459061179805449e-05,
      "loss": 0.9161,
      "step": 15455
    },
    {
      "epoch": 1.7835717581910475,
      "grad_norm": 0.3746182918548584,
      "learning_rate": 8.452429749140233e-05,
      "loss": 0.9298,
      "step": 15460
    },
    {
      "epoch": 1.784148592524227,
      "grad_norm": 0.4121777415275574,
      "learning_rate": 8.445799015658427e-05,
      "loss": 0.8803,
      "step": 15465
    },
    {
      "epoch": 1.7847254268574066,
      "grad_norm": 0.46958792209625244,
      "learning_rate": 8.439168982347186e-05,
      "loss": 0.9259,
      "step": 15470
    },
    {
      "epoch": 1.7853022611905862,
      "grad_norm": 0.3878363072872162,
      "learning_rate": 8.432539652193363e-05,
      "loss": 0.9235,
      "step": 15475
    },
    {
      "epoch": 1.7858790955237656,
      "grad_norm": 0.3856402039527893,
      "learning_rate": 8.425911028183479e-05,
      "loss": 0.8902,
      "step": 15480
    },
    {
      "epoch": 1.7864559298569451,
      "grad_norm": 0.48076337575912476,
      "learning_rate": 8.419283113303742e-05,
      "loss": 0.9705,
      "step": 15485
    },
    {
      "epoch": 1.7870327641901245,
      "grad_norm": 0.3873134255409241,
      "learning_rate": 8.412655910540035e-05,
      "loss": 0.9599,
      "step": 15490
    },
    {
      "epoch": 1.787609598523304,
      "grad_norm": 0.3456037640571594,
      "learning_rate": 8.406029422877937e-05,
      "loss": 0.9236,
      "step": 15495
    },
    {
      "epoch": 1.7881864328564836,
      "grad_norm": 0.3917859196662903,
      "learning_rate": 8.399403653302687e-05,
      "loss": 0.8963,
      "step": 15500
    },
    {
      "epoch": 1.7887632671896632,
      "grad_norm": 0.43257713317871094,
      "learning_rate": 8.392778604799203e-05,
      "loss": 0.8965,
      "step": 15505
    },
    {
      "epoch": 1.7893401015228427,
      "grad_norm": 0.4102369546890259,
      "learning_rate": 8.386154280352094e-05,
      "loss": 0.9122,
      "step": 15510
    },
    {
      "epoch": 1.7899169358560223,
      "grad_norm": 0.40290239453315735,
      "learning_rate": 8.379530682945622e-05,
      "loss": 0.9344,
      "step": 15515
    },
    {
      "epoch": 1.7904937701892016,
      "grad_norm": 0.41029873490333557,
      "learning_rate": 8.372907815563733e-05,
      "loss": 0.9048,
      "step": 15520
    },
    {
      "epoch": 1.7910706045223812,
      "grad_norm": 0.4195650815963745,
      "learning_rate": 8.366285681190039e-05,
      "loss": 0.9081,
      "step": 15525
    },
    {
      "epoch": 1.7916474388555605,
      "grad_norm": 0.40104198455810547,
      "learning_rate": 8.359664282807829e-05,
      "loss": 0.9728,
      "step": 15530
    },
    {
      "epoch": 1.7922242731887401,
      "grad_norm": 0.40213140845298767,
      "learning_rate": 8.353043623400054e-05,
      "loss": 0.9563,
      "step": 15535
    },
    {
      "epoch": 1.7928011075219197,
      "grad_norm": 0.4082624912261963,
      "learning_rate": 8.34642370594933e-05,
      "loss": 0.8982,
      "step": 15540
    },
    {
      "epoch": 1.7933779418550992,
      "grad_norm": 0.3675086498260498,
      "learning_rate": 8.339804533437951e-05,
      "loss": 0.9356,
      "step": 15545
    },
    {
      "epoch": 1.7939547761882788,
      "grad_norm": 0.42921894788742065,
      "learning_rate": 8.333186108847862e-05,
      "loss": 0.8969,
      "step": 15550
    },
    {
      "epoch": 1.7945316105214584,
      "grad_norm": 0.3704732656478882,
      "learning_rate": 8.326568435160677e-05,
      "loss": 0.9421,
      "step": 15555
    },
    {
      "epoch": 1.7951084448546377,
      "grad_norm": 0.4511631429195404,
      "learning_rate": 8.319951515357666e-05,
      "loss": 0.9853,
      "step": 15560
    },
    {
      "epoch": 1.7956852791878173,
      "grad_norm": 0.42818620800971985,
      "learning_rate": 8.313335352419773e-05,
      "loss": 0.9735,
      "step": 15565
    },
    {
      "epoch": 1.7962621135209966,
      "grad_norm": 0.3656601011753082,
      "learning_rate": 8.306719949327588e-05,
      "loss": 0.9318,
      "step": 15570
    },
    {
      "epoch": 1.7968389478541762,
      "grad_norm": 0.40848276019096375,
      "learning_rate": 8.300105309061358e-05,
      "loss": 0.9316,
      "step": 15575
    },
    {
      "epoch": 1.7974157821873558,
      "grad_norm": 0.37792646884918213,
      "learning_rate": 8.293491434601003e-05,
      "loss": 0.9263,
      "step": 15580
    },
    {
      "epoch": 1.7979926165205353,
      "grad_norm": 0.3963736891746521,
      "learning_rate": 8.286878328926077e-05,
      "loss": 0.9002,
      "step": 15585
    },
    {
      "epoch": 1.798569450853715,
      "grad_norm": 0.4018113911151886,
      "learning_rate": 8.280265995015802e-05,
      "loss": 0.9209,
      "step": 15590
    },
    {
      "epoch": 1.7991462851868945,
      "grad_norm": 0.3702740967273712,
      "learning_rate": 8.27365443584904e-05,
      "loss": 0.8576,
      "step": 15595
    },
    {
      "epoch": 1.7997231195200738,
      "grad_norm": 0.40294185280799866,
      "learning_rate": 8.26704365440432e-05,
      "loss": 0.9255,
      "step": 15600
    },
    {
      "epoch": 1.8002999538532534,
      "grad_norm": 0.4672613739967346,
      "learning_rate": 8.260433653659809e-05,
      "loss": 0.9112,
      "step": 15605
    },
    {
      "epoch": 1.8008767881864327,
      "grad_norm": 0.3775807321071625,
      "learning_rate": 8.25382443659332e-05,
      "loss": 0.9228,
      "step": 15610
    },
    {
      "epoch": 1.8014536225196123,
      "grad_norm": 0.39842814207077026,
      "learning_rate": 8.247216006182326e-05,
      "loss": 0.9351,
      "step": 15615
    },
    {
      "epoch": 1.8020304568527918,
      "grad_norm": 0.37785279750823975,
      "learning_rate": 8.240608365403934e-05,
      "loss": 0.8772,
      "step": 15620
    },
    {
      "epoch": 1.8026072911859714,
      "grad_norm": 0.4134449362754822,
      "learning_rate": 8.234001517234901e-05,
      "loss": 0.8922,
      "step": 15625
    },
    {
      "epoch": 1.803184125519151,
      "grad_norm": 0.38116446137428284,
      "learning_rate": 8.227395464651618e-05,
      "loss": 0.901,
      "step": 15630
    },
    {
      "epoch": 1.8037609598523305,
      "grad_norm": 0.4059298038482666,
      "learning_rate": 8.220790210630134e-05,
      "loss": 0.9601,
      "step": 15635
    },
    {
      "epoch": 1.8043377941855099,
      "grad_norm": 0.3819584548473358,
      "learning_rate": 8.214185758146126e-05,
      "loss": 0.9207,
      "step": 15640
    },
    {
      "epoch": 1.8049146285186894,
      "grad_norm": 0.4297388195991516,
      "learning_rate": 8.20758211017491e-05,
      "loss": 0.9235,
      "step": 15645
    },
    {
      "epoch": 1.8054914628518688,
      "grad_norm": 0.41157621145248413,
      "learning_rate": 8.20097926969144e-05,
      "loss": 0.9497,
      "step": 15650
    },
    {
      "epoch": 1.8060682971850484,
      "grad_norm": 0.43935003876686096,
      "learning_rate": 8.194377239670317e-05,
      "loss": 0.9521,
      "step": 15655
    },
    {
      "epoch": 1.806645131518228,
      "grad_norm": 0.41185909509658813,
      "learning_rate": 8.187776023085762e-05,
      "loss": 0.9502,
      "step": 15660
    },
    {
      "epoch": 1.8072219658514075,
      "grad_norm": 0.4108864367008209,
      "learning_rate": 8.181175622911635e-05,
      "loss": 0.9256,
      "step": 15665
    },
    {
      "epoch": 1.807798800184587,
      "grad_norm": 0.38821250200271606,
      "learning_rate": 8.174576042121433e-05,
      "loss": 0.9331,
      "step": 15670
    },
    {
      "epoch": 1.8083756345177666,
      "grad_norm": 0.43762829899787903,
      "learning_rate": 8.167977283688282e-05,
      "loss": 0.9446,
      "step": 15675
    },
    {
      "epoch": 1.808952468850946,
      "grad_norm": 0.3885977268218994,
      "learning_rate": 8.16137935058493e-05,
      "loss": 0.9332,
      "step": 15680
    },
    {
      "epoch": 1.8095293031841255,
      "grad_norm": 0.3965728282928467,
      "learning_rate": 8.154782245783756e-05,
      "loss": 0.8907,
      "step": 15685
    },
    {
      "epoch": 1.8101061375173049,
      "grad_norm": 0.4613015651702881,
      "learning_rate": 8.148185972256778e-05,
      "loss": 0.9267,
      "step": 15690
    },
    {
      "epoch": 1.8106829718504844,
      "grad_norm": 0.38467276096343994,
      "learning_rate": 8.141590532975626e-05,
      "loss": 0.9241,
      "step": 15695
    },
    {
      "epoch": 1.811259806183664,
      "grad_norm": 0.3891170620918274,
      "learning_rate": 8.134995930911555e-05,
      "loss": 0.9751,
      "step": 15700
    },
    {
      "epoch": 1.8118366405168436,
      "grad_norm": 0.4085995852947235,
      "learning_rate": 8.128402169035451e-05,
      "loss": 0.9202,
      "step": 15705
    },
    {
      "epoch": 1.8124134748500231,
      "grad_norm": 0.4093764126300812,
      "learning_rate": 8.121809250317815e-05,
      "loss": 0.9358,
      "step": 15710
    },
    {
      "epoch": 1.8129903091832027,
      "grad_norm": 0.434633731842041,
      "learning_rate": 8.115217177728766e-05,
      "loss": 0.9862,
      "step": 15715
    },
    {
      "epoch": 1.813567143516382,
      "grad_norm": 0.3456355333328247,
      "learning_rate": 8.108625954238051e-05,
      "loss": 0.9446,
      "step": 15720
    },
    {
      "epoch": 1.8141439778495616,
      "grad_norm": 0.3649025857448578,
      "learning_rate": 8.102035582815026e-05,
      "loss": 0.9174,
      "step": 15725
    },
    {
      "epoch": 1.8147208121827412,
      "grad_norm": 0.37535566091537476,
      "learning_rate": 8.095446066428666e-05,
      "loss": 0.9541,
      "step": 15730
    },
    {
      "epoch": 1.8152976465159205,
      "grad_norm": 0.42814260721206665,
      "learning_rate": 8.088857408047562e-05,
      "loss": 0.9571,
      "step": 15735
    },
    {
      "epoch": 1.8158744808491,
      "grad_norm": 0.41946104168891907,
      "learning_rate": 8.082269610639919e-05,
      "loss": 0.9451,
      "step": 15740
    },
    {
      "epoch": 1.8164513151822796,
      "grad_norm": 0.4270831346511841,
      "learning_rate": 8.07568267717355e-05,
      "loss": 0.8814,
      "step": 15745
    },
    {
      "epoch": 1.8170281495154592,
      "grad_norm": 0.36242547631263733,
      "learning_rate": 8.06909661061588e-05,
      "loss": 0.9423,
      "step": 15750
    },
    {
      "epoch": 1.8176049838486388,
      "grad_norm": 0.40985921025276184,
      "learning_rate": 8.062511413933948e-05,
      "loss": 0.8786,
      "step": 15755
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.36799412965774536,
      "learning_rate": 8.055927090094397e-05,
      "loss": 0.9068,
      "step": 15760
    },
    {
      "epoch": 1.8187586525149977,
      "grad_norm": 0.41538429260253906,
      "learning_rate": 8.049343642063477e-05,
      "loss": 0.9299,
      "step": 15765
    },
    {
      "epoch": 1.8193354868481773,
      "grad_norm": 0.3922020494937897,
      "learning_rate": 8.042761072807045e-05,
      "loss": 0.9473,
      "step": 15770
    },
    {
      "epoch": 1.8199123211813566,
      "grad_norm": 0.40639728307724,
      "learning_rate": 8.036179385290561e-05,
      "loss": 0.8988,
      "step": 15775
    },
    {
      "epoch": 1.8204891555145362,
      "grad_norm": 0.4399595856666565,
      "learning_rate": 8.029598582479088e-05,
      "loss": 0.9281,
      "step": 15780
    },
    {
      "epoch": 1.8210659898477157,
      "grad_norm": 0.37775760889053345,
      "learning_rate": 8.023018667337291e-05,
      "loss": 0.8989,
      "step": 15785
    },
    {
      "epoch": 1.8216428241808953,
      "grad_norm": 0.4193717837333679,
      "learning_rate": 8.016439642829433e-05,
      "loss": 0.9235,
      "step": 15790
    },
    {
      "epoch": 1.8222196585140749,
      "grad_norm": 0.38701748847961426,
      "learning_rate": 8.00986151191938e-05,
      "loss": 0.9148,
      "step": 15795
    },
    {
      "epoch": 1.8227964928472544,
      "grad_norm": 0.40565016865730286,
      "learning_rate": 8.003284277570588e-05,
      "loss": 0.9378,
      "step": 15800
    },
    {
      "epoch": 1.8233733271804338,
      "grad_norm": 0.4248996078968048,
      "learning_rate": 7.99670794274612e-05,
      "loss": 0.9135,
      "step": 15805
    },
    {
      "epoch": 1.8239501615136133,
      "grad_norm": 0.3437690734863281,
      "learning_rate": 7.990132510408625e-05,
      "loss": 0.8809,
      "step": 15810
    },
    {
      "epoch": 1.8245269958467927,
      "grad_norm": 0.4016622304916382,
      "learning_rate": 7.983557983520345e-05,
      "loss": 0.9645,
      "step": 15815
    },
    {
      "epoch": 1.8251038301799722,
      "grad_norm": 0.3806969225406647,
      "learning_rate": 7.976984365043123e-05,
      "loss": 0.8848,
      "step": 15820
    },
    {
      "epoch": 1.8256806645131518,
      "grad_norm": 0.3845742642879486,
      "learning_rate": 7.970411657938381e-05,
      "loss": 0.9128,
      "step": 15825
    },
    {
      "epoch": 1.8262574988463314,
      "grad_norm": 0.40634384751319885,
      "learning_rate": 7.963839865167139e-05,
      "loss": 0.9613,
      "step": 15830
    },
    {
      "epoch": 1.826834333179511,
      "grad_norm": 0.4079045057296753,
      "learning_rate": 7.957268989690007e-05,
      "loss": 0.9268,
      "step": 15835
    },
    {
      "epoch": 1.8274111675126905,
      "grad_norm": 0.3828427493572235,
      "learning_rate": 7.95069903446717e-05,
      "loss": 0.8936,
      "step": 15840
    },
    {
      "epoch": 1.8279880018458698,
      "grad_norm": 0.40603169798851013,
      "learning_rate": 7.944130002458406e-05,
      "loss": 0.926,
      "step": 15845
    },
    {
      "epoch": 1.8285648361790494,
      "grad_norm": 0.41030603647232056,
      "learning_rate": 7.937561896623084e-05,
      "loss": 0.9358,
      "step": 15850
    },
    {
      "epoch": 1.8291416705122288,
      "grad_norm": 0.4036436975002289,
      "learning_rate": 7.930994719920142e-05,
      "loss": 0.9531,
      "step": 15855
    },
    {
      "epoch": 1.8297185048454083,
      "grad_norm": 0.40609830617904663,
      "learning_rate": 7.924428475308106e-05,
      "loss": 0.8767,
      "step": 15860
    },
    {
      "epoch": 1.8302953391785879,
      "grad_norm": 0.371971070766449,
      "learning_rate": 7.917863165745084e-05,
      "loss": 0.8763,
      "step": 15865
    },
    {
      "epoch": 1.8308721735117675,
      "grad_norm": 0.4002048969268799,
      "learning_rate": 7.911298794188761e-05,
      "loss": 0.9454,
      "step": 15870
    },
    {
      "epoch": 1.831449007844947,
      "grad_norm": 0.3957858085632324,
      "learning_rate": 7.904735363596401e-05,
      "loss": 0.9064,
      "step": 15875
    },
    {
      "epoch": 1.8320258421781266,
      "grad_norm": 0.3692780137062073,
      "learning_rate": 7.898172876924833e-05,
      "loss": 0.9281,
      "step": 15880
    },
    {
      "epoch": 1.832602676511306,
      "grad_norm": 0.42157161235809326,
      "learning_rate": 7.891611337130482e-05,
      "loss": 0.919,
      "step": 15885
    },
    {
      "epoch": 1.8331795108444855,
      "grad_norm": 0.40753528475761414,
      "learning_rate": 7.88505074716933e-05,
      "loss": 1.0049,
      "step": 15890
    },
    {
      "epoch": 1.8337563451776648,
      "grad_norm": 0.3804478645324707,
      "learning_rate": 7.878491109996928e-05,
      "loss": 0.9284,
      "step": 15895
    },
    {
      "epoch": 1.8343331795108444,
      "grad_norm": 0.40410223603248596,
      "learning_rate": 7.871932428568418e-05,
      "loss": 0.946,
      "step": 15900
    },
    {
      "epoch": 1.834910013844024,
      "grad_norm": 0.42021000385284424,
      "learning_rate": 7.865374705838493e-05,
      "loss": 0.9146,
      "step": 15905
    },
    {
      "epoch": 1.8354868481772035,
      "grad_norm": 0.4008379876613617,
      "learning_rate": 7.85881794476142e-05,
      "loss": 0.9104,
      "step": 15910
    },
    {
      "epoch": 1.836063682510383,
      "grad_norm": 0.41672542691230774,
      "learning_rate": 7.852262148291028e-05,
      "loss": 0.9564,
      "step": 15915
    },
    {
      "epoch": 1.8366405168435627,
      "grad_norm": 0.39630943536758423,
      "learning_rate": 7.845707319380723e-05,
      "loss": 0.9127,
      "step": 15920
    },
    {
      "epoch": 1.837217351176742,
      "grad_norm": 0.4240933358669281,
      "learning_rate": 7.839153460983468e-05,
      "loss": 0.9728,
      "step": 15925
    },
    {
      "epoch": 1.8377941855099216,
      "grad_norm": 0.4285568594932556,
      "learning_rate": 7.832600576051779e-05,
      "loss": 0.8806,
      "step": 15930
    },
    {
      "epoch": 1.838371019843101,
      "grad_norm": 0.4240418076515198,
      "learning_rate": 7.826048667537757e-05,
      "loss": 0.938,
      "step": 15935
    },
    {
      "epoch": 1.8389478541762805,
      "grad_norm": 0.3835122585296631,
      "learning_rate": 7.819497738393044e-05,
      "loss": 0.9463,
      "step": 15940
    },
    {
      "epoch": 1.83952468850946,
      "grad_norm": 0.4309677183628082,
      "learning_rate": 7.812947791568845e-05,
      "loss": 0.9371,
      "step": 15945
    },
    {
      "epoch": 1.8401015228426396,
      "grad_norm": 0.3694637417793274,
      "learning_rate": 7.806398830015921e-05,
      "loss": 0.9423,
      "step": 15950
    },
    {
      "epoch": 1.8406783571758192,
      "grad_norm": 0.3995886743068695,
      "learning_rate": 7.7998508566846e-05,
      "loss": 0.9003,
      "step": 15955
    },
    {
      "epoch": 1.8412551915089987,
      "grad_norm": 0.43725675344467163,
      "learning_rate": 7.793303874524752e-05,
      "loss": 0.8792,
      "step": 15960
    },
    {
      "epoch": 1.841832025842178,
      "grad_norm": 0.4083716571331024,
      "learning_rate": 7.786757886485806e-05,
      "loss": 0.9519,
      "step": 15965
    },
    {
      "epoch": 1.8424088601753577,
      "grad_norm": 0.3987749218940735,
      "learning_rate": 7.780212895516737e-05,
      "loss": 0.901,
      "step": 15970
    },
    {
      "epoch": 1.842985694508537,
      "grad_norm": 0.41949161887168884,
      "learning_rate": 7.773668904566085e-05,
      "loss": 0.8809,
      "step": 15975
    },
    {
      "epoch": 1.8435625288417166,
      "grad_norm": 0.4237136244773865,
      "learning_rate": 7.767125916581928e-05,
      "loss": 0.9537,
      "step": 15980
    },
    {
      "epoch": 1.8441393631748961,
      "grad_norm": 0.3790094256401062,
      "learning_rate": 7.760583934511887e-05,
      "loss": 0.9282,
      "step": 15985
    },
    {
      "epoch": 1.8447161975080757,
      "grad_norm": 0.36996859312057495,
      "learning_rate": 7.75404296130315e-05,
      "loss": 0.9472,
      "step": 15990
    },
    {
      "epoch": 1.8452930318412553,
      "grad_norm": 0.40153929591178894,
      "learning_rate": 7.747502999902433e-05,
      "loss": 0.9103,
      "step": 15995
    },
    {
      "epoch": 1.8458698661744348,
      "grad_norm": 0.3847443461418152,
      "learning_rate": 7.740964053255999e-05,
      "loss": 0.9051,
      "step": 16000
    },
    {
      "epoch": 1.8464467005076142,
      "grad_norm": 0.3627074956893921,
      "learning_rate": 7.734426124309656e-05,
      "loss": 0.9618,
      "step": 16005
    },
    {
      "epoch": 1.8470235348407937,
      "grad_norm": 0.42722293734550476,
      "learning_rate": 7.727889216008757e-05,
      "loss": 0.9784,
      "step": 16010
    },
    {
      "epoch": 1.847600369173973,
      "grad_norm": 0.4196968376636505,
      "learning_rate": 7.721353331298192e-05,
      "loss": 0.9048,
      "step": 16015
    },
    {
      "epoch": 1.8481772035071526,
      "grad_norm": 0.38322052359580994,
      "learning_rate": 7.714818473122385e-05,
      "loss": 0.9251,
      "step": 16020
    },
    {
      "epoch": 1.8487540378403322,
      "grad_norm": 0.4124515652656555,
      "learning_rate": 7.708284644425309e-05,
      "loss": 0.9276,
      "step": 16025
    },
    {
      "epoch": 1.8493308721735118,
      "grad_norm": 0.43157243728637695,
      "learning_rate": 7.701751848150462e-05,
      "loss": 0.9677,
      "step": 16030
    },
    {
      "epoch": 1.8499077065066913,
      "grad_norm": 0.3826289474964142,
      "learning_rate": 7.695220087240885e-05,
      "loss": 0.9283,
      "step": 16035
    },
    {
      "epoch": 1.850484540839871,
      "grad_norm": 0.39209648966789246,
      "learning_rate": 7.68868936463914e-05,
      "loss": 0.9227,
      "step": 16040
    },
    {
      "epoch": 1.8510613751730502,
      "grad_norm": 0.42012834548950195,
      "learning_rate": 7.682159683287345e-05,
      "loss": 0.9217,
      "step": 16045
    },
    {
      "epoch": 1.8516382095062298,
      "grad_norm": 0.38320961594581604,
      "learning_rate": 7.675631046127123e-05,
      "loss": 0.9327,
      "step": 16050
    },
    {
      "epoch": 1.8522150438394092,
      "grad_norm": 0.3654974400997162,
      "learning_rate": 7.669103456099637e-05,
      "loss": 0.9499,
      "step": 16055
    },
    {
      "epoch": 1.8527918781725887,
      "grad_norm": 0.3563767671585083,
      "learning_rate": 7.662576916145587e-05,
      "loss": 0.9016,
      "step": 16060
    },
    {
      "epoch": 1.8533687125057683,
      "grad_norm": 0.38693365454673767,
      "learning_rate": 7.656051429205188e-05,
      "loss": 0.9531,
      "step": 16065
    },
    {
      "epoch": 1.8539455468389479,
      "grad_norm": 0.4085317552089691,
      "learning_rate": 7.649526998218185e-05,
      "loss": 0.9219,
      "step": 16070
    },
    {
      "epoch": 1.8545223811721274,
      "grad_norm": 0.3921666741371155,
      "learning_rate": 7.643003626123841e-05,
      "loss": 0.913,
      "step": 16075
    },
    {
      "epoch": 1.855099215505307,
      "grad_norm": 0.382941335439682,
      "learning_rate": 7.636481315860958e-05,
      "loss": 0.9308,
      "step": 16080
    },
    {
      "epoch": 1.8556760498384866,
      "grad_norm": 0.3899093270301819,
      "learning_rate": 7.629960070367846e-05,
      "loss": 0.9103,
      "step": 16085
    },
    {
      "epoch": 1.856252884171666,
      "grad_norm": 0.5074906349182129,
      "learning_rate": 7.623439892582331e-05,
      "loss": 0.9146,
      "step": 16090
    },
    {
      "epoch": 1.8568297185048455,
      "grad_norm": 0.3871719241142273,
      "learning_rate": 7.616920785441777e-05,
      "loss": 0.9272,
      "step": 16095
    },
    {
      "epoch": 1.8574065528380248,
      "grad_norm": 0.4301646947860718,
      "learning_rate": 7.61040275188305e-05,
      "loss": 0.9237,
      "step": 16100
    },
    {
      "epoch": 1.8579833871712044,
      "grad_norm": 0.386033296585083,
      "learning_rate": 7.603885794842536e-05,
      "loss": 0.869,
      "step": 16105
    },
    {
      "epoch": 1.858560221504384,
      "grad_norm": 0.42225587368011475,
      "learning_rate": 7.597369917256132e-05,
      "loss": 0.8915,
      "step": 16110
    },
    {
      "epoch": 1.8591370558375635,
      "grad_norm": 0.40398165583610535,
      "learning_rate": 7.590855122059265e-05,
      "loss": 0.9254,
      "step": 16115
    },
    {
      "epoch": 1.859713890170743,
      "grad_norm": 0.4442918002605438,
      "learning_rate": 7.584341412186855e-05,
      "loss": 0.8927,
      "step": 16120
    },
    {
      "epoch": 1.8602907245039226,
      "grad_norm": 0.4120365083217621,
      "learning_rate": 7.577828790573345e-05,
      "loss": 0.9186,
      "step": 16125
    },
    {
      "epoch": 1.860867558837102,
      "grad_norm": 0.39227908849716187,
      "learning_rate": 7.571317260152675e-05,
      "loss": 0.9066,
      "step": 16130
    },
    {
      "epoch": 1.8614443931702815,
      "grad_norm": 0.44152939319610596,
      "learning_rate": 7.564806823858314e-05,
      "loss": 0.8705,
      "step": 16135
    },
    {
      "epoch": 1.8620212275034609,
      "grad_norm": 0.4071158468723297,
      "learning_rate": 7.55829748462322e-05,
      "loss": 0.9611,
      "step": 16140
    },
    {
      "epoch": 1.8625980618366405,
      "grad_norm": 0.4083629548549652,
      "learning_rate": 7.55178924537986e-05,
      "loss": 0.9647,
      "step": 16145
    },
    {
      "epoch": 1.86317489616982,
      "grad_norm": 0.4229707717895508,
      "learning_rate": 7.545282109060215e-05,
      "loss": 0.9201,
      "step": 16150
    },
    {
      "epoch": 1.8637517305029996,
      "grad_norm": 0.38979142904281616,
      "learning_rate": 7.538776078595762e-05,
      "loss": 0.9889,
      "step": 16155
    },
    {
      "epoch": 1.8643285648361791,
      "grad_norm": 0.4165305495262146,
      "learning_rate": 7.532271156917478e-05,
      "loss": 0.8899,
      "step": 16160
    },
    {
      "epoch": 1.8649053991693587,
      "grad_norm": 0.39025261998176575,
      "learning_rate": 7.525767346955837e-05,
      "loss": 0.9509,
      "step": 16165
    },
    {
      "epoch": 1.865482233502538,
      "grad_norm": 0.3888443112373352,
      "learning_rate": 7.519264651640829e-05,
      "loss": 0.9175,
      "step": 16170
    },
    {
      "epoch": 1.8660590678357176,
      "grad_norm": 0.4504176676273346,
      "learning_rate": 7.512763073901927e-05,
      "loss": 0.9471,
      "step": 16175
    },
    {
      "epoch": 1.866635902168897,
      "grad_norm": 0.41925010085105896,
      "learning_rate": 7.5062626166681e-05,
      "loss": 0.9074,
      "step": 16180
    },
    {
      "epoch": 1.8672127365020765,
      "grad_norm": 0.40411990880966187,
      "learning_rate": 7.499763282867823e-05,
      "loss": 0.9075,
      "step": 16185
    },
    {
      "epoch": 1.867789570835256,
      "grad_norm": 0.3905486464500427,
      "learning_rate": 7.493265075429056e-05,
      "loss": 0.9207,
      "step": 16190
    },
    {
      "epoch": 1.8683664051684357,
      "grad_norm": 0.35199257731437683,
      "learning_rate": 7.486767997279251e-05,
      "loss": 0.8817,
      "step": 16195
    },
    {
      "epoch": 1.8689432395016152,
      "grad_norm": 0.42351698875427246,
      "learning_rate": 7.480272051345358e-05,
      "loss": 0.9764,
      "step": 16200
    },
    {
      "epoch": 1.8695200738347948,
      "grad_norm": 0.413447767496109,
      "learning_rate": 7.473777240553814e-05,
      "loss": 0.9348,
      "step": 16205
    },
    {
      "epoch": 1.8700969081679741,
      "grad_norm": 0.42107057571411133,
      "learning_rate": 7.467283567830542e-05,
      "loss": 0.9551,
      "step": 16210
    },
    {
      "epoch": 1.8706737425011537,
      "grad_norm": 0.41672512888908386,
      "learning_rate": 7.460791036100952e-05,
      "loss": 0.9127,
      "step": 16215
    },
    {
      "epoch": 1.871250576834333,
      "grad_norm": 0.43511131405830383,
      "learning_rate": 7.454299648289946e-05,
      "loss": 0.9656,
      "step": 16220
    },
    {
      "epoch": 1.8718274111675126,
      "grad_norm": 0.37112078070640564,
      "learning_rate": 7.447809407321909e-05,
      "loss": 0.9245,
      "step": 16225
    },
    {
      "epoch": 1.8724042455006922,
      "grad_norm": 0.4278329908847809,
      "learning_rate": 7.4413203161207e-05,
      "loss": 0.9372,
      "step": 16230
    },
    {
      "epoch": 1.8729810798338717,
      "grad_norm": 0.4026067852973938,
      "learning_rate": 7.434832377609671e-05,
      "loss": 0.9019,
      "step": 16235
    },
    {
      "epoch": 1.8735579141670513,
      "grad_norm": 0.43685853481292725,
      "learning_rate": 7.428345594711652e-05,
      "loss": 0.9051,
      "step": 16240
    },
    {
      "epoch": 1.8741347485002309,
      "grad_norm": 0.4294734001159668,
      "learning_rate": 7.421859970348949e-05,
      "loss": 0.9445,
      "step": 16245
    },
    {
      "epoch": 1.8747115828334102,
      "grad_norm": 0.42335641384124756,
      "learning_rate": 7.41537550744335e-05,
      "loss": 0.9338,
      "step": 16250
    },
    {
      "epoch": 1.8752884171665898,
      "grad_norm": 0.3663158714771271,
      "learning_rate": 7.408892208916118e-05,
      "loss": 0.8959,
      "step": 16255
    },
    {
      "epoch": 1.8758652514997691,
      "grad_norm": 0.39420464634895325,
      "learning_rate": 7.402410077687993e-05,
      "loss": 0.9231,
      "step": 16260
    },
    {
      "epoch": 1.8764420858329487,
      "grad_norm": 0.3816054165363312,
      "learning_rate": 7.395929116679185e-05,
      "loss": 0.9217,
      "step": 16265
    },
    {
      "epoch": 1.8770189201661283,
      "grad_norm": 0.4024397134780884,
      "learning_rate": 7.38944932880938e-05,
      "loss": 0.9171,
      "step": 16270
    },
    {
      "epoch": 1.8775957544993078,
      "grad_norm": 0.37566685676574707,
      "learning_rate": 7.382970716997736e-05,
      "loss": 0.9604,
      "step": 16275
    },
    {
      "epoch": 1.8781725888324874,
      "grad_norm": 0.3700317144393921,
      "learning_rate": 7.37649328416288e-05,
      "loss": 0.9155,
      "step": 16280
    },
    {
      "epoch": 1.878749423165667,
      "grad_norm": 0.40654414892196655,
      "learning_rate": 7.370017033222912e-05,
      "loss": 0.9055,
      "step": 16285
    },
    {
      "epoch": 1.8793262574988463,
      "grad_norm": 0.3641127943992615,
      "learning_rate": 7.363541967095387e-05,
      "loss": 0.912,
      "step": 16290
    },
    {
      "epoch": 1.8799030918320259,
      "grad_norm": 0.4290393888950348,
      "learning_rate": 7.357068088697339e-05,
      "loss": 0.9601,
      "step": 16295
    },
    {
      "epoch": 1.8804799261652052,
      "grad_norm": 0.4048447012901306,
      "learning_rate": 7.350595400945268e-05,
      "loss": 0.9298,
      "step": 16300
    },
    {
      "epoch": 1.8810567604983848,
      "grad_norm": 0.37648749351501465,
      "learning_rate": 7.344123906755124e-05,
      "loss": 0.9371,
      "step": 16305
    },
    {
      "epoch": 1.8816335948315643,
      "grad_norm": 0.3663206994533539,
      "learning_rate": 7.337653609042332e-05,
      "loss": 0.8938,
      "step": 16310
    },
    {
      "epoch": 1.882210429164744,
      "grad_norm": 0.4044005274772644,
      "learning_rate": 7.331184510721776e-05,
      "loss": 0.9356,
      "step": 16315
    },
    {
      "epoch": 1.8827872634979235,
      "grad_norm": 0.37954995036125183,
      "learning_rate": 7.324716614707793e-05,
      "loss": 0.9178,
      "step": 16320
    },
    {
      "epoch": 1.883364097831103,
      "grad_norm": 0.3999575972557068,
      "learning_rate": 7.318249923914184e-05,
      "loss": 0.9632,
      "step": 16325
    },
    {
      "epoch": 1.8839409321642824,
      "grad_norm": 0.3797055184841156,
      "learning_rate": 7.311784441254207e-05,
      "loss": 0.9277,
      "step": 16330
    },
    {
      "epoch": 1.884517766497462,
      "grad_norm": 0.4123266935348511,
      "learning_rate": 7.305320169640575e-05,
      "loss": 0.9324,
      "step": 16335
    },
    {
      "epoch": 1.8850946008306413,
      "grad_norm": 0.38933947682380676,
      "learning_rate": 7.29885711198545e-05,
      "loss": 0.9254,
      "step": 16340
    },
    {
      "epoch": 1.8856714351638209,
      "grad_norm": 0.3938358724117279,
      "learning_rate": 7.292395271200459e-05,
      "loss": 0.9264,
      "step": 16345
    },
    {
      "epoch": 1.8862482694970004,
      "grad_norm": 0.49041980504989624,
      "learning_rate": 7.285934650196672e-05,
      "loss": 0.947,
      "step": 16350
    },
    {
      "epoch": 1.88682510383018,
      "grad_norm": 0.37116193771362305,
      "learning_rate": 7.279475251884609e-05,
      "loss": 0.8855,
      "step": 16355
    },
    {
      "epoch": 1.8874019381633595,
      "grad_norm": 0.3848974406719208,
      "learning_rate": 7.273017079174239e-05,
      "loss": 0.9428,
      "step": 16360
    },
    {
      "epoch": 1.8879787724965391,
      "grad_norm": 0.4379478394985199,
      "learning_rate": 7.266560134974989e-05,
      "loss": 0.9608,
      "step": 16365
    },
    {
      "epoch": 1.8885556068297185,
      "grad_norm": 0.404349148273468,
      "learning_rate": 7.260104422195721e-05,
      "loss": 0.9392,
      "step": 16370
    },
    {
      "epoch": 1.889132441162898,
      "grad_norm": 0.4757440984249115,
      "learning_rate": 7.253649943744742e-05,
      "loss": 0.9663,
      "step": 16375
    },
    {
      "epoch": 1.8897092754960774,
      "grad_norm": 0.4406890571117401,
      "learning_rate": 7.247196702529815e-05,
      "loss": 0.9497,
      "step": 16380
    },
    {
      "epoch": 1.890286109829257,
      "grad_norm": 0.3876641094684601,
      "learning_rate": 7.240744701458134e-05,
      "loss": 0.9348,
      "step": 16385
    },
    {
      "epoch": 1.8908629441624365,
      "grad_norm": 0.4099477231502533,
      "learning_rate": 7.234293943436338e-05,
      "loss": 0.9521,
      "step": 16390
    },
    {
      "epoch": 1.891439778495616,
      "grad_norm": 0.37708956003189087,
      "learning_rate": 7.227844431370502e-05,
      "loss": 0.9045,
      "step": 16395
    },
    {
      "epoch": 1.8920166128287956,
      "grad_norm": 0.37302008271217346,
      "learning_rate": 7.221396168166152e-05,
      "loss": 0.9244,
      "step": 16400
    },
    {
      "epoch": 1.8925934471619752,
      "grad_norm": 0.37601548433303833,
      "learning_rate": 7.214949156728239e-05,
      "loss": 0.9299,
      "step": 16405
    },
    {
      "epoch": 1.8931702814951545,
      "grad_norm": 0.4314630329608917,
      "learning_rate": 7.208503399961149e-05,
      "loss": 0.9369,
      "step": 16410
    },
    {
      "epoch": 1.893747115828334,
      "grad_norm": 0.3835648000240326,
      "learning_rate": 7.202058900768718e-05,
      "loss": 0.9245,
      "step": 16415
    },
    {
      "epoch": 1.8943239501615134,
      "grad_norm": 0.39860522747039795,
      "learning_rate": 7.1956156620542e-05,
      "loss": 0.898,
      "step": 16420
    },
    {
      "epoch": 1.894900784494693,
      "grad_norm": 0.3779727518558502,
      "learning_rate": 7.189173686720287e-05,
      "loss": 0.9415,
      "step": 16425
    },
    {
      "epoch": 1.8954776188278726,
      "grad_norm": 0.38131847977638245,
      "learning_rate": 7.182732977669098e-05,
      "loss": 0.9132,
      "step": 16430
    },
    {
      "epoch": 1.8960544531610521,
      "grad_norm": 0.45847800374031067,
      "learning_rate": 7.176293537802193e-05,
      "loss": 0.9214,
      "step": 16435
    },
    {
      "epoch": 1.8966312874942317,
      "grad_norm": 0.40811875462532043,
      "learning_rate": 7.169855370020547e-05,
      "loss": 0.925,
      "step": 16440
    },
    {
      "epoch": 1.8972081218274113,
      "grad_norm": 0.4316577613353729,
      "learning_rate": 7.163418477224567e-05,
      "loss": 0.9443,
      "step": 16445
    },
    {
      "epoch": 1.8977849561605908,
      "grad_norm": 0.42547932267189026,
      "learning_rate": 7.156982862314084e-05,
      "loss": 0.8999,
      "step": 16450
    },
    {
      "epoch": 1.8983617904937702,
      "grad_norm": 0.39770105481147766,
      "learning_rate": 7.15054852818836e-05,
      "loss": 0.8858,
      "step": 16455
    },
    {
      "epoch": 1.8989386248269498,
      "grad_norm": 0.3987480401992798,
      "learning_rate": 7.144115477746074e-05,
      "loss": 0.9079,
      "step": 16460
    },
    {
      "epoch": 1.899515459160129,
      "grad_norm": 0.45633745193481445,
      "learning_rate": 7.137683713885321e-05,
      "loss": 0.9337,
      "step": 16465
    },
    {
      "epoch": 1.9000922934933087,
      "grad_norm": 0.4088650941848755,
      "learning_rate": 7.131253239503635e-05,
      "loss": 0.9509,
      "step": 16470
    },
    {
      "epoch": 1.9006691278264882,
      "grad_norm": 0.4616527855396271,
      "learning_rate": 7.124824057497949e-05,
      "loss": 0.9329,
      "step": 16475
    },
    {
      "epoch": 1.9012459621596678,
      "grad_norm": 0.36398735642433167,
      "learning_rate": 7.118396170764623e-05,
      "loss": 0.9264,
      "step": 16480
    },
    {
      "epoch": 1.9018227964928474,
      "grad_norm": 0.4025936424732208,
      "learning_rate": 7.111969582199431e-05,
      "loss": 0.9527,
      "step": 16485
    },
    {
      "epoch": 1.902399630826027,
      "grad_norm": 0.3752637803554535,
      "learning_rate": 7.105544294697569e-05,
      "loss": 0.986,
      "step": 16490
    },
    {
      "epoch": 1.9029764651592063,
      "grad_norm": 0.4268634617328644,
      "learning_rate": 7.099120311153639e-05,
      "loss": 0.9546,
      "step": 16495
    },
    {
      "epoch": 1.9035532994923858,
      "grad_norm": 0.4121106266975403,
      "learning_rate": 7.092697634461654e-05,
      "loss": 0.9851,
      "step": 16500
    },
    {
      "epoch": 1.9041301338255652,
      "grad_norm": 0.4355171024799347,
      "learning_rate": 7.086276267515048e-05,
      "loss": 0.9322,
      "step": 16505
    },
    {
      "epoch": 1.9047069681587447,
      "grad_norm": 0.38573938608169556,
      "learning_rate": 7.07985621320666e-05,
      "loss": 0.923,
      "step": 16510
    },
    {
      "epoch": 1.9052838024919243,
      "grad_norm": 0.4271920919418335,
      "learning_rate": 7.073437474428732e-05,
      "loss": 0.9642,
      "step": 16515
    },
    {
      "epoch": 1.9058606368251039,
      "grad_norm": 0.40393730998039246,
      "learning_rate": 7.067020054072916e-05,
      "loss": 0.9754,
      "step": 16520
    },
    {
      "epoch": 1.9064374711582834,
      "grad_norm": 0.3705683648586273,
      "learning_rate": 7.060603955030283e-05,
      "loss": 0.8778,
      "step": 16525
    },
    {
      "epoch": 1.907014305491463,
      "grad_norm": 0.401284396648407,
      "learning_rate": 7.05418918019129e-05,
      "loss": 0.8879,
      "step": 16530
    },
    {
      "epoch": 1.9075911398246423,
      "grad_norm": 0.3965059518814087,
      "learning_rate": 7.047775732445805e-05,
      "loss": 0.9206,
      "step": 16535
    },
    {
      "epoch": 1.908167974157822,
      "grad_norm": 0.406692236661911,
      "learning_rate": 7.041363614683106e-05,
      "loss": 0.9312,
      "step": 16540
    },
    {
      "epoch": 1.9087448084910013,
      "grad_norm": 0.4251668453216553,
      "learning_rate": 7.034952829791858e-05,
      "loss": 0.9195,
      "step": 16545
    },
    {
      "epoch": 1.9093216428241808,
      "grad_norm": 0.4059813916683197,
      "learning_rate": 7.028543380660135e-05,
      "loss": 1.0009,
      "step": 16550
    },
    {
      "epoch": 1.9098984771573604,
      "grad_norm": 0.37833818793296814,
      "learning_rate": 7.022135270175401e-05,
      "loss": 0.9313,
      "step": 16555
    },
    {
      "epoch": 1.91047531149054,
      "grad_norm": 0.3625733554363251,
      "learning_rate": 7.01572850122453e-05,
      "loss": 0.9346,
      "step": 16560
    },
    {
      "epoch": 1.9110521458237195,
      "grad_norm": 0.38310348987579346,
      "learning_rate": 7.00932307669378e-05,
      "loss": 0.9007,
      "step": 16565
    },
    {
      "epoch": 1.911628980156899,
      "grad_norm": 0.39671164751052856,
      "learning_rate": 7.002918999468804e-05,
      "loss": 0.9106,
      "step": 16570
    },
    {
      "epoch": 1.9122058144900784,
      "grad_norm": 0.3802867531776428,
      "learning_rate": 6.996516272434658e-05,
      "loss": 0.8741,
      "step": 16575
    },
    {
      "epoch": 1.912782648823258,
      "grad_norm": 0.44518622756004333,
      "learning_rate": 6.990114898475782e-05,
      "loss": 0.9304,
      "step": 16580
    },
    {
      "epoch": 1.9133594831564373,
      "grad_norm": 0.3733896017074585,
      "learning_rate": 6.983714880476002e-05,
      "loss": 0.9097,
      "step": 16585
    },
    {
      "epoch": 1.913936317489617,
      "grad_norm": 0.3972858786582947,
      "learning_rate": 6.97731622131854e-05,
      "loss": 0.9595,
      "step": 16590
    },
    {
      "epoch": 1.9145131518227965,
      "grad_norm": 0.38236817717552185,
      "learning_rate": 6.97091892388601e-05,
      "loss": 0.9005,
      "step": 16595
    },
    {
      "epoch": 1.915089986155976,
      "grad_norm": 0.36411792039871216,
      "learning_rate": 6.964522991060404e-05,
      "loss": 0.9103,
      "step": 16600
    },
    {
      "epoch": 1.9156668204891556,
      "grad_norm": 0.43032193183898926,
      "learning_rate": 6.9581284257231e-05,
      "loss": 0.9137,
      "step": 16605
    },
    {
      "epoch": 1.9162436548223352,
      "grad_norm": 0.38933807611465454,
      "learning_rate": 6.951735230754859e-05,
      "loss": 0.8861,
      "step": 16610
    },
    {
      "epoch": 1.9168204891555145,
      "grad_norm": 0.40578311681747437,
      "learning_rate": 6.945343409035839e-05,
      "loss": 0.9433,
      "step": 16615
    },
    {
      "epoch": 1.917397323488694,
      "grad_norm": 0.4237358868122101,
      "learning_rate": 6.938952963445559e-05,
      "loss": 0.8664,
      "step": 16620
    },
    {
      "epoch": 1.9179741578218734,
      "grad_norm": 0.3827938437461853,
      "learning_rate": 6.932563896862926e-05,
      "loss": 0.8932,
      "step": 16625
    },
    {
      "epoch": 1.918550992155053,
      "grad_norm": 0.4397215247154236,
      "learning_rate": 6.926176212166234e-05,
      "loss": 0.962,
      "step": 16630
    },
    {
      "epoch": 1.9191278264882325,
      "grad_norm": 0.4293370246887207,
      "learning_rate": 6.919789912233146e-05,
      "loss": 0.935,
      "step": 16635
    },
    {
      "epoch": 1.9197046608214121,
      "grad_norm": 0.4171515703201294,
      "learning_rate": 6.913404999940698e-05,
      "loss": 0.9105,
      "step": 16640
    },
    {
      "epoch": 1.9202814951545917,
      "grad_norm": 0.4079015552997589,
      "learning_rate": 6.907021478165305e-05,
      "loss": 0.9212,
      "step": 16645
    },
    {
      "epoch": 1.9208583294877712,
      "grad_norm": 0.46125656366348267,
      "learning_rate": 6.900639349782762e-05,
      "loss": 0.892,
      "step": 16650
    },
    {
      "epoch": 1.9214351638209506,
      "grad_norm": 0.3586021959781647,
      "learning_rate": 6.894258617668229e-05,
      "loss": 0.955,
      "step": 16655
    },
    {
      "epoch": 1.9220119981541302,
      "grad_norm": 0.3966992497444153,
      "learning_rate": 6.887879284696232e-05,
      "loss": 0.9363,
      "step": 16660
    },
    {
      "epoch": 1.9225888324873095,
      "grad_norm": 0.4333534836769104,
      "learning_rate": 6.88150135374068e-05,
      "loss": 0.928,
      "step": 16665
    },
    {
      "epoch": 1.923165666820489,
      "grad_norm": 0.41055798530578613,
      "learning_rate": 6.875124827674841e-05,
      "loss": 0.9279,
      "step": 16670
    },
    {
      "epoch": 1.9237425011536686,
      "grad_norm": 0.4172268211841583,
      "learning_rate": 6.868749709371354e-05,
      "loss": 0.941,
      "step": 16675
    },
    {
      "epoch": 1.9243193354868482,
      "grad_norm": 0.42212820053100586,
      "learning_rate": 6.862376001702213e-05,
      "loss": 0.9339,
      "step": 16680
    },
    {
      "epoch": 1.9248961698200278,
      "grad_norm": 0.41261783242225647,
      "learning_rate": 6.8560037075388e-05,
      "loss": 0.9533,
      "step": 16685
    },
    {
      "epoch": 1.9254730041532073,
      "grad_norm": 0.4174567759037018,
      "learning_rate": 6.849632829751838e-05,
      "loss": 0.93,
      "step": 16690
    },
    {
      "epoch": 1.9260498384863867,
      "grad_norm": 0.38516369462013245,
      "learning_rate": 6.843263371211414e-05,
      "loss": 0.937,
      "step": 16695
    },
    {
      "epoch": 1.9266266728195662,
      "grad_norm": 0.420776903629303,
      "learning_rate": 6.836895334786996e-05,
      "loss": 0.9415,
      "step": 16700
    },
    {
      "epoch": 1.9272035071527456,
      "grad_norm": 0.38259801268577576,
      "learning_rate": 6.830528723347387e-05,
      "loss": 0.953,
      "step": 16705
    },
    {
      "epoch": 1.9277803414859251,
      "grad_norm": 0.38586026430130005,
      "learning_rate": 6.824163539760759e-05,
      "loss": 0.9387,
      "step": 16710
    },
    {
      "epoch": 1.9283571758191047,
      "grad_norm": 0.39584457874298096,
      "learning_rate": 6.81779978689464e-05,
      "loss": 0.9517,
      "step": 16715
    },
    {
      "epoch": 1.9289340101522843,
      "grad_norm": 0.39963656663894653,
      "learning_rate": 6.811437467615915e-05,
      "loss": 0.9289,
      "step": 16720
    },
    {
      "epoch": 1.9295108444854638,
      "grad_norm": 0.4218963384628296,
      "learning_rate": 6.805076584790818e-05,
      "loss": 0.8897,
      "step": 16725
    },
    {
      "epoch": 1.9300876788186434,
      "grad_norm": 0.3993114233016968,
      "learning_rate": 6.798717141284942e-05,
      "loss": 0.9396,
      "step": 16730
    },
    {
      "epoch": 1.9306645131518227,
      "grad_norm": 0.39297980070114136,
      "learning_rate": 6.792359139963228e-05,
      "loss": 0.9502,
      "step": 16735
    },
    {
      "epoch": 1.9312413474850023,
      "grad_norm": 0.353900671005249,
      "learning_rate": 6.786002583689968e-05,
      "loss": 0.9808,
      "step": 16740
    },
    {
      "epoch": 1.9318181818181817,
      "grad_norm": 0.37047556042671204,
      "learning_rate": 6.7796474753288e-05,
      "loss": 0.8961,
      "step": 16745
    },
    {
      "epoch": 1.9323950161513612,
      "grad_norm": 0.39696457982063293,
      "learning_rate": 6.773293817742717e-05,
      "loss": 0.8609,
      "step": 16750
    },
    {
      "epoch": 1.9329718504845408,
      "grad_norm": 0.3835957646369934,
      "learning_rate": 6.766941613794053e-05,
      "loss": 0.948,
      "step": 16755
    },
    {
      "epoch": 1.9335486848177204,
      "grad_norm": 0.4246508479118347,
      "learning_rate": 6.760590866344486e-05,
      "loss": 0.9271,
      "step": 16760
    },
    {
      "epoch": 1.9341255191509,
      "grad_norm": 0.35733363032341003,
      "learning_rate": 6.754241578255042e-05,
      "loss": 0.8943,
      "step": 16765
    },
    {
      "epoch": 1.9347023534840795,
      "grad_norm": 0.38427141308784485,
      "learning_rate": 6.747893752386088e-05,
      "loss": 0.9205,
      "step": 16770
    },
    {
      "epoch": 1.9352791878172588,
      "grad_norm": 0.4096218943595886,
      "learning_rate": 6.74154739159733e-05,
      "loss": 0.9333,
      "step": 16775
    },
    {
      "epoch": 1.9358560221504384,
      "grad_norm": 0.3812349736690521,
      "learning_rate": 6.73520249874782e-05,
      "loss": 0.9194,
      "step": 16780
    },
    {
      "epoch": 1.936432856483618,
      "grad_norm": 0.42356806993484497,
      "learning_rate": 6.728859076695938e-05,
      "loss": 0.9593,
      "step": 16785
    },
    {
      "epoch": 1.9370096908167973,
      "grad_norm": 0.386717826128006,
      "learning_rate": 6.722517128299414e-05,
      "loss": 0.9022,
      "step": 16790
    },
    {
      "epoch": 1.9375865251499769,
      "grad_norm": 0.4004405438899994,
      "learning_rate": 6.716176656415305e-05,
      "loss": 0.9745,
      "step": 16795
    },
    {
      "epoch": 1.9381633594831564,
      "grad_norm": 0.398300439119339,
      "learning_rate": 6.709837663900007e-05,
      "loss": 0.9484,
      "step": 16800
    },
    {
      "epoch": 1.938740193816336,
      "grad_norm": 0.4078962504863739,
      "learning_rate": 6.703500153609247e-05,
      "loss": 0.9304,
      "step": 16805
    },
    {
      "epoch": 1.9393170281495156,
      "grad_norm": 0.40395575761795044,
      "learning_rate": 6.697164128398088e-05,
      "loss": 0.9248,
      "step": 16810
    },
    {
      "epoch": 1.9398938624826951,
      "grad_norm": 0.40404340624809265,
      "learning_rate": 6.690829591120922e-05,
      "loss": 0.9578,
      "step": 16815
    },
    {
      "epoch": 1.9404706968158745,
      "grad_norm": 0.4161837100982666,
      "learning_rate": 6.684496544631466e-05,
      "loss": 0.8986,
      "step": 16820
    },
    {
      "epoch": 1.941047531149054,
      "grad_norm": 0.4277629852294922,
      "learning_rate": 6.678164991782772e-05,
      "loss": 0.9928,
      "step": 16825
    },
    {
      "epoch": 1.9416243654822334,
      "grad_norm": 0.38067278265953064,
      "learning_rate": 6.671834935427222e-05,
      "loss": 0.9516,
      "step": 16830
    },
    {
      "epoch": 1.942201199815413,
      "grad_norm": 0.37735915184020996,
      "learning_rate": 6.665506378416513e-05,
      "loss": 0.9199,
      "step": 16835
    },
    {
      "epoch": 1.9427780341485925,
      "grad_norm": 0.39840787649154663,
      "learning_rate": 6.65917932360167e-05,
      "loss": 0.9641,
      "step": 16840
    },
    {
      "epoch": 1.943354868481772,
      "grad_norm": 0.41158953309059143,
      "learning_rate": 6.652853773833052e-05,
      "loss": 0.9537,
      "step": 16845
    },
    {
      "epoch": 1.9439317028149516,
      "grad_norm": 0.367220014333725,
      "learning_rate": 6.646529731960324e-05,
      "loss": 0.8913,
      "step": 16850
    },
    {
      "epoch": 1.9445085371481312,
      "grad_norm": 0.3907991349697113,
      "learning_rate": 6.640207200832482e-05,
      "loss": 0.9216,
      "step": 16855
    },
    {
      "epoch": 1.9450853714813106,
      "grad_norm": 0.40876129269599915,
      "learning_rate": 6.633886183297838e-05,
      "loss": 0.9338,
      "step": 16860
    },
    {
      "epoch": 1.9456622058144901,
      "grad_norm": 0.41524142026901245,
      "learning_rate": 6.627566682204025e-05,
      "loss": 0.9301,
      "step": 16865
    },
    {
      "epoch": 1.9462390401476695,
      "grad_norm": 0.40617069602012634,
      "learning_rate": 6.621248700397989e-05,
      "loss": 0.9035,
      "step": 16870
    },
    {
      "epoch": 1.946815874480849,
      "grad_norm": 0.3806745707988739,
      "learning_rate": 6.614932240725989e-05,
      "loss": 0.9121,
      "step": 16875
    },
    {
      "epoch": 1.9473927088140286,
      "grad_norm": 0.3907873332500458,
      "learning_rate": 6.608617306033609e-05,
      "loss": 0.9058,
      "step": 16880
    },
    {
      "epoch": 1.9479695431472082,
      "grad_norm": 0.39370810985565186,
      "learning_rate": 6.602303899165737e-05,
      "loss": 0.9464,
      "step": 16885
    },
    {
      "epoch": 1.9485463774803877,
      "grad_norm": 0.36397796869277954,
      "learning_rate": 6.595992022966571e-05,
      "loss": 0.8671,
      "step": 16890
    },
    {
      "epoch": 1.9491232118135673,
      "grad_norm": 0.39594727754592896,
      "learning_rate": 6.58968168027963e-05,
      "loss": 0.9762,
      "step": 16895
    },
    {
      "epoch": 1.9497000461467466,
      "grad_norm": 0.3924747109413147,
      "learning_rate": 6.583372873947732e-05,
      "loss": 0.939,
      "step": 16900
    },
    {
      "epoch": 1.9502768804799262,
      "grad_norm": 0.3910292983055115,
      "learning_rate": 6.577065606813011e-05,
      "loss": 0.9245,
      "step": 16905
    },
    {
      "epoch": 1.9508537148131055,
      "grad_norm": 0.3986966907978058,
      "learning_rate": 6.570759881716892e-05,
      "loss": 0.922,
      "step": 16910
    },
    {
      "epoch": 1.951430549146285,
      "grad_norm": 0.37212473154067993,
      "learning_rate": 6.56445570150013e-05,
      "loss": 0.8996,
      "step": 16915
    },
    {
      "epoch": 1.9520073834794647,
      "grad_norm": 0.38835760951042175,
      "learning_rate": 6.558153069002764e-05,
      "loss": 0.9552,
      "step": 16920
    },
    {
      "epoch": 1.9525842178126442,
      "grad_norm": 0.40095987915992737,
      "learning_rate": 6.551851987064141e-05,
      "loss": 0.9248,
      "step": 16925
    },
    {
      "epoch": 1.9531610521458238,
      "grad_norm": 0.42613843083381653,
      "learning_rate": 6.54555245852291e-05,
      "loss": 0.9297,
      "step": 16930
    },
    {
      "epoch": 1.9537378864790034,
      "grad_norm": 0.39653289318084717,
      "learning_rate": 6.539254486217026e-05,
      "loss": 0.9619,
      "step": 16935
    },
    {
      "epoch": 1.9543147208121827,
      "grad_norm": 0.4119310677051544,
      "learning_rate": 6.532958072983734e-05,
      "loss": 0.9605,
      "step": 16940
    },
    {
      "epoch": 1.9548915551453623,
      "grad_norm": 0.3854370415210724,
      "learning_rate": 6.526663221659579e-05,
      "loss": 0.9268,
      "step": 16945
    },
    {
      "epoch": 1.9554683894785416,
      "grad_norm": 0.4038507640361786,
      "learning_rate": 6.520369935080411e-05,
      "loss": 0.8646,
      "step": 16950
    },
    {
      "epoch": 1.9560452238117212,
      "grad_norm": 0.3981791138648987,
      "learning_rate": 6.51407821608136e-05,
      "loss": 0.9104,
      "step": 16955
    },
    {
      "epoch": 1.9566220581449008,
      "grad_norm": 0.41779494285583496,
      "learning_rate": 6.507788067496863e-05,
      "loss": 0.9362,
      "step": 16960
    },
    {
      "epoch": 1.9571988924780803,
      "grad_norm": 0.39504823088645935,
      "learning_rate": 6.501499492160636e-05,
      "loss": 0.9438,
      "step": 16965
    },
    {
      "epoch": 1.9577757268112599,
      "grad_norm": 0.421944260597229,
      "learning_rate": 6.495212492905707e-05,
      "loss": 0.918,
      "step": 16970
    },
    {
      "epoch": 1.9583525611444395,
      "grad_norm": 0.40287306904792786,
      "learning_rate": 6.488927072564372e-05,
      "loss": 0.8682,
      "step": 16975
    },
    {
      "epoch": 1.9589293954776188,
      "grad_norm": 0.43788981437683105,
      "learning_rate": 6.482643233968224e-05,
      "loss": 0.908,
      "step": 16980
    },
    {
      "epoch": 1.9595062298107984,
      "grad_norm": 0.3697527348995209,
      "learning_rate": 6.476360979948153e-05,
      "loss": 0.9286,
      "step": 16985
    },
    {
      "epoch": 1.9600830641439777,
      "grad_norm": 0.38073477149009705,
      "learning_rate": 6.470080313334322e-05,
      "loss": 0.8957,
      "step": 16990
    },
    {
      "epoch": 1.9606598984771573,
      "grad_norm": 0.44243210554122925,
      "learning_rate": 6.463801236956184e-05,
      "loss": 0.9488,
      "step": 16995
    },
    {
      "epoch": 1.9612367328103368,
      "grad_norm": 0.39931437373161316,
      "learning_rate": 6.457523753642469e-05,
      "loss": 0.9382,
      "step": 17000
    },
    {
      "epoch": 1.9618135671435164,
      "grad_norm": 0.428107351064682,
      "learning_rate": 6.451247866221206e-05,
      "loss": 0.9399,
      "step": 17005
    },
    {
      "epoch": 1.962390401476696,
      "grad_norm": 0.4132627546787262,
      "learning_rate": 6.44497357751969e-05,
      "loss": 0.9203,
      "step": 17010
    },
    {
      "epoch": 1.9629672358098755,
      "grad_norm": 0.3902876079082489,
      "learning_rate": 6.438700890364496e-05,
      "loss": 0.9191,
      "step": 17015
    },
    {
      "epoch": 1.9635440701430549,
      "grad_norm": 0.41587990522384644,
      "learning_rate": 6.432429807581489e-05,
      "loss": 0.9854,
      "step": 17020
    },
    {
      "epoch": 1.9641209044762344,
      "grad_norm": 0.5028902888298035,
      "learning_rate": 6.426160331995801e-05,
      "loss": 0.8926,
      "step": 17025
    },
    {
      "epoch": 1.9646977388094138,
      "grad_norm": 0.4033501148223877,
      "learning_rate": 6.419892466431842e-05,
      "loss": 0.9497,
      "step": 17030
    },
    {
      "epoch": 1.9652745731425934,
      "grad_norm": 0.3764554262161255,
      "learning_rate": 6.413626213713295e-05,
      "loss": 0.9535,
      "step": 17035
    },
    {
      "epoch": 1.965851407475773,
      "grad_norm": 0.4067056179046631,
      "learning_rate": 6.407361576663124e-05,
      "loss": 0.928,
      "step": 17040
    },
    {
      "epoch": 1.9664282418089525,
      "grad_norm": 0.38903966546058655,
      "learning_rate": 6.401098558103563e-05,
      "loss": 0.8632,
      "step": 17045
    },
    {
      "epoch": 1.967005076142132,
      "grad_norm": 0.48693788051605225,
      "learning_rate": 6.394837160856105e-05,
      "loss": 0.8727,
      "step": 17050
    },
    {
      "epoch": 1.9675819104753116,
      "grad_norm": 0.39712145924568176,
      "learning_rate": 6.388577387741524e-05,
      "loss": 0.9145,
      "step": 17055
    },
    {
      "epoch": 1.968158744808491,
      "grad_norm": 0.43445274233818054,
      "learning_rate": 6.382319241579866e-05,
      "loss": 0.9578,
      "step": 17060
    },
    {
      "epoch": 1.9687355791416705,
      "grad_norm": 0.4607578217983246,
      "learning_rate": 6.376062725190435e-05,
      "loss": 0.9467,
      "step": 17065
    },
    {
      "epoch": 1.9693124134748499,
      "grad_norm": 0.4164617657661438,
      "learning_rate": 6.369807841391798e-05,
      "loss": 0.9318,
      "step": 17070
    },
    {
      "epoch": 1.9698892478080294,
      "grad_norm": 0.38452988862991333,
      "learning_rate": 6.363554593001801e-05,
      "loss": 0.9329,
      "step": 17075
    },
    {
      "epoch": 1.970466082141209,
      "grad_norm": 0.41434916853904724,
      "learning_rate": 6.357302982837543e-05,
      "loss": 0.9214,
      "step": 17080
    },
    {
      "epoch": 1.9710429164743886,
      "grad_norm": 0.3847775161266327,
      "learning_rate": 6.351053013715383e-05,
      "loss": 0.9322,
      "step": 17085
    },
    {
      "epoch": 1.9716197508075681,
      "grad_norm": 0.4423973262310028,
      "learning_rate": 6.344804688450941e-05,
      "loss": 0.9484,
      "step": 17090
    },
    {
      "epoch": 1.9721965851407477,
      "grad_norm": 0.4209598898887634,
      "learning_rate": 6.33855800985911e-05,
      "loss": 0.9023,
      "step": 17095
    },
    {
      "epoch": 1.972773419473927,
      "grad_norm": 0.4027029871940613,
      "learning_rate": 6.332312980754025e-05,
      "loss": 0.9565,
      "step": 17100
    },
    {
      "epoch": 1.9733502538071066,
      "grad_norm": 0.4411013722419739,
      "learning_rate": 6.326069603949079e-05,
      "loss": 0.9358,
      "step": 17105
    },
    {
      "epoch": 1.973927088140286,
      "grad_norm": 0.3684937655925751,
      "learning_rate": 6.319827882256935e-05,
      "loss": 0.9061,
      "step": 17110
    },
    {
      "epoch": 1.9745039224734655,
      "grad_norm": 0.4084828197956085,
      "learning_rate": 6.313587818489497e-05,
      "loss": 0.9066,
      "step": 17115
    },
    {
      "epoch": 1.975080756806645,
      "grad_norm": 0.4619320333003998,
      "learning_rate": 6.307349415457923e-05,
      "loss": 0.922,
      "step": 17120
    },
    {
      "epoch": 1.9756575911398246,
      "grad_norm": 0.37400147318840027,
      "learning_rate": 6.301112675972627e-05,
      "loss": 0.9151,
      "step": 17125
    },
    {
      "epoch": 1.9762344254730042,
      "grad_norm": 0.39188510179519653,
      "learning_rate": 6.294877602843275e-05,
      "loss": 0.9913,
      "step": 17130
    },
    {
      "epoch": 1.9768112598061838,
      "grad_norm": 0.4424871802330017,
      "learning_rate": 6.28864419887878e-05,
      "loss": 0.8995,
      "step": 17135
    },
    {
      "epoch": 1.9773880941393633,
      "grad_norm": 0.39406830072402954,
      "learning_rate": 6.282412466887293e-05,
      "loss": 0.8848,
      "step": 17140
    },
    {
      "epoch": 1.9779649284725427,
      "grad_norm": 0.41170790791511536,
      "learning_rate": 6.276182409676234e-05,
      "loss": 0.9644,
      "step": 17145
    },
    {
      "epoch": 1.9785417628057222,
      "grad_norm": 0.42084214091300964,
      "learning_rate": 6.269954030052252e-05,
      "loss": 0.9029,
      "step": 17150
    },
    {
      "epoch": 1.9791185971389016,
      "grad_norm": 0.4019472599029541,
      "learning_rate": 6.263727330821241e-05,
      "loss": 0.9181,
      "step": 17155
    },
    {
      "epoch": 1.9796954314720812,
      "grad_norm": 0.36666131019592285,
      "learning_rate": 6.25750231478834e-05,
      "loss": 0.9103,
      "step": 17160
    },
    {
      "epoch": 1.9802722658052607,
      "grad_norm": 0.4180550277233124,
      "learning_rate": 6.251278984757938e-05,
      "loss": 0.9464,
      "step": 17165
    },
    {
      "epoch": 1.9808491001384403,
      "grad_norm": 0.4247073233127594,
      "learning_rate": 6.245057343533653e-05,
      "loss": 0.9364,
      "step": 17170
    },
    {
      "epoch": 1.9814259344716199,
      "grad_norm": 0.4047488868236542,
      "learning_rate": 6.238837393918341e-05,
      "loss": 0.9444,
      "step": 17175
    },
    {
      "epoch": 1.9820027688047994,
      "grad_norm": 0.37309056520462036,
      "learning_rate": 6.232619138714112e-05,
      "loss": 0.9291,
      "step": 17180
    },
    {
      "epoch": 1.9825796031379788,
      "grad_norm": 0.4210014343261719,
      "learning_rate": 6.226402580722298e-05,
      "loss": 0.8786,
      "step": 17185
    },
    {
      "epoch": 1.9831564374711583,
      "grad_norm": 0.39095330238342285,
      "learning_rate": 6.220187722743466e-05,
      "loss": 0.9254,
      "step": 17190
    },
    {
      "epoch": 1.9837332718043377,
      "grad_norm": 0.4275166988372803,
      "learning_rate": 6.213974567577426e-05,
      "loss": 0.9888,
      "step": 17195
    },
    {
      "epoch": 1.9843101061375172,
      "grad_norm": 0.39710062742233276,
      "learning_rate": 6.207763118023218e-05,
      "loss": 0.9084,
      "step": 17200
    },
    {
      "epoch": 1.9848869404706968,
      "grad_norm": 0.5015316605567932,
      "learning_rate": 6.201553376879108e-05,
      "loss": 0.9358,
      "step": 17205
    },
    {
      "epoch": 1.9854637748038764,
      "grad_norm": 0.3706842064857483,
      "learning_rate": 6.195345346942599e-05,
      "loss": 0.9353,
      "step": 17210
    },
    {
      "epoch": 1.986040609137056,
      "grad_norm": 0.40164434909820557,
      "learning_rate": 6.189139031010416e-05,
      "loss": 0.9092,
      "step": 17215
    },
    {
      "epoch": 1.9866174434702355,
      "grad_norm": 0.4197016656398773,
      "learning_rate": 6.182934431878526e-05,
      "loss": 0.8762,
      "step": 17220
    },
    {
      "epoch": 1.9871942778034148,
      "grad_norm": 0.42161640524864197,
      "learning_rate": 6.176731552342104e-05,
      "loss": 0.9427,
      "step": 17225
    },
    {
      "epoch": 1.9877711121365944,
      "grad_norm": 0.42144665122032166,
      "learning_rate": 6.170530395195561e-05,
      "loss": 0.9246,
      "step": 17230
    },
    {
      "epoch": 1.9883479464697738,
      "grad_norm": 0.40069445967674255,
      "learning_rate": 6.164330963232535e-05,
      "loss": 0.9494,
      "step": 17235
    },
    {
      "epoch": 1.9889247808029533,
      "grad_norm": 0.3807680010795593,
      "learning_rate": 6.158133259245877e-05,
      "loss": 0.9212,
      "step": 17240
    },
    {
      "epoch": 1.9895016151361329,
      "grad_norm": 0.3835570812225342,
      "learning_rate": 6.151937286027669e-05,
      "loss": 0.9311,
      "step": 17245
    },
    {
      "epoch": 1.9900784494693124,
      "grad_norm": 0.40442800521850586,
      "learning_rate": 6.145743046369205e-05,
      "loss": 0.9444,
      "step": 17250
    },
    {
      "epoch": 1.990655283802492,
      "grad_norm": 0.4020007848739624,
      "learning_rate": 6.139550543061006e-05,
      "loss": 0.9084,
      "step": 17255
    },
    {
      "epoch": 1.9912321181356716,
      "grad_norm": 0.3811594545841217,
      "learning_rate": 6.133359778892802e-05,
      "loss": 0.9033,
      "step": 17260
    },
    {
      "epoch": 1.991808952468851,
      "grad_norm": 0.4268489480018616,
      "learning_rate": 6.127170756653546e-05,
      "loss": 0.9646,
      "step": 17265
    },
    {
      "epoch": 1.9923857868020305,
      "grad_norm": 0.43692687153816223,
      "learning_rate": 6.120983479131411e-05,
      "loss": 0.9556,
      "step": 17270
    },
    {
      "epoch": 1.9929626211352098,
      "grad_norm": 0.3834821581840515,
      "learning_rate": 6.114797949113767e-05,
      "loss": 0.901,
      "step": 17275
    },
    {
      "epoch": 1.9935394554683894,
      "grad_norm": 0.4126569628715515,
      "learning_rate": 6.108614169387215e-05,
      "loss": 0.9706,
      "step": 17280
    },
    {
      "epoch": 1.994116289801569,
      "grad_norm": 0.41819560527801514,
      "learning_rate": 6.102432142737555e-05,
      "loss": 0.8937,
      "step": 17285
    },
    {
      "epoch": 1.9946931241347485,
      "grad_norm": 0.40084108710289,
      "learning_rate": 6.096251871949804e-05,
      "loss": 0.9308,
      "step": 17290
    },
    {
      "epoch": 1.995269958467928,
      "grad_norm": 0.35687753558158875,
      "learning_rate": 6.090073359808188e-05,
      "loss": 0.9226,
      "step": 17295
    },
    {
      "epoch": 1.9958467928011077,
      "grad_norm": 0.393536776304245,
      "learning_rate": 6.0838966090961355e-05,
      "loss": 0.913,
      "step": 17300
    },
    {
      "epoch": 1.996423627134287,
      "grad_norm": 0.36556947231292725,
      "learning_rate": 6.077721622596287e-05,
      "loss": 0.9291,
      "step": 17305
    },
    {
      "epoch": 1.9970004614674666,
      "grad_norm": 0.3737342357635498,
      "learning_rate": 6.071548403090488e-05,
      "loss": 0.8971,
      "step": 17310
    },
    {
      "epoch": 1.997577295800646,
      "grad_norm": 0.41196849942207336,
      "learning_rate": 6.0653769533597804e-05,
      "loss": 0.9329,
      "step": 17315
    },
    {
      "epoch": 1.9981541301338255,
      "grad_norm": 0.40694162249565125,
      "learning_rate": 6.059207276184416e-05,
      "loss": 0.9129,
      "step": 17320
    },
    {
      "epoch": 1.998730964467005,
      "grad_norm": 0.4291395843029022,
      "learning_rate": 6.053039374343849e-05,
      "loss": 0.9066,
      "step": 17325
    },
    {
      "epoch": 1.9993077988001846,
      "grad_norm": 0.45420485734939575,
      "learning_rate": 6.046873250616729e-05,
      "loss": 0.9447,
      "step": 17330
    },
    {
      "epoch": 1.9998846331333642,
      "grad_norm": 0.4170719087123871,
      "learning_rate": 6.040708907780907e-05,
      "loss": 0.9487,
      "step": 17335
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.9607023000717163,
      "eval_runtime": 959.7881,
      "eval_samples_per_second": 15.993,
      "eval_steps_per_second": 1.0,
      "step": 17336
    },
    {
      "epoch": 2.0004614674665437,
      "grad_norm": 0.3912767767906189,
      "learning_rate": 6.0345463486134325e-05,
      "loss": 0.8925,
      "step": 17340
    },
    {
      "epoch": 2.0010383017997233,
      "grad_norm": 0.48886004090309143,
      "learning_rate": 6.0283855758905496e-05,
      "loss": 0.8874,
      "step": 17345
    },
    {
      "epoch": 2.0016151361329024,
      "grad_norm": 0.38123923540115356,
      "learning_rate": 6.0222265923876995e-05,
      "loss": 0.8842,
      "step": 17350
    },
    {
      "epoch": 2.002191970466082,
      "grad_norm": 0.3847237825393677,
      "learning_rate": 6.0160694008795114e-05,
      "loss": 0.8911,
      "step": 17355
    },
    {
      "epoch": 2.0027688047992616,
      "grad_norm": 0.3945702314376831,
      "learning_rate": 6.0099140041398205e-05,
      "loss": 0.8977,
      "step": 17360
    },
    {
      "epoch": 2.003345639132441,
      "grad_norm": 0.3811289370059967,
      "learning_rate": 6.0037604049416383e-05,
      "loss": 0.8462,
      "step": 17365
    },
    {
      "epoch": 2.0039224734656207,
      "grad_norm": 0.3992691934108734,
      "learning_rate": 5.9976086060571765e-05,
      "loss": 0.903,
      "step": 17370
    },
    {
      "epoch": 2.0044993077988003,
      "grad_norm": 0.4333883225917816,
      "learning_rate": 5.9914586102578284e-05,
      "loss": 0.9278,
      "step": 17375
    },
    {
      "epoch": 2.00507614213198,
      "grad_norm": 0.4005573093891144,
      "learning_rate": 5.9853104203141854e-05,
      "loss": 0.874,
      "step": 17380
    },
    {
      "epoch": 2.0056529764651594,
      "grad_norm": 0.4214078187942505,
      "learning_rate": 5.979164038996015e-05,
      "loss": 0.8595,
      "step": 17385
    },
    {
      "epoch": 2.0062298107983385,
      "grad_norm": 0.43147456645965576,
      "learning_rate": 5.973019469072272e-05,
      "loss": 0.8401,
      "step": 17390
    },
    {
      "epoch": 2.006806645131518,
      "grad_norm": 0.44270798563957214,
      "learning_rate": 5.966876713311103e-05,
      "loss": 0.8779,
      "step": 17395
    },
    {
      "epoch": 2.0073834794646976,
      "grad_norm": 0.424435555934906,
      "learning_rate": 5.960735774479826e-05,
      "loss": 0.8772,
      "step": 17400
    },
    {
      "epoch": 2.007960313797877,
      "grad_norm": 0.38282132148742676,
      "learning_rate": 5.954596655344951e-05,
      "loss": 0.8401,
      "step": 17405
    },
    {
      "epoch": 2.0085371481310568,
      "grad_norm": 0.40239688754081726,
      "learning_rate": 5.9484593586721546e-05,
      "loss": 0.8821,
      "step": 17410
    },
    {
      "epoch": 2.0091139824642363,
      "grad_norm": 0.4169721007347107,
      "learning_rate": 5.942323887226311e-05,
      "loss": 0.8924,
      "step": 17415
    },
    {
      "epoch": 2.009690816797416,
      "grad_norm": 0.4102388024330139,
      "learning_rate": 5.936190243771458e-05,
      "loss": 0.8869,
      "step": 17420
    },
    {
      "epoch": 2.0102676511305955,
      "grad_norm": 0.4065175950527191,
      "learning_rate": 5.9300584310708086e-05,
      "loss": 0.8357,
      "step": 17425
    },
    {
      "epoch": 2.0108444854637746,
      "grad_norm": 0.42205628752708435,
      "learning_rate": 5.923928451886767e-05,
      "loss": 0.8602,
      "step": 17430
    },
    {
      "epoch": 2.011421319796954,
      "grad_norm": 0.4142395555973053,
      "learning_rate": 5.917800308980892e-05,
      "loss": 0.8751,
      "step": 17435
    },
    {
      "epoch": 2.0119981541301337,
      "grad_norm": 0.3703550398349762,
      "learning_rate": 5.911674005113929e-05,
      "loss": 0.9026,
      "step": 17440
    },
    {
      "epoch": 2.0125749884633133,
      "grad_norm": 0.43400120735168457,
      "learning_rate": 5.905549543045783e-05,
      "loss": 0.8188,
      "step": 17445
    },
    {
      "epoch": 2.013151822796493,
      "grad_norm": 0.3862331509590149,
      "learning_rate": 5.899426925535545e-05,
      "loss": 0.8668,
      "step": 17450
    },
    {
      "epoch": 2.0137286571296724,
      "grad_norm": 0.36499011516571045,
      "learning_rate": 5.8933061553414614e-05,
      "loss": 0.8867,
      "step": 17455
    },
    {
      "epoch": 2.014305491462852,
      "grad_norm": 0.3892669975757599,
      "learning_rate": 5.887187235220948e-05,
      "loss": 0.8732,
      "step": 17460
    },
    {
      "epoch": 2.0148823257960315,
      "grad_norm": 0.4285455346107483,
      "learning_rate": 5.881070167930598e-05,
      "loss": 0.87,
      "step": 17465
    },
    {
      "epoch": 2.0154591601292107,
      "grad_norm": 0.42607060074806213,
      "learning_rate": 5.874954956226157e-05,
      "loss": 0.8865,
      "step": 17470
    },
    {
      "epoch": 2.0160359944623902,
      "grad_norm": 0.4194190502166748,
      "learning_rate": 5.868841602862541e-05,
      "loss": 0.8928,
      "step": 17475
    },
    {
      "epoch": 2.01661282879557,
      "grad_norm": 0.4271929860115051,
      "learning_rate": 5.862730110593824e-05,
      "loss": 0.8396,
      "step": 17480
    },
    {
      "epoch": 2.0171896631287494,
      "grad_norm": 0.3861948251724243,
      "learning_rate": 5.856620482173252e-05,
      "loss": 0.8878,
      "step": 17485
    },
    {
      "epoch": 2.017766497461929,
      "grad_norm": 0.39574941992759705,
      "learning_rate": 5.8505127203532216e-05,
      "loss": 0.8399,
      "step": 17490
    },
    {
      "epoch": 2.0183433317951085,
      "grad_norm": 0.40148764848709106,
      "learning_rate": 5.844406827885287e-05,
      "loss": 0.8678,
      "step": 17495
    },
    {
      "epoch": 2.018920166128288,
      "grad_norm": 0.396918922662735,
      "learning_rate": 5.838302807520171e-05,
      "loss": 0.8774,
      "step": 17500
    },
    {
      "epoch": 2.0194970004614676,
      "grad_norm": 0.45297619700431824,
      "learning_rate": 5.8322006620077426e-05,
      "loss": 0.848,
      "step": 17505
    },
    {
      "epoch": 2.020073834794647,
      "grad_norm": 0.4377535283565521,
      "learning_rate": 5.826100394097036e-05,
      "loss": 0.876,
      "step": 17510
    },
    {
      "epoch": 2.0206506691278263,
      "grad_norm": 0.423527330160141,
      "learning_rate": 5.8200020065362246e-05,
      "loss": 0.8914,
      "step": 17515
    },
    {
      "epoch": 2.021227503461006,
      "grad_norm": 0.4209001362323761,
      "learning_rate": 5.8139055020726494e-05,
      "loss": 0.8783,
      "step": 17520
    },
    {
      "epoch": 2.0218043377941854,
      "grad_norm": 0.4174181818962097,
      "learning_rate": 5.807810883452798e-05,
      "loss": 0.8553,
      "step": 17525
    },
    {
      "epoch": 2.022381172127365,
      "grad_norm": 0.4181443452835083,
      "learning_rate": 5.8017181534223096e-05,
      "loss": 0.9307,
      "step": 17530
    },
    {
      "epoch": 2.0229580064605446,
      "grad_norm": 0.4317053556442261,
      "learning_rate": 5.7956273147259645e-05,
      "loss": 0.8831,
      "step": 17535
    },
    {
      "epoch": 2.023534840793724,
      "grad_norm": 0.4345930516719818,
      "learning_rate": 5.789538370107701e-05,
      "loss": 0.8768,
      "step": 17540
    },
    {
      "epoch": 2.0241116751269037,
      "grad_norm": 0.39889758825302124,
      "learning_rate": 5.7834513223106004e-05,
      "loss": 0.8695,
      "step": 17545
    },
    {
      "epoch": 2.0246885094600833,
      "grad_norm": 0.45139816403388977,
      "learning_rate": 5.77736617407689e-05,
      "loss": 0.8829,
      "step": 17550
    },
    {
      "epoch": 2.0252653437932624,
      "grad_norm": 0.43973278999328613,
      "learning_rate": 5.771282928147941e-05,
      "loss": 0.8439,
      "step": 17555
    },
    {
      "epoch": 2.025842178126442,
      "grad_norm": 0.4227539896965027,
      "learning_rate": 5.765201587264271e-05,
      "loss": 0.8518,
      "step": 17560
    },
    {
      "epoch": 2.0264190124596215,
      "grad_norm": 0.4028456211090088,
      "learning_rate": 5.7591221541655285e-05,
      "loss": 0.8845,
      "step": 17565
    },
    {
      "epoch": 2.026995846792801,
      "grad_norm": 0.4572751820087433,
      "learning_rate": 5.753044631590513e-05,
      "loss": 0.8713,
      "step": 17570
    },
    {
      "epoch": 2.0275726811259807,
      "grad_norm": 0.42924416065216064,
      "learning_rate": 5.746969022277161e-05,
      "loss": 0.8404,
      "step": 17575
    },
    {
      "epoch": 2.0281495154591602,
      "grad_norm": 0.38023436069488525,
      "learning_rate": 5.74089532896255e-05,
      "loss": 0.8709,
      "step": 17580
    },
    {
      "epoch": 2.02872634979234,
      "grad_norm": 0.41763070225715637,
      "learning_rate": 5.7348235543828834e-05,
      "loss": 0.8754,
      "step": 17585
    },
    {
      "epoch": 2.0293031841255194,
      "grad_norm": 0.4525087773799896,
      "learning_rate": 5.7287537012735104e-05,
      "loss": 0.9014,
      "step": 17590
    },
    {
      "epoch": 2.0298800184586985,
      "grad_norm": 0.4048612117767334,
      "learning_rate": 5.722685772368912e-05,
      "loss": 0.8648,
      "step": 17595
    },
    {
      "epoch": 2.030456852791878,
      "grad_norm": 0.4235800802707672,
      "learning_rate": 5.716619770402707e-05,
      "loss": 0.8565,
      "step": 17600
    },
    {
      "epoch": 2.0310336871250576,
      "grad_norm": 0.38395988941192627,
      "learning_rate": 5.710555698107627e-05,
      "loss": 0.882,
      "step": 17605
    },
    {
      "epoch": 2.031610521458237,
      "grad_norm": 0.40724024176597595,
      "learning_rate": 5.704493558215567e-05,
      "loss": 0.8982,
      "step": 17610
    },
    {
      "epoch": 2.0321873557914167,
      "grad_norm": 0.41519051790237427,
      "learning_rate": 5.69843335345752e-05,
      "loss": 0.8696,
      "step": 17615
    },
    {
      "epoch": 2.0327641901245963,
      "grad_norm": 0.37952789664268494,
      "learning_rate": 5.692375086563622e-05,
      "loss": 0.8895,
      "step": 17620
    },
    {
      "epoch": 2.033341024457776,
      "grad_norm": 0.37834814190864563,
      "learning_rate": 5.6863187602631354e-05,
      "loss": 0.8362,
      "step": 17625
    },
    {
      "epoch": 2.0339178587909554,
      "grad_norm": 0.40140074491500854,
      "learning_rate": 5.680264377284451e-05,
      "loss": 0.8111,
      "step": 17630
    },
    {
      "epoch": 2.0344946931241346,
      "grad_norm": 0.41932836174964905,
      "learning_rate": 5.6742119403550733e-05,
      "loss": 0.906,
      "step": 17635
    },
    {
      "epoch": 2.035071527457314,
      "grad_norm": 0.3922612965106964,
      "learning_rate": 5.668161452201639e-05,
      "loss": 0.8498,
      "step": 17640
    },
    {
      "epoch": 2.0356483617904937,
      "grad_norm": 0.3891882300376892,
      "learning_rate": 5.6621129155499066e-05,
      "loss": 0.8675,
      "step": 17645
    },
    {
      "epoch": 2.0362251961236733,
      "grad_norm": 0.42834147810935974,
      "learning_rate": 5.6560663331247556e-05,
      "loss": 0.8348,
      "step": 17650
    },
    {
      "epoch": 2.036802030456853,
      "grad_norm": 0.43242147564888,
      "learning_rate": 5.650021707650173e-05,
      "loss": 0.8868,
      "step": 17655
    },
    {
      "epoch": 2.0373788647900324,
      "grad_norm": 0.4187890887260437,
      "learning_rate": 5.64397904184929e-05,
      "loss": 0.887,
      "step": 17660
    },
    {
      "epoch": 2.037955699123212,
      "grad_norm": 0.41034138202667236,
      "learning_rate": 5.6379383384443255e-05,
      "loss": 0.9019,
      "step": 17665
    },
    {
      "epoch": 2.0385325334563915,
      "grad_norm": 0.5285660624504089,
      "learning_rate": 5.6318996001566384e-05,
      "loss": 0.8967,
      "step": 17670
    },
    {
      "epoch": 2.0391093677895706,
      "grad_norm": 0.4344254434108734,
      "learning_rate": 5.625862829706679e-05,
      "loss": 0.9037,
      "step": 17675
    },
    {
      "epoch": 2.03968620212275,
      "grad_norm": 0.40379151701927185,
      "learning_rate": 5.6198280298140404e-05,
      "loss": 0.8866,
      "step": 17680
    },
    {
      "epoch": 2.0402630364559298,
      "grad_norm": 0.46100154519081116,
      "learning_rate": 5.613795203197401e-05,
      "loss": 0.8283,
      "step": 17685
    },
    {
      "epoch": 2.0408398707891093,
      "grad_norm": 0.45787981152534485,
      "learning_rate": 5.607764352574565e-05,
      "loss": 0.8646,
      "step": 17690
    },
    {
      "epoch": 2.041416705122289,
      "grad_norm": 0.4042724072933197,
      "learning_rate": 5.6017354806624344e-05,
      "loss": 0.8741,
      "step": 17695
    },
    {
      "epoch": 2.0419935394554685,
      "grad_norm": 0.3924694061279297,
      "learning_rate": 5.5957085901770424e-05,
      "loss": 0.8481,
      "step": 17700
    },
    {
      "epoch": 2.042570373788648,
      "grad_norm": 0.4196171164512634,
      "learning_rate": 5.589683683833502e-05,
      "loss": 0.9099,
      "step": 17705
    },
    {
      "epoch": 2.0431472081218276,
      "grad_norm": 0.4184753894805908,
      "learning_rate": 5.5836607643460504e-05,
      "loss": 0.8511,
      "step": 17710
    },
    {
      "epoch": 2.0437240424550067,
      "grad_norm": 0.3830004632472992,
      "learning_rate": 5.577639834428026e-05,
      "loss": 0.8416,
      "step": 17715
    },
    {
      "epoch": 2.0443008767881863,
      "grad_norm": 0.4424822926521301,
      "learning_rate": 5.571620896791869e-05,
      "loss": 0.8951,
      "step": 17720
    },
    {
      "epoch": 2.044877711121366,
      "grad_norm": 0.44922518730163574,
      "learning_rate": 5.565603954149118e-05,
      "loss": 0.8697,
      "step": 17725
    },
    {
      "epoch": 2.0454545454545454,
      "grad_norm": 0.4096304774284363,
      "learning_rate": 5.559589009210421e-05,
      "loss": 0.8638,
      "step": 17730
    },
    {
      "epoch": 2.046031379787725,
      "grad_norm": 0.41669782996177673,
      "learning_rate": 5.553576064685522e-05,
      "loss": 0.8459,
      "step": 17735
    },
    {
      "epoch": 2.0466082141209045,
      "grad_norm": 0.4168075621128082,
      "learning_rate": 5.547565123283267e-05,
      "loss": 0.8374,
      "step": 17740
    },
    {
      "epoch": 2.047185048454084,
      "grad_norm": 0.40191158652305603,
      "learning_rate": 5.5415561877115876e-05,
      "loss": 0.915,
      "step": 17745
    },
    {
      "epoch": 2.0477618827872637,
      "grad_norm": 0.40572503209114075,
      "learning_rate": 5.535549260677534e-05,
      "loss": 0.8361,
      "step": 17750
    },
    {
      "epoch": 2.048338717120443,
      "grad_norm": 0.36613893508911133,
      "learning_rate": 5.529544344887227e-05,
      "loss": 0.8305,
      "step": 17755
    },
    {
      "epoch": 2.0489155514536224,
      "grad_norm": 0.4345441460609436,
      "learning_rate": 5.523541443045904e-05,
      "loss": 0.8612,
      "step": 17760
    },
    {
      "epoch": 2.049492385786802,
      "grad_norm": 0.44153690338134766,
      "learning_rate": 5.517540557857869e-05,
      "loss": 0.8863,
      "step": 17765
    },
    {
      "epoch": 2.0500692201199815,
      "grad_norm": 0.3826152980327606,
      "learning_rate": 5.511541692026549e-05,
      "loss": 0.8733,
      "step": 17770
    },
    {
      "epoch": 2.050646054453161,
      "grad_norm": 0.44603461027145386,
      "learning_rate": 5.505544848254432e-05,
      "loss": 0.8998,
      "step": 17775
    },
    {
      "epoch": 2.0512228887863406,
      "grad_norm": 0.43969833850860596,
      "learning_rate": 5.4995500292431144e-05,
      "loss": 0.8992,
      "step": 17780
    },
    {
      "epoch": 2.05179972311952,
      "grad_norm": 0.4337633550167084,
      "learning_rate": 5.493557237693271e-05,
      "loss": 0.8735,
      "step": 17785
    },
    {
      "epoch": 2.0523765574526998,
      "grad_norm": 0.3976989984512329,
      "learning_rate": 5.4875664763046705e-05,
      "loss": 0.8607,
      "step": 17790
    },
    {
      "epoch": 2.052953391785879,
      "grad_norm": 0.44140127301216125,
      "learning_rate": 5.481577747776156e-05,
      "loss": 0.9202,
      "step": 17795
    },
    {
      "epoch": 2.0535302261190584,
      "grad_norm": 0.439375638961792,
      "learning_rate": 5.4755910548056666e-05,
      "loss": 0.8707,
      "step": 17800
    },
    {
      "epoch": 2.054107060452238,
      "grad_norm": 0.42562079429626465,
      "learning_rate": 5.4696064000902146e-05,
      "loss": 0.8643,
      "step": 17805
    },
    {
      "epoch": 2.0546838947854176,
      "grad_norm": 0.4260598421096802,
      "learning_rate": 5.463623786325907e-05,
      "loss": 0.8906,
      "step": 17810
    },
    {
      "epoch": 2.055260729118597,
      "grad_norm": 0.40893182158470154,
      "learning_rate": 5.457643216207907e-05,
      "loss": 0.8497,
      "step": 17815
    },
    {
      "epoch": 2.0558375634517767,
      "grad_norm": 0.3989853262901306,
      "learning_rate": 5.451664692430493e-05,
      "loss": 0.889,
      "step": 17820
    },
    {
      "epoch": 2.0564143977849563,
      "grad_norm": 0.38165777921676636,
      "learning_rate": 5.445688217686986e-05,
      "loss": 0.8788,
      "step": 17825
    },
    {
      "epoch": 2.056991232118136,
      "grad_norm": 0.4174046218395233,
      "learning_rate": 5.4397137946698106e-05,
      "loss": 0.8305,
      "step": 17830
    },
    {
      "epoch": 2.0575680664513154,
      "grad_norm": 0.4230709373950958,
      "learning_rate": 5.433741426070442e-05,
      "loss": 0.877,
      "step": 17835
    },
    {
      "epoch": 2.0581449007844945,
      "grad_norm": 0.44466516375541687,
      "learning_rate": 5.427771114579462e-05,
      "loss": 0.8842,
      "step": 17840
    },
    {
      "epoch": 2.058721735117674,
      "grad_norm": 0.42083120346069336,
      "learning_rate": 5.421802862886494e-05,
      "loss": 0.8768,
      "step": 17845
    },
    {
      "epoch": 2.0592985694508537,
      "grad_norm": 0.46251851320266724,
      "learning_rate": 5.415836673680253e-05,
      "loss": 0.91,
      "step": 17850
    },
    {
      "epoch": 2.059875403784033,
      "grad_norm": 0.41464483737945557,
      "learning_rate": 5.4098725496485116e-05,
      "loss": 0.8935,
      "step": 17855
    },
    {
      "epoch": 2.060452238117213,
      "grad_norm": 0.40952378511428833,
      "learning_rate": 5.4039104934781305e-05,
      "loss": 0.8705,
      "step": 17860
    },
    {
      "epoch": 2.0610290724503924,
      "grad_norm": 0.40670013427734375,
      "learning_rate": 5.3979505078550184e-05,
      "loss": 0.8536,
      "step": 17865
    },
    {
      "epoch": 2.061605906783572,
      "grad_norm": 0.4991162121295929,
      "learning_rate": 5.391992595464166e-05,
      "loss": 0.8463,
      "step": 17870
    },
    {
      "epoch": 2.0621827411167515,
      "grad_norm": 0.3962211012840271,
      "learning_rate": 5.38603675898962e-05,
      "loss": 0.8782,
      "step": 17875
    },
    {
      "epoch": 2.0627595754499306,
      "grad_norm": 0.41492053866386414,
      "learning_rate": 5.380083001114503e-05,
      "loss": 0.9583,
      "step": 17880
    },
    {
      "epoch": 2.06333640978311,
      "grad_norm": 0.44420069456100464,
      "learning_rate": 5.3741313245209854e-05,
      "loss": 0.896,
      "step": 17885
    },
    {
      "epoch": 2.0639132441162897,
      "grad_norm": 0.37794816493988037,
      "learning_rate": 5.368181731890316e-05,
      "loss": 0.9014,
      "step": 17890
    },
    {
      "epoch": 2.0644900784494693,
      "grad_norm": 0.47934192419052124,
      "learning_rate": 5.362234225902794e-05,
      "loss": 0.9153,
      "step": 17895
    },
    {
      "epoch": 2.065066912782649,
      "grad_norm": 0.4091241657733917,
      "learning_rate": 5.356288809237788e-05,
      "loss": 0.8829,
      "step": 17900
    },
    {
      "epoch": 2.0656437471158284,
      "grad_norm": 0.4090352952480316,
      "learning_rate": 5.350345484573709e-05,
      "loss": 0.8856,
      "step": 17905
    },
    {
      "epoch": 2.066220581449008,
      "grad_norm": 0.4243384003639221,
      "learning_rate": 5.3444042545880514e-05,
      "loss": 0.8999,
      "step": 17910
    },
    {
      "epoch": 2.0667974157821876,
      "grad_norm": 0.45561861991882324,
      "learning_rate": 5.338465121957338e-05,
      "loss": 0.8859,
      "step": 17915
    },
    {
      "epoch": 2.0673742501153667,
      "grad_norm": 0.4170970320701599,
      "learning_rate": 5.332528089357165e-05,
      "loss": 0.8182,
      "step": 17920
    },
    {
      "epoch": 2.0679510844485463,
      "grad_norm": 0.4319002628326416,
      "learning_rate": 5.3265931594621756e-05,
      "loss": 0.9013,
      "step": 17925
    },
    {
      "epoch": 2.068527918781726,
      "grad_norm": 0.43598082661628723,
      "learning_rate": 5.320660334946072e-05,
      "loss": 0.8559,
      "step": 17930
    },
    {
      "epoch": 2.0691047531149054,
      "grad_norm": 0.4153001308441162,
      "learning_rate": 5.3147296184815956e-05,
      "loss": 0.8746,
      "step": 17935
    },
    {
      "epoch": 2.069681587448085,
      "grad_norm": 0.42318597435951233,
      "learning_rate": 5.3088010127405496e-05,
      "loss": 0.8615,
      "step": 17940
    },
    {
      "epoch": 2.0702584217812645,
      "grad_norm": 0.41694238781929016,
      "learning_rate": 5.3028745203937825e-05,
      "loss": 0.8214,
      "step": 17945
    },
    {
      "epoch": 2.070835256114444,
      "grad_norm": 0.4185914993286133,
      "learning_rate": 5.296950144111195e-05,
      "loss": 0.8326,
      "step": 17950
    },
    {
      "epoch": 2.0714120904476236,
      "grad_norm": 0.38809898495674133,
      "learning_rate": 5.29102788656172e-05,
      "loss": 0.8329,
      "step": 17955
    },
    {
      "epoch": 2.0719889247808028,
      "grad_norm": 0.40080171823501587,
      "learning_rate": 5.285107750413353e-05,
      "loss": 0.8864,
      "step": 17960
    },
    {
      "epoch": 2.0725657591139823,
      "grad_norm": 0.4352705180644989,
      "learning_rate": 5.279189738333125e-05,
      "loss": 0.8707,
      "step": 17965
    },
    {
      "epoch": 2.073142593447162,
      "grad_norm": 0.41367441415786743,
      "learning_rate": 5.273273852987113e-05,
      "loss": 0.8819,
      "step": 17970
    },
    {
      "epoch": 2.0737194277803415,
      "grad_norm": 0.40614110231399536,
      "learning_rate": 5.2673600970404336e-05,
      "loss": 0.9156,
      "step": 17975
    },
    {
      "epoch": 2.074296262113521,
      "grad_norm": 0.41569119691848755,
      "learning_rate": 5.26144847315725e-05,
      "loss": 0.8798,
      "step": 17980
    },
    {
      "epoch": 2.0748730964467006,
      "grad_norm": 0.41030633449554443,
      "learning_rate": 5.255538984000753e-05,
      "loss": 0.8908,
      "step": 17985
    },
    {
      "epoch": 2.07544993077988,
      "grad_norm": 0.43785807490348816,
      "learning_rate": 5.249631632233182e-05,
      "loss": 0.9173,
      "step": 17990
    },
    {
      "epoch": 2.0760267651130597,
      "grad_norm": 0.40564823150634766,
      "learning_rate": 5.243726420515811e-05,
      "loss": 0.8732,
      "step": 17995
    },
    {
      "epoch": 2.076603599446239,
      "grad_norm": 0.3811871409416199,
      "learning_rate": 5.237823351508953e-05,
      "loss": 0.8702,
      "step": 18000
    },
    {
      "epoch": 2.0771804337794184,
      "grad_norm": 0.41942134499549866,
      "learning_rate": 5.231922427871945e-05,
      "loss": 0.895,
      "step": 18005
    },
    {
      "epoch": 2.077757268112598,
      "grad_norm": 0.4052973985671997,
      "learning_rate": 5.2260236522631665e-05,
      "loss": 0.8592,
      "step": 18010
    },
    {
      "epoch": 2.0783341024457775,
      "grad_norm": 0.4108360707759857,
      "learning_rate": 5.2201270273400296e-05,
      "loss": 0.8819,
      "step": 18015
    },
    {
      "epoch": 2.078910936778957,
      "grad_norm": 0.417239785194397,
      "learning_rate": 5.2142325557589753e-05,
      "loss": 0.8474,
      "step": 18020
    },
    {
      "epoch": 2.0794877711121367,
      "grad_norm": 0.42869383096694946,
      "learning_rate": 5.208340240175476e-05,
      "loss": 0.8416,
      "step": 18025
    },
    {
      "epoch": 2.0800646054453162,
      "grad_norm": 0.40606796741485596,
      "learning_rate": 5.202450083244026e-05,
      "loss": 0.8673,
      "step": 18030
    },
    {
      "epoch": 2.080641439778496,
      "grad_norm": 0.3967663049697876,
      "learning_rate": 5.1965620876181564e-05,
      "loss": 0.8634,
      "step": 18035
    },
    {
      "epoch": 2.081218274111675,
      "grad_norm": 0.4389677345752716,
      "learning_rate": 5.190676255950418e-05,
      "loss": 0.8818,
      "step": 18040
    },
    {
      "epoch": 2.0817951084448545,
      "grad_norm": 0.440441370010376,
      "learning_rate": 5.184792590892397e-05,
      "loss": 0.875,
      "step": 18045
    },
    {
      "epoch": 2.082371942778034,
      "grad_norm": 0.45011159777641296,
      "learning_rate": 5.178911095094685e-05,
      "loss": 0.8821,
      "step": 18050
    },
    {
      "epoch": 2.0829487771112136,
      "grad_norm": 0.43679890036582947,
      "learning_rate": 5.173031771206913e-05,
      "loss": 0.8342,
      "step": 18055
    },
    {
      "epoch": 2.083525611444393,
      "grad_norm": 0.41379445791244507,
      "learning_rate": 5.167154621877728e-05,
      "loss": 0.8426,
      "step": 18060
    },
    {
      "epoch": 2.0841024457775728,
      "grad_norm": 0.4258072078227997,
      "learning_rate": 5.161279649754796e-05,
      "loss": 0.9147,
      "step": 18065
    },
    {
      "epoch": 2.0846792801107523,
      "grad_norm": 0.45177289843559265,
      "learning_rate": 5.155406857484804e-05,
      "loss": 0.8421,
      "step": 18070
    },
    {
      "epoch": 2.085256114443932,
      "grad_norm": 0.3834865689277649,
      "learning_rate": 5.14953624771346e-05,
      "loss": 0.925,
      "step": 18075
    },
    {
      "epoch": 2.085832948777111,
      "grad_norm": 0.39812150597572327,
      "learning_rate": 5.143667823085477e-05,
      "loss": 0.8863,
      "step": 18080
    },
    {
      "epoch": 2.0864097831102906,
      "grad_norm": 0.3837997317314148,
      "learning_rate": 5.1378015862445975e-05,
      "loss": 0.8636,
      "step": 18085
    },
    {
      "epoch": 2.08698661744347,
      "grad_norm": 0.4225236773490906,
      "learning_rate": 5.131937539833571e-05,
      "loss": 0.9135,
      "step": 18090
    },
    {
      "epoch": 2.0875634517766497,
      "grad_norm": 0.41509759426116943,
      "learning_rate": 5.126075686494165e-05,
      "loss": 0.8911,
      "step": 18095
    },
    {
      "epoch": 2.0881402861098293,
      "grad_norm": 0.3711944818496704,
      "learning_rate": 5.1202160288671505e-05,
      "loss": 0.8855,
      "step": 18100
    },
    {
      "epoch": 2.088717120443009,
      "grad_norm": 0.40293386578559875,
      "learning_rate": 5.1143585695923166e-05,
      "loss": 0.8907,
      "step": 18105
    },
    {
      "epoch": 2.0892939547761884,
      "grad_norm": 0.3973406255245209,
      "learning_rate": 5.108503311308461e-05,
      "loss": 0.8828,
      "step": 18110
    },
    {
      "epoch": 2.089870789109368,
      "grad_norm": 0.46865829825401306,
      "learning_rate": 5.1026502566533917e-05,
      "loss": 0.8478,
      "step": 18115
    },
    {
      "epoch": 2.090447623442547,
      "grad_norm": 0.3833647668361664,
      "learning_rate": 5.09679940826391e-05,
      "loss": 0.8695,
      "step": 18120
    },
    {
      "epoch": 2.0910244577757267,
      "grad_norm": 0.44151002168655396,
      "learning_rate": 5.0909507687758515e-05,
      "loss": 0.8356,
      "step": 18125
    },
    {
      "epoch": 2.091601292108906,
      "grad_norm": 0.4126966595649719,
      "learning_rate": 5.085104340824027e-05,
      "loss": 0.8573,
      "step": 18130
    },
    {
      "epoch": 2.092178126442086,
      "grad_norm": 0.4035085141658783,
      "learning_rate": 5.079260127042267e-05,
      "loss": 0.8223,
      "step": 18135
    },
    {
      "epoch": 2.0927549607752653,
      "grad_norm": 0.4391005337238312,
      "learning_rate": 5.0734181300634024e-05,
      "loss": 0.9041,
      "step": 18140
    },
    {
      "epoch": 2.093331795108445,
      "grad_norm": 0.4382282495498657,
      "learning_rate": 5.067578352519267e-05,
      "loss": 0.8725,
      "step": 18145
    },
    {
      "epoch": 2.0939086294416245,
      "grad_norm": 0.3936323821544647,
      "learning_rate": 5.061740797040684e-05,
      "loss": 0.8838,
      "step": 18150
    },
    {
      "epoch": 2.094485463774804,
      "grad_norm": 0.4061303734779358,
      "learning_rate": 5.0559054662574876e-05,
      "loss": 0.88,
      "step": 18155
    },
    {
      "epoch": 2.095062298107983,
      "grad_norm": 0.47590842843055725,
      "learning_rate": 5.050072362798507e-05,
      "loss": 0.8818,
      "step": 18160
    },
    {
      "epoch": 2.0956391324411627,
      "grad_norm": 0.4334276020526886,
      "learning_rate": 5.044241489291569e-05,
      "loss": 0.8728,
      "step": 18165
    },
    {
      "epoch": 2.0962159667743423,
      "grad_norm": 0.4106757640838623,
      "learning_rate": 5.0384128483634875e-05,
      "loss": 0.8644,
      "step": 18170
    },
    {
      "epoch": 2.096792801107522,
      "grad_norm": 0.3826504051685333,
      "learning_rate": 5.032586442640077e-05,
      "loss": 0.8619,
      "step": 18175
    },
    {
      "epoch": 2.0973696354407014,
      "grad_norm": 0.48092758655548096,
      "learning_rate": 5.0267622747461487e-05,
      "loss": 0.8642,
      "step": 18180
    },
    {
      "epoch": 2.097946469773881,
      "grad_norm": 0.4312995672225952,
      "learning_rate": 5.020940347305503e-05,
      "loss": 0.8716,
      "step": 18185
    },
    {
      "epoch": 2.0985233041070606,
      "grad_norm": 0.391157865524292,
      "learning_rate": 5.0151206629409195e-05,
      "loss": 0.8319,
      "step": 18190
    },
    {
      "epoch": 2.09910013844024,
      "grad_norm": 0.42850786447525024,
      "learning_rate": 5.009303224274191e-05,
      "loss": 0.8552,
      "step": 18195
    },
    {
      "epoch": 2.0996769727734197,
      "grad_norm": 0.47858262062072754,
      "learning_rate": 5.0034880339260734e-05,
      "loss": 0.8636,
      "step": 18200
    },
    {
      "epoch": 2.100253807106599,
      "grad_norm": 0.38631346821784973,
      "learning_rate": 4.997675094516332e-05,
      "loss": 0.8758,
      "step": 18205
    },
    {
      "epoch": 2.1008306414397784,
      "grad_norm": 0.4535498023033142,
      "learning_rate": 4.991864408663692e-05,
      "loss": 0.855,
      "step": 18210
    },
    {
      "epoch": 2.101407475772958,
      "grad_norm": 0.41745612025260925,
      "learning_rate": 4.9860559789858965e-05,
      "loss": 0.8748,
      "step": 18215
    },
    {
      "epoch": 2.1019843101061375,
      "grad_norm": 0.4063878655433655,
      "learning_rate": 4.980249808099642e-05,
      "loss": 0.9213,
      "step": 18220
    },
    {
      "epoch": 2.102561144439317,
      "grad_norm": 0.436110258102417,
      "learning_rate": 4.974445898620622e-05,
      "loss": 0.8632,
      "step": 18225
    },
    {
      "epoch": 2.1031379787724966,
      "grad_norm": 0.39621496200561523,
      "learning_rate": 4.968644253163513e-05,
      "loss": 0.9058,
      "step": 18230
    },
    {
      "epoch": 2.103714813105676,
      "grad_norm": 0.4312610328197479,
      "learning_rate": 4.9628448743419675e-05,
      "loss": 0.8817,
      "step": 18235
    },
    {
      "epoch": 2.1042916474388558,
      "grad_norm": 0.42516598105430603,
      "learning_rate": 4.957047764768612e-05,
      "loss": 0.8902,
      "step": 18240
    },
    {
      "epoch": 2.104868481772035,
      "grad_norm": 0.4386134743690491,
      "learning_rate": 4.951252927055058e-05,
      "loss": 0.8838,
      "step": 18245
    },
    {
      "epoch": 2.1054453161052145,
      "grad_norm": 0.38619065284729004,
      "learning_rate": 4.945460363811891e-05,
      "loss": 0.8821,
      "step": 18250
    },
    {
      "epoch": 2.106022150438394,
      "grad_norm": 0.43006646633148193,
      "learning_rate": 4.939670077648676e-05,
      "loss": 0.827,
      "step": 18255
    },
    {
      "epoch": 2.1065989847715736,
      "grad_norm": 0.39681535959243774,
      "learning_rate": 4.933882071173939e-05,
      "loss": 0.8463,
      "step": 18260
    },
    {
      "epoch": 2.107175819104753,
      "grad_norm": 0.5474952459335327,
      "learning_rate": 4.9280963469952e-05,
      "loss": 0.8507,
      "step": 18265
    },
    {
      "epoch": 2.1077526534379327,
      "grad_norm": 0.39354756474494934,
      "learning_rate": 4.922312907718929e-05,
      "loss": 0.8959,
      "step": 18270
    },
    {
      "epoch": 2.1083294877711123,
      "grad_norm": 0.412794291973114,
      "learning_rate": 4.916531755950585e-05,
      "loss": 0.8846,
      "step": 18275
    },
    {
      "epoch": 2.108906322104292,
      "grad_norm": 0.4190261960029602,
      "learning_rate": 4.9107528942945754e-05,
      "loss": 0.8847,
      "step": 18280
    },
    {
      "epoch": 2.109483156437471,
      "grad_norm": 0.4136815667152405,
      "learning_rate": 4.9049763253543054e-05,
      "loss": 0.8194,
      "step": 18285
    },
    {
      "epoch": 2.1100599907706505,
      "grad_norm": 0.44577425718307495,
      "learning_rate": 4.8992020517321194e-05,
      "loss": 0.8366,
      "step": 18290
    },
    {
      "epoch": 2.11063682510383,
      "grad_norm": 0.4660280644893646,
      "learning_rate": 4.8934300760293396e-05,
      "loss": 0.8508,
      "step": 18295
    },
    {
      "epoch": 2.1112136594370097,
      "grad_norm": 0.43971607089042664,
      "learning_rate": 4.8876604008462554e-05,
      "loss": 0.9408,
      "step": 18300
    },
    {
      "epoch": 2.1117904937701892,
      "grad_norm": 0.4964000880718231,
      "learning_rate": 4.881893028782118e-05,
      "loss": 0.8984,
      "step": 18305
    },
    {
      "epoch": 2.112367328103369,
      "grad_norm": 0.4421519935131073,
      "learning_rate": 4.876127962435135e-05,
      "loss": 0.883,
      "step": 18310
    },
    {
      "epoch": 2.1129441624365484,
      "grad_norm": 0.4391144812107086,
      "learning_rate": 4.870365204402483e-05,
      "loss": 0.8759,
      "step": 18315
    },
    {
      "epoch": 2.113520996769728,
      "grad_norm": 0.43410804867744446,
      "learning_rate": 4.864604757280293e-05,
      "loss": 0.8744,
      "step": 18320
    },
    {
      "epoch": 2.114097831102907,
      "grad_norm": 0.4183378219604492,
      "learning_rate": 4.8588466236636656e-05,
      "loss": 0.846,
      "step": 18325
    },
    {
      "epoch": 2.1146746654360866,
      "grad_norm": 0.39406901597976685,
      "learning_rate": 4.8530908061466404e-05,
      "loss": 0.8236,
      "step": 18330
    },
    {
      "epoch": 2.115251499769266,
      "grad_norm": 0.4376058876514435,
      "learning_rate": 4.8473373073222294e-05,
      "loss": 0.8406,
      "step": 18335
    },
    {
      "epoch": 2.1158283341024458,
      "grad_norm": 0.4023524224758148,
      "learning_rate": 4.841586129782395e-05,
      "loss": 0.8672,
      "step": 18340
    },
    {
      "epoch": 2.1164051684356253,
      "grad_norm": 0.5077024698257446,
      "learning_rate": 4.835837276118058e-05,
      "loss": 0.8847,
      "step": 18345
    },
    {
      "epoch": 2.116982002768805,
      "grad_norm": 0.39667654037475586,
      "learning_rate": 4.830090748919076e-05,
      "loss": 0.8446,
      "step": 18350
    },
    {
      "epoch": 2.1175588371019844,
      "grad_norm": 0.4139440953731537,
      "learning_rate": 4.8243465507742866e-05,
      "loss": 0.8577,
      "step": 18355
    },
    {
      "epoch": 2.118135671435164,
      "grad_norm": 0.40672725439071655,
      "learning_rate": 4.8186046842714504e-05,
      "loss": 0.8879,
      "step": 18360
    },
    {
      "epoch": 2.118712505768343,
      "grad_norm": 0.40575653314590454,
      "learning_rate": 4.812865151997298e-05,
      "loss": 0.9089,
      "step": 18365
    },
    {
      "epoch": 2.1192893401015227,
      "grad_norm": 0.46657443046569824,
      "learning_rate": 4.807127956537487e-05,
      "loss": 0.8865,
      "step": 18370
    },
    {
      "epoch": 2.1198661744347023,
      "grad_norm": 0.49544504284858704,
      "learning_rate": 4.801393100476651e-05,
      "loss": 0.9071,
      "step": 18375
    },
    {
      "epoch": 2.120443008767882,
      "grad_norm": 0.462701678276062,
      "learning_rate": 4.795660586398342e-05,
      "loss": 0.8795,
      "step": 18380
    },
    {
      "epoch": 2.1210198431010614,
      "grad_norm": 0.41454997658729553,
      "learning_rate": 4.7899304168850734e-05,
      "loss": 0.8521,
      "step": 18385
    },
    {
      "epoch": 2.121596677434241,
      "grad_norm": 0.4203789234161377,
      "learning_rate": 4.784202594518298e-05,
      "loss": 0.8631,
      "step": 18390
    },
    {
      "epoch": 2.1221735117674205,
      "grad_norm": 0.38255804777145386,
      "learning_rate": 4.778477121878413e-05,
      "loss": 0.859,
      "step": 18395
    },
    {
      "epoch": 2.1227503461006,
      "grad_norm": 0.405504435300827,
      "learning_rate": 4.772754001544748e-05,
      "loss": 0.8923,
      "step": 18400
    },
    {
      "epoch": 2.123327180433779,
      "grad_norm": 0.48973825573921204,
      "learning_rate": 4.767033236095585e-05,
      "loss": 0.8781,
      "step": 18405
    },
    {
      "epoch": 2.123904014766959,
      "grad_norm": 0.43271970748901367,
      "learning_rate": 4.761314828108139e-05,
      "loss": 0.887,
      "step": 18410
    },
    {
      "epoch": 2.1244808491001383,
      "grad_norm": 0.49364644289016724,
      "learning_rate": 4.755598780158568e-05,
      "loss": 0.8777,
      "step": 18415
    },
    {
      "epoch": 2.125057683433318,
      "grad_norm": 0.42385977506637573,
      "learning_rate": 4.749885094821951e-05,
      "loss": 0.8959,
      "step": 18420
    },
    {
      "epoch": 2.1256345177664975,
      "grad_norm": 0.4609013795852661,
      "learning_rate": 4.744173774672329e-05,
      "loss": 0.8556,
      "step": 18425
    },
    {
      "epoch": 2.126211352099677,
      "grad_norm": 0.4490292966365814,
      "learning_rate": 4.738464822282652e-05,
      "loss": 0.8809,
      "step": 18430
    },
    {
      "epoch": 2.1267881864328566,
      "grad_norm": 0.39603060483932495,
      "learning_rate": 4.732758240224818e-05,
      "loss": 0.9149,
      "step": 18435
    },
    {
      "epoch": 2.127365020766036,
      "grad_norm": 0.41065889596939087,
      "learning_rate": 4.727054031069654e-05,
      "loss": 0.8645,
      "step": 18440
    },
    {
      "epoch": 2.1279418550992153,
      "grad_norm": 0.4732472002506256,
      "learning_rate": 4.7213521973869215e-05,
      "loss": 0.8702,
      "step": 18445
    },
    {
      "epoch": 2.128518689432395,
      "grad_norm": 0.42086437344551086,
      "learning_rate": 4.715652741745298e-05,
      "loss": 0.8654,
      "step": 18450
    },
    {
      "epoch": 2.1290955237655744,
      "grad_norm": 0.4273388981819153,
      "learning_rate": 4.709955666712404e-05,
      "loss": 0.8802,
      "step": 18455
    },
    {
      "epoch": 2.129672358098754,
      "grad_norm": 0.4001615047454834,
      "learning_rate": 4.704260974854784e-05,
      "loss": 0.8651,
      "step": 18460
    },
    {
      "epoch": 2.1302491924319336,
      "grad_norm": 0.38031452894210815,
      "learning_rate": 4.6985686687379103e-05,
      "loss": 0.8613,
      "step": 18465
    },
    {
      "epoch": 2.130826026765113,
      "grad_norm": 0.44780388474464417,
      "learning_rate": 4.692878750926171e-05,
      "loss": 0.8348,
      "step": 18470
    },
    {
      "epoch": 2.1314028610982927,
      "grad_norm": 0.44016191363334656,
      "learning_rate": 4.687191223982889e-05,
      "loss": 0.8832,
      "step": 18475
    },
    {
      "epoch": 2.1319796954314723,
      "grad_norm": 0.3917645514011383,
      "learning_rate": 4.6815060904703046e-05,
      "loss": 0.8561,
      "step": 18480
    },
    {
      "epoch": 2.132556529764652,
      "grad_norm": 0.4286423921585083,
      "learning_rate": 4.6758233529495846e-05,
      "loss": 0.8981,
      "step": 18485
    },
    {
      "epoch": 2.133133364097831,
      "grad_norm": 0.39710286259651184,
      "learning_rate": 4.670143013980814e-05,
      "loss": 0.8657,
      "step": 18490
    },
    {
      "epoch": 2.1337101984310105,
      "grad_norm": 0.49590003490448,
      "learning_rate": 4.664465076122991e-05,
      "loss": 0.8942,
      "step": 18495
    },
    {
      "epoch": 2.13428703276419,
      "grad_norm": 0.40663713216781616,
      "learning_rate": 4.658789541934041e-05,
      "loss": 0.851,
      "step": 18500
    },
    {
      "epoch": 2.1348638670973696,
      "grad_norm": 0.4643513262271881,
      "learning_rate": 4.653116413970803e-05,
      "loss": 0.8585,
      "step": 18505
    },
    {
      "epoch": 2.135440701430549,
      "grad_norm": 0.39103883504867554,
      "learning_rate": 4.647445694789032e-05,
      "loss": 0.8343,
      "step": 18510
    },
    {
      "epoch": 2.1360175357637288,
      "grad_norm": 0.49371686577796936,
      "learning_rate": 4.641777386943402e-05,
      "loss": 0.8586,
      "step": 18515
    },
    {
      "epoch": 2.1365943700969083,
      "grad_norm": 0.44037875533103943,
      "learning_rate": 4.6361114929874895e-05,
      "loss": 0.8757,
      "step": 18520
    },
    {
      "epoch": 2.1371712044300875,
      "grad_norm": 0.42881131172180176,
      "learning_rate": 4.630448015473794e-05,
      "loss": 0.9403,
      "step": 18525
    },
    {
      "epoch": 2.137748038763267,
      "grad_norm": 0.40794649720191956,
      "learning_rate": 4.6247869569537225e-05,
      "loss": 0.862,
      "step": 18530
    },
    {
      "epoch": 2.1383248730964466,
      "grad_norm": 0.41439539194107056,
      "learning_rate": 4.6191283199775946e-05,
      "loss": 0.8535,
      "step": 18535
    },
    {
      "epoch": 2.138901707429626,
      "grad_norm": 0.4385506212711334,
      "learning_rate": 4.613472107094641e-05,
      "loss": 0.8473,
      "step": 18540
    },
    {
      "epoch": 2.1394785417628057,
      "grad_norm": 0.39511799812316895,
      "learning_rate": 4.607818320852988e-05,
      "loss": 0.9115,
      "step": 18545
    },
    {
      "epoch": 2.1400553760959853,
      "grad_norm": 0.43139395117759705,
      "learning_rate": 4.60216696379968e-05,
      "loss": 0.8281,
      "step": 18550
    },
    {
      "epoch": 2.140632210429165,
      "grad_norm": 0.3915206491947174,
      "learning_rate": 4.596518038480667e-05,
      "loss": 0.851,
      "step": 18555
    },
    {
      "epoch": 2.1412090447623444,
      "grad_norm": 0.4159088134765625,
      "learning_rate": 4.590871547440804e-05,
      "loss": 0.885,
      "step": 18560
    },
    {
      "epoch": 2.141785879095524,
      "grad_norm": 0.43738165497779846,
      "learning_rate": 4.585227493223836e-05,
      "loss": 0.8766,
      "step": 18565
    },
    {
      "epoch": 2.142362713428703,
      "grad_norm": 0.4490554630756378,
      "learning_rate": 4.579585878372428e-05,
      "loss": 0.8452,
      "step": 18570
    },
    {
      "epoch": 2.1429395477618827,
      "grad_norm": 0.4256729185581207,
      "learning_rate": 4.573946705428136e-05,
      "loss": 0.8571,
      "step": 18575
    },
    {
      "epoch": 2.1435163820950622,
      "grad_norm": 0.43782299757003784,
      "learning_rate": 4.5683099769314185e-05,
      "loss": 0.9114,
      "step": 18580
    },
    {
      "epoch": 2.144093216428242,
      "grad_norm": 0.4350529909133911,
      "learning_rate": 4.562675695421634e-05,
      "loss": 0.8728,
      "step": 18585
    },
    {
      "epoch": 2.1446700507614214,
      "grad_norm": 0.4200105667114258,
      "learning_rate": 4.55704386343704e-05,
      "loss": 0.8267,
      "step": 18590
    },
    {
      "epoch": 2.145246885094601,
      "grad_norm": 0.4020532965660095,
      "learning_rate": 4.551414483514781e-05,
      "loss": 0.8791,
      "step": 18595
    },
    {
      "epoch": 2.1458237194277805,
      "grad_norm": 0.4043259918689728,
      "learning_rate": 4.545787558190907e-05,
      "loss": 0.8687,
      "step": 18600
    },
    {
      "epoch": 2.1464005537609596,
      "grad_norm": 0.4017442464828491,
      "learning_rate": 4.540163090000358e-05,
      "loss": 0.8764,
      "step": 18605
    },
    {
      "epoch": 2.146977388094139,
      "grad_norm": 0.4435402452945709,
      "learning_rate": 4.534541081476973e-05,
      "loss": 0.8345,
      "step": 18610
    },
    {
      "epoch": 2.1475542224273187,
      "grad_norm": 0.4666100740432739,
      "learning_rate": 4.5289215351534666e-05,
      "loss": 0.8801,
      "step": 18615
    },
    {
      "epoch": 2.1481310567604983,
      "grad_norm": 0.40481117367744446,
      "learning_rate": 4.5233044535614676e-05,
      "loss": 0.8678,
      "step": 18620
    },
    {
      "epoch": 2.148707891093678,
      "grad_norm": 0.4130145013332367,
      "learning_rate": 4.517689839231475e-05,
      "loss": 0.8754,
      "step": 18625
    },
    {
      "epoch": 2.1492847254268574,
      "grad_norm": 0.41234853863716125,
      "learning_rate": 4.512077694692888e-05,
      "loss": 0.8644,
      "step": 18630
    },
    {
      "epoch": 2.149861559760037,
      "grad_norm": 0.4444868862628937,
      "learning_rate": 4.5064680224739783e-05,
      "loss": 0.8812,
      "step": 18635
    },
    {
      "epoch": 2.1504383940932166,
      "grad_norm": 0.4467960596084595,
      "learning_rate": 4.50086082510193e-05,
      "loss": 0.8759,
      "step": 18640
    },
    {
      "epoch": 2.151015228426396,
      "grad_norm": 0.4122447371482849,
      "learning_rate": 4.495256105102784e-05,
      "loss": 0.8791,
      "step": 18645
    },
    {
      "epoch": 2.1515920627595753,
      "grad_norm": 0.5279197692871094,
      "learning_rate": 4.489653865001485e-05,
      "loss": 0.8049,
      "step": 18650
    },
    {
      "epoch": 2.152168897092755,
      "grad_norm": 0.3907780647277832,
      "learning_rate": 4.4840541073218433e-05,
      "loss": 0.9522,
      "step": 18655
    },
    {
      "epoch": 2.1527457314259344,
      "grad_norm": 0.4981468617916107,
      "learning_rate": 4.478456834586574e-05,
      "loss": 0.8748,
      "step": 18660
    },
    {
      "epoch": 2.153322565759114,
      "grad_norm": 0.4211672246456146,
      "learning_rate": 4.472862049317249e-05,
      "loss": 0.8337,
      "step": 18665
    },
    {
      "epoch": 2.1538994000922935,
      "grad_norm": 0.43220365047454834,
      "learning_rate": 4.467269754034333e-05,
      "loss": 0.8608,
      "step": 18670
    },
    {
      "epoch": 2.154476234425473,
      "grad_norm": 0.4173910617828369,
      "learning_rate": 4.4616799512571675e-05,
      "loss": 0.8675,
      "step": 18675
    },
    {
      "epoch": 2.1550530687586527,
      "grad_norm": 0.4267426133155823,
      "learning_rate": 4.456092643503972e-05,
      "loss": 0.8683,
      "step": 18680
    },
    {
      "epoch": 2.1556299030918322,
      "grad_norm": 0.3888731300830841,
      "learning_rate": 4.450507833291831e-05,
      "loss": 0.8461,
      "step": 18685
    },
    {
      "epoch": 2.1562067374250113,
      "grad_norm": 0.40398934483528137,
      "learning_rate": 4.4449255231367183e-05,
      "loss": 0.8115,
      "step": 18690
    },
    {
      "epoch": 2.156783571758191,
      "grad_norm": 0.4564119279384613,
      "learning_rate": 4.439345715553475e-05,
      "loss": 0.8732,
      "step": 18695
    },
    {
      "epoch": 2.1573604060913705,
      "grad_norm": 0.43610015511512756,
      "learning_rate": 4.433768413055818e-05,
      "loss": 0.8955,
      "step": 18700
    },
    {
      "epoch": 2.15793724042455,
      "grad_norm": 0.40108174085617065,
      "learning_rate": 4.428193618156322e-05,
      "loss": 0.8907,
      "step": 18705
    },
    {
      "epoch": 2.1585140747577296,
      "grad_norm": 0.43645983934402466,
      "learning_rate": 4.422621333366459e-05,
      "loss": 0.8627,
      "step": 18710
    },
    {
      "epoch": 2.159090909090909,
      "grad_norm": 0.3946572542190552,
      "learning_rate": 4.4170515611965415e-05,
      "loss": 0.8467,
      "step": 18715
    },
    {
      "epoch": 2.1596677434240887,
      "grad_norm": 0.40307918190956116,
      "learning_rate": 4.411484304155771e-05,
      "loss": 0.8717,
      "step": 18720
    },
    {
      "epoch": 2.1602445777572683,
      "grad_norm": 0.4881908893585205,
      "learning_rate": 4.405919564752195e-05,
      "loss": 0.9199,
      "step": 18725
    },
    {
      "epoch": 2.1608214120904474,
      "grad_norm": 0.40026798844337463,
      "learning_rate": 4.4003573454927585e-05,
      "loss": 0.8857,
      "step": 18730
    },
    {
      "epoch": 2.161398246423627,
      "grad_norm": 0.4110172986984253,
      "learning_rate": 4.394797648883236e-05,
      "loss": 0.8701,
      "step": 18735
    },
    {
      "epoch": 2.1619750807568066,
      "grad_norm": 0.44310736656188965,
      "learning_rate": 4.389240477428288e-05,
      "loss": 0.8547,
      "step": 18740
    },
    {
      "epoch": 2.162551915089986,
      "grad_norm": 0.4193337559700012,
      "learning_rate": 4.38368583363143e-05,
      "loss": 0.8378,
      "step": 18745
    },
    {
      "epoch": 2.1631287494231657,
      "grad_norm": 0.43070536851882935,
      "learning_rate": 4.378133719995044e-05,
      "loss": 0.9047,
      "step": 18750
    },
    {
      "epoch": 2.1637055837563453,
      "grad_norm": 0.4080751836299896,
      "learning_rate": 4.37258413902036e-05,
      "loss": 0.8502,
      "step": 18755
    },
    {
      "epoch": 2.164282418089525,
      "grad_norm": 0.538879930973053,
      "learning_rate": 4.367037093207481e-05,
      "loss": 0.8779,
      "step": 18760
    },
    {
      "epoch": 2.1648592524227044,
      "grad_norm": 0.40123456716537476,
      "learning_rate": 4.36149258505536e-05,
      "loss": 0.8619,
      "step": 18765
    },
    {
      "epoch": 2.1654360867558835,
      "grad_norm": 0.36256951093673706,
      "learning_rate": 4.3559506170618116e-05,
      "loss": 0.8444,
      "step": 18770
    },
    {
      "epoch": 2.166012921089063,
      "grad_norm": 0.4287301003932953,
      "learning_rate": 4.350411191723498e-05,
      "loss": 0.86,
      "step": 18775
    },
    {
      "epoch": 2.1665897554222426,
      "grad_norm": 0.40902701020240784,
      "learning_rate": 4.344874311535944e-05,
      "loss": 0.8722,
      "step": 18780
    },
    {
      "epoch": 2.167166589755422,
      "grad_norm": 0.4519880712032318,
      "learning_rate": 4.339339978993523e-05,
      "loss": 0.8985,
      "step": 18785
    },
    {
      "epoch": 2.1677434240886018,
      "grad_norm": 0.39673271775245667,
      "learning_rate": 4.333808196589468e-05,
      "loss": 0.9006,
      "step": 18790
    },
    {
      "epoch": 2.1683202584217813,
      "grad_norm": 0.441983163356781,
      "learning_rate": 4.3282789668158476e-05,
      "loss": 0.9035,
      "step": 18795
    },
    {
      "epoch": 2.168897092754961,
      "grad_norm": 0.41148027777671814,
      "learning_rate": 4.3227522921636044e-05,
      "loss": 0.8512,
      "step": 18800
    },
    {
      "epoch": 2.1694739270881405,
      "grad_norm": 0.5070381760597229,
      "learning_rate": 4.317228175122504e-05,
      "loss": 0.863,
      "step": 18805
    },
    {
      "epoch": 2.1700507614213196,
      "grad_norm": 0.453912615776062,
      "learning_rate": 4.31170661818118e-05,
      "loss": 0.8576,
      "step": 18810
    },
    {
      "epoch": 2.170627595754499,
      "grad_norm": 0.4003099203109741,
      "learning_rate": 4.306187623827095e-05,
      "loss": 0.8799,
      "step": 18815
    },
    {
      "epoch": 2.1712044300876787,
      "grad_norm": 0.4114973247051239,
      "learning_rate": 4.300671194546579e-05,
      "loss": 0.8716,
      "step": 18820
    },
    {
      "epoch": 2.1717812644208583,
      "grad_norm": 0.39984384179115295,
      "learning_rate": 4.295157332824785e-05,
      "loss": 0.8489,
      "step": 18825
    },
    {
      "epoch": 2.172358098754038,
      "grad_norm": 0.4752567410469055,
      "learning_rate": 4.289646041145721e-05,
      "loss": 0.8671,
      "step": 18830
    },
    {
      "epoch": 2.1729349330872174,
      "grad_norm": 0.3880119323730469,
      "learning_rate": 4.284137321992235e-05,
      "loss": 0.8626,
      "step": 18835
    },
    {
      "epoch": 2.173511767420397,
      "grad_norm": 0.4211719036102295,
      "learning_rate": 4.27863117784602e-05,
      "loss": 0.9373,
      "step": 18840
    },
    {
      "epoch": 2.1740886017535765,
      "grad_norm": 0.4246695935726166,
      "learning_rate": 4.273127611187595e-05,
      "loss": 0.8985,
      "step": 18845
    },
    {
      "epoch": 2.174665436086756,
      "grad_norm": 0.43371304869651794,
      "learning_rate": 4.2676266244963335e-05,
      "loss": 0.8991,
      "step": 18850
    },
    {
      "epoch": 2.1752422704199352,
      "grad_norm": 0.434150755405426,
      "learning_rate": 4.262128220250441e-05,
      "loss": 0.9137,
      "step": 18855
    },
    {
      "epoch": 2.175819104753115,
      "grad_norm": 0.39126837253570557,
      "learning_rate": 4.256632400926961e-05,
      "loss": 0.9046,
      "step": 18860
    },
    {
      "epoch": 2.1763959390862944,
      "grad_norm": 0.4539991617202759,
      "learning_rate": 4.251139169001761e-05,
      "loss": 0.8592,
      "step": 18865
    },
    {
      "epoch": 2.176972773419474,
      "grad_norm": 0.41316598653793335,
      "learning_rate": 4.245648526949567e-05,
      "loss": 0.8825,
      "step": 18870
    },
    {
      "epoch": 2.1775496077526535,
      "grad_norm": 0.41215020418167114,
      "learning_rate": 4.240160477243913e-05,
      "loss": 0.8553,
      "step": 18875
    },
    {
      "epoch": 2.178126442085833,
      "grad_norm": 0.4877290725708008,
      "learning_rate": 4.234675022357184e-05,
      "loss": 0.8485,
      "step": 18880
    },
    {
      "epoch": 2.1787032764190126,
      "grad_norm": 0.436401903629303,
      "learning_rate": 4.229192164760576e-05,
      "loss": 0.8947,
      "step": 18885
    },
    {
      "epoch": 2.1792801107521917,
      "grad_norm": 0.4835764467716217,
      "learning_rate": 4.223711906924143e-05,
      "loss": 0.8144,
      "step": 18890
    },
    {
      "epoch": 2.1798569450853713,
      "grad_norm": 0.36747488379478455,
      "learning_rate": 4.2182342513167395e-05,
      "loss": 0.8148,
      "step": 18895
    },
    {
      "epoch": 2.180433779418551,
      "grad_norm": 0.43172943592071533,
      "learning_rate": 4.212759200406065e-05,
      "loss": 0.9065,
      "step": 18900
    },
    {
      "epoch": 2.1810106137517304,
      "grad_norm": 0.4256831109523773,
      "learning_rate": 4.2072867566586394e-05,
      "loss": 0.8635,
      "step": 18905
    },
    {
      "epoch": 2.18158744808491,
      "grad_norm": 0.5033872127532959,
      "learning_rate": 4.201816922539814e-05,
      "loss": 0.8843,
      "step": 18910
    },
    {
      "epoch": 2.1821642824180896,
      "grad_norm": 0.47121378779411316,
      "learning_rate": 4.1963497005137516e-05,
      "loss": 0.8994,
      "step": 18915
    },
    {
      "epoch": 2.182741116751269,
      "grad_norm": 0.42762336134910583,
      "learning_rate": 4.1908850930434506e-05,
      "loss": 0.9069,
      "step": 18920
    },
    {
      "epoch": 2.1833179510844487,
      "grad_norm": 0.4158373177051544,
      "learning_rate": 4.185423102590726e-05,
      "loss": 0.8502,
      "step": 18925
    },
    {
      "epoch": 2.1838947854176283,
      "grad_norm": 0.4415193498134613,
      "learning_rate": 4.179963731616221e-05,
      "loss": 0.8994,
      "step": 18930
    },
    {
      "epoch": 2.1844716197508074,
      "grad_norm": 0.4090315103530884,
      "learning_rate": 4.1745069825793836e-05,
      "loss": 0.8678,
      "step": 18935
    },
    {
      "epoch": 2.185048454083987,
      "grad_norm": 0.48849308490753174,
      "learning_rate": 4.1690528579384935e-05,
      "loss": 0.867,
      "step": 18940
    },
    {
      "epoch": 2.1856252884171665,
      "grad_norm": 0.4298642575740814,
      "learning_rate": 4.163601360150646e-05,
      "loss": 0.8641,
      "step": 18945
    },
    {
      "epoch": 2.186202122750346,
      "grad_norm": 0.4962320625782013,
      "learning_rate": 4.1581524916717515e-05,
      "loss": 0.8793,
      "step": 18950
    },
    {
      "epoch": 2.1867789570835257,
      "grad_norm": 0.39665916562080383,
      "learning_rate": 4.1527062549565344e-05,
      "loss": 0.8694,
      "step": 18955
    },
    {
      "epoch": 2.187355791416705,
      "grad_norm": 0.4870767295360565,
      "learning_rate": 4.147262652458539e-05,
      "loss": 0.9127,
      "step": 18960
    },
    {
      "epoch": 2.187932625749885,
      "grad_norm": 0.4359199106693268,
      "learning_rate": 4.141821686630112e-05,
      "loss": 0.8448,
      "step": 18965
    },
    {
      "epoch": 2.1885094600830644,
      "grad_norm": 0.4554474651813507,
      "learning_rate": 4.1363833599224225e-05,
      "loss": 0.899,
      "step": 18970
    },
    {
      "epoch": 2.1890862944162435,
      "grad_norm": 0.3968992531299591,
      "learning_rate": 4.130947674785447e-05,
      "loss": 0.8589,
      "step": 18975
    },
    {
      "epoch": 2.189663128749423,
      "grad_norm": 0.3888143002986908,
      "learning_rate": 4.1255146336679764e-05,
      "loss": 0.8657,
      "step": 18980
    },
    {
      "epoch": 2.1902399630826026,
      "grad_norm": 0.4677968919277191,
      "learning_rate": 4.1200842390175985e-05,
      "loss": 0.8921,
      "step": 18985
    },
    {
      "epoch": 2.190816797415782,
      "grad_norm": 0.39660555124282837,
      "learning_rate": 4.114656493280721e-05,
      "loss": 0.8675,
      "step": 18990
    },
    {
      "epoch": 2.1913936317489617,
      "grad_norm": 0.4506629705429077,
      "learning_rate": 4.1092313989025534e-05,
      "loss": 0.853,
      "step": 18995
    },
    {
      "epoch": 2.1919704660821413,
      "grad_norm": 0.45868542790412903,
      "learning_rate": 4.103808958327111e-05,
      "loss": 0.8326,
      "step": 19000
    },
    {
      "epoch": 2.192547300415321,
      "grad_norm": 0.40273788571357727,
      "learning_rate": 4.098389173997218e-05,
      "loss": 0.8579,
      "step": 19005
    },
    {
      "epoch": 2.1931241347485004,
      "grad_norm": 0.4366571605205536,
      "learning_rate": 4.092972048354491e-05,
      "loss": 0.9013,
      "step": 19010
    },
    {
      "epoch": 2.1937009690816796,
      "grad_norm": 0.44341322779655457,
      "learning_rate": 4.0875575838393574e-05,
      "loss": 0.9058,
      "step": 19015
    },
    {
      "epoch": 2.194277803414859,
      "grad_norm": 0.4099046289920807,
      "learning_rate": 4.082145782891046e-05,
      "loss": 0.8404,
      "step": 19020
    },
    {
      "epoch": 2.1948546377480387,
      "grad_norm": 0.43949276208877563,
      "learning_rate": 4.076736647947583e-05,
      "loss": 0.8557,
      "step": 19025
    },
    {
      "epoch": 2.1954314720812182,
      "grad_norm": 0.39623621106147766,
      "learning_rate": 4.071330181445795e-05,
      "loss": 0.8837,
      "step": 19030
    },
    {
      "epoch": 2.196008306414398,
      "grad_norm": 0.4332854747772217,
      "learning_rate": 4.065926385821307e-05,
      "loss": 0.8099,
      "step": 19035
    },
    {
      "epoch": 2.1965851407475774,
      "grad_norm": 0.49497532844543457,
      "learning_rate": 4.0605252635085345e-05,
      "loss": 0.8714,
      "step": 19040
    },
    {
      "epoch": 2.197161975080757,
      "grad_norm": 0.5062466859817505,
      "learning_rate": 4.055126816940697e-05,
      "loss": 0.8715,
      "step": 19045
    },
    {
      "epoch": 2.1977388094139365,
      "grad_norm": 0.43262168765068054,
      "learning_rate": 4.049731048549804e-05,
      "loss": 0.916,
      "step": 19050
    },
    {
      "epoch": 2.1983156437471156,
      "grad_norm": 0.3961028456687927,
      "learning_rate": 4.044337960766663e-05,
      "loss": 0.8536,
      "step": 19055
    },
    {
      "epoch": 2.198892478080295,
      "grad_norm": 0.4137302339076996,
      "learning_rate": 4.0389475560208653e-05,
      "loss": 0.8527,
      "step": 19060
    },
    {
      "epoch": 2.1994693124134748,
      "grad_norm": 0.39934080839157104,
      "learning_rate": 4.033559836740801e-05,
      "loss": 0.8487,
      "step": 19065
    },
    {
      "epoch": 2.2000461467466543,
      "grad_norm": 0.4289592206478119,
      "learning_rate": 4.028174805353648e-05,
      "loss": 0.8969,
      "step": 19070
    },
    {
      "epoch": 2.200622981079834,
      "grad_norm": 0.4357072114944458,
      "learning_rate": 4.0227924642853786e-05,
      "loss": 0.8291,
      "step": 19075
    },
    {
      "epoch": 2.2011998154130135,
      "grad_norm": 0.41731470823287964,
      "learning_rate": 4.017412815960735e-05,
      "loss": 0.8866,
      "step": 19080
    },
    {
      "epoch": 2.201776649746193,
      "grad_norm": 0.3993309736251831,
      "learning_rate": 4.012035862803275e-05,
      "loss": 0.9265,
      "step": 19085
    },
    {
      "epoch": 2.2023534840793726,
      "grad_norm": 0.3996540307998657,
      "learning_rate": 4.006661607235316e-05,
      "loss": 0.8724,
      "step": 19090
    },
    {
      "epoch": 2.2029303184125517,
      "grad_norm": 0.4056003987789154,
      "learning_rate": 4.001290051677975e-05,
      "loss": 0.8479,
      "step": 19095
    },
    {
      "epoch": 2.2035071527457313,
      "grad_norm": 0.42227908968925476,
      "learning_rate": 3.9959211985511415e-05,
      "loss": 0.9067,
      "step": 19100
    },
    {
      "epoch": 2.204083987078911,
      "grad_norm": 0.4420729875564575,
      "learning_rate": 3.990555050273506e-05,
      "loss": 0.9141,
      "step": 19105
    },
    {
      "epoch": 2.2046608214120904,
      "grad_norm": 0.42280176281929016,
      "learning_rate": 3.985191609262519e-05,
      "loss": 0.887,
      "step": 19110
    },
    {
      "epoch": 2.20523765574527,
      "grad_norm": 0.3818375766277313,
      "learning_rate": 3.979830877934423e-05,
      "loss": 0.8428,
      "step": 19115
    },
    {
      "epoch": 2.2058144900784495,
      "grad_norm": 0.4037393629550934,
      "learning_rate": 3.97447285870424e-05,
      "loss": 0.8572,
      "step": 19120
    },
    {
      "epoch": 2.206391324411629,
      "grad_norm": 0.4422479271888733,
      "learning_rate": 3.969117553985772e-05,
      "loss": 0.8919,
      "step": 19125
    },
    {
      "epoch": 2.2069681587448087,
      "grad_norm": 0.3848304748535156,
      "learning_rate": 3.9637649661915844e-05,
      "loss": 0.9447,
      "step": 19130
    },
    {
      "epoch": 2.207544993077988,
      "grad_norm": 0.37574300169944763,
      "learning_rate": 3.958415097733035e-05,
      "loss": 0.8699,
      "step": 19135
    },
    {
      "epoch": 2.2081218274111674,
      "grad_norm": 0.4617113471031189,
      "learning_rate": 3.9530679510202476e-05,
      "loss": 0.9728,
      "step": 19140
    },
    {
      "epoch": 2.208698661744347,
      "grad_norm": 0.42802298069000244,
      "learning_rate": 3.947723528462126e-05,
      "loss": 0.9185,
      "step": 19145
    },
    {
      "epoch": 2.2092754960775265,
      "grad_norm": 0.4145731031894684,
      "learning_rate": 3.942381832466334e-05,
      "loss": 0.8572,
      "step": 19150
    },
    {
      "epoch": 2.209852330410706,
      "grad_norm": 0.4005712568759918,
      "learning_rate": 3.9370428654393296e-05,
      "loss": 0.8823,
      "step": 19155
    },
    {
      "epoch": 2.2104291647438856,
      "grad_norm": 0.4595177471637726,
      "learning_rate": 3.931706629786317e-05,
      "loss": 0.8639,
      "step": 19160
    },
    {
      "epoch": 2.211005999077065,
      "grad_norm": 0.40236470103263855,
      "learning_rate": 3.926373127911287e-05,
      "loss": 0.8691,
      "step": 19165
    },
    {
      "epoch": 2.2115828334102448,
      "grad_norm": 0.4109460413455963,
      "learning_rate": 3.921042362216983e-05,
      "loss": 0.8875,
      "step": 19170
    },
    {
      "epoch": 2.212159667743424,
      "grad_norm": 0.4146732687950134,
      "learning_rate": 3.9157143351049386e-05,
      "loss": 0.9115,
      "step": 19175
    },
    {
      "epoch": 2.2127365020766034,
      "grad_norm": 0.38754817843437195,
      "learning_rate": 3.910389048975431e-05,
      "loss": 0.9114,
      "step": 19180
    },
    {
      "epoch": 2.213313336409783,
      "grad_norm": 0.4098663330078125,
      "learning_rate": 3.905066506227515e-05,
      "loss": 0.8382,
      "step": 19185
    },
    {
      "epoch": 2.2138901707429626,
      "grad_norm": 0.43350058794021606,
      "learning_rate": 3.8997467092590056e-05,
      "loss": 0.8488,
      "step": 19190
    },
    {
      "epoch": 2.214467005076142,
      "grad_norm": 0.44083696603775024,
      "learning_rate": 3.8944296604664866e-05,
      "loss": 0.8266,
      "step": 19195
    },
    {
      "epoch": 2.2150438394093217,
      "grad_norm": 0.44283217191696167,
      "learning_rate": 3.8891153622452904e-05,
      "loss": 0.8576,
      "step": 19200
    },
    {
      "epoch": 2.2156206737425013,
      "grad_norm": 0.4850544035434723,
      "learning_rate": 3.883803816989523e-05,
      "loss": 0.8542,
      "step": 19205
    },
    {
      "epoch": 2.216197508075681,
      "grad_norm": 0.5047842264175415,
      "learning_rate": 3.8784950270920465e-05,
      "loss": 0.8928,
      "step": 19210
    },
    {
      "epoch": 2.2167743424088604,
      "grad_norm": 0.464539498090744,
      "learning_rate": 3.873188994944483e-05,
      "loss": 0.94,
      "step": 19215
    },
    {
      "epoch": 2.2173511767420395,
      "grad_norm": 0.4273037314414978,
      "learning_rate": 3.867885722937201e-05,
      "loss": 0.8738,
      "step": 19220
    },
    {
      "epoch": 2.217928011075219,
      "grad_norm": 0.4209919273853302,
      "learning_rate": 3.862585213459349e-05,
      "loss": 0.8648,
      "step": 19225
    },
    {
      "epoch": 2.2185048454083987,
      "grad_norm": 0.420584112405777,
      "learning_rate": 3.857287468898806e-05,
      "loss": 0.8516,
      "step": 19230
    },
    {
      "epoch": 2.219081679741578,
      "grad_norm": 0.4526803493499756,
      "learning_rate": 3.851992491642222e-05,
      "loss": 0.8924,
      "step": 19235
    },
    {
      "epoch": 2.219658514074758,
      "grad_norm": 0.446689635515213,
      "learning_rate": 3.846700284074987e-05,
      "loss": 0.8676,
      "step": 19240
    },
    {
      "epoch": 2.2202353484079373,
      "grad_norm": 0.4283064901828766,
      "learning_rate": 3.8414108485812613e-05,
      "loss": 0.8934,
      "step": 19245
    },
    {
      "epoch": 2.220812182741117,
      "grad_norm": 0.43577268719673157,
      "learning_rate": 3.836124187543938e-05,
      "loss": 0.8238,
      "step": 19250
    },
    {
      "epoch": 2.221389017074296,
      "grad_norm": 0.41376611590385437,
      "learning_rate": 3.830840303344675e-05,
      "loss": 0.8273,
      "step": 19255
    },
    {
      "epoch": 2.2219658514074756,
      "grad_norm": 0.42846664786338806,
      "learning_rate": 3.825559198363861e-05,
      "loss": 0.9316,
      "step": 19260
    },
    {
      "epoch": 2.222542685740655,
      "grad_norm": 0.4213767945766449,
      "learning_rate": 3.820280874980658e-05,
      "loss": 0.8694,
      "step": 19265
    },
    {
      "epoch": 2.2231195200738347,
      "grad_norm": 0.43722113966941833,
      "learning_rate": 3.8150053355729495e-05,
      "loss": 0.8672,
      "step": 19270
    },
    {
      "epoch": 2.2236963544070143,
      "grad_norm": 0.4323386251926422,
      "learning_rate": 3.8097325825173826e-05,
      "loss": 0.8544,
      "step": 19275
    },
    {
      "epoch": 2.224273188740194,
      "grad_norm": 0.4062696695327759,
      "learning_rate": 3.80446261818934e-05,
      "loss": 0.8761,
      "step": 19280
    },
    {
      "epoch": 2.2248500230733734,
      "grad_norm": 0.4705086648464203,
      "learning_rate": 3.799195444962956e-05,
      "loss": 0.8459,
      "step": 19285
    },
    {
      "epoch": 2.225426857406553,
      "grad_norm": 0.4575541317462921,
      "learning_rate": 3.793931065211096e-05,
      "loss": 0.8433,
      "step": 19290
    },
    {
      "epoch": 2.2260036917397326,
      "grad_norm": 0.4377269446849823,
      "learning_rate": 3.788669481305376e-05,
      "loss": 0.8728,
      "step": 19295
    },
    {
      "epoch": 2.2265805260729117,
      "grad_norm": 0.41368991136550903,
      "learning_rate": 3.783410695616149e-05,
      "loss": 0.8454,
      "step": 19300
    },
    {
      "epoch": 2.2271573604060912,
      "grad_norm": 0.43097037076950073,
      "learning_rate": 3.778154710512512e-05,
      "loss": 0.866,
      "step": 19305
    },
    {
      "epoch": 2.227734194739271,
      "grad_norm": 0.4154130518436432,
      "learning_rate": 3.772901528362287e-05,
      "loss": 0.8629,
      "step": 19310
    },
    {
      "epoch": 2.2283110290724504,
      "grad_norm": 0.47126176953315735,
      "learning_rate": 3.767651151532057e-05,
      "loss": 0.8565,
      "step": 19315
    },
    {
      "epoch": 2.22888786340563,
      "grad_norm": 0.411214143037796,
      "learning_rate": 3.762403582387114e-05,
      "loss": 0.9093,
      "step": 19320
    },
    {
      "epoch": 2.2294646977388095,
      "grad_norm": 0.42424070835113525,
      "learning_rate": 3.757158823291508e-05,
      "loss": 0.8569,
      "step": 19325
    },
    {
      "epoch": 2.230041532071989,
      "grad_norm": 0.43448081612586975,
      "learning_rate": 3.751916876608001e-05,
      "loss": 0.9002,
      "step": 19330
    },
    {
      "epoch": 2.2306183664051686,
      "grad_norm": 0.442730575799942,
      "learning_rate": 3.746677744698114e-05,
      "loss": 0.9348,
      "step": 19335
    },
    {
      "epoch": 2.2311952007383478,
      "grad_norm": 0.437593936920166,
      "learning_rate": 3.741441429922078e-05,
      "loss": 0.8935,
      "step": 19340
    },
    {
      "epoch": 2.2317720350715273,
      "grad_norm": 0.41157275438308716,
      "learning_rate": 3.736207934638864e-05,
      "loss": 0.8402,
      "step": 19345
    },
    {
      "epoch": 2.232348869404707,
      "grad_norm": 0.4042685329914093,
      "learning_rate": 3.730977261206171e-05,
      "loss": 0.8719,
      "step": 19350
    },
    {
      "epoch": 2.2329257037378865,
      "grad_norm": 0.42768070101737976,
      "learning_rate": 3.725749411980435e-05,
      "loss": 0.8603,
      "step": 19355
    },
    {
      "epoch": 2.233502538071066,
      "grad_norm": 0.44104745984077454,
      "learning_rate": 3.720524389316802e-05,
      "loss": 0.8971,
      "step": 19360
    },
    {
      "epoch": 2.2340793724042456,
      "grad_norm": 0.4265894293785095,
      "learning_rate": 3.715302195569159e-05,
      "loss": 0.872,
      "step": 19365
    },
    {
      "epoch": 2.234656206737425,
      "grad_norm": 0.45184656977653503,
      "learning_rate": 3.710082833090115e-05,
      "loss": 0.8893,
      "step": 19370
    },
    {
      "epoch": 2.2352330410706047,
      "grad_norm": 0.4579407572746277,
      "learning_rate": 3.7048663042310063e-05,
      "loss": 0.8494,
      "step": 19375
    },
    {
      "epoch": 2.235809875403784,
      "grad_norm": 0.44718435406684875,
      "learning_rate": 3.69965261134188e-05,
      "loss": 0.9108,
      "step": 19380
    },
    {
      "epoch": 2.2363867097369634,
      "grad_norm": 0.44501993060112,
      "learning_rate": 3.694441756771525e-05,
      "loss": 0.8604,
      "step": 19385
    },
    {
      "epoch": 2.236963544070143,
      "grad_norm": 0.4550657272338867,
      "learning_rate": 3.689233742867435e-05,
      "loss": 0.8502,
      "step": 19390
    },
    {
      "epoch": 2.2375403784033225,
      "grad_norm": 0.38818812370300293,
      "learning_rate": 3.684028571975836e-05,
      "loss": 0.8862,
      "step": 19395
    },
    {
      "epoch": 2.238117212736502,
      "grad_norm": 0.43724438548088074,
      "learning_rate": 3.678826246441658e-05,
      "loss": 0.8706,
      "step": 19400
    },
    {
      "epoch": 2.2386940470696817,
      "grad_norm": 0.4609769582748413,
      "learning_rate": 3.67362676860857e-05,
      "loss": 0.8821,
      "step": 19405
    },
    {
      "epoch": 2.2392708814028612,
      "grad_norm": 0.4721742272377014,
      "learning_rate": 3.6684301408189406e-05,
      "loss": 0.8705,
      "step": 19410
    },
    {
      "epoch": 2.239847715736041,
      "grad_norm": 0.44866010546684265,
      "learning_rate": 3.66323636541386e-05,
      "loss": 0.8934,
      "step": 19415
    },
    {
      "epoch": 2.24042455006922,
      "grad_norm": 0.4057766795158386,
      "learning_rate": 3.658045444733137e-05,
      "loss": 0.8711,
      "step": 19420
    },
    {
      "epoch": 2.2410013844023995,
      "grad_norm": 0.43573522567749023,
      "learning_rate": 3.652857381115293e-05,
      "loss": 0.8508,
      "step": 19425
    },
    {
      "epoch": 2.241578218735579,
      "grad_norm": 0.3890364468097687,
      "learning_rate": 3.6476721768975554e-05,
      "loss": 0.8354,
      "step": 19430
    },
    {
      "epoch": 2.2421550530687586,
      "grad_norm": 0.44802749156951904,
      "learning_rate": 3.642489834415872e-05,
      "loss": 0.913,
      "step": 19435
    },
    {
      "epoch": 2.242731887401938,
      "grad_norm": 0.41781002283096313,
      "learning_rate": 3.637310356004897e-05,
      "loss": 0.8546,
      "step": 19440
    },
    {
      "epoch": 2.2433087217351178,
      "grad_norm": 0.4287906885147095,
      "learning_rate": 3.632133743998001e-05,
      "loss": 0.9148,
      "step": 19445
    },
    {
      "epoch": 2.2438855560682973,
      "grad_norm": 0.4324365556240082,
      "learning_rate": 3.6269600007272485e-05,
      "loss": 0.8566,
      "step": 19450
    },
    {
      "epoch": 2.244462390401477,
      "grad_norm": 0.47438672184944153,
      "learning_rate": 3.6217891285234265e-05,
      "loss": 0.8796,
      "step": 19455
    },
    {
      "epoch": 2.245039224734656,
      "grad_norm": 0.4307745397090912,
      "learning_rate": 3.6166211297160215e-05,
      "loss": 0.8702,
      "step": 19460
    },
    {
      "epoch": 2.2456160590678356,
      "grad_norm": 0.39747270941734314,
      "learning_rate": 3.611456006633228e-05,
      "loss": 0.9056,
      "step": 19465
    },
    {
      "epoch": 2.246192893401015,
      "grad_norm": 0.3949414789676666,
      "learning_rate": 3.6062937616019433e-05,
      "loss": 0.8409,
      "step": 19470
    },
    {
      "epoch": 2.2467697277341947,
      "grad_norm": 0.45473381876945496,
      "learning_rate": 3.601134396947773e-05,
      "loss": 0.8587,
      "step": 19475
    },
    {
      "epoch": 2.2473465620673743,
      "grad_norm": 0.5197227001190186,
      "learning_rate": 3.595977914995014e-05,
      "loss": 0.8988,
      "step": 19480
    },
    {
      "epoch": 2.247923396400554,
      "grad_norm": 0.4385240972042084,
      "learning_rate": 3.5908243180666734e-05,
      "loss": 0.9216,
      "step": 19485
    },
    {
      "epoch": 2.2485002307337334,
      "grad_norm": 0.45504510402679443,
      "learning_rate": 3.585673608484458e-05,
      "loss": 0.8773,
      "step": 19490
    },
    {
      "epoch": 2.249077065066913,
      "grad_norm": 0.419172465801239,
      "learning_rate": 3.5805257885687726e-05,
      "loss": 0.883,
      "step": 19495
    },
    {
      "epoch": 2.2496538994000925,
      "grad_norm": 0.41316547989845276,
      "learning_rate": 3.5753808606387226e-05,
      "loss": 0.8475,
      "step": 19500
    },
    {
      "epoch": 2.2502307337332716,
      "grad_norm": 0.3854261338710785,
      "learning_rate": 3.570238827012102e-05,
      "loss": 0.8527,
      "step": 19505
    },
    {
      "epoch": 2.250807568066451,
      "grad_norm": 0.39473459124565125,
      "learning_rate": 3.565099690005411e-05,
      "loss": 0.8726,
      "step": 19510
    },
    {
      "epoch": 2.251384402399631,
      "grad_norm": 0.43062132596969604,
      "learning_rate": 3.55996345193384e-05,
      "loss": 0.8598,
      "step": 19515
    },
    {
      "epoch": 2.2519612367328103,
      "grad_norm": 0.37857621908187866,
      "learning_rate": 3.5548301151112774e-05,
      "loss": 0.8552,
      "step": 19520
    },
    {
      "epoch": 2.25253807106599,
      "grad_norm": 0.4191773533821106,
      "learning_rate": 3.549699681850294e-05,
      "loss": 0.8459,
      "step": 19525
    },
    {
      "epoch": 2.2531149053991695,
      "grad_norm": 0.43940702080726624,
      "learning_rate": 3.544572154462165e-05,
      "loss": 0.8592,
      "step": 19530
    },
    {
      "epoch": 2.253691739732349,
      "grad_norm": 0.4216030538082123,
      "learning_rate": 3.539447535256851e-05,
      "loss": 0.845,
      "step": 19535
    },
    {
      "epoch": 2.254268574065528,
      "grad_norm": 0.4699593186378479,
      "learning_rate": 3.534325826543002e-05,
      "loss": 0.8323,
      "step": 19540
    },
    {
      "epoch": 2.2548454083987077,
      "grad_norm": 0.4556678235530853,
      "learning_rate": 3.529207030627959e-05,
      "loss": 0.8951,
      "step": 19545
    },
    {
      "epoch": 2.2554222427318873,
      "grad_norm": 0.433390736579895,
      "learning_rate": 3.524091149817752e-05,
      "loss": 0.8777,
      "step": 19550
    },
    {
      "epoch": 2.255999077065067,
      "grad_norm": 0.421514630317688,
      "learning_rate": 3.518978186417089e-05,
      "loss": 0.9213,
      "step": 19555
    },
    {
      "epoch": 2.2565759113982464,
      "grad_norm": 0.39421340823173523,
      "learning_rate": 3.513868142729373e-05,
      "loss": 0.8982,
      "step": 19560
    },
    {
      "epoch": 2.257152745731426,
      "grad_norm": 0.4324612319469452,
      "learning_rate": 3.508761021056689e-05,
      "loss": 0.9017,
      "step": 19565
    },
    {
      "epoch": 2.2577295800646056,
      "grad_norm": 0.4661559760570526,
      "learning_rate": 3.503656823699809e-05,
      "loss": 0.9211,
      "step": 19570
    },
    {
      "epoch": 2.258306414397785,
      "grad_norm": 0.4388502538204193,
      "learning_rate": 3.498555552958176e-05,
      "loss": 0.901,
      "step": 19575
    },
    {
      "epoch": 2.2588832487309647,
      "grad_norm": 0.3959125578403473,
      "learning_rate": 3.4934572111299254e-05,
      "loss": 0.9145,
      "step": 19580
    },
    {
      "epoch": 2.259460083064144,
      "grad_norm": 0.39172422885894775,
      "learning_rate": 3.4883618005118724e-05,
      "loss": 0.8521,
      "step": 19585
    },
    {
      "epoch": 2.2600369173973234,
      "grad_norm": 0.500770628452301,
      "learning_rate": 3.483269323399512e-05,
      "loss": 0.9168,
      "step": 19590
    },
    {
      "epoch": 2.260613751730503,
      "grad_norm": 0.43640342354774475,
      "learning_rate": 3.4781797820870036e-05,
      "loss": 0.893,
      "step": 19595
    },
    {
      "epoch": 2.2611905860636825,
      "grad_norm": 0.4276639223098755,
      "learning_rate": 3.473093178867208e-05,
      "loss": 0.885,
      "step": 19600
    },
    {
      "epoch": 2.261767420396862,
      "grad_norm": 0.4681278169155121,
      "learning_rate": 3.468009516031644e-05,
      "loss": 0.9085,
      "step": 19605
    },
    {
      "epoch": 2.2623442547300416,
      "grad_norm": 0.4059450328350067,
      "learning_rate": 3.462928795870515e-05,
      "loss": 0.8732,
      "step": 19610
    },
    {
      "epoch": 2.262921089063221,
      "grad_norm": 0.42977821826934814,
      "learning_rate": 3.4578510206726856e-05,
      "loss": 0.8538,
      "step": 19615
    },
    {
      "epoch": 2.2634979233964003,
      "grad_norm": 0.4566701054573059,
      "learning_rate": 3.452776192725717e-05,
      "loss": 0.9053,
      "step": 19620
    },
    {
      "epoch": 2.26407475772958,
      "grad_norm": 0.43263301253318787,
      "learning_rate": 3.4477043143158204e-05,
      "loss": 0.8835,
      "step": 19625
    },
    {
      "epoch": 2.2646515920627595,
      "grad_norm": 0.46123167872428894,
      "learning_rate": 3.4426353877278885e-05,
      "loss": 0.8861,
      "step": 19630
    },
    {
      "epoch": 2.265228426395939,
      "grad_norm": 0.4144291281700134,
      "learning_rate": 3.437569415245483e-05,
      "loss": 0.8385,
      "step": 19635
    },
    {
      "epoch": 2.2658052607291186,
      "grad_norm": 0.4805741310119629,
      "learning_rate": 3.432506399150839e-05,
      "loss": 0.7951,
      "step": 19640
    },
    {
      "epoch": 2.266382095062298,
      "grad_norm": 0.4421154260635376,
      "learning_rate": 3.427446341724846e-05,
      "loss": 0.9346,
      "step": 19645
    },
    {
      "epoch": 2.2669589293954777,
      "grad_norm": 0.4587462246417999,
      "learning_rate": 3.422389245247075e-05,
      "loss": 0.9087,
      "step": 19650
    },
    {
      "epoch": 2.2675357637286573,
      "grad_norm": 0.40255850553512573,
      "learning_rate": 3.4173351119957585e-05,
      "loss": 0.8269,
      "step": 19655
    },
    {
      "epoch": 2.268112598061837,
      "grad_norm": 0.41475871205329895,
      "learning_rate": 3.412283944247794e-05,
      "loss": 0.8546,
      "step": 19660
    },
    {
      "epoch": 2.268689432395016,
      "grad_norm": 0.39724406599998474,
      "learning_rate": 3.407235744278734e-05,
      "loss": 0.8428,
      "step": 19665
    },
    {
      "epoch": 2.2692662667281955,
      "grad_norm": 0.4867123067378998,
      "learning_rate": 3.4021905143628166e-05,
      "loss": 0.8971,
      "step": 19670
    },
    {
      "epoch": 2.269843101061375,
      "grad_norm": 0.4419727325439453,
      "learning_rate": 3.397148256772916e-05,
      "loss": 0.8865,
      "step": 19675
    },
    {
      "epoch": 2.2704199353945547,
      "grad_norm": 0.39697927236557007,
      "learning_rate": 3.3921089737805866e-05,
      "loss": 0.8884,
      "step": 19680
    },
    {
      "epoch": 2.2709967697277342,
      "grad_norm": 0.43786725401878357,
      "learning_rate": 3.387072667656025e-05,
      "loss": 0.9046,
      "step": 19685
    },
    {
      "epoch": 2.271573604060914,
      "grad_norm": 0.3968237638473511,
      "learning_rate": 3.3820393406681104e-05,
      "loss": 0.8644,
      "step": 19690
    },
    {
      "epoch": 2.2721504383940934,
      "grad_norm": 0.405393123626709,
      "learning_rate": 3.3770089950843564e-05,
      "loss": 0.872,
      "step": 19695
    },
    {
      "epoch": 2.2727272727272725,
      "grad_norm": 0.40466177463531494,
      "learning_rate": 3.3719816331709465e-05,
      "loss": 0.8794,
      "step": 19700
    },
    {
      "epoch": 2.273304107060452,
      "grad_norm": 0.39831775426864624,
      "learning_rate": 3.3669572571927167e-05,
      "loss": 0.8519,
      "step": 19705
    },
    {
      "epoch": 2.2738809413936316,
      "grad_norm": 0.4163300395011902,
      "learning_rate": 3.361935869413163e-05,
      "loss": 0.8841,
      "step": 19710
    },
    {
      "epoch": 2.274457775726811,
      "grad_norm": 0.4293881058692932,
      "learning_rate": 3.356917472094422e-05,
      "loss": 0.8701,
      "step": 19715
    },
    {
      "epoch": 2.2750346100599907,
      "grad_norm": 0.3978329002857208,
      "learning_rate": 3.3519020674972966e-05,
      "loss": 0.8271,
      "step": 19720
    },
    {
      "epoch": 2.2756114443931703,
      "grad_norm": 0.4234081208705902,
      "learning_rate": 3.3468896578812344e-05,
      "loss": 0.876,
      "step": 19725
    },
    {
      "epoch": 2.27618827872635,
      "grad_norm": 0.4614051580429077,
      "learning_rate": 3.341880245504341e-05,
      "loss": 0.8424,
      "step": 19730
    },
    {
      "epoch": 2.2767651130595294,
      "grad_norm": 0.3951602578163147,
      "learning_rate": 3.3368738326233576e-05,
      "loss": 0.8786,
      "step": 19735
    },
    {
      "epoch": 2.277341947392709,
      "grad_norm": 0.45089635252952576,
      "learning_rate": 3.331870421493688e-05,
      "loss": 0.8766,
      "step": 19740
    },
    {
      "epoch": 2.277918781725888,
      "grad_norm": 0.41349610686302185,
      "learning_rate": 3.3268700143693775e-05,
      "loss": 0.9101,
      "step": 19745
    },
    {
      "epoch": 2.2784956160590677,
      "grad_norm": 0.5059412717819214,
      "learning_rate": 3.321872613503123e-05,
      "loss": 0.8829,
      "step": 19750
    },
    {
      "epoch": 2.2790724503922473,
      "grad_norm": 0.4013746976852417,
      "learning_rate": 3.316878221146253e-05,
      "loss": 0.8798,
      "step": 19755
    },
    {
      "epoch": 2.279649284725427,
      "grad_norm": 0.47415482997894287,
      "learning_rate": 3.311886839548767e-05,
      "loss": 0.9138,
      "step": 19760
    },
    {
      "epoch": 2.2802261190586064,
      "grad_norm": 0.43062588572502136,
      "learning_rate": 3.306898470959278e-05,
      "loss": 0.8784,
      "step": 19765
    },
    {
      "epoch": 2.280802953391786,
      "grad_norm": 0.46922576427459717,
      "learning_rate": 3.301913117625065e-05,
      "loss": 0.8598,
      "step": 19770
    },
    {
      "epoch": 2.2813797877249655,
      "grad_norm": 0.4475753903388977,
      "learning_rate": 3.296930781792028e-05,
      "loss": 0.8808,
      "step": 19775
    },
    {
      "epoch": 2.281956622058145,
      "grad_norm": 0.4148850739002228,
      "learning_rate": 3.2919514657047334e-05,
      "loss": 0.9151,
      "step": 19780
    },
    {
      "epoch": 2.2825334563913247,
      "grad_norm": 0.4225980043411255,
      "learning_rate": 3.286975171606362e-05,
      "loss": 0.8396,
      "step": 19785
    },
    {
      "epoch": 2.2831102907245038,
      "grad_norm": 0.41470444202423096,
      "learning_rate": 3.282001901738749e-05,
      "loss": 0.8742,
      "step": 19790
    },
    {
      "epoch": 2.2836871250576833,
      "grad_norm": 0.4593305289745331,
      "learning_rate": 3.27703165834236e-05,
      "loss": 0.911,
      "step": 19795
    },
    {
      "epoch": 2.284263959390863,
      "grad_norm": 0.46919670701026917,
      "learning_rate": 3.272064443656303e-05,
      "loss": 0.8615,
      "step": 19800
    },
    {
      "epoch": 2.2848407937240425,
      "grad_norm": 0.4396454095840454,
      "learning_rate": 3.267100259918313e-05,
      "loss": 0.8898,
      "step": 19805
    },
    {
      "epoch": 2.285417628057222,
      "grad_norm": 0.3871668577194214,
      "learning_rate": 3.262139109364766e-05,
      "loss": 0.8499,
      "step": 19810
    },
    {
      "epoch": 2.2859944623904016,
      "grad_norm": 0.4604567885398865,
      "learning_rate": 3.257180994230671e-05,
      "loss": 0.8922,
      "step": 19815
    },
    {
      "epoch": 2.286571296723581,
      "grad_norm": 0.3920028507709503,
      "learning_rate": 3.2522259167496706e-05,
      "loss": 0.8444,
      "step": 19820
    },
    {
      "epoch": 2.2871481310567603,
      "grad_norm": 0.4583243131637573,
      "learning_rate": 3.247273879154028e-05,
      "loss": 0.8847,
      "step": 19825
    },
    {
      "epoch": 2.28772496538994,
      "grad_norm": 0.4010569453239441,
      "learning_rate": 3.2423248836746575e-05,
      "loss": 0.8818,
      "step": 19830
    },
    {
      "epoch": 2.2883017997231194,
      "grad_norm": 0.49284353852272034,
      "learning_rate": 3.237378932541084e-05,
      "loss": 0.9396,
      "step": 19835
    },
    {
      "epoch": 2.288878634056299,
      "grad_norm": 0.4109579026699066,
      "learning_rate": 3.2324360279814734e-05,
      "loss": 0.8614,
      "step": 19840
    },
    {
      "epoch": 2.2894554683894786,
      "grad_norm": 0.3795565366744995,
      "learning_rate": 3.227496172222603e-05,
      "loss": 0.9212,
      "step": 19845
    },
    {
      "epoch": 2.290032302722658,
      "grad_norm": 0.41086894273757935,
      "learning_rate": 3.222559367489901e-05,
      "loss": 0.8607,
      "step": 19850
    },
    {
      "epoch": 2.2906091370558377,
      "grad_norm": 0.4576627314090729,
      "learning_rate": 3.217625616007399e-05,
      "loss": 0.8155,
      "step": 19855
    },
    {
      "epoch": 2.2911859713890173,
      "grad_norm": 0.45540130138397217,
      "learning_rate": 3.212694919997764e-05,
      "loss": 0.8947,
      "step": 19860
    },
    {
      "epoch": 2.291762805722197,
      "grad_norm": 0.4258350431919098,
      "learning_rate": 3.2077672816822836e-05,
      "loss": 0.8913,
      "step": 19865
    },
    {
      "epoch": 2.292339640055376,
      "grad_norm": 0.4477665424346924,
      "learning_rate": 3.202842703280872e-05,
      "loss": 0.9329,
      "step": 19870
    },
    {
      "epoch": 2.2929164743885555,
      "grad_norm": 0.40553560853004456,
      "learning_rate": 3.197921187012055e-05,
      "loss": 0.8641,
      "step": 19875
    },
    {
      "epoch": 2.293493308721735,
      "grad_norm": 0.4320368468761444,
      "learning_rate": 3.193002735092989e-05,
      "loss": 0.8611,
      "step": 19880
    },
    {
      "epoch": 2.2940701430549146,
      "grad_norm": 0.43080151081085205,
      "learning_rate": 3.188087349739446e-05,
      "loss": 0.8702,
      "step": 19885
    },
    {
      "epoch": 2.294646977388094,
      "grad_norm": 0.4172769784927368,
      "learning_rate": 3.1831750331658196e-05,
      "loss": 0.8789,
      "step": 19890
    },
    {
      "epoch": 2.2952238117212738,
      "grad_norm": 0.465230792760849,
      "learning_rate": 3.178265787585112e-05,
      "loss": 0.8691,
      "step": 19895
    },
    {
      "epoch": 2.2958006460544533,
      "grad_norm": 0.41405051946640015,
      "learning_rate": 3.1733596152089495e-05,
      "loss": 0.9003,
      "step": 19900
    },
    {
      "epoch": 2.2963774803876325,
      "grad_norm": 0.40615877509117126,
      "learning_rate": 3.168456518247575e-05,
      "loss": 0.8408,
      "step": 19905
    },
    {
      "epoch": 2.296954314720812,
      "grad_norm": 0.49301356077194214,
      "learning_rate": 3.163556498909843e-05,
      "loss": 0.8775,
      "step": 19910
    },
    {
      "epoch": 2.2975311490539916,
      "grad_norm": 0.4456724524497986,
      "learning_rate": 3.1586595594032154e-05,
      "loss": 0.898,
      "step": 19915
    },
    {
      "epoch": 2.298107983387171,
      "grad_norm": 0.415539413690567,
      "learning_rate": 3.153765701933784e-05,
      "loss": 0.8291,
      "step": 19920
    },
    {
      "epoch": 2.2986848177203507,
      "grad_norm": 0.41856181621551514,
      "learning_rate": 3.148874928706232e-05,
      "loss": 0.9107,
      "step": 19925
    },
    {
      "epoch": 2.2992616520535303,
      "grad_norm": 0.44639578461647034,
      "learning_rate": 3.1439872419238666e-05,
      "loss": 0.8456,
      "step": 19930
    },
    {
      "epoch": 2.29983848638671,
      "grad_norm": 0.4922926723957062,
      "learning_rate": 3.1391026437885984e-05,
      "loss": 0.8511,
      "step": 19935
    },
    {
      "epoch": 2.3004153207198894,
      "grad_norm": 0.39371350407600403,
      "learning_rate": 3.134221136500952e-05,
      "loss": 0.8905,
      "step": 19940
    },
    {
      "epoch": 2.300992155053069,
      "grad_norm": 0.45283186435699463,
      "learning_rate": 3.1293427222600504e-05,
      "loss": 0.9106,
      "step": 19945
    },
    {
      "epoch": 2.301568989386248,
      "grad_norm": 0.48746415972709656,
      "learning_rate": 3.12446740326363e-05,
      "loss": 0.8145,
      "step": 19950
    },
    {
      "epoch": 2.3021458237194277,
      "grad_norm": 0.39407244324684143,
      "learning_rate": 3.119595181708034e-05,
      "loss": 0.8061,
      "step": 19955
    },
    {
      "epoch": 2.3027226580526072,
      "grad_norm": 0.4312855005264282,
      "learning_rate": 3.114726059788206e-05,
      "loss": 0.9428,
      "step": 19960
    },
    {
      "epoch": 2.303299492385787,
      "grad_norm": 0.40344172716140747,
      "learning_rate": 3.109860039697699e-05,
      "loss": 0.8769,
      "step": 19965
    },
    {
      "epoch": 2.3038763267189664,
      "grad_norm": 0.47457730770111084,
      "learning_rate": 3.1049971236286566e-05,
      "loss": 0.8066,
      "step": 19970
    },
    {
      "epoch": 2.304453161052146,
      "grad_norm": 0.4646870791912079,
      "learning_rate": 3.100137313771838e-05,
      "loss": 0.8744,
      "step": 19975
    },
    {
      "epoch": 2.3050299953853255,
      "grad_norm": 0.42959192395210266,
      "learning_rate": 3.0952806123165945e-05,
      "loss": 0.8513,
      "step": 19980
    },
    {
      "epoch": 2.3056068297185046,
      "grad_norm": 0.43078503012657166,
      "learning_rate": 3.09042702145088e-05,
      "loss": 0.9291,
      "step": 19985
    },
    {
      "epoch": 2.306183664051684,
      "grad_norm": 0.40327394008636475,
      "learning_rate": 3.0855765433612504e-05,
      "loss": 0.8477,
      "step": 19990
    },
    {
      "epoch": 2.3067604983848637,
      "grad_norm": 0.43422433733940125,
      "learning_rate": 3.0807291802328494e-05,
      "loss": 0.8785,
      "step": 19995
    },
    {
      "epoch": 2.3073373327180433,
      "grad_norm": 0.48264971375465393,
      "learning_rate": 3.075884934249426e-05,
      "loss": 0.9068,
      "step": 20000
    },
    {
      "epoch": 2.307914167051223,
      "grad_norm": 0.43897566199302673,
      "learning_rate": 3.0710438075933225e-05,
      "loss": 0.9054,
      "step": 20005
    },
    {
      "epoch": 2.3084910013844024,
      "grad_norm": 0.4612114727497101,
      "learning_rate": 3.066205802445477e-05,
      "loss": 0.9348,
      "step": 20010
    },
    {
      "epoch": 2.309067835717582,
      "grad_norm": 0.4375527799129486,
      "learning_rate": 3.0613709209854214e-05,
      "loss": 0.8707,
      "step": 20015
    },
    {
      "epoch": 2.3096446700507616,
      "grad_norm": 0.420188844203949,
      "learning_rate": 3.056539165391275e-05,
      "loss": 0.883,
      "step": 20020
    },
    {
      "epoch": 2.310221504383941,
      "grad_norm": 0.44482848048210144,
      "learning_rate": 3.0517105378397536e-05,
      "loss": 0.8882,
      "step": 20025
    },
    {
      "epoch": 2.3107983387171203,
      "grad_norm": 0.439638614654541,
      "learning_rate": 3.0468850405061668e-05,
      "loss": 0.8514,
      "step": 20030
    },
    {
      "epoch": 2.3113751730503,
      "grad_norm": 0.45619815587997437,
      "learning_rate": 3.04206267556441e-05,
      "loss": 0.8587,
      "step": 20035
    },
    {
      "epoch": 2.3119520073834794,
      "grad_norm": 0.4407486915588379,
      "learning_rate": 3.037243445186965e-05,
      "loss": 0.8501,
      "step": 20040
    },
    {
      "epoch": 2.312528841716659,
      "grad_norm": 0.4541487693786621,
      "learning_rate": 3.0324273515449052e-05,
      "loss": 0.8347,
      "step": 20045
    },
    {
      "epoch": 2.3131056760498385,
      "grad_norm": 0.43526491522789,
      "learning_rate": 3.027614396807892e-05,
      "loss": 0.8815,
      "step": 20050
    },
    {
      "epoch": 2.313682510383018,
      "grad_norm": 0.4115588665008545,
      "learning_rate": 3.0228045831441733e-05,
      "loss": 0.8109,
      "step": 20055
    },
    {
      "epoch": 2.3142593447161977,
      "grad_norm": 0.4194267988204956,
      "learning_rate": 3.0179979127205692e-05,
      "loss": 0.9207,
      "step": 20060
    },
    {
      "epoch": 2.3148361790493768,
      "grad_norm": 0.38986527919769287,
      "learning_rate": 3.0131943877025072e-05,
      "loss": 0.856,
      "step": 20065
    },
    {
      "epoch": 2.3154130133825563,
      "grad_norm": 0.43422049283981323,
      "learning_rate": 3.0083940102539763e-05,
      "loss": 0.8778,
      "step": 20070
    },
    {
      "epoch": 2.315989847715736,
      "grad_norm": 0.41118425130844116,
      "learning_rate": 3.0035967825375577e-05,
      "loss": 0.8238,
      "step": 20075
    },
    {
      "epoch": 2.3165666820489155,
      "grad_norm": 0.4259694516658783,
      "learning_rate": 2.998802706714412e-05,
      "loss": 0.8975,
      "step": 20080
    },
    {
      "epoch": 2.317143516382095,
      "grad_norm": 0.39787614345550537,
      "learning_rate": 2.994011784944284e-05,
      "loss": 0.8057,
      "step": 20085
    },
    {
      "epoch": 2.3177203507152746,
      "grad_norm": 0.42141956090927124,
      "learning_rate": 2.9892240193854858e-05,
      "loss": 0.8747,
      "step": 20090
    },
    {
      "epoch": 2.318297185048454,
      "grad_norm": 0.43193697929382324,
      "learning_rate": 2.9844394121949182e-05,
      "loss": 0.8747,
      "step": 20095
    },
    {
      "epoch": 2.3188740193816337,
      "grad_norm": 0.430708646774292,
      "learning_rate": 2.9796579655280576e-05,
      "loss": 0.8928,
      "step": 20100
    },
    {
      "epoch": 2.3194508537148133,
      "grad_norm": 0.39962413907051086,
      "learning_rate": 2.9748796815389556e-05,
      "loss": 0.8059,
      "step": 20105
    },
    {
      "epoch": 2.3200276880479924,
      "grad_norm": 0.4007127285003662,
      "learning_rate": 2.9701045623802315e-05,
      "loss": 0.8972,
      "step": 20110
    },
    {
      "epoch": 2.320604522381172,
      "grad_norm": 0.39141660928726196,
      "learning_rate": 2.9653326102030964e-05,
      "loss": 0.8787,
      "step": 20115
    },
    {
      "epoch": 2.3211813567143516,
      "grad_norm": 0.48572033643722534,
      "learning_rate": 2.9605638271573166e-05,
      "loss": 0.8466,
      "step": 20120
    },
    {
      "epoch": 2.321758191047531,
      "grad_norm": 0.43861210346221924,
      "learning_rate": 2.9557982153912424e-05,
      "loss": 0.8933,
      "step": 20125
    },
    {
      "epoch": 2.3223350253807107,
      "grad_norm": 0.44550949335098267,
      "learning_rate": 2.9510357770517825e-05,
      "loss": 0.9044,
      "step": 20130
    },
    {
      "epoch": 2.3229118597138902,
      "grad_norm": 0.3988287150859833,
      "learning_rate": 2.9462765142844384e-05,
      "loss": 0.8629,
      "step": 20135
    },
    {
      "epoch": 2.32348869404707,
      "grad_norm": 0.45350053906440735,
      "learning_rate": 2.9415204292332565e-05,
      "loss": 0.8538,
      "step": 20140
    },
    {
      "epoch": 2.3240655283802494,
      "grad_norm": 0.4375722408294678,
      "learning_rate": 2.9367675240408654e-05,
      "loss": 0.8715,
      "step": 20145
    },
    {
      "epoch": 2.324642362713429,
      "grad_norm": 0.390643835067749,
      "learning_rate": 2.9320178008484587e-05,
      "loss": 0.835,
      "step": 20150
    },
    {
      "epoch": 2.325219197046608,
      "grad_norm": 0.4461374282836914,
      "learning_rate": 2.9272712617957997e-05,
      "loss": 0.8254,
      "step": 20155
    },
    {
      "epoch": 2.3257960313797876,
      "grad_norm": 0.4431370496749878,
      "learning_rate": 2.9225279090212067e-05,
      "loss": 0.904,
      "step": 20160
    },
    {
      "epoch": 2.326372865712967,
      "grad_norm": 0.4003266990184784,
      "learning_rate": 2.917787744661571e-05,
      "loss": 0.8588,
      "step": 20165
    },
    {
      "epoch": 2.3269497000461468,
      "grad_norm": 0.4321753978729248,
      "learning_rate": 2.913050770852348e-05,
      "loss": 0.9173,
      "step": 20170
    },
    {
      "epoch": 2.3275265343793263,
      "grad_norm": 0.39779239892959595,
      "learning_rate": 2.9083169897275552e-05,
      "loss": 0.8776,
      "step": 20175
    },
    {
      "epoch": 2.328103368712506,
      "grad_norm": 0.4392048716545105,
      "learning_rate": 2.9035864034197623e-05,
      "loss": 0.9053,
      "step": 20180
    },
    {
      "epoch": 2.3286802030456855,
      "grad_norm": 0.41199496388435364,
      "learning_rate": 2.898859014060119e-05,
      "loss": 0.8739,
      "step": 20185
    },
    {
      "epoch": 2.3292570373788646,
      "grad_norm": 0.42873457074165344,
      "learning_rate": 2.894134823778315e-05,
      "loss": 0.8805,
      "step": 20190
    },
    {
      "epoch": 2.329833871712044,
      "grad_norm": 0.4416216015815735,
      "learning_rate": 2.8894138347026125e-05,
      "loss": 0.9102,
      "step": 20195
    },
    {
      "epoch": 2.3304107060452237,
      "grad_norm": 0.4041585624217987,
      "learning_rate": 2.8846960489598173e-05,
      "loss": 0.8466,
      "step": 20200
    },
    {
      "epoch": 2.3309875403784033,
      "grad_norm": 0.45262736082077026,
      "learning_rate": 2.8799814686753134e-05,
      "loss": 0.8963,
      "step": 20205
    },
    {
      "epoch": 2.331564374711583,
      "grad_norm": 0.38636353611946106,
      "learning_rate": 2.8752700959730193e-05,
      "loss": 0.788,
      "step": 20210
    },
    {
      "epoch": 2.3321412090447624,
      "grad_norm": 0.4599827826023102,
      "learning_rate": 2.870561932975424e-05,
      "loss": 0.8385,
      "step": 20215
    },
    {
      "epoch": 2.332718043377942,
      "grad_norm": 0.4050966799259186,
      "learning_rate": 2.8658569818035542e-05,
      "loss": 0.8615,
      "step": 20220
    },
    {
      "epoch": 2.3332948777111215,
      "grad_norm": 0.38030165433883667,
      "learning_rate": 2.8611552445770127e-05,
      "loss": 0.8425,
      "step": 20225
    },
    {
      "epoch": 2.333871712044301,
      "grad_norm": 0.3842707574367523,
      "learning_rate": 2.8564567234139304e-05,
      "loss": 0.8763,
      "step": 20230
    },
    {
      "epoch": 2.3344485463774802,
      "grad_norm": 0.4096066653728485,
      "learning_rate": 2.851761420431006e-05,
      "loss": 0.8119,
      "step": 20235
    },
    {
      "epoch": 2.33502538071066,
      "grad_norm": 0.4566209614276886,
      "learning_rate": 2.8470693377434797e-05,
      "loss": 0.8707,
      "step": 20240
    },
    {
      "epoch": 2.3356022150438394,
      "grad_norm": 0.4076927602291107,
      "learning_rate": 2.8423804774651496e-05,
      "loss": 0.8344,
      "step": 20245
    },
    {
      "epoch": 2.336179049377019,
      "grad_norm": 0.4491892457008362,
      "learning_rate": 2.8376948417083483e-05,
      "loss": 0.841,
      "step": 20250
    },
    {
      "epoch": 2.3367558837101985,
      "grad_norm": 0.42058318853378296,
      "learning_rate": 2.833012432583968e-05,
      "loss": 0.872,
      "step": 20255
    },
    {
      "epoch": 2.337332718043378,
      "grad_norm": 0.4171206057071686,
      "learning_rate": 2.8283332522014427e-05,
      "loss": 0.916,
      "step": 20260
    },
    {
      "epoch": 2.3379095523765576,
      "grad_norm": 0.37775084376335144,
      "learning_rate": 2.823657302668755e-05,
      "loss": 0.8729,
      "step": 20265
    },
    {
      "epoch": 2.3384863867097367,
      "grad_norm": 0.4589167535305023,
      "learning_rate": 2.81898458609242e-05,
      "loss": 0.9139,
      "step": 20270
    },
    {
      "epoch": 2.3390632210429163,
      "grad_norm": 0.3988697826862335,
      "learning_rate": 2.8143151045775196e-05,
      "loss": 0.8507,
      "step": 20275
    },
    {
      "epoch": 2.339640055376096,
      "grad_norm": 0.39948388934135437,
      "learning_rate": 2.8096488602276528e-05,
      "loss": 0.9003,
      "step": 20280
    },
    {
      "epoch": 2.3402168897092754,
      "grad_norm": 0.4345165193080902,
      "learning_rate": 2.804985855144979e-05,
      "loss": 0.887,
      "step": 20285
    },
    {
      "epoch": 2.340793724042455,
      "grad_norm": 0.4340234696865082,
      "learning_rate": 2.800326091430182e-05,
      "loss": 0.8868,
      "step": 20290
    },
    {
      "epoch": 2.3413705583756346,
      "grad_norm": 0.4125678539276123,
      "learning_rate": 2.7956695711825075e-05,
      "loss": 0.8698,
      "step": 20295
    },
    {
      "epoch": 2.341947392708814,
      "grad_norm": 0.45810467004776,
      "learning_rate": 2.7910162964997154e-05,
      "loss": 0.8384,
      "step": 20300
    },
    {
      "epoch": 2.3425242270419937,
      "grad_norm": 0.43330734968185425,
      "learning_rate": 2.78636626947812e-05,
      "loss": 0.8722,
      "step": 20305
    },
    {
      "epoch": 2.3431010613751733,
      "grad_norm": 0.392790824174881,
      "learning_rate": 2.7817194922125666e-05,
      "loss": 0.8737,
      "step": 20310
    },
    {
      "epoch": 2.3436778957083524,
      "grad_norm": 0.4088856875896454,
      "learning_rate": 2.777075966796442e-05,
      "loss": 0.9155,
      "step": 20315
    },
    {
      "epoch": 2.344254730041532,
      "grad_norm": 0.41359519958496094,
      "learning_rate": 2.7724356953216545e-05,
      "loss": 0.8969,
      "step": 20320
    },
    {
      "epoch": 2.3448315643747115,
      "grad_norm": 0.41587385535240173,
      "learning_rate": 2.7677986798786615e-05,
      "loss": 0.8366,
      "step": 20325
    },
    {
      "epoch": 2.345408398707891,
      "grad_norm": 0.460124671459198,
      "learning_rate": 2.763164922556445e-05,
      "loss": 0.8809,
      "step": 20330
    },
    {
      "epoch": 2.3459852330410707,
      "grad_norm": 0.37693455815315247,
      "learning_rate": 2.758534425442526e-05,
      "loss": 0.8692,
      "step": 20335
    },
    {
      "epoch": 2.34656206737425,
      "grad_norm": 0.4221893548965454,
      "learning_rate": 2.753907190622944e-05,
      "loss": 0.8756,
      "step": 20340
    },
    {
      "epoch": 2.34713890170743,
      "grad_norm": 0.4988071322441101,
      "learning_rate": 2.7492832201822882e-05,
      "loss": 0.8969,
      "step": 20345
    },
    {
      "epoch": 2.347715736040609,
      "grad_norm": 0.46341338753700256,
      "learning_rate": 2.7446625162036577e-05,
      "loss": 0.8354,
      "step": 20350
    },
    {
      "epoch": 2.3482925703737885,
      "grad_norm": 0.43746891617774963,
      "learning_rate": 2.7400450807686938e-05,
      "loss": 0.871,
      "step": 20355
    },
    {
      "epoch": 2.348869404706968,
      "grad_norm": 0.4257577359676361,
      "learning_rate": 2.735430915957553e-05,
      "loss": 0.8351,
      "step": 20360
    },
    {
      "epoch": 2.3494462390401476,
      "grad_norm": 0.40961867570877075,
      "learning_rate": 2.7308200238489367e-05,
      "loss": 0.8615,
      "step": 20365
    },
    {
      "epoch": 2.350023073373327,
      "grad_norm": 0.4807041585445404,
      "learning_rate": 2.726212406520051e-05,
      "loss": 0.8729,
      "step": 20370
    },
    {
      "epoch": 2.3505999077065067,
      "grad_norm": 0.4345334470272064,
      "learning_rate": 2.7216080660466403e-05,
      "loss": 0.8955,
      "step": 20375
    },
    {
      "epoch": 2.3511767420396863,
      "grad_norm": 0.4259088337421417,
      "learning_rate": 2.717007004502968e-05,
      "loss": 0.8409,
      "step": 20380
    },
    {
      "epoch": 2.351753576372866,
      "grad_norm": 0.4218992590904236,
      "learning_rate": 2.712409223961826e-05,
      "loss": 0.8594,
      "step": 20385
    },
    {
      "epoch": 2.3523304107060454,
      "grad_norm": 0.44640931487083435,
      "learning_rate": 2.7078147264945153e-05,
      "loss": 0.8873,
      "step": 20390
    },
    {
      "epoch": 2.3529072450392245,
      "grad_norm": 0.4130471646785736,
      "learning_rate": 2.7032235141708704e-05,
      "loss": 0.8687,
      "step": 20395
    },
    {
      "epoch": 2.353484079372404,
      "grad_norm": 0.37854447960853577,
      "learning_rate": 2.698635589059242e-05,
      "loss": 0.8776,
      "step": 20400
    },
    {
      "epoch": 2.3540609137055837,
      "grad_norm": 0.4220699667930603,
      "learning_rate": 2.6940509532265003e-05,
      "loss": 0.8527,
      "step": 20405
    },
    {
      "epoch": 2.3546377480387632,
      "grad_norm": 0.4551481306552887,
      "learning_rate": 2.689469608738028e-05,
      "loss": 0.8891,
      "step": 20410
    },
    {
      "epoch": 2.355214582371943,
      "grad_norm": 0.4928138852119446,
      "learning_rate": 2.6848915576577317e-05,
      "loss": 0.8627,
      "step": 20415
    },
    {
      "epoch": 2.3557914167051224,
      "grad_norm": 0.40893441438674927,
      "learning_rate": 2.680316802048034e-05,
      "loss": 0.903,
      "step": 20420
    },
    {
      "epoch": 2.356368251038302,
      "grad_norm": 0.42715442180633545,
      "learning_rate": 2.67574534396987e-05,
      "loss": 0.8748,
      "step": 20425
    },
    {
      "epoch": 2.356945085371481,
      "grad_norm": 0.4622029662132263,
      "learning_rate": 2.6711771854826905e-05,
      "loss": 0.8926,
      "step": 20430
    },
    {
      "epoch": 2.3575219197046606,
      "grad_norm": 0.43145161867141724,
      "learning_rate": 2.6666123286444623e-05,
      "loss": 0.9149,
      "step": 20435
    },
    {
      "epoch": 2.35809875403784,
      "grad_norm": 0.4071784019470215,
      "learning_rate": 2.6620507755116574e-05,
      "loss": 0.8695,
      "step": 20440
    },
    {
      "epoch": 2.3586755883710198,
      "grad_norm": 0.430257648229599,
      "learning_rate": 2.657492528139268e-05,
      "loss": 0.83,
      "step": 20445
    },
    {
      "epoch": 2.3592524227041993,
      "grad_norm": 0.39387521147727966,
      "learning_rate": 2.6529375885807915e-05,
      "loss": 0.8472,
      "step": 20450
    },
    {
      "epoch": 2.359829257037379,
      "grad_norm": 0.4547233581542969,
      "learning_rate": 2.6483859588882408e-05,
      "loss": 0.8741,
      "step": 20455
    },
    {
      "epoch": 2.3604060913705585,
      "grad_norm": 0.420403391122818,
      "learning_rate": 2.643837641112128e-05,
      "loss": 0.9036,
      "step": 20460
    },
    {
      "epoch": 2.360982925703738,
      "grad_norm": 0.4206133484840393,
      "learning_rate": 2.6392926373014825e-05,
      "loss": 0.8512,
      "step": 20465
    },
    {
      "epoch": 2.3615597600369176,
      "grad_norm": 0.42972996830940247,
      "learning_rate": 2.634750949503837e-05,
      "loss": 0.8849,
      "step": 20470
    },
    {
      "epoch": 2.3621365943700967,
      "grad_norm": 0.43919214606285095,
      "learning_rate": 2.630212579765231e-05,
      "loss": 0.8254,
      "step": 20475
    },
    {
      "epoch": 2.3627134287032763,
      "grad_norm": 0.43563053011894226,
      "learning_rate": 2.6256775301302115e-05,
      "loss": 0.8661,
      "step": 20480
    },
    {
      "epoch": 2.363290263036456,
      "grad_norm": 0.4127313792705536,
      "learning_rate": 2.6211458026418212e-05,
      "loss": 0.8734,
      "step": 20485
    },
    {
      "epoch": 2.3638670973696354,
      "grad_norm": 0.42784926295280457,
      "learning_rate": 2.6166173993416154e-05,
      "loss": 0.8856,
      "step": 20490
    },
    {
      "epoch": 2.364443931702815,
      "grad_norm": 0.4503290355205536,
      "learning_rate": 2.612092322269648e-05,
      "loss": 0.8888,
      "step": 20495
    },
    {
      "epoch": 2.3650207660359945,
      "grad_norm": 0.39504754543304443,
      "learning_rate": 2.6075705734644796e-05,
      "loss": 0.8398,
      "step": 20500
    },
    {
      "epoch": 2.365597600369174,
      "grad_norm": 0.4576849937438965,
      "learning_rate": 2.603052154963158e-05,
      "loss": 0.9339,
      "step": 20505
    },
    {
      "epoch": 2.3661744347023537,
      "grad_norm": 0.41642168164253235,
      "learning_rate": 2.5985370688012457e-05,
      "loss": 0.8616,
      "step": 20510
    },
    {
      "epoch": 2.3667512690355332,
      "grad_norm": 0.4202212989330292,
      "learning_rate": 2.594025317012796e-05,
      "loss": 0.865,
      "step": 20515
    },
    {
      "epoch": 2.3673281033687124,
      "grad_norm": 0.4365369379520416,
      "learning_rate": 2.5895169016303623e-05,
      "loss": 0.8556,
      "step": 20520
    },
    {
      "epoch": 2.367904937701892,
      "grad_norm": 0.4484837055206299,
      "learning_rate": 2.5850118246849942e-05,
      "loss": 0.8848,
      "step": 20525
    },
    {
      "epoch": 2.3684817720350715,
      "grad_norm": 0.36972007155418396,
      "learning_rate": 2.5805100882062416e-05,
      "loss": 0.8821,
      "step": 20530
    },
    {
      "epoch": 2.369058606368251,
      "grad_norm": 0.4523972272872925,
      "learning_rate": 2.576011694222139e-05,
      "loss": 0.916,
      "step": 20535
    },
    {
      "epoch": 2.3696354407014306,
      "grad_norm": 0.40989214181900024,
      "learning_rate": 2.571516644759223e-05,
      "loss": 0.8602,
      "step": 20540
    },
    {
      "epoch": 2.37021227503461,
      "grad_norm": 0.4344289004802704,
      "learning_rate": 2.567024941842525e-05,
      "loss": 0.8708,
      "step": 20545
    },
    {
      "epoch": 2.3707891093677897,
      "grad_norm": 0.4548048675060272,
      "learning_rate": 2.5625365874955674e-05,
      "loss": 0.8366,
      "step": 20550
    },
    {
      "epoch": 2.371365943700969,
      "grad_norm": 0.5554631352424622,
      "learning_rate": 2.5580515837403563e-05,
      "loss": 0.8741,
      "step": 20555
    },
    {
      "epoch": 2.3719427780341484,
      "grad_norm": 0.45449256896972656,
      "learning_rate": 2.5535699325973983e-05,
      "loss": 0.8692,
      "step": 20560
    },
    {
      "epoch": 2.372519612367328,
      "grad_norm": 0.43762558698654175,
      "learning_rate": 2.5490916360856853e-05,
      "loss": 0.8752,
      "step": 20565
    },
    {
      "epoch": 2.3730964467005076,
      "grad_norm": 0.4865242540836334,
      "learning_rate": 2.5446166962227023e-05,
      "loss": 0.8788,
      "step": 20570
    },
    {
      "epoch": 2.373673281033687,
      "grad_norm": 0.43669965863227844,
      "learning_rate": 2.5401451150244105e-05,
      "loss": 0.8666,
      "step": 20575
    },
    {
      "epoch": 2.3742501153668667,
      "grad_norm": 0.4120925962924957,
      "learning_rate": 2.5356768945052745e-05,
      "loss": 0.8631,
      "step": 20580
    },
    {
      "epoch": 2.3748269497000463,
      "grad_norm": 0.4763656258583069,
      "learning_rate": 2.531212036678231e-05,
      "loss": 0.8937,
      "step": 20585
    },
    {
      "epoch": 2.375403784033226,
      "grad_norm": 0.41615408658981323,
      "learning_rate": 2.5267505435547078e-05,
      "loss": 0.8541,
      "step": 20590
    },
    {
      "epoch": 2.3759806183664054,
      "grad_norm": 0.44895505905151367,
      "learning_rate": 2.522292417144617e-05,
      "loss": 0.8887,
      "step": 20595
    },
    {
      "epoch": 2.3765574526995845,
      "grad_norm": 0.46258002519607544,
      "learning_rate": 2.5178376594563556e-05,
      "loss": 0.8727,
      "step": 20600
    },
    {
      "epoch": 2.377134287032764,
      "grad_norm": 0.4195844233036041,
      "learning_rate": 2.513386272496796e-05,
      "loss": 0.823,
      "step": 20605
    },
    {
      "epoch": 2.3777111213659436,
      "grad_norm": 0.41249099373817444,
      "learning_rate": 2.5089382582712994e-05,
      "loss": 0.836,
      "step": 20610
    },
    {
      "epoch": 2.378287955699123,
      "grad_norm": 0.39638757705688477,
      "learning_rate": 2.5044936187837044e-05,
      "loss": 0.8776,
      "step": 20615
    },
    {
      "epoch": 2.378864790032303,
      "grad_norm": 0.5457749962806702,
      "learning_rate": 2.5000523560363322e-05,
      "loss": 0.9153,
      "step": 20620
    },
    {
      "epoch": 2.3794416243654823,
      "grad_norm": 0.3871450424194336,
      "learning_rate": 2.4956144720299712e-05,
      "loss": 0.8258,
      "step": 20625
    },
    {
      "epoch": 2.380018458698662,
      "grad_norm": 0.4669581353664398,
      "learning_rate": 2.4911799687639102e-05,
      "loss": 0.8968,
      "step": 20630
    },
    {
      "epoch": 2.380595293031841,
      "grad_norm": 0.44625550508499146,
      "learning_rate": 2.4867488482358923e-05,
      "loss": 0.8696,
      "step": 20635
    },
    {
      "epoch": 2.3811721273650206,
      "grad_norm": 0.4140227437019348,
      "learning_rate": 2.482321112442151e-05,
      "loss": 0.867,
      "step": 20640
    },
    {
      "epoch": 2.3817489616982,
      "grad_norm": 0.4040051996707916,
      "learning_rate": 2.477896763377382e-05,
      "loss": 0.882,
      "step": 20645
    },
    {
      "epoch": 2.3823257960313797,
      "grad_norm": 0.41291922330856323,
      "learning_rate": 2.4734758030347738e-05,
      "loss": 0.8418,
      "step": 20650
    },
    {
      "epoch": 2.3829026303645593,
      "grad_norm": 0.42268839478492737,
      "learning_rate": 2.4690582334059685e-05,
      "loss": 0.8842,
      "step": 20655
    },
    {
      "epoch": 2.383479464697739,
      "grad_norm": 0.46346721053123474,
      "learning_rate": 2.464644056481098e-05,
      "loss": 0.8908,
      "step": 20660
    },
    {
      "epoch": 2.3840562990309184,
      "grad_norm": 0.3883967995643616,
      "learning_rate": 2.4602332742487476e-05,
      "loss": 0.8437,
      "step": 20665
    },
    {
      "epoch": 2.384633133364098,
      "grad_norm": 0.4117676913738251,
      "learning_rate": 2.455825888695994e-05,
      "loss": 0.9205,
      "step": 20670
    },
    {
      "epoch": 2.3852099676972776,
      "grad_norm": 0.3966183066368103,
      "learning_rate": 2.451421901808365e-05,
      "loss": 0.8497,
      "step": 20675
    },
    {
      "epoch": 2.3857868020304567,
      "grad_norm": 0.42475172877311707,
      "learning_rate": 2.4470213155698683e-05,
      "loss": 0.8584,
      "step": 20680
    },
    {
      "epoch": 2.3863636363636362,
      "grad_norm": 0.4242252707481384,
      "learning_rate": 2.4426241319629772e-05,
      "loss": 0.8568,
      "step": 20685
    },
    {
      "epoch": 2.386940470696816,
      "grad_norm": 0.4951821565628052,
      "learning_rate": 2.4382303529686324e-05,
      "loss": 0.8503,
      "step": 20690
    },
    {
      "epoch": 2.3875173050299954,
      "grad_norm": 0.4654747247695923,
      "learning_rate": 2.433839980566236e-05,
      "loss": 0.8316,
      "step": 20695
    },
    {
      "epoch": 2.388094139363175,
      "grad_norm": 0.4673211872577667,
      "learning_rate": 2.4294530167336615e-05,
      "loss": 0.884,
      "step": 20700
    },
    {
      "epoch": 2.3886709736963545,
      "grad_norm": 0.45259878039360046,
      "learning_rate": 2.425069463447245e-05,
      "loss": 0.9021,
      "step": 20705
    },
    {
      "epoch": 2.389247808029534,
      "grad_norm": 0.4560452997684479,
      "learning_rate": 2.4206893226817884e-05,
      "loss": 0.911,
      "step": 20710
    },
    {
      "epoch": 2.389824642362713,
      "grad_norm": 0.420181006193161,
      "learning_rate": 2.4163125964105448e-05,
      "loss": 0.882,
      "step": 20715
    },
    {
      "epoch": 2.3904014766958928,
      "grad_norm": 0.43059244751930237,
      "learning_rate": 2.4119392866052492e-05,
      "loss": 0.8786,
      "step": 20720
    },
    {
      "epoch": 2.3909783110290723,
      "grad_norm": 0.4146776497364044,
      "learning_rate": 2.407569395236079e-05,
      "loss": 0.8795,
      "step": 20725
    },
    {
      "epoch": 2.391555145362252,
      "grad_norm": 0.3845740556716919,
      "learning_rate": 2.4032029242716826e-05,
      "loss": 0.8814,
      "step": 20730
    },
    {
      "epoch": 2.3921319796954315,
      "grad_norm": 0.3898622691631317,
      "learning_rate": 2.398839875679155e-05,
      "loss": 0.8552,
      "step": 20735
    },
    {
      "epoch": 2.392708814028611,
      "grad_norm": 0.4128072261810303,
      "learning_rate": 2.3944802514240726e-05,
      "loss": 0.9095,
      "step": 20740
    },
    {
      "epoch": 2.3932856483617906,
      "grad_norm": 0.4304887652397156,
      "learning_rate": 2.390124053470443e-05,
      "loss": 0.8821,
      "step": 20745
    },
    {
      "epoch": 2.39386248269497,
      "grad_norm": 0.40331536531448364,
      "learning_rate": 2.3857712837807454e-05,
      "loss": 0.8946,
      "step": 20750
    },
    {
      "epoch": 2.3944393170281497,
      "grad_norm": 0.40375304222106934,
      "learning_rate": 2.3814219443159115e-05,
      "loss": 0.8667,
      "step": 20755
    },
    {
      "epoch": 2.395016151361329,
      "grad_norm": 0.4574378430843353,
      "learning_rate": 2.3770760370353294e-05,
      "loss": 0.8721,
      "step": 20760
    },
    {
      "epoch": 2.3955929856945084,
      "grad_norm": 0.4717426002025604,
      "learning_rate": 2.372733563896834e-05,
      "loss": 0.8803,
      "step": 20765
    },
    {
      "epoch": 2.396169820027688,
      "grad_norm": 0.49499765038490295,
      "learning_rate": 2.3683945268567197e-05,
      "loss": 0.9142,
      "step": 20770
    },
    {
      "epoch": 2.3967466543608675,
      "grad_norm": 0.40809836983680725,
      "learning_rate": 2.364058927869732e-05,
      "loss": 0.9058,
      "step": 20775
    },
    {
      "epoch": 2.397323488694047,
      "grad_norm": 0.40131765604019165,
      "learning_rate": 2.35972676888907e-05,
      "loss": 0.8766,
      "step": 20780
    },
    {
      "epoch": 2.3979003230272267,
      "grad_norm": 0.43188804388046265,
      "learning_rate": 2.35539805186637e-05,
      "loss": 0.8498,
      "step": 20785
    },
    {
      "epoch": 2.3984771573604062,
      "grad_norm": 0.4838411509990692,
      "learning_rate": 2.3510727787517382e-05,
      "loss": 0.8694,
      "step": 20790
    },
    {
      "epoch": 2.3990539916935854,
      "grad_norm": 0.43422776460647583,
      "learning_rate": 2.3467509514937126e-05,
      "loss": 0.8835,
      "step": 20795
    },
    {
      "epoch": 2.3996308260267654,
      "grad_norm": 0.4064343571662903,
      "learning_rate": 2.3424325720392882e-05,
      "loss": 0.8297,
      "step": 20800
    },
    {
      "epoch": 2.4002076603599445,
      "grad_norm": 0.42274659872055054,
      "learning_rate": 2.3381176423338956e-05,
      "loss": 0.8689,
      "step": 20805
    },
    {
      "epoch": 2.400784494693124,
      "grad_norm": 0.5127131938934326,
      "learning_rate": 2.3338061643214316e-05,
      "loss": 0.852,
      "step": 20810
    },
    {
      "epoch": 2.4013613290263036,
      "grad_norm": 0.4743903577327728,
      "learning_rate": 2.3294981399442138e-05,
      "loss": 0.8761,
      "step": 20815
    },
    {
      "epoch": 2.401938163359483,
      "grad_norm": 0.4744824767112732,
      "learning_rate": 2.325193571143024e-05,
      "loss": 0.8937,
      "step": 20820
    },
    {
      "epoch": 2.4025149976926627,
      "grad_norm": 0.4018848240375519,
      "learning_rate": 2.3208924598570702e-05,
      "loss": 0.8439,
      "step": 20825
    },
    {
      "epoch": 2.4030918320258423,
      "grad_norm": 0.44422465562820435,
      "learning_rate": 2.3165948080240206e-05,
      "loss": 0.8936,
      "step": 20830
    },
    {
      "epoch": 2.403668666359022,
      "grad_norm": 0.4371984302997589,
      "learning_rate": 2.31230061757997e-05,
      "loss": 0.848,
      "step": 20835
    },
    {
      "epoch": 2.404245500692201,
      "grad_norm": 0.438287615776062,
      "learning_rate": 2.3080098904594603e-05,
      "loss": 0.8541,
      "step": 20840
    },
    {
      "epoch": 2.4048223350253806,
      "grad_norm": 0.4366404712200165,
      "learning_rate": 2.303722628595474e-05,
      "loss": 0.8519,
      "step": 20845
    },
    {
      "epoch": 2.40539916935856,
      "grad_norm": 0.3852338194847107,
      "learning_rate": 2.299438833919432e-05,
      "loss": 0.8386,
      "step": 20850
    },
    {
      "epoch": 2.4059760036917397,
      "grad_norm": 0.42250022292137146,
      "learning_rate": 2.295158508361188e-05,
      "loss": 0.8668,
      "step": 20855
    },
    {
      "epoch": 2.4065528380249193,
      "grad_norm": 0.45582088828086853,
      "learning_rate": 2.2908816538490385e-05,
      "loss": 0.9077,
      "step": 20860
    },
    {
      "epoch": 2.407129672358099,
      "grad_norm": 0.447816401720047,
      "learning_rate": 2.286608272309716e-05,
      "loss": 0.8893,
      "step": 20865
    },
    {
      "epoch": 2.4077065066912784,
      "grad_norm": 0.5000319480895996,
      "learning_rate": 2.2823383656683904e-05,
      "loss": 0.933,
      "step": 20870
    },
    {
      "epoch": 2.408283341024458,
      "grad_norm": 0.43965062499046326,
      "learning_rate": 2.2780719358486524e-05,
      "loss": 0.8496,
      "step": 20875
    },
    {
      "epoch": 2.4088601753576375,
      "grad_norm": 0.44113633036613464,
      "learning_rate": 2.2738089847725497e-05,
      "loss": 0.8328,
      "step": 20880
    },
    {
      "epoch": 2.4094370096908166,
      "grad_norm": 0.4106186628341675,
      "learning_rate": 2.2695495143605416e-05,
      "loss": 0.8419,
      "step": 20885
    },
    {
      "epoch": 2.410013844023996,
      "grad_norm": 0.41286739706993103,
      "learning_rate": 2.26529352653153e-05,
      "loss": 0.8693,
      "step": 20890
    },
    {
      "epoch": 2.4105906783571758,
      "grad_norm": 0.42569470405578613,
      "learning_rate": 2.2610410232028467e-05,
      "loss": 0.8539,
      "step": 20895
    },
    {
      "epoch": 2.4111675126903553,
      "grad_norm": 0.39974820613861084,
      "learning_rate": 2.2567920062902546e-05,
      "loss": 0.8694,
      "step": 20900
    },
    {
      "epoch": 2.411744347023535,
      "grad_norm": 0.42251622676849365,
      "learning_rate": 2.2525464777079398e-05,
      "loss": 0.8916,
      "step": 20905
    },
    {
      "epoch": 2.4123211813567145,
      "grad_norm": 0.4560185670852661,
      "learning_rate": 2.2483044393685215e-05,
      "loss": 0.8593,
      "step": 20910
    },
    {
      "epoch": 2.412898015689894,
      "grad_norm": 0.40557077527046204,
      "learning_rate": 2.244065893183048e-05,
      "loss": 0.8338,
      "step": 20915
    },
    {
      "epoch": 2.413474850023073,
      "grad_norm": 0.41522446274757385,
      "learning_rate": 2.239830841060996e-05,
      "loss": 0.9022,
      "step": 20920
    },
    {
      "epoch": 2.4140516843562527,
      "grad_norm": 0.39065486192703247,
      "learning_rate": 2.235599284910258e-05,
      "loss": 0.8769,
      "step": 20925
    },
    {
      "epoch": 2.4146285186894323,
      "grad_norm": 0.4300009310245514,
      "learning_rate": 2.231371226637161e-05,
      "loss": 0.8506,
      "step": 20930
    },
    {
      "epoch": 2.415205353022612,
      "grad_norm": 0.4822119176387787,
      "learning_rate": 2.2271466681464547e-05,
      "loss": 0.8726,
      "step": 20935
    },
    {
      "epoch": 2.4157821873557914,
      "grad_norm": 0.4451304078102112,
      "learning_rate": 2.2229256113413087e-05,
      "loss": 0.8631,
      "step": 20940
    },
    {
      "epoch": 2.416359021688971,
      "grad_norm": 0.4610227644443512,
      "learning_rate": 2.21870805812332e-05,
      "loss": 0.9034,
      "step": 20945
    },
    {
      "epoch": 2.4169358560221506,
      "grad_norm": 0.40991950035095215,
      "learning_rate": 2.214494010392505e-05,
      "loss": 0.8939,
      "step": 20950
    },
    {
      "epoch": 2.41751269035533,
      "grad_norm": 0.4460807740688324,
      "learning_rate": 2.210283470047296e-05,
      "loss": 0.8765,
      "step": 20955
    },
    {
      "epoch": 2.4180895246885097,
      "grad_norm": 0.43716761469841003,
      "learning_rate": 2.2060764389845534e-05,
      "loss": 0.887,
      "step": 20960
    },
    {
      "epoch": 2.418666359021689,
      "grad_norm": 0.4361554980278015,
      "learning_rate": 2.2018729190995514e-05,
      "loss": 0.8302,
      "step": 20965
    },
    {
      "epoch": 2.4192431933548684,
      "grad_norm": 0.3899691700935364,
      "learning_rate": 2.1976729122859864e-05,
      "loss": 0.8283,
      "step": 20970
    },
    {
      "epoch": 2.419820027688048,
      "grad_norm": 0.4203675091266632,
      "learning_rate": 2.1934764204359648e-05,
      "loss": 0.883,
      "step": 20975
    },
    {
      "epoch": 2.4203968620212275,
      "grad_norm": 0.48073139786720276,
      "learning_rate": 2.1892834454400167e-05,
      "loss": 0.8684,
      "step": 20980
    },
    {
      "epoch": 2.420973696354407,
      "grad_norm": 0.4376397132873535,
      "learning_rate": 2.185093989187087e-05,
      "loss": 0.8898,
      "step": 20985
    },
    {
      "epoch": 2.4215505306875866,
      "grad_norm": 0.39727121591567993,
      "learning_rate": 2.1809080535645323e-05,
      "loss": 0.8615,
      "step": 20990
    },
    {
      "epoch": 2.422127365020766,
      "grad_norm": 0.4959402084350586,
      "learning_rate": 2.176725640458127e-05,
      "loss": 0.8504,
      "step": 20995
    },
    {
      "epoch": 2.4227041993539453,
      "grad_norm": 0.3901364207267761,
      "learning_rate": 2.1725467517520526e-05,
      "loss": 0.8537,
      "step": 21000
    },
    {
      "epoch": 2.423281033687125,
      "grad_norm": 0.4253600239753723,
      "learning_rate": 2.1683713893289094e-05,
      "loss": 0.8638,
      "step": 21005
    },
    {
      "epoch": 2.4238578680203045,
      "grad_norm": 0.43812429904937744,
      "learning_rate": 2.1641995550697038e-05,
      "loss": 0.8103,
      "step": 21010
    },
    {
      "epoch": 2.424434702353484,
      "grad_norm": 0.38738441467285156,
      "learning_rate": 2.1600312508538602e-05,
      "loss": 0.8374,
      "step": 21015
    },
    {
      "epoch": 2.4250115366866636,
      "grad_norm": 0.4647636115550995,
      "learning_rate": 2.155866478559202e-05,
      "loss": 0.8965,
      "step": 21020
    },
    {
      "epoch": 2.425588371019843,
      "grad_norm": 0.4338874816894531,
      "learning_rate": 2.1517052400619696e-05,
      "loss": 0.8724,
      "step": 21025
    },
    {
      "epoch": 2.4261652053530227,
      "grad_norm": 0.4465797543525696,
      "learning_rate": 2.1475475372368094e-05,
      "loss": 0.8758,
      "step": 21030
    },
    {
      "epoch": 2.4267420396862023,
      "grad_norm": 0.427768349647522,
      "learning_rate": 2.1433933719567745e-05,
      "loss": 0.8586,
      "step": 21035
    },
    {
      "epoch": 2.427318874019382,
      "grad_norm": 0.42242631316185,
      "learning_rate": 2.139242746093323e-05,
      "loss": 0.9039,
      "step": 21040
    },
    {
      "epoch": 2.427895708352561,
      "grad_norm": 0.4240041673183441,
      "learning_rate": 2.1350956615163254e-05,
      "loss": 0.8989,
      "step": 21045
    },
    {
      "epoch": 2.4284725426857405,
      "grad_norm": 0.4253004789352417,
      "learning_rate": 2.130952120094044e-05,
      "loss": 0.8653,
      "step": 21050
    },
    {
      "epoch": 2.42904937701892,
      "grad_norm": 0.4393162429332733,
      "learning_rate": 2.126812123693155e-05,
      "loss": 0.8465,
      "step": 21055
    },
    {
      "epoch": 2.4296262113520997,
      "grad_norm": 0.43222832679748535,
      "learning_rate": 2.1226756741787356e-05,
      "loss": 0.8754,
      "step": 21060
    },
    {
      "epoch": 2.4302030456852792,
      "grad_norm": 0.46488478779792786,
      "learning_rate": 2.1185427734142682e-05,
      "loss": 0.8392,
      "step": 21065
    },
    {
      "epoch": 2.430779880018459,
      "grad_norm": 0.39256176352500916,
      "learning_rate": 2.114413423261622e-05,
      "loss": 0.8815,
      "step": 21070
    },
    {
      "epoch": 2.4313567143516384,
      "grad_norm": 0.42439353466033936,
      "learning_rate": 2.1102876255810887e-05,
      "loss": 0.8672,
      "step": 21075
    },
    {
      "epoch": 2.4319335486848175,
      "grad_norm": 0.476100355386734,
      "learning_rate": 2.106165382231341e-05,
      "loss": 0.9268,
      "step": 21080
    },
    {
      "epoch": 2.432510383017997,
      "grad_norm": 0.4509223997592926,
      "learning_rate": 2.102046695069463e-05,
      "loss": 0.8451,
      "step": 21085
    },
    {
      "epoch": 2.4330872173511766,
      "grad_norm": 0.44436001777648926,
      "learning_rate": 2.0979315659509223e-05,
      "loss": 0.8915,
      "step": 21090
    },
    {
      "epoch": 2.433664051684356,
      "grad_norm": 0.4077122211456299,
      "learning_rate": 2.0938199967296036e-05,
      "loss": 0.8599,
      "step": 21095
    },
    {
      "epoch": 2.4342408860175357,
      "grad_norm": 0.4311679005622864,
      "learning_rate": 2.0897119892577698e-05,
      "loss": 0.8531,
      "step": 21100
    },
    {
      "epoch": 2.4348177203507153,
      "grad_norm": 0.4484409987926483,
      "learning_rate": 2.085607545386088e-05,
      "loss": 0.8949,
      "step": 21105
    },
    {
      "epoch": 2.435394554683895,
      "grad_norm": 0.42149195075035095,
      "learning_rate": 2.0815066669636174e-05,
      "loss": 0.8299,
      "step": 21110
    },
    {
      "epoch": 2.4359713890170744,
      "grad_norm": 0.40608465671539307,
      "learning_rate": 2.077409355837816e-05,
      "loss": 0.8374,
      "step": 21115
    },
    {
      "epoch": 2.436548223350254,
      "grad_norm": 0.41218310594558716,
      "learning_rate": 2.0733156138545252e-05,
      "loss": 0.862,
      "step": 21120
    },
    {
      "epoch": 2.437125057683433,
      "grad_norm": 0.42621365189552307,
      "learning_rate": 2.069225442857984e-05,
      "loss": 0.9075,
      "step": 21125
    },
    {
      "epoch": 2.4377018920166127,
      "grad_norm": 0.4423667788505554,
      "learning_rate": 2.0651388446908248e-05,
      "loss": 0.8844,
      "step": 21130
    },
    {
      "epoch": 2.4382787263497923,
      "grad_norm": 0.41898277401924133,
      "learning_rate": 2.0610558211940702e-05,
      "loss": 0.8856,
      "step": 21135
    },
    {
      "epoch": 2.438855560682972,
      "grad_norm": 0.39691296219825745,
      "learning_rate": 2.0569763742071236e-05,
      "loss": 0.8571,
      "step": 21140
    },
    {
      "epoch": 2.4394323950161514,
      "grad_norm": 0.43472006916999817,
      "learning_rate": 2.0529005055677884e-05,
      "loss": 0.829,
      "step": 21145
    },
    {
      "epoch": 2.440009229349331,
      "grad_norm": 0.4144620895385742,
      "learning_rate": 2.0488282171122498e-05,
      "loss": 0.8758,
      "step": 21150
    },
    {
      "epoch": 2.4405860636825105,
      "grad_norm": 0.4102288782596588,
      "learning_rate": 2.044759510675086e-05,
      "loss": 0.9259,
      "step": 21155
    },
    {
      "epoch": 2.44116289801569,
      "grad_norm": 0.4925200045108795,
      "learning_rate": 2.040694388089247e-05,
      "loss": 0.889,
      "step": 21160
    },
    {
      "epoch": 2.4417397323488697,
      "grad_norm": 0.42156094312667847,
      "learning_rate": 2.036632851186091e-05,
      "loss": 0.8579,
      "step": 21165
    },
    {
      "epoch": 2.4423165666820488,
      "grad_norm": 0.43018943071365356,
      "learning_rate": 2.0325749017953387e-05,
      "loss": 0.8924,
      "step": 21170
    },
    {
      "epoch": 2.4428934010152283,
      "grad_norm": 0.3908984959125519,
      "learning_rate": 2.0285205417451115e-05,
      "loss": 0.8549,
      "step": 21175
    },
    {
      "epoch": 2.443470235348408,
      "grad_norm": 0.4131862223148346,
      "learning_rate": 2.0244697728618966e-05,
      "loss": 0.8871,
      "step": 21180
    },
    {
      "epoch": 2.4440470696815875,
      "grad_norm": 0.42835119366645813,
      "learning_rate": 2.0204225969705846e-05,
      "loss": 0.8736,
      "step": 21185
    },
    {
      "epoch": 2.444623904014767,
      "grad_norm": 0.43167683482170105,
      "learning_rate": 2.0163790158944283e-05,
      "loss": 0.8479,
      "step": 21190
    },
    {
      "epoch": 2.4452007383479466,
      "grad_norm": 0.3958747982978821,
      "learning_rate": 2.0123390314550717e-05,
      "loss": 0.8444,
      "step": 21195
    },
    {
      "epoch": 2.445777572681126,
      "grad_norm": 0.4122403562068939,
      "learning_rate": 2.0083026454725364e-05,
      "loss": 0.8428,
      "step": 21200
    },
    {
      "epoch": 2.4463544070143053,
      "grad_norm": 0.4481377899646759,
      "learning_rate": 2.0042698597652222e-05,
      "loss": 0.8766,
      "step": 21205
    },
    {
      "epoch": 2.446931241347485,
      "grad_norm": 0.4057758152484894,
      "learning_rate": 2.000240676149904e-05,
      "loss": 0.8332,
      "step": 21210
    },
    {
      "epoch": 2.4475080756806644,
      "grad_norm": 0.4093271791934967,
      "learning_rate": 1.9962150964417382e-05,
      "loss": 0.86,
      "step": 21215
    },
    {
      "epoch": 2.448084910013844,
      "grad_norm": 0.42271873354911804,
      "learning_rate": 1.9921931224542567e-05,
      "loss": 0.8735,
      "step": 21220
    },
    {
      "epoch": 2.4486617443470236,
      "grad_norm": 0.4053228795528412,
      "learning_rate": 1.9881747559993703e-05,
      "loss": 0.8769,
      "step": 21225
    },
    {
      "epoch": 2.449238578680203,
      "grad_norm": 0.4581349790096283,
      "learning_rate": 1.9841599988873517e-05,
      "loss": 0.8737,
      "step": 21230
    },
    {
      "epoch": 2.4498154130133827,
      "grad_norm": 0.37571778893470764,
      "learning_rate": 1.9801488529268673e-05,
      "loss": 0.8509,
      "step": 21235
    },
    {
      "epoch": 2.4503922473465622,
      "grad_norm": 0.4023771286010742,
      "learning_rate": 1.976141319924939e-05,
      "loss": 0.8249,
      "step": 21240
    },
    {
      "epoch": 2.450969081679742,
      "grad_norm": 0.4219907224178314,
      "learning_rate": 1.972137401686973e-05,
      "loss": 0.872,
      "step": 21245
    },
    {
      "epoch": 2.451545916012921,
      "grad_norm": 0.4790753722190857,
      "learning_rate": 1.9681371000167348e-05,
      "loss": 0.9489,
      "step": 21250
    },
    {
      "epoch": 2.4521227503461005,
      "grad_norm": 0.4022129476070404,
      "learning_rate": 1.964140416716379e-05,
      "loss": 0.832,
      "step": 21255
    },
    {
      "epoch": 2.45269958467928,
      "grad_norm": 0.4099849462509155,
      "learning_rate": 1.960147353586409e-05,
      "loss": 0.8369,
      "step": 21260
    },
    {
      "epoch": 2.4532764190124596,
      "grad_norm": 0.45280352234840393,
      "learning_rate": 1.9561579124257133e-05,
      "loss": 0.9413,
      "step": 21265
    },
    {
      "epoch": 2.453853253345639,
      "grad_norm": 0.40902838110923767,
      "learning_rate": 1.9521720950315403e-05,
      "loss": 0.865,
      "step": 21270
    },
    {
      "epoch": 2.4544300876788188,
      "grad_norm": 0.39801719784736633,
      "learning_rate": 1.9481899031995133e-05,
      "loss": 0.8029,
      "step": 21275
    },
    {
      "epoch": 2.4550069220119983,
      "grad_norm": 0.42373040318489075,
      "learning_rate": 1.9442113387236105e-05,
      "loss": 0.9056,
      "step": 21280
    },
    {
      "epoch": 2.4555837563451774,
      "grad_norm": 0.4700776934623718,
      "learning_rate": 1.940236403396186e-05,
      "loss": 0.8751,
      "step": 21285
    },
    {
      "epoch": 2.456160590678357,
      "grad_norm": 0.47902897000312805,
      "learning_rate": 1.9362650990079566e-05,
      "loss": 0.8409,
      "step": 21290
    },
    {
      "epoch": 2.4567374250115366,
      "grad_norm": 0.41159096360206604,
      "learning_rate": 1.9322974273480054e-05,
      "loss": 0.8969,
      "step": 21295
    },
    {
      "epoch": 2.457314259344716,
      "grad_norm": 0.46974456310272217,
      "learning_rate": 1.9283333902037694e-05,
      "loss": 0.8648,
      "step": 21300
    },
    {
      "epoch": 2.4578910936778957,
      "grad_norm": 0.3993097245693207,
      "learning_rate": 1.9243729893610597e-05,
      "loss": 0.8908,
      "step": 21305
    },
    {
      "epoch": 2.4584679280110753,
      "grad_norm": 0.4157516360282898,
      "learning_rate": 1.9204162266040425e-05,
      "loss": 0.8746,
      "step": 21310
    },
    {
      "epoch": 2.459044762344255,
      "grad_norm": 0.46683579683303833,
      "learning_rate": 1.9164631037152513e-05,
      "loss": 0.8727,
      "step": 21315
    },
    {
      "epoch": 2.4596215966774344,
      "grad_norm": 0.44361960887908936,
      "learning_rate": 1.912513622475567e-05,
      "loss": 0.8974,
      "step": 21320
    },
    {
      "epoch": 2.460198431010614,
      "grad_norm": 0.46571844816207886,
      "learning_rate": 1.9085677846642492e-05,
      "loss": 0.8774,
      "step": 21325
    },
    {
      "epoch": 2.460775265343793,
      "grad_norm": 0.4342435896396637,
      "learning_rate": 1.9046255920588985e-05,
      "loss": 0.8723,
      "step": 21330
    },
    {
      "epoch": 2.4613520996769727,
      "grad_norm": 0.4246952533721924,
      "learning_rate": 1.9006870464354853e-05,
      "loss": 0.8366,
      "step": 21335
    },
    {
      "epoch": 2.4619289340101522,
      "grad_norm": 0.41374680399894714,
      "learning_rate": 1.896752149568323e-05,
      "loss": 0.8464,
      "step": 21340
    },
    {
      "epoch": 2.462505768343332,
      "grad_norm": 0.48700255155563354,
      "learning_rate": 1.8928209032301013e-05,
      "loss": 0.8893,
      "step": 21345
    },
    {
      "epoch": 2.4630826026765114,
      "grad_norm": 0.3991559147834778,
      "learning_rate": 1.888893309191847e-05,
      "loss": 0.8556,
      "step": 21350
    },
    {
      "epoch": 2.463659437009691,
      "grad_norm": 0.4696345925331116,
      "learning_rate": 1.884969369222952e-05,
      "loss": 0.8444,
      "step": 21355
    },
    {
      "epoch": 2.4642362713428705,
      "grad_norm": 0.39918190240859985,
      "learning_rate": 1.8810490850911577e-05,
      "loss": 0.8522,
      "step": 21360
    },
    {
      "epoch": 2.4648131056760496,
      "grad_norm": 0.42222800850868225,
      "learning_rate": 1.8771324585625627e-05,
      "loss": 0.8798,
      "step": 21365
    },
    {
      "epoch": 2.465389940009229,
      "grad_norm": 0.4302942454814911,
      "learning_rate": 1.87321949140161e-05,
      "loss": 0.8588,
      "step": 21370
    },
    {
      "epoch": 2.4659667743424087,
      "grad_norm": 0.463563472032547,
      "learning_rate": 1.8693101853711004e-05,
      "loss": 0.8984,
      "step": 21375
    },
    {
      "epoch": 2.4665436086755883,
      "grad_norm": 0.40896716713905334,
      "learning_rate": 1.8654045422321863e-05,
      "loss": 0.9366,
      "step": 21380
    },
    {
      "epoch": 2.467120443008768,
      "grad_norm": 0.3967675268650055,
      "learning_rate": 1.8615025637443673e-05,
      "loss": 0.8929,
      "step": 21385
    },
    {
      "epoch": 2.4676972773419474,
      "grad_norm": 0.41192832589149475,
      "learning_rate": 1.857604251665487e-05,
      "loss": 0.895,
      "step": 21390
    },
    {
      "epoch": 2.468274111675127,
      "grad_norm": 0.4389733672142029,
      "learning_rate": 1.8537096077517502e-05,
      "loss": 0.911,
      "step": 21395
    },
    {
      "epoch": 2.4688509460083066,
      "grad_norm": 0.42747360467910767,
      "learning_rate": 1.8498186337576972e-05,
      "loss": 0.8929,
      "step": 21400
    },
    {
      "epoch": 2.469427780341486,
      "grad_norm": 0.41354596614837646,
      "learning_rate": 1.845931331436219e-05,
      "loss": 0.8477,
      "step": 21405
    },
    {
      "epoch": 2.4700046146746653,
      "grad_norm": 0.45903632044792175,
      "learning_rate": 1.842047702538554e-05,
      "loss": 0.8825,
      "step": 21410
    },
    {
      "epoch": 2.470581449007845,
      "grad_norm": 0.45671504735946655,
      "learning_rate": 1.838167748814288e-05,
      "loss": 0.871,
      "step": 21415
    },
    {
      "epoch": 2.4711582833410244,
      "grad_norm": 0.42325904965400696,
      "learning_rate": 1.8342914720113404e-05,
      "loss": 0.8901,
      "step": 21420
    },
    {
      "epoch": 2.471735117674204,
      "grad_norm": 0.404392272233963,
      "learning_rate": 1.8304188738759864e-05,
      "loss": 0.9069,
      "step": 21425
    },
    {
      "epoch": 2.4723119520073835,
      "grad_norm": 0.4236079156398773,
      "learning_rate": 1.8265499561528377e-05,
      "loss": 0.8515,
      "step": 21430
    },
    {
      "epoch": 2.472888786340563,
      "grad_norm": 0.5150244832038879,
      "learning_rate": 1.822684720584852e-05,
      "loss": 0.9117,
      "step": 21435
    },
    {
      "epoch": 2.4734656206737426,
      "grad_norm": 0.40461140871047974,
      "learning_rate": 1.8188231689133207e-05,
      "loss": 0.8762,
      "step": 21440
    },
    {
      "epoch": 2.4740424550069218,
      "grad_norm": 0.4534957706928253,
      "learning_rate": 1.8149653028778813e-05,
      "loss": 0.8637,
      "step": 21445
    },
    {
      "epoch": 2.4746192893401013,
      "grad_norm": 0.42262566089630127,
      "learning_rate": 1.8111111242165124e-05,
      "loss": 0.851,
      "step": 21450
    },
    {
      "epoch": 2.475196123673281,
      "grad_norm": 0.41150304675102234,
      "learning_rate": 1.8072606346655274e-05,
      "loss": 0.8881,
      "step": 21455
    },
    {
      "epoch": 2.4757729580064605,
      "grad_norm": 0.42715293169021606,
      "learning_rate": 1.803413835959583e-05,
      "loss": 0.8618,
      "step": 21460
    },
    {
      "epoch": 2.47634979233964,
      "grad_norm": 0.4450746178627014,
      "learning_rate": 1.7995707298316632e-05,
      "loss": 0.8519,
      "step": 21465
    },
    {
      "epoch": 2.4769266266728196,
      "grad_norm": 0.3851078748703003,
      "learning_rate": 1.7957313180130986e-05,
      "loss": 0.8565,
      "step": 21470
    },
    {
      "epoch": 2.477503461005999,
      "grad_norm": 0.4541506767272949,
      "learning_rate": 1.79189560223355e-05,
      "loss": 0.8453,
      "step": 21475
    },
    {
      "epoch": 2.4780802953391787,
      "grad_norm": 0.39562171697616577,
      "learning_rate": 1.788063584221017e-05,
      "loss": 0.8445,
      "step": 21480
    },
    {
      "epoch": 2.4786571296723583,
      "grad_norm": 0.41531699895858765,
      "learning_rate": 1.7842352657018313e-05,
      "loss": 0.8581,
      "step": 21485
    },
    {
      "epoch": 2.4792339640055374,
      "grad_norm": 0.4400758743286133,
      "learning_rate": 1.7804106484006543e-05,
      "loss": 0.9219,
      "step": 21490
    },
    {
      "epoch": 2.479810798338717,
      "grad_norm": 0.4268995225429535,
      "learning_rate": 1.776589734040487e-05,
      "loss": 0.889,
      "step": 21495
    },
    {
      "epoch": 2.4803876326718965,
      "grad_norm": 0.4040692150592804,
      "learning_rate": 1.7727725243426564e-05,
      "loss": 0.849,
      "step": 21500
    },
    {
      "epoch": 2.480964467005076,
      "grad_norm": 0.40337270498275757,
      "learning_rate": 1.7689590210268235e-05,
      "loss": 0.9017,
      "step": 21505
    },
    {
      "epoch": 2.4815413013382557,
      "grad_norm": 0.40132004022598267,
      "learning_rate": 1.7651492258109835e-05,
      "loss": 0.8515,
      "step": 21510
    },
    {
      "epoch": 2.4821181356714352,
      "grad_norm": 0.3863861858844757,
      "learning_rate": 1.7613431404114487e-05,
      "loss": 0.8567,
      "step": 21515
    },
    {
      "epoch": 2.482694970004615,
      "grad_norm": 0.4050062894821167,
      "learning_rate": 1.7575407665428735e-05,
      "loss": 0.8635,
      "step": 21520
    },
    {
      "epoch": 2.4832718043377944,
      "grad_norm": 0.4430626630783081,
      "learning_rate": 1.7537421059182314e-05,
      "loss": 0.8908,
      "step": 21525
    },
    {
      "epoch": 2.483848638670974,
      "grad_norm": 0.4688493609428406,
      "learning_rate": 1.7499471602488316e-05,
      "loss": 0.8878,
      "step": 21530
    },
    {
      "epoch": 2.484425473004153,
      "grad_norm": 0.42432737350463867,
      "learning_rate": 1.7461559312442953e-05,
      "loss": 0.8621,
      "step": 21535
    },
    {
      "epoch": 2.4850023073373326,
      "grad_norm": 0.4097782373428345,
      "learning_rate": 1.74236842061259e-05,
      "loss": 0.8953,
      "step": 21540
    },
    {
      "epoch": 2.485579141670512,
      "grad_norm": 0.3850114345550537,
      "learning_rate": 1.7385846300599885e-05,
      "loss": 0.8483,
      "step": 21545
    },
    {
      "epoch": 2.4861559760036918,
      "grad_norm": 0.43694594502449036,
      "learning_rate": 1.7348045612910978e-05,
      "loss": 0.8793,
      "step": 21550
    },
    {
      "epoch": 2.4867328103368713,
      "grad_norm": 0.39885058999061584,
      "learning_rate": 1.7310282160088465e-05,
      "loss": 0.8459,
      "step": 21555
    },
    {
      "epoch": 2.487309644670051,
      "grad_norm": 0.4454430043697357,
      "learning_rate": 1.7272555959144888e-05,
      "loss": 0.9015,
      "step": 21560
    },
    {
      "epoch": 2.4878864790032305,
      "grad_norm": 0.4712061285972595,
      "learning_rate": 1.7234867027075906e-05,
      "loss": 0.8686,
      "step": 21565
    },
    {
      "epoch": 2.4884633133364096,
      "grad_norm": 0.4153970777988434,
      "learning_rate": 1.7197215380860497e-05,
      "loss": 0.8595,
      "step": 21570
    },
    {
      "epoch": 2.489040147669589,
      "grad_norm": 0.3996500074863434,
      "learning_rate": 1.7159601037460805e-05,
      "loss": 0.8391,
      "step": 21575
    },
    {
      "epoch": 2.4896169820027687,
      "grad_norm": 0.43990781903266907,
      "learning_rate": 1.712202401382217e-05,
      "loss": 0.8731,
      "step": 21580
    },
    {
      "epoch": 2.4901938163359483,
      "grad_norm": 0.46257680654525757,
      "learning_rate": 1.7084484326873062e-05,
      "loss": 0.8236,
      "step": 21585
    },
    {
      "epoch": 2.490770650669128,
      "grad_norm": 0.4220145344734192,
      "learning_rate": 1.704698199352527e-05,
      "loss": 0.869,
      "step": 21590
    },
    {
      "epoch": 2.4913474850023074,
      "grad_norm": 0.4439292550086975,
      "learning_rate": 1.7009517030673584e-05,
      "loss": 0.8021,
      "step": 21595
    },
    {
      "epoch": 2.491924319335487,
      "grad_norm": 0.4120362102985382,
      "learning_rate": 1.6972089455196115e-05,
      "loss": 0.9061,
      "step": 21600
    },
    {
      "epoch": 2.4925011536686665,
      "grad_norm": 0.39903098344802856,
      "learning_rate": 1.6934699283953968e-05,
      "loss": 0.8302,
      "step": 21605
    },
    {
      "epoch": 2.493077988001846,
      "grad_norm": 0.4350089132785797,
      "learning_rate": 1.6897346533791592e-05,
      "loss": 0.8538,
      "step": 21610
    },
    {
      "epoch": 2.4936548223350252,
      "grad_norm": 0.3748519718647003,
      "learning_rate": 1.6860031221536398e-05,
      "loss": 0.8907,
      "step": 21615
    },
    {
      "epoch": 2.494231656668205,
      "grad_norm": 0.40715354681015015,
      "learning_rate": 1.6822753363999066e-05,
      "loss": 0.8603,
      "step": 21620
    },
    {
      "epoch": 2.4948084910013844,
      "grad_norm": 0.44604524970054626,
      "learning_rate": 1.678551297797325e-05,
      "loss": 0.8541,
      "step": 21625
    },
    {
      "epoch": 2.495385325334564,
      "grad_norm": 0.45316800475120544,
      "learning_rate": 1.674831008023594e-05,
      "loss": 0.8733,
      "step": 21630
    },
    {
      "epoch": 2.4959621596677435,
      "grad_norm": 0.42586323618888855,
      "learning_rate": 1.671114468754702e-05,
      "loss": 0.8808,
      "step": 21635
    },
    {
      "epoch": 2.496538994000923,
      "grad_norm": 0.40052857995033264,
      "learning_rate": 1.6674016816649595e-05,
      "loss": 0.8482,
      "step": 21640
    },
    {
      "epoch": 2.4971158283341026,
      "grad_norm": 0.3876071572303772,
      "learning_rate": 1.6636926484269855e-05,
      "loss": 0.8543,
      "step": 21645
    },
    {
      "epoch": 2.4976926626672817,
      "grad_norm": 0.3747224509716034,
      "learning_rate": 1.6599873707117087e-05,
      "loss": 0.8397,
      "step": 21650
    },
    {
      "epoch": 2.4982694970004613,
      "grad_norm": 0.41739127039909363,
      "learning_rate": 1.6562858501883595e-05,
      "loss": 0.8634,
      "step": 21655
    },
    {
      "epoch": 2.498846331333641,
      "grad_norm": 0.4056764841079712,
      "learning_rate": 1.6525880885244815e-05,
      "loss": 0.8947,
      "step": 21660
    },
    {
      "epoch": 2.4994231656668204,
      "grad_norm": 0.3982926905155182,
      "learning_rate": 1.648894087385925e-05,
      "loss": 0.8889,
      "step": 21665
    },
    {
      "epoch": 2.5,
      "grad_norm": 0.46115100383758545,
      "learning_rate": 1.6452038484368447e-05,
      "loss": 0.8877,
      "step": 21670
    },
    {
      "epoch": 2.5005768343331796,
      "grad_norm": 0.40610867738723755,
      "learning_rate": 1.641517373339696e-05,
      "loss": 0.9315,
      "step": 21675
    },
    {
      "epoch": 2.501153668666359,
      "grad_norm": 0.4094102084636688,
      "learning_rate": 1.6378346637552512e-05,
      "loss": 0.869,
      "step": 21680
    },
    {
      "epoch": 2.5017305029995387,
      "grad_norm": 0.3844679892063141,
      "learning_rate": 1.6341557213425708e-05,
      "loss": 0.822,
      "step": 21685
    },
    {
      "epoch": 2.5023073373327183,
      "grad_norm": 0.39057260751724243,
      "learning_rate": 1.6304805477590312e-05,
      "loss": 0.8623,
      "step": 21690
    },
    {
      "epoch": 2.5028841716658974,
      "grad_norm": 0.40054717659950256,
      "learning_rate": 1.6268091446602973e-05,
      "loss": 0.8537,
      "step": 21695
    },
    {
      "epoch": 2.503461005999077,
      "grad_norm": 0.4297622740268707,
      "learning_rate": 1.6231415137003537e-05,
      "loss": 0.8425,
      "step": 21700
    },
    {
      "epoch": 2.5040378403322565,
      "grad_norm": 0.4544278383255005,
      "learning_rate": 1.6194776565314672e-05,
      "loss": 0.8456,
      "step": 21705
    },
    {
      "epoch": 2.504614674665436,
      "grad_norm": 0.42204564809799194,
      "learning_rate": 1.6158175748042147e-05,
      "loss": 0.9345,
      "step": 21710
    },
    {
      "epoch": 2.5051915089986156,
      "grad_norm": 0.40919971466064453,
      "learning_rate": 1.6121612701674725e-05,
      "loss": 0.8399,
      "step": 21715
    },
    {
      "epoch": 2.505768343331795,
      "grad_norm": 0.44898590445518494,
      "learning_rate": 1.6085087442684122e-05,
      "loss": 0.9533,
      "step": 21720
    },
    {
      "epoch": 2.5063451776649748,
      "grad_norm": 0.4511086940765381,
      "learning_rate": 1.6048599987525015e-05,
      "loss": 0.8747,
      "step": 21725
    },
    {
      "epoch": 2.506922011998154,
      "grad_norm": 0.37081825733184814,
      "learning_rate": 1.601215035263508e-05,
      "loss": 0.8409,
      "step": 21730
    },
    {
      "epoch": 2.507498846331334,
      "grad_norm": 0.4122322201728821,
      "learning_rate": 1.597573855443497e-05,
      "loss": 0.853,
      "step": 21735
    },
    {
      "epoch": 2.508075680664513,
      "grad_norm": 0.45850232243537903,
      "learning_rate": 1.5939364609328265e-05,
      "loss": 0.8752,
      "step": 21740
    },
    {
      "epoch": 2.5086525149976926,
      "grad_norm": 0.43158847093582153,
      "learning_rate": 1.5903028533701457e-05,
      "loss": 0.8965,
      "step": 21745
    },
    {
      "epoch": 2.509229349330872,
      "grad_norm": 0.41096311807632446,
      "learning_rate": 1.5866730343924085e-05,
      "loss": 0.9114,
      "step": 21750
    },
    {
      "epoch": 2.5098061836640517,
      "grad_norm": 0.4422203004360199,
      "learning_rate": 1.5830470056348513e-05,
      "loss": 0.8432,
      "step": 21755
    },
    {
      "epoch": 2.5103830179972313,
      "grad_norm": 0.42912983894348145,
      "learning_rate": 1.5794247687310093e-05,
      "loss": 0.8377,
      "step": 21760
    },
    {
      "epoch": 2.510959852330411,
      "grad_norm": 0.4608105719089508,
      "learning_rate": 1.575806325312702e-05,
      "loss": 0.8773,
      "step": 21765
    },
    {
      "epoch": 2.5115366866635904,
      "grad_norm": 0.43036070466041565,
      "learning_rate": 1.5721916770100532e-05,
      "loss": 0.8908,
      "step": 21770
    },
    {
      "epoch": 2.5121135209967695,
      "grad_norm": 0.44745394587516785,
      "learning_rate": 1.5685808254514634e-05,
      "loss": 0.8709,
      "step": 21775
    },
    {
      "epoch": 2.512690355329949,
      "grad_norm": 0.4649929702281952,
      "learning_rate": 1.5649737722636315e-05,
      "loss": 0.849,
      "step": 21780
    },
    {
      "epoch": 2.5132671896631287,
      "grad_norm": 0.4303283989429474,
      "learning_rate": 1.5613705190715356e-05,
      "loss": 0.8852,
      "step": 21785
    },
    {
      "epoch": 2.5138440239963082,
      "grad_norm": 0.5020930171012878,
      "learning_rate": 1.557771067498458e-05,
      "loss": 0.9027,
      "step": 21790
    },
    {
      "epoch": 2.514420858329488,
      "grad_norm": 0.359627902507782,
      "learning_rate": 1.554175419165951e-05,
      "loss": 0.8415,
      "step": 21795
    },
    {
      "epoch": 2.5149976926626674,
      "grad_norm": 0.41575315594673157,
      "learning_rate": 1.5505835756938636e-05,
      "loss": 0.8718,
      "step": 21800
    },
    {
      "epoch": 2.515574526995847,
      "grad_norm": 0.400100439786911,
      "learning_rate": 1.546995538700329e-05,
      "loss": 0.8759,
      "step": 21805
    },
    {
      "epoch": 2.516151361329026,
      "grad_norm": 0.40466317534446716,
      "learning_rate": 1.5434113098017667e-05,
      "loss": 0.8871,
      "step": 21810
    },
    {
      "epoch": 2.516728195662206,
      "grad_norm": 0.4189120829105377,
      "learning_rate": 1.5398308906128735e-05,
      "loss": 0.8984,
      "step": 21815
    },
    {
      "epoch": 2.517305029995385,
      "grad_norm": 0.4149659276008606,
      "learning_rate": 1.5362542827466387e-05,
      "loss": 0.8664,
      "step": 21820
    },
    {
      "epoch": 2.5178818643285648,
      "grad_norm": 0.41705700755119324,
      "learning_rate": 1.5326814878143304e-05,
      "loss": 0.8438,
      "step": 21825
    },
    {
      "epoch": 2.5184586986617443,
      "grad_norm": 0.3998613953590393,
      "learning_rate": 1.529112507425502e-05,
      "loss": 0.869,
      "step": 21830
    },
    {
      "epoch": 2.519035532994924,
      "grad_norm": 0.4276602566242218,
      "learning_rate": 1.5255473431879785e-05,
      "loss": 0.8712,
      "step": 21835
    },
    {
      "epoch": 2.5196123673281035,
      "grad_norm": 0.4247130751609802,
      "learning_rate": 1.5219859967078854e-05,
      "loss": 0.8362,
      "step": 21840
    },
    {
      "epoch": 2.520189201661283,
      "grad_norm": 0.42334526777267456,
      "learning_rate": 1.5184284695896056e-05,
      "loss": 0.8558,
      "step": 21845
    },
    {
      "epoch": 2.5207660359944626,
      "grad_norm": 0.4518727958202362,
      "learning_rate": 1.5148747634358185e-05,
      "loss": 0.8547,
      "step": 21850
    },
    {
      "epoch": 2.5213428703276417,
      "grad_norm": 0.39660370349884033,
      "learning_rate": 1.5113248798474689e-05,
      "loss": 0.8924,
      "step": 21855
    },
    {
      "epoch": 2.5219197046608213,
      "grad_norm": 0.431823194026947,
      "learning_rate": 1.5077788204237952e-05,
      "loss": 0.8683,
      "step": 21860
    },
    {
      "epoch": 2.522496538994001,
      "grad_norm": 0.4138554036617279,
      "learning_rate": 1.5042365867622976e-05,
      "loss": 0.8733,
      "step": 21865
    },
    {
      "epoch": 2.5230733733271804,
      "grad_norm": 0.4500097334384918,
      "learning_rate": 1.5006981804587595e-05,
      "loss": 0.8854,
      "step": 21870
    },
    {
      "epoch": 2.52365020766036,
      "grad_norm": 0.3916674256324768,
      "learning_rate": 1.4971636031072422e-05,
      "loss": 0.8951,
      "step": 21875
    },
    {
      "epoch": 2.5242270419935395,
      "grad_norm": 0.4572630524635315,
      "learning_rate": 1.4936328563000812e-05,
      "loss": 0.8475,
      "step": 21880
    },
    {
      "epoch": 2.524803876326719,
      "grad_norm": 0.44522368907928467,
      "learning_rate": 1.4901059416278806e-05,
      "loss": 0.8946,
      "step": 21885
    },
    {
      "epoch": 2.525380710659898,
      "grad_norm": 0.5151233673095703,
      "learning_rate": 1.4865828606795241e-05,
      "loss": 0.8551,
      "step": 21890
    },
    {
      "epoch": 2.5259575449930782,
      "grad_norm": 0.43266403675079346,
      "learning_rate": 1.4830636150421662e-05,
      "loss": 0.8539,
      "step": 21895
    },
    {
      "epoch": 2.5265343793262574,
      "grad_norm": 0.4494539499282837,
      "learning_rate": 1.4795482063012367e-05,
      "loss": 0.8631,
      "step": 21900
    },
    {
      "epoch": 2.527111213659437,
      "grad_norm": 0.44507652521133423,
      "learning_rate": 1.4760366360404266e-05,
      "loss": 0.9034,
      "step": 21905
    },
    {
      "epoch": 2.5276880479926165,
      "grad_norm": 0.44144436717033386,
      "learning_rate": 1.4725289058417158e-05,
      "loss": 0.8356,
      "step": 21910
    },
    {
      "epoch": 2.528264882325796,
      "grad_norm": 0.3664838969707489,
      "learning_rate": 1.4690250172853348e-05,
      "loss": 0.8655,
      "step": 21915
    },
    {
      "epoch": 2.5288417166589756,
      "grad_norm": 0.4298860728740692,
      "learning_rate": 1.4655249719497965e-05,
      "loss": 0.8781,
      "step": 21920
    },
    {
      "epoch": 2.529418550992155,
      "grad_norm": 0.43058478832244873,
      "learning_rate": 1.4620287714118764e-05,
      "loss": 0.9014,
      "step": 21925
    },
    {
      "epoch": 2.5299953853253347,
      "grad_norm": 0.43566563725471497,
      "learning_rate": 1.4585364172466231e-05,
      "loss": 0.8951,
      "step": 21930
    },
    {
      "epoch": 2.530572219658514,
      "grad_norm": 0.4238441586494446,
      "learning_rate": 1.4550479110273429e-05,
      "loss": 0.9008,
      "step": 21935
    },
    {
      "epoch": 2.5311490539916934,
      "grad_norm": 0.514903724193573,
      "learning_rate": 1.4515632543256197e-05,
      "loss": 0.8527,
      "step": 21940
    },
    {
      "epoch": 2.531725888324873,
      "grad_norm": 0.3885857164859772,
      "learning_rate": 1.4480824487112943e-05,
      "loss": 0.8232,
      "step": 21945
    },
    {
      "epoch": 2.5323027226580526,
      "grad_norm": 0.4228072166442871,
      "learning_rate": 1.4446054957524802e-05,
      "loss": 0.9215,
      "step": 21950
    },
    {
      "epoch": 2.532879556991232,
      "grad_norm": 0.4234387278556824,
      "learning_rate": 1.441132397015551e-05,
      "loss": 0.886,
      "step": 21955
    },
    {
      "epoch": 2.5334563913244117,
      "grad_norm": 0.39268434047698975,
      "learning_rate": 1.437663154065142e-05,
      "loss": 0.8399,
      "step": 21960
    },
    {
      "epoch": 2.5340332256575913,
      "grad_norm": 0.4078601896762848,
      "learning_rate": 1.4341977684641539e-05,
      "loss": 0.8798,
      "step": 21965
    },
    {
      "epoch": 2.5346100599907704,
      "grad_norm": 0.4169177711009979,
      "learning_rate": 1.4307362417737512e-05,
      "loss": 0.8964,
      "step": 21970
    },
    {
      "epoch": 2.5351868943239504,
      "grad_norm": 0.40286895632743835,
      "learning_rate": 1.4272785755533601e-05,
      "loss": 0.8619,
      "step": 21975
    },
    {
      "epoch": 2.5357637286571295,
      "grad_norm": 0.43225333094596863,
      "learning_rate": 1.4238247713606622e-05,
      "loss": 0.8598,
      "step": 21980
    },
    {
      "epoch": 2.536340562990309,
      "grad_norm": 0.5134694576263428,
      "learning_rate": 1.4203748307516052e-05,
      "loss": 0.862,
      "step": 21985
    },
    {
      "epoch": 2.5369173973234886,
      "grad_norm": 0.5272102355957031,
      "learning_rate": 1.4169287552803923e-05,
      "loss": 0.876,
      "step": 21990
    },
    {
      "epoch": 2.537494231656668,
      "grad_norm": 0.44927138090133667,
      "learning_rate": 1.4134865464994894e-05,
      "loss": 0.8765,
      "step": 21995
    },
    {
      "epoch": 2.5380710659898478,
      "grad_norm": 0.44614723324775696,
      "learning_rate": 1.4100482059596177e-05,
      "loss": 0.8442,
      "step": 22000
    },
    {
      "epoch": 2.5386479003230273,
      "grad_norm": 0.4107849895954132,
      "learning_rate": 1.4066137352097575e-05,
      "loss": 0.8332,
      "step": 22005
    },
    {
      "epoch": 2.539224734656207,
      "grad_norm": 0.3938974440097809,
      "learning_rate": 1.4031831357971414e-05,
      "loss": 0.8645,
      "step": 22010
    },
    {
      "epoch": 2.539801568989386,
      "grad_norm": 0.4226299822330475,
      "learning_rate": 1.3997564092672622e-05,
      "loss": 0.8903,
      "step": 22015
    },
    {
      "epoch": 2.540378403322566,
      "grad_norm": 0.4413795471191406,
      "learning_rate": 1.396333557163868e-05,
      "loss": 0.9201,
      "step": 22020
    },
    {
      "epoch": 2.540955237655745,
      "grad_norm": 0.4873966872692108,
      "learning_rate": 1.3929145810289612e-05,
      "loss": 0.884,
      "step": 22025
    },
    {
      "epoch": 2.5415320719889247,
      "grad_norm": 0.435316264629364,
      "learning_rate": 1.3894994824027951e-05,
      "loss": 0.8776,
      "step": 22030
    },
    {
      "epoch": 2.5421089063221043,
      "grad_norm": 0.4176023006439209,
      "learning_rate": 1.3860882628238781e-05,
      "loss": 0.8666,
      "step": 22035
    },
    {
      "epoch": 2.542685740655284,
      "grad_norm": 0.4190213084220886,
      "learning_rate": 1.3826809238289717e-05,
      "loss": 0.894,
      "step": 22040
    },
    {
      "epoch": 2.5432625749884634,
      "grad_norm": 0.45858487486839294,
      "learning_rate": 1.3792774669530917e-05,
      "loss": 0.8955,
      "step": 22045
    },
    {
      "epoch": 2.543839409321643,
      "grad_norm": 0.44214770197868347,
      "learning_rate": 1.3758778937294947e-05,
      "loss": 0.8868,
      "step": 22050
    },
    {
      "epoch": 2.5444162436548226,
      "grad_norm": 0.4137255549430847,
      "learning_rate": 1.3724822056897046e-05,
      "loss": 0.8208,
      "step": 22055
    },
    {
      "epoch": 2.5449930779880017,
      "grad_norm": 0.4444392919540405,
      "learning_rate": 1.369090404363479e-05,
      "loss": 0.8669,
      "step": 22060
    },
    {
      "epoch": 2.5455699123211812,
      "grad_norm": 0.433173269033432,
      "learning_rate": 1.365702491278833e-05,
      "loss": 0.8687,
      "step": 22065
    },
    {
      "epoch": 2.546146746654361,
      "grad_norm": 0.4325042963027954,
      "learning_rate": 1.3623184679620272e-05,
      "loss": 0.8709,
      "step": 22070
    },
    {
      "epoch": 2.5467235809875404,
      "grad_norm": 0.4476592540740967,
      "learning_rate": 1.358938335937574e-05,
      "loss": 0.9137,
      "step": 22075
    },
    {
      "epoch": 2.54730041532072,
      "grad_norm": 0.42596349120140076,
      "learning_rate": 1.3555620967282235e-05,
      "loss": 0.8615,
      "step": 22080
    },
    {
      "epoch": 2.5478772496538995,
      "grad_norm": 0.38326412439346313,
      "learning_rate": 1.352189751854982e-05,
      "loss": 0.8753,
      "step": 22085
    },
    {
      "epoch": 2.548454083987079,
      "grad_norm": 0.4048885405063629,
      "learning_rate": 1.3488213028370967e-05,
      "loss": 0.8891,
      "step": 22090
    },
    {
      "epoch": 2.549030918320258,
      "grad_norm": 0.39084184169769287,
      "learning_rate": 1.3454567511920634e-05,
      "loss": 0.8896,
      "step": 22095
    },
    {
      "epoch": 2.549607752653438,
      "grad_norm": 0.41996973752975464,
      "learning_rate": 1.3420960984356134e-05,
      "loss": 0.8452,
      "step": 22100
    },
    {
      "epoch": 2.5501845869866173,
      "grad_norm": 0.47061657905578613,
      "learning_rate": 1.33873934608173e-05,
      "loss": 0.8706,
      "step": 22105
    },
    {
      "epoch": 2.550761421319797,
      "grad_norm": 0.4455969035625458,
      "learning_rate": 1.3353864956426366e-05,
      "loss": 0.8736,
      "step": 22110
    },
    {
      "epoch": 2.5513382556529764,
      "grad_norm": 0.4241873025894165,
      "learning_rate": 1.3320375486288017e-05,
      "loss": 0.8226,
      "step": 22115
    },
    {
      "epoch": 2.551915089986156,
      "grad_norm": 0.3960394561290741,
      "learning_rate": 1.3286925065489253e-05,
      "loss": 0.8473,
      "step": 22120
    },
    {
      "epoch": 2.5524919243193356,
      "grad_norm": 0.43792304396629333,
      "learning_rate": 1.3253513709099652e-05,
      "loss": 0.8635,
      "step": 22125
    },
    {
      "epoch": 2.553068758652515,
      "grad_norm": 0.44686734676361084,
      "learning_rate": 1.3220141432171007e-05,
      "loss": 0.8711,
      "step": 22130
    },
    {
      "epoch": 2.5536455929856947,
      "grad_norm": 0.4718668460845947,
      "learning_rate": 1.3186808249737658e-05,
      "loss": 0.8091,
      "step": 22135
    },
    {
      "epoch": 2.554222427318874,
      "grad_norm": 0.42717134952545166,
      "learning_rate": 1.3153514176816195e-05,
      "loss": 0.8444,
      "step": 22140
    },
    {
      "epoch": 2.5547992616520534,
      "grad_norm": 0.403614342212677,
      "learning_rate": 1.3120259228405751e-05,
      "loss": 0.7916,
      "step": 22145
    },
    {
      "epoch": 2.555376095985233,
      "grad_norm": 0.4558524489402771,
      "learning_rate": 1.3087043419487676e-05,
      "loss": 0.8748,
      "step": 22150
    },
    {
      "epoch": 2.5559529303184125,
      "grad_norm": 0.47509682178497314,
      "learning_rate": 1.305386676502578e-05,
      "loss": 0.8737,
      "step": 22155
    },
    {
      "epoch": 2.556529764651592,
      "grad_norm": 0.4862813651561737,
      "learning_rate": 1.3020729279966215e-05,
      "loss": 0.9136,
      "step": 22160
    },
    {
      "epoch": 2.5571065989847717,
      "grad_norm": 0.413223534822464,
      "learning_rate": 1.2987630979237509e-05,
      "loss": 0.9088,
      "step": 22165
    },
    {
      "epoch": 2.5576834333179512,
      "grad_norm": 0.4112166464328766,
      "learning_rate": 1.2954571877750443e-05,
      "loss": 0.864,
      "step": 22170
    },
    {
      "epoch": 2.5582602676511303,
      "grad_norm": 0.4073093831539154,
      "learning_rate": 1.2921551990398262e-05,
      "loss": 0.8841,
      "step": 22175
    },
    {
      "epoch": 2.5588371019843104,
      "grad_norm": 0.4392577111721039,
      "learning_rate": 1.2888571332056464e-05,
      "loss": 0.8658,
      "step": 22180
    },
    {
      "epoch": 2.5594139363174895,
      "grad_norm": 0.3867861032485962,
      "learning_rate": 1.2855629917582935e-05,
      "loss": 0.8429,
      "step": 22185
    },
    {
      "epoch": 2.559990770650669,
      "grad_norm": 0.43121537566185,
      "learning_rate": 1.2822727761817776e-05,
      "loss": 0.8772,
      "step": 22190
    },
    {
      "epoch": 2.5605676049838486,
      "grad_norm": 0.4013991355895996,
      "learning_rate": 1.2789864879583557e-05,
      "loss": 0.8684,
      "step": 22195
    },
    {
      "epoch": 2.561144439317028,
      "grad_norm": 0.38427436351776123,
      "learning_rate": 1.2757041285685011e-05,
      "loss": 0.8633,
      "step": 22200
    },
    {
      "epoch": 2.5617212736502077,
      "grad_norm": 0.41684120893478394,
      "learning_rate": 1.2724256994909268e-05,
      "loss": 0.8734,
      "step": 22205
    },
    {
      "epoch": 2.5622981079833873,
      "grad_norm": 0.4538641571998596,
      "learning_rate": 1.2691512022025653e-05,
      "loss": 0.856,
      "step": 22210
    },
    {
      "epoch": 2.562874942316567,
      "grad_norm": 0.39726805686950684,
      "learning_rate": 1.2658806381785926e-05,
      "loss": 0.8719,
      "step": 22215
    },
    {
      "epoch": 2.563451776649746,
      "grad_norm": 0.4542260468006134,
      "learning_rate": 1.2626140088923987e-05,
      "loss": 0.8779,
      "step": 22220
    },
    {
      "epoch": 2.5640286109829256,
      "grad_norm": 0.4026002883911133,
      "learning_rate": 1.2593513158156089e-05,
      "loss": 0.8843,
      "step": 22225
    },
    {
      "epoch": 2.564605445316105,
      "grad_norm": 0.43637779355049133,
      "learning_rate": 1.2560925604180673e-05,
      "loss": 0.877,
      "step": 22230
    },
    {
      "epoch": 2.5651822796492847,
      "grad_norm": 0.4000450670719147,
      "learning_rate": 1.2528377441678585e-05,
      "loss": 0.8963,
      "step": 22235
    },
    {
      "epoch": 2.5657591139824643,
      "grad_norm": 0.41496726870536804,
      "learning_rate": 1.2495868685312761e-05,
      "loss": 0.9021,
      "step": 22240
    },
    {
      "epoch": 2.566335948315644,
      "grad_norm": 0.4522967040538788,
      "learning_rate": 1.2463399349728488e-05,
      "loss": 0.8862,
      "step": 22245
    },
    {
      "epoch": 2.5669127826488234,
      "grad_norm": 0.3863905966281891,
      "learning_rate": 1.2430969449553276e-05,
      "loss": 0.8659,
      "step": 22250
    },
    {
      "epoch": 2.5674896169820025,
      "grad_norm": 0.48074695467948914,
      "learning_rate": 1.2398578999396848e-05,
      "loss": 0.8919,
      "step": 22255
    },
    {
      "epoch": 2.5680664513151825,
      "grad_norm": 0.38571128249168396,
      "learning_rate": 1.2366228013851156e-05,
      "loss": 0.8478,
      "step": 22260
    },
    {
      "epoch": 2.5686432856483616,
      "grad_norm": 0.4104611873626709,
      "learning_rate": 1.2333916507490384e-05,
      "loss": 0.8858,
      "step": 22265
    },
    {
      "epoch": 2.569220119981541,
      "grad_norm": 0.45108452439308167,
      "learning_rate": 1.2301644494870934e-05,
      "loss": 0.8982,
      "step": 22270
    },
    {
      "epoch": 2.5697969543147208,
      "grad_norm": 0.38730573654174805,
      "learning_rate": 1.2269411990531421e-05,
      "loss": 0.8801,
      "step": 22275
    },
    {
      "epoch": 2.5703737886479003,
      "grad_norm": 0.40812236070632935,
      "learning_rate": 1.2237219008992607e-05,
      "loss": 0.8772,
      "step": 22280
    },
    {
      "epoch": 2.57095062298108,
      "grad_norm": 0.4362446367740631,
      "learning_rate": 1.2205065564757568e-05,
      "loss": 0.8175,
      "step": 22285
    },
    {
      "epoch": 2.5715274573142595,
      "grad_norm": 0.47301381826400757,
      "learning_rate": 1.2172951672311427e-05,
      "loss": 0.8879,
      "step": 22290
    },
    {
      "epoch": 2.572104291647439,
      "grad_norm": 0.4802537262439728,
      "learning_rate": 1.2140877346121604e-05,
      "loss": 0.8624,
      "step": 22295
    },
    {
      "epoch": 2.572681125980618,
      "grad_norm": 0.39373570680618286,
      "learning_rate": 1.2108842600637571e-05,
      "loss": 0.8733,
      "step": 22300
    },
    {
      "epoch": 2.5732579603137977,
      "grad_norm": 0.38379108905792236,
      "learning_rate": 1.207684745029114e-05,
      "loss": 0.8824,
      "step": 22305
    },
    {
      "epoch": 2.5738347946469773,
      "grad_norm": 0.44027572870254517,
      "learning_rate": 1.2044891909496126e-05,
      "loss": 0.8337,
      "step": 22310
    },
    {
      "epoch": 2.574411628980157,
      "grad_norm": 0.4214009642601013,
      "learning_rate": 1.2012975992648568e-05,
      "loss": 0.8312,
      "step": 22315
    },
    {
      "epoch": 2.5749884633133364,
      "grad_norm": 0.46107718348503113,
      "learning_rate": 1.1981099714126654e-05,
      "loss": 0.8527,
      "step": 22320
    },
    {
      "epoch": 2.575565297646516,
      "grad_norm": 0.4151912331581116,
      "learning_rate": 1.1949263088290742e-05,
      "loss": 0.8836,
      "step": 22325
    },
    {
      "epoch": 2.5761421319796955,
      "grad_norm": 0.4159373939037323,
      "learning_rate": 1.1917466129483246e-05,
      "loss": 0.8533,
      "step": 22330
    },
    {
      "epoch": 2.5767189663128747,
      "grad_norm": 0.4287991523742676,
      "learning_rate": 1.1885708852028777e-05,
      "loss": 0.894,
      "step": 22335
    },
    {
      "epoch": 2.5772958006460547,
      "grad_norm": 0.4311448335647583,
      "learning_rate": 1.1853991270234043e-05,
      "loss": 0.8697,
      "step": 22340
    },
    {
      "epoch": 2.577872634979234,
      "grad_norm": 0.43213003873825073,
      "learning_rate": 1.1822313398387919e-05,
      "loss": 0.8727,
      "step": 22345
    },
    {
      "epoch": 2.5784494693124134,
      "grad_norm": 0.42621949315071106,
      "learning_rate": 1.1790675250761263e-05,
      "loss": 0.8978,
      "step": 22350
    },
    {
      "epoch": 2.579026303645593,
      "grad_norm": 0.4279092848300934,
      "learning_rate": 1.1759076841607208e-05,
      "loss": 0.8577,
      "step": 22355
    },
    {
      "epoch": 2.5796031379787725,
      "grad_norm": 0.4713653326034546,
      "learning_rate": 1.1727518185160847e-05,
      "loss": 0.8832,
      "step": 22360
    },
    {
      "epoch": 2.580179972311952,
      "grad_norm": 0.43556147813796997,
      "learning_rate": 1.1695999295639459e-05,
      "loss": 0.8847,
      "step": 22365
    },
    {
      "epoch": 2.5807568066451316,
      "grad_norm": 0.4158504605293274,
      "learning_rate": 1.1664520187242289e-05,
      "loss": 0.878,
      "step": 22370
    },
    {
      "epoch": 2.581333640978311,
      "grad_norm": 0.5004168152809143,
      "learning_rate": 1.1633080874150826e-05,
      "loss": 0.8739,
      "step": 22375
    },
    {
      "epoch": 2.5819104753114903,
      "grad_norm": 0.42205938696861267,
      "learning_rate": 1.1601681370528484e-05,
      "loss": 0.8659,
      "step": 22380
    },
    {
      "epoch": 2.5824873096446703,
      "grad_norm": 0.4189571738243103,
      "learning_rate": 1.1570321690520824e-05,
      "loss": 0.923,
      "step": 22385
    },
    {
      "epoch": 2.5830641439778494,
      "grad_norm": 0.49161621928215027,
      "learning_rate": 1.1539001848255426e-05,
      "loss": 0.8671,
      "step": 22390
    },
    {
      "epoch": 2.583640978311029,
      "grad_norm": 0.42730411887168884,
      "learning_rate": 1.150772185784198e-05,
      "loss": 0.9117,
      "step": 22395
    },
    {
      "epoch": 2.5842178126442086,
      "grad_norm": 0.40977731347084045,
      "learning_rate": 1.1476481733372134e-05,
      "loss": 0.8231,
      "step": 22400
    },
    {
      "epoch": 2.584794646977388,
      "grad_norm": 0.423208624124527,
      "learning_rate": 1.1445281488919645e-05,
      "loss": 0.868,
      "step": 22405
    },
    {
      "epoch": 2.5853714813105677,
      "grad_norm": 0.4165674149990082,
      "learning_rate": 1.1414121138540279e-05,
      "loss": 0.8439,
      "step": 22410
    },
    {
      "epoch": 2.5859483156437473,
      "grad_norm": 0.5030677914619446,
      "learning_rate": 1.138300069627184e-05,
      "loss": 0.844,
      "step": 22415
    },
    {
      "epoch": 2.586525149976927,
      "grad_norm": 0.49662232398986816,
      "learning_rate": 1.1351920176134168e-05,
      "loss": 0.8879,
      "step": 22420
    },
    {
      "epoch": 2.587101984310106,
      "grad_norm": 0.40264540910720825,
      "learning_rate": 1.1320879592129052e-05,
      "loss": 0.8523,
      "step": 22425
    },
    {
      "epoch": 2.5876788186432855,
      "grad_norm": 0.4302821457386017,
      "learning_rate": 1.1289878958240364e-05,
      "loss": 0.8544,
      "step": 22430
    },
    {
      "epoch": 2.588255652976465,
      "grad_norm": 0.44366252422332764,
      "learning_rate": 1.125891828843394e-05,
      "loss": 0.8559,
      "step": 22435
    },
    {
      "epoch": 2.5888324873096447,
      "grad_norm": 0.4585106074810028,
      "learning_rate": 1.1227997596657636e-05,
      "loss": 0.8834,
      "step": 22440
    },
    {
      "epoch": 2.5894093216428242,
      "grad_norm": 0.41217270493507385,
      "learning_rate": 1.1197116896841297e-05,
      "loss": 0.8668,
      "step": 22445
    },
    {
      "epoch": 2.589986155976004,
      "grad_norm": 0.39120951294898987,
      "learning_rate": 1.1166276202896698e-05,
      "loss": 0.8655,
      "step": 22450
    },
    {
      "epoch": 2.5905629903091834,
      "grad_norm": 0.44525665044784546,
      "learning_rate": 1.1135475528717642e-05,
      "loss": 0.8944,
      "step": 22455
    },
    {
      "epoch": 2.5911398246423625,
      "grad_norm": 0.4466302990913391,
      "learning_rate": 1.1104714888179901e-05,
      "loss": 0.8592,
      "step": 22460
    },
    {
      "epoch": 2.5917166589755425,
      "grad_norm": 0.41439515352249146,
      "learning_rate": 1.107399429514121e-05,
      "loss": 0.8623,
      "step": 22465
    },
    {
      "epoch": 2.5922934933087216,
      "grad_norm": 0.41282886266708374,
      "learning_rate": 1.1043313763441277e-05,
      "loss": 0.8325,
      "step": 22470
    },
    {
      "epoch": 2.592870327641901,
      "grad_norm": 0.4122515022754669,
      "learning_rate": 1.1012673306901689e-05,
      "loss": 0.8651,
      "step": 22475
    },
    {
      "epoch": 2.5934471619750807,
      "grad_norm": 0.4221882224082947,
      "learning_rate": 1.098207293932605e-05,
      "loss": 0.8378,
      "step": 22480
    },
    {
      "epoch": 2.5940239963082603,
      "grad_norm": 0.43227624893188477,
      "learning_rate": 1.0951512674499898e-05,
      "loss": 0.8472,
      "step": 22485
    },
    {
      "epoch": 2.59460083064144,
      "grad_norm": 0.4147707223892212,
      "learning_rate": 1.092099252619071e-05,
      "loss": 0.8718,
      "step": 22490
    },
    {
      "epoch": 2.5951776649746194,
      "grad_norm": 0.45452365279197693,
      "learning_rate": 1.0890512508147821e-05,
      "loss": 0.8584,
      "step": 22495
    },
    {
      "epoch": 2.595754499307799,
      "grad_norm": 0.3966226875782013,
      "learning_rate": 1.0860072634102569e-05,
      "loss": 0.8835,
      "step": 22500
    },
    {
      "epoch": 2.596331333640978,
      "grad_norm": 0.4434650242328644,
      "learning_rate": 1.0829672917768175e-05,
      "loss": 0.8187,
      "step": 22505
    },
    {
      "epoch": 2.5969081679741577,
      "grad_norm": 0.5329386591911316,
      "learning_rate": 1.0799313372839759e-05,
      "loss": 0.9135,
      "step": 22510
    },
    {
      "epoch": 2.5974850023073373,
      "grad_norm": 0.6921077966690063,
      "learning_rate": 1.0768994012994371e-05,
      "loss": 0.8589,
      "step": 22515
    },
    {
      "epoch": 2.598061836640517,
      "grad_norm": 0.4159179627895355,
      "learning_rate": 1.0738714851890963e-05,
      "loss": 0.8675,
      "step": 22520
    },
    {
      "epoch": 2.5986386709736964,
      "grad_norm": 0.5262424945831299,
      "learning_rate": 1.07084759031703e-05,
      "loss": 0.9055,
      "step": 22525
    },
    {
      "epoch": 2.599215505306876,
      "grad_norm": 0.4461279511451721,
      "learning_rate": 1.0678277180455109e-05,
      "loss": 0.835,
      "step": 22530
    },
    {
      "epoch": 2.5997923396400555,
      "grad_norm": 0.4341142177581787,
      "learning_rate": 1.0648118697349996e-05,
      "loss": 0.8783,
      "step": 22535
    },
    {
      "epoch": 2.6003691739732346,
      "grad_norm": 0.4575253427028656,
      "learning_rate": 1.0618000467441402e-05,
      "loss": 0.9055,
      "step": 22540
    },
    {
      "epoch": 2.6009460083064146,
      "grad_norm": 0.4164150059223175,
      "learning_rate": 1.0587922504297642e-05,
      "loss": 0.868,
      "step": 22545
    },
    {
      "epoch": 2.6015228426395938,
      "grad_norm": 0.431892067193985,
      "learning_rate": 1.0557884821468899e-05,
      "loss": 0.8345,
      "step": 22550
    },
    {
      "epoch": 2.6020996769727733,
      "grad_norm": 0.48790407180786133,
      "learning_rate": 1.0527887432487204e-05,
      "loss": 0.8831,
      "step": 22555
    },
    {
      "epoch": 2.602676511305953,
      "grad_norm": 0.44928449392318726,
      "learning_rate": 1.049793035086647e-05,
      "loss": 0.838,
      "step": 22560
    },
    {
      "epoch": 2.6032533456391325,
      "grad_norm": 0.3979651927947998,
      "learning_rate": 1.0468013590102355e-05,
      "loss": 0.8959,
      "step": 22565
    },
    {
      "epoch": 2.603830179972312,
      "grad_norm": 0.4415774345397949,
      "learning_rate": 1.043813716367249e-05,
      "loss": 0.8473,
      "step": 22570
    },
    {
      "epoch": 2.6044070143054916,
      "grad_norm": 0.4301419258117676,
      "learning_rate": 1.040830108503622e-05,
      "loss": 0.8718,
      "step": 22575
    },
    {
      "epoch": 2.604983848638671,
      "grad_norm": 0.4541887640953064,
      "learning_rate": 1.0378505367634794e-05,
      "loss": 0.8461,
      "step": 22580
    },
    {
      "epoch": 2.6055606829718503,
      "grad_norm": 0.4886419177055359,
      "learning_rate": 1.0348750024891162e-05,
      "loss": 0.8319,
      "step": 22585
    },
    {
      "epoch": 2.60613751730503,
      "grad_norm": 0.4782902002334595,
      "learning_rate": 1.031903507021027e-05,
      "loss": 0.877,
      "step": 22590
    },
    {
      "epoch": 2.6067143516382094,
      "grad_norm": 0.45295077562332153,
      "learning_rate": 1.0289360516978686e-05,
      "loss": 0.856,
      "step": 22595
    },
    {
      "epoch": 2.607291185971389,
      "grad_norm": 0.4169815480709076,
      "learning_rate": 1.0259726378564871e-05,
      "loss": 0.8554,
      "step": 22600
    },
    {
      "epoch": 2.6078680203045685,
      "grad_norm": 0.46750664710998535,
      "learning_rate": 1.0230132668319082e-05,
      "loss": 0.911,
      "step": 22605
    },
    {
      "epoch": 2.608444854637748,
      "grad_norm": 0.41136133670806885,
      "learning_rate": 1.020057939957334e-05,
      "loss": 0.8669,
      "step": 22610
    },
    {
      "epoch": 2.6090216889709277,
      "grad_norm": 0.46126261353492737,
      "learning_rate": 1.017106658564142e-05,
      "loss": 0.8655,
      "step": 22615
    },
    {
      "epoch": 2.609598523304107,
      "grad_norm": 0.4138302206993103,
      "learning_rate": 1.014159423981893e-05,
      "loss": 0.8647,
      "step": 22620
    },
    {
      "epoch": 2.610175357637287,
      "grad_norm": 0.4453456401824951,
      "learning_rate": 1.0112162375383205e-05,
      "loss": 0.8721,
      "step": 22625
    },
    {
      "epoch": 2.610752191970466,
      "grad_norm": 0.4735527038574219,
      "learning_rate": 1.0082771005593394e-05,
      "loss": 0.8572,
      "step": 22630
    },
    {
      "epoch": 2.6113290263036455,
      "grad_norm": 0.4040181338787079,
      "learning_rate": 1.0053420143690284e-05,
      "loss": 0.8589,
      "step": 22635
    },
    {
      "epoch": 2.611905860636825,
      "grad_norm": 0.44362005591392517,
      "learning_rate": 1.0024109802896597e-05,
      "loss": 0.9095,
      "step": 22640
    },
    {
      "epoch": 2.6124826949700046,
      "grad_norm": 0.43080490827560425,
      "learning_rate": 9.994839996416628e-06,
      "loss": 0.8964,
      "step": 22645
    },
    {
      "epoch": 2.613059529303184,
      "grad_norm": 0.4174696207046509,
      "learning_rate": 9.965610737436515e-06,
      "loss": 0.8707,
      "step": 22650
    },
    {
      "epoch": 2.6136363636363638,
      "grad_norm": 0.40807998180389404,
      "learning_rate": 9.936422039124049e-06,
      "loss": 0.8727,
      "step": 22655
    },
    {
      "epoch": 2.6142131979695433,
      "grad_norm": 0.46339353919029236,
      "learning_rate": 9.907273914628879e-06,
      "loss": 0.8609,
      "step": 22660
    },
    {
      "epoch": 2.6147900323027224,
      "grad_norm": 0.4206438958644867,
      "learning_rate": 9.87816637708221e-06,
      "loss": 0.857,
      "step": 22665
    },
    {
      "epoch": 2.615366866635902,
      "grad_norm": 0.45879271626472473,
      "learning_rate": 9.849099439597087e-06,
      "loss": 0.8676,
      "step": 22670
    },
    {
      "epoch": 2.6159437009690816,
      "grad_norm": 0.4163302481174469,
      "learning_rate": 9.820073115268213e-06,
      "loss": 0.8893,
      "step": 22675
    },
    {
      "epoch": 2.616520535302261,
      "grad_norm": 0.4696640074253082,
      "learning_rate": 9.791087417172019e-06,
      "loss": 0.9178,
      "step": 22680
    },
    {
      "epoch": 2.6170973696354407,
      "grad_norm": 0.39774516224861145,
      "learning_rate": 9.762142358366588e-06,
      "loss": 0.8646,
      "step": 22685
    },
    {
      "epoch": 2.6176742039686203,
      "grad_norm": 0.42907288670539856,
      "learning_rate": 9.733237951891728e-06,
      "loss": 0.8773,
      "step": 22690
    },
    {
      "epoch": 2.6182510383018,
      "grad_norm": 0.4168655574321747,
      "learning_rate": 9.704374210768952e-06,
      "loss": 0.8637,
      "step": 22695
    },
    {
      "epoch": 2.618827872634979,
      "grad_norm": 0.4312590956687927,
      "learning_rate": 9.675551148001439e-06,
      "loss": 0.8503,
      "step": 22700
    },
    {
      "epoch": 2.619404706968159,
      "grad_norm": 0.4251955449581146,
      "learning_rate": 9.646768776574e-06,
      "loss": 0.8621,
      "step": 22705
    },
    {
      "epoch": 2.619981541301338,
      "grad_norm": 0.4650317430496216,
      "learning_rate": 9.618027109453176e-06,
      "loss": 0.8369,
      "step": 22710
    },
    {
      "epoch": 2.6205583756345177,
      "grad_norm": 0.39958468079566956,
      "learning_rate": 9.58932615958712e-06,
      "loss": 0.8926,
      "step": 22715
    },
    {
      "epoch": 2.621135209967697,
      "grad_norm": 0.4295012652873993,
      "learning_rate": 9.560665939905711e-06,
      "loss": 0.8615,
      "step": 22720
    },
    {
      "epoch": 2.621712044300877,
      "grad_norm": 0.3877478539943695,
      "learning_rate": 9.532046463320365e-06,
      "loss": 0.8575,
      "step": 22725
    },
    {
      "epoch": 2.6222888786340564,
      "grad_norm": 0.4100772738456726,
      "learning_rate": 9.503467742724292e-06,
      "loss": 0.9196,
      "step": 22730
    },
    {
      "epoch": 2.622865712967236,
      "grad_norm": 0.4218401312828064,
      "learning_rate": 9.474929790992193e-06,
      "loss": 0.8966,
      "step": 22735
    },
    {
      "epoch": 2.6234425473004155,
      "grad_norm": 0.4165334701538086,
      "learning_rate": 9.446432620980517e-06,
      "loss": 0.8652,
      "step": 22740
    },
    {
      "epoch": 2.6240193816335946,
      "grad_norm": 0.36938929557800293,
      "learning_rate": 9.417976245527238e-06,
      "loss": 0.8635,
      "step": 22745
    },
    {
      "epoch": 2.6245962159667746,
      "grad_norm": 0.45037856698036194,
      "learning_rate": 9.389560677452092e-06,
      "loss": 0.8718,
      "step": 22750
    },
    {
      "epoch": 2.6251730502999537,
      "grad_norm": 0.5322123765945435,
      "learning_rate": 9.361185929556282e-06,
      "loss": 0.8919,
      "step": 22755
    },
    {
      "epoch": 2.6257498846331333,
      "grad_norm": 0.4363367557525635,
      "learning_rate": 9.332852014622706e-06,
      "loss": 0.8882,
      "step": 22760
    },
    {
      "epoch": 2.626326718966313,
      "grad_norm": 0.3961765170097351,
      "learning_rate": 9.304558945415842e-06,
      "loss": 0.8816,
      "step": 22765
    },
    {
      "epoch": 2.6269035532994924,
      "grad_norm": 0.4418666660785675,
      "learning_rate": 9.276306734681805e-06,
      "loss": 0.851,
      "step": 22770
    },
    {
      "epoch": 2.627480387632672,
      "grad_norm": 0.4532480239868164,
      "learning_rate": 9.248095395148226e-06,
      "loss": 0.8243,
      "step": 22775
    },
    {
      "epoch": 2.6280572219658516,
      "grad_norm": 0.45740392804145813,
      "learning_rate": 9.219924939524383e-06,
      "loss": 0.901,
      "step": 22780
    },
    {
      "epoch": 2.628634056299031,
      "grad_norm": 0.4594760239124298,
      "learning_rate": 9.191795380501134e-06,
      "loss": 0.8952,
      "step": 22785
    },
    {
      "epoch": 2.6292108906322103,
      "grad_norm": 0.41434475779533386,
      "learning_rate": 9.163706730750909e-06,
      "loss": 0.8527,
      "step": 22790
    },
    {
      "epoch": 2.62978772496539,
      "grad_norm": 0.40205803513526917,
      "learning_rate": 9.135659002927643e-06,
      "loss": 0.875,
      "step": 22795
    },
    {
      "epoch": 2.6303645592985694,
      "grad_norm": 0.4774476885795593,
      "learning_rate": 9.107652209666973e-06,
      "loss": 0.8499,
      "step": 22800
    },
    {
      "epoch": 2.630941393631749,
      "grad_norm": 0.42177462577819824,
      "learning_rate": 9.079686363585949e-06,
      "loss": 0.8989,
      "step": 22805
    },
    {
      "epoch": 2.6315182279649285,
      "grad_norm": 0.4283266067504883,
      "learning_rate": 9.051761477283283e-06,
      "loss": 0.8996,
      "step": 22810
    },
    {
      "epoch": 2.632095062298108,
      "grad_norm": 0.38501039147377014,
      "learning_rate": 9.023877563339134e-06,
      "loss": 0.8513,
      "step": 22815
    },
    {
      "epoch": 2.6326718966312876,
      "grad_norm": 0.43188706040382385,
      "learning_rate": 8.996034634315332e-06,
      "loss": 0.9017,
      "step": 22820
    },
    {
      "epoch": 2.6332487309644668,
      "grad_norm": 0.43529829382896423,
      "learning_rate": 8.968232702755119e-06,
      "loss": 0.8545,
      "step": 22825
    },
    {
      "epoch": 2.6338255652976468,
      "grad_norm": 0.3926048278808594,
      "learning_rate": 8.940471781183335e-06,
      "loss": 0.8873,
      "step": 22830
    },
    {
      "epoch": 2.634402399630826,
      "grad_norm": 0.437465101480484,
      "learning_rate": 8.912751882106318e-06,
      "loss": 0.8341,
      "step": 22835
    },
    {
      "epoch": 2.6349792339640055,
      "grad_norm": 0.44119659066200256,
      "learning_rate": 8.88507301801198e-06,
      "loss": 0.9208,
      "step": 22840
    },
    {
      "epoch": 2.635556068297185,
      "grad_norm": 0.4317054748535156,
      "learning_rate": 8.857435201369645e-06,
      "loss": 0.8602,
      "step": 22845
    },
    {
      "epoch": 2.6361329026303646,
      "grad_norm": 0.40922558307647705,
      "learning_rate": 8.829838444630234e-06,
      "loss": 0.8744,
      "step": 22850
    },
    {
      "epoch": 2.636709736963544,
      "grad_norm": 0.4548785984516144,
      "learning_rate": 8.802282760226132e-06,
      "loss": 0.8525,
      "step": 22855
    },
    {
      "epoch": 2.6372865712967237,
      "grad_norm": 0.42294684052467346,
      "learning_rate": 8.774768160571257e-06,
      "loss": 0.8859,
      "step": 22860
    },
    {
      "epoch": 2.6378634056299033,
      "grad_norm": 0.4373004734516144,
      "learning_rate": 8.747294658060934e-06,
      "loss": 0.8366,
      "step": 22865
    },
    {
      "epoch": 2.6384402399630824,
      "grad_norm": 0.4728624224662781,
      "learning_rate": 8.719862265072065e-06,
      "loss": 0.8719,
      "step": 22870
    },
    {
      "epoch": 2.639017074296262,
      "grad_norm": 0.4086358845233917,
      "learning_rate": 8.692470993962987e-06,
      "loss": 0.8158,
      "step": 22875
    },
    {
      "epoch": 2.6395939086294415,
      "grad_norm": 0.4760802090167999,
      "learning_rate": 8.665120857073528e-06,
      "loss": 0.9017,
      "step": 22880
    },
    {
      "epoch": 2.640170742962621,
      "grad_norm": 0.39884287118911743,
      "learning_rate": 8.637811866724977e-06,
      "loss": 0.8547,
      "step": 22885
    },
    {
      "epoch": 2.6407475772958007,
      "grad_norm": 0.40861549973487854,
      "learning_rate": 8.610544035220103e-06,
      "loss": 0.8679,
      "step": 22890
    },
    {
      "epoch": 2.6413244116289802,
      "grad_norm": 0.43178048729896545,
      "learning_rate": 8.58331737484308e-06,
      "loss": 0.8428,
      "step": 22895
    },
    {
      "epoch": 2.64190124596216,
      "grad_norm": 0.46922725439071655,
      "learning_rate": 8.556131897859587e-06,
      "loss": 0.8974,
      "step": 22900
    },
    {
      "epoch": 2.642478080295339,
      "grad_norm": 0.4884260594844818,
      "learning_rate": 8.528987616516748e-06,
      "loss": 0.8894,
      "step": 22905
    },
    {
      "epoch": 2.643054914628519,
      "grad_norm": 0.3909541368484497,
      "learning_rate": 8.501884543043114e-06,
      "loss": 0.8726,
      "step": 22910
    },
    {
      "epoch": 2.643631748961698,
      "grad_norm": 0.4605487585067749,
      "learning_rate": 8.474822689648643e-06,
      "loss": 0.841,
      "step": 22915
    },
    {
      "epoch": 2.6442085832948776,
      "grad_norm": 0.3980555832386017,
      "learning_rate": 8.44780206852478e-06,
      "loss": 0.8423,
      "step": 22920
    },
    {
      "epoch": 2.644785417628057,
      "grad_norm": 0.3951432704925537,
      "learning_rate": 8.420822691844354e-06,
      "loss": 0.8701,
      "step": 22925
    },
    {
      "epoch": 2.6453622519612368,
      "grad_norm": 0.3847266137599945,
      "learning_rate": 8.393884571761645e-06,
      "loss": 0.925,
      "step": 22930
    },
    {
      "epoch": 2.6459390862944163,
      "grad_norm": 0.414840430021286,
      "learning_rate": 8.366987720412322e-06,
      "loss": 0.8733,
      "step": 22935
    },
    {
      "epoch": 2.646515920627596,
      "grad_norm": 0.5108116269111633,
      "learning_rate": 8.340132149913448e-06,
      "loss": 0.8971,
      "step": 22940
    },
    {
      "epoch": 2.6470927549607755,
      "grad_norm": 0.4190194308757782,
      "learning_rate": 8.313317872363524e-06,
      "loss": 0.855,
      "step": 22945
    },
    {
      "epoch": 2.6476695892939546,
      "grad_norm": 0.4162110984325409,
      "learning_rate": 8.286544899842441e-06,
      "loss": 0.8875,
      "step": 22950
    },
    {
      "epoch": 2.648246423627134,
      "grad_norm": 0.459574431180954,
      "learning_rate": 8.259813244411463e-06,
      "loss": 0.9131,
      "step": 22955
    },
    {
      "epoch": 2.6488232579603137,
      "grad_norm": 0.4537857472896576,
      "learning_rate": 8.233122918113278e-06,
      "loss": 0.8846,
      "step": 22960
    },
    {
      "epoch": 2.6494000922934933,
      "grad_norm": 0.42362692952156067,
      "learning_rate": 8.206473932971903e-06,
      "loss": 0.8929,
      "step": 22965
    },
    {
      "epoch": 2.649976926626673,
      "grad_norm": 0.42241352796554565,
      "learning_rate": 8.179866300992756e-06,
      "loss": 0.8383,
      "step": 22970
    },
    {
      "epoch": 2.6505537609598524,
      "grad_norm": 0.43495357036590576,
      "learning_rate": 8.153300034162637e-06,
      "loss": 0.8805,
      "step": 22975
    },
    {
      "epoch": 2.651130595293032,
      "grad_norm": 0.43217211961746216,
      "learning_rate": 8.126775144449705e-06,
      "loss": 0.8806,
      "step": 22980
    },
    {
      "epoch": 2.651707429626211,
      "grad_norm": 0.4123685956001282,
      "learning_rate": 8.100291643803493e-06,
      "loss": 0.8525,
      "step": 22985
    },
    {
      "epoch": 2.652284263959391,
      "grad_norm": 0.4632377326488495,
      "learning_rate": 8.07384954415482e-06,
      "loss": 0.9339,
      "step": 22990
    },
    {
      "epoch": 2.65286109829257,
      "grad_norm": 0.44759467244148254,
      "learning_rate": 8.04744885741593e-06,
      "loss": 0.8938,
      "step": 22995
    },
    {
      "epoch": 2.65343793262575,
      "grad_norm": 0.46450936794281006,
      "learning_rate": 8.021089595480391e-06,
      "loss": 0.8825,
      "step": 23000
    },
    {
      "epoch": 2.6540147669589293,
      "grad_norm": 0.3885645568370819,
      "learning_rate": 7.994771770223108e-06,
      "loss": 0.8601,
      "step": 23005
    },
    {
      "epoch": 2.654591601292109,
      "grad_norm": 0.4260353446006775,
      "learning_rate": 7.968495393500285e-06,
      "loss": 0.8432,
      "step": 23010
    },
    {
      "epoch": 2.6551684356252885,
      "grad_norm": 0.49735400080680847,
      "learning_rate": 7.9422604771495e-06,
      "loss": 0.8832,
      "step": 23015
    },
    {
      "epoch": 2.655745269958468,
      "grad_norm": 0.4212798476219177,
      "learning_rate": 7.916067032989628e-06,
      "loss": 0.884,
      "step": 23020
    },
    {
      "epoch": 2.6563221042916476,
      "grad_norm": 0.4518694281578064,
      "learning_rate": 7.889915072820874e-06,
      "loss": 0.8523,
      "step": 23025
    },
    {
      "epoch": 2.6568989386248267,
      "grad_norm": 0.41297948360443115,
      "learning_rate": 7.863804608424718e-06,
      "loss": 0.8713,
      "step": 23030
    },
    {
      "epoch": 2.6574757729580063,
      "grad_norm": 0.41061973571777344,
      "learning_rate": 7.837735651564037e-06,
      "loss": 0.8119,
      "step": 23035
    },
    {
      "epoch": 2.658052607291186,
      "grad_norm": 0.45814794301986694,
      "learning_rate": 7.811708213982883e-06,
      "loss": 0.8421,
      "step": 23040
    },
    {
      "epoch": 2.6586294416243654,
      "grad_norm": 0.4035818576812744,
      "learning_rate": 7.785722307406684e-06,
      "loss": 0.9158,
      "step": 23045
    },
    {
      "epoch": 2.659206275957545,
      "grad_norm": 0.46518614888191223,
      "learning_rate": 7.759777943542157e-06,
      "loss": 0.88,
      "step": 23050
    },
    {
      "epoch": 2.6597831102907246,
      "grad_norm": 0.47394055128097534,
      "learning_rate": 7.733875134077307e-06,
      "loss": 0.8324,
      "step": 23055
    },
    {
      "epoch": 2.660359944623904,
      "grad_norm": 0.44765111804008484,
      "learning_rate": 7.708013890681343e-06,
      "loss": 0.9019,
      "step": 23060
    },
    {
      "epoch": 2.6609367789570832,
      "grad_norm": 0.43634724617004395,
      "learning_rate": 7.682194225004845e-06,
      "loss": 0.8848,
      "step": 23065
    },
    {
      "epoch": 2.6615136132902633,
      "grad_norm": 0.43800708651542664,
      "learning_rate": 7.656416148679612e-06,
      "loss": 0.9194,
      "step": 23070
    },
    {
      "epoch": 2.6620904476234424,
      "grad_norm": 0.42345482110977173,
      "learning_rate": 7.630679673318742e-06,
      "loss": 0.9358,
      "step": 23075
    },
    {
      "epoch": 2.662667281956622,
      "grad_norm": 0.4679677486419678,
      "learning_rate": 7.604984810516514e-06,
      "loss": 0.9458,
      "step": 23080
    },
    {
      "epoch": 2.6632441162898015,
      "grad_norm": 0.4194246530532837,
      "learning_rate": 7.579331571848569e-06,
      "loss": 0.8814,
      "step": 23085
    },
    {
      "epoch": 2.663820950622981,
      "grad_norm": 0.3922426402568817,
      "learning_rate": 7.5537199688716975e-06,
      "loss": 0.8442,
      "step": 23090
    },
    {
      "epoch": 2.6643977849561606,
      "grad_norm": 0.42661118507385254,
      "learning_rate": 7.528150013124024e-06,
      "loss": 0.8797,
      "step": 23095
    },
    {
      "epoch": 2.66497461928934,
      "grad_norm": 0.41264429688453674,
      "learning_rate": 7.502621716124791e-06,
      "loss": 0.8877,
      "step": 23100
    },
    {
      "epoch": 2.6655514536225198,
      "grad_norm": 0.3934708833694458,
      "learning_rate": 7.477135089374631e-06,
      "loss": 0.8582,
      "step": 23105
    },
    {
      "epoch": 2.666128287955699,
      "grad_norm": 0.4392053782939911,
      "learning_rate": 7.451690144355261e-06,
      "loss": 0.8686,
      "step": 23110
    },
    {
      "epoch": 2.666705122288879,
      "grad_norm": 0.44735977053642273,
      "learning_rate": 7.4262868925296995e-06,
      "loss": 0.8693,
      "step": 23115
    },
    {
      "epoch": 2.667281956622058,
      "grad_norm": 0.41240233182907104,
      "learning_rate": 7.400925345342147e-06,
      "loss": 0.8983,
      "step": 23120
    },
    {
      "epoch": 2.6678587909552376,
      "grad_norm": 0.42448821663856506,
      "learning_rate": 7.375605514218065e-06,
      "loss": 0.8409,
      "step": 23125
    },
    {
      "epoch": 2.668435625288417,
      "grad_norm": 0.4222618341445923,
      "learning_rate": 7.35032741056404e-06,
      "loss": 0.8585,
      "step": 23130
    },
    {
      "epoch": 2.6690124596215967,
      "grad_norm": 0.4419105648994446,
      "learning_rate": 7.32509104576794e-06,
      "loss": 0.8444,
      "step": 23135
    },
    {
      "epoch": 2.6695892939547763,
      "grad_norm": 0.4208768308162689,
      "learning_rate": 7.299896431198772e-06,
      "loss": 0.8427,
      "step": 23140
    },
    {
      "epoch": 2.670166128287956,
      "grad_norm": 0.41719236969947815,
      "learning_rate": 7.274743578206788e-06,
      "loss": 0.8716,
      "step": 23145
    },
    {
      "epoch": 2.6707429626211354,
      "grad_norm": 0.4166914224624634,
      "learning_rate": 7.24963249812336e-06,
      "loss": 0.857,
      "step": 23150
    },
    {
      "epoch": 2.6713197969543145,
      "grad_norm": 0.45486098527908325,
      "learning_rate": 7.224563202261125e-06,
      "loss": 0.8793,
      "step": 23155
    },
    {
      "epoch": 2.671896631287494,
      "grad_norm": 0.39285019040107727,
      "learning_rate": 7.199535701913806e-06,
      "loss": 0.8704,
      "step": 23160
    },
    {
      "epoch": 2.6724734656206737,
      "grad_norm": 0.3915071487426758,
      "learning_rate": 7.174550008356374e-06,
      "loss": 0.9232,
      "step": 23165
    },
    {
      "epoch": 2.6730502999538532,
      "grad_norm": 0.40840792655944824,
      "learning_rate": 7.149606132844888e-06,
      "loss": 0.8931,
      "step": 23170
    },
    {
      "epoch": 2.673627134287033,
      "grad_norm": 0.42678335309028625,
      "learning_rate": 7.124704086616684e-06,
      "loss": 0.8597,
      "step": 23175
    },
    {
      "epoch": 2.6742039686202124,
      "grad_norm": 0.44222939014434814,
      "learning_rate": 7.0998438808901115e-06,
      "loss": 0.8811,
      "step": 23180
    },
    {
      "epoch": 2.674780802953392,
      "grad_norm": 0.4381917417049408,
      "learning_rate": 7.075025526864798e-06,
      "loss": 0.924,
      "step": 23185
    },
    {
      "epoch": 2.675357637286571,
      "grad_norm": 0.3970070481300354,
      "learning_rate": 7.05024903572139e-06,
      "loss": 0.8886,
      "step": 23190
    },
    {
      "epoch": 2.675934471619751,
      "grad_norm": 0.4008063077926636,
      "learning_rate": 7.025514418621826e-06,
      "loss": 0.8771,
      "step": 23195
    },
    {
      "epoch": 2.67651130595293,
      "grad_norm": 0.4886510372161865,
      "learning_rate": 7.000821686709036e-06,
      "loss": 0.8932,
      "step": 23200
    },
    {
      "epoch": 2.6770881402861098,
      "grad_norm": 0.4655059576034546,
      "learning_rate": 6.976170851107178e-06,
      "loss": 0.8692,
      "step": 23205
    },
    {
      "epoch": 2.6776649746192893,
      "grad_norm": 0.5062782168388367,
      "learning_rate": 6.95156192292149e-06,
      "loss": 0.9053,
      "step": 23210
    },
    {
      "epoch": 2.678241808952469,
      "grad_norm": 0.41502344608306885,
      "learning_rate": 6.9269949132383606e-06,
      "loss": 0.7778,
      "step": 23215
    },
    {
      "epoch": 2.6788186432856484,
      "grad_norm": 0.45568451285362244,
      "learning_rate": 6.902469833125236e-06,
      "loss": 0.8275,
      "step": 23220
    },
    {
      "epoch": 2.679395477618828,
      "grad_norm": 0.4134273827075958,
      "learning_rate": 6.877986693630745e-06,
      "loss": 0.8874,
      "step": 23225
    },
    {
      "epoch": 2.6799723119520076,
      "grad_norm": 0.39337506890296936,
      "learning_rate": 6.853545505784575e-06,
      "loss": 0.8619,
      "step": 23230
    },
    {
      "epoch": 2.6805491462851867,
      "grad_norm": 0.436361163854599,
      "learning_rate": 6.8291462805975535e-06,
      "loss": 0.8283,
      "step": 23235
    },
    {
      "epoch": 2.6811259806183663,
      "grad_norm": 0.4170958995819092,
      "learning_rate": 6.804789029061531e-06,
      "loss": 0.8482,
      "step": 23240
    },
    {
      "epoch": 2.681702814951546,
      "grad_norm": 0.3744247853755951,
      "learning_rate": 6.780473762149553e-06,
      "loss": 0.8689,
      "step": 23245
    },
    {
      "epoch": 2.6822796492847254,
      "grad_norm": 0.45442360639572144,
      "learning_rate": 6.756200490815645e-06,
      "loss": 0.8552,
      "step": 23250
    },
    {
      "epoch": 2.682856483617905,
      "grad_norm": 0.4478665888309479,
      "learning_rate": 6.731969225995005e-06,
      "loss": 0.87,
      "step": 23255
    },
    {
      "epoch": 2.6834333179510845,
      "grad_norm": 0.4316500723361969,
      "learning_rate": 6.707779978603823e-06,
      "loss": 0.8455,
      "step": 23260
    },
    {
      "epoch": 2.684010152284264,
      "grad_norm": 0.40314388275146484,
      "learning_rate": 6.683632759539449e-06,
      "loss": 0.8341,
      "step": 23265
    },
    {
      "epoch": 2.684586986617443,
      "grad_norm": 0.4224727153778076,
      "learning_rate": 6.659527579680203e-06,
      "loss": 0.8857,
      "step": 23270
    },
    {
      "epoch": 2.6851638209506232,
      "grad_norm": 0.3884515166282654,
      "learning_rate": 6.635464449885542e-06,
      "loss": 0.834,
      "step": 23275
    },
    {
      "epoch": 2.6857406552838023,
      "grad_norm": 0.464347243309021,
      "learning_rate": 6.611443380995963e-06,
      "loss": 0.8886,
      "step": 23280
    },
    {
      "epoch": 2.686317489616982,
      "grad_norm": 0.4518876075744629,
      "learning_rate": 6.587464383832998e-06,
      "loss": 0.927,
      "step": 23285
    },
    {
      "epoch": 2.6868943239501615,
      "grad_norm": 0.40290123224258423,
      "learning_rate": 6.563527469199205e-06,
      "loss": 0.8613,
      "step": 23290
    },
    {
      "epoch": 2.687471158283341,
      "grad_norm": 0.4083547294139862,
      "learning_rate": 6.5396326478782465e-06,
      "loss": 0.858,
      "step": 23295
    },
    {
      "epoch": 2.6880479926165206,
      "grad_norm": 0.45641326904296875,
      "learning_rate": 6.515779930634757e-06,
      "loss": 0.8951,
      "step": 23300
    },
    {
      "epoch": 2.6886248269497,
      "grad_norm": 0.4491538107395172,
      "learning_rate": 6.491969328214464e-06,
      "loss": 0.8756,
      "step": 23305
    },
    {
      "epoch": 2.6892016612828797,
      "grad_norm": 0.3983515202999115,
      "learning_rate": 6.468200851344042e-06,
      "loss": 0.8785,
      "step": 23310
    },
    {
      "epoch": 2.689778495616059,
      "grad_norm": 0.4467414319515228,
      "learning_rate": 6.444474510731302e-06,
      "loss": 0.8429,
      "step": 23315
    },
    {
      "epoch": 2.6903553299492384,
      "grad_norm": 0.4083499610424042,
      "learning_rate": 6.42079031706495e-06,
      "loss": 0.8667,
      "step": 23320
    },
    {
      "epoch": 2.690932164282418,
      "grad_norm": 0.43478161096572876,
      "learning_rate": 6.397148281014798e-06,
      "loss": 0.8544,
      "step": 23325
    },
    {
      "epoch": 2.6915089986155976,
      "grad_norm": 0.41683241724967957,
      "learning_rate": 6.373548413231589e-06,
      "loss": 0.8836,
      "step": 23330
    },
    {
      "epoch": 2.692085832948777,
      "grad_norm": 0.4447765350341797,
      "learning_rate": 6.349990724347155e-06,
      "loss": 0.843,
      "step": 23335
    },
    {
      "epoch": 2.6926626672819567,
      "grad_norm": 0.41929274797439575,
      "learning_rate": 6.326475224974249e-06,
      "loss": 0.8597,
      "step": 23340
    },
    {
      "epoch": 2.6932395016151363,
      "grad_norm": 0.42952585220336914,
      "learning_rate": 6.303001925706664e-06,
      "loss": 0.865,
      "step": 23345
    },
    {
      "epoch": 2.6938163359483154,
      "grad_norm": 0.43239691853523254,
      "learning_rate": 6.279570837119164e-06,
      "loss": 0.9176,
      "step": 23350
    },
    {
      "epoch": 2.6943931702814954,
      "grad_norm": 0.38875019550323486,
      "learning_rate": 6.256181969767505e-06,
      "loss": 0.8258,
      "step": 23355
    },
    {
      "epoch": 2.6949700046146745,
      "grad_norm": 0.4094666540622711,
      "learning_rate": 6.2328353341884025e-06,
      "loss": 0.8804,
      "step": 23360
    },
    {
      "epoch": 2.695546838947854,
      "grad_norm": 0.3923322260379791,
      "learning_rate": 6.209530940899566e-06,
      "loss": 0.8529,
      "step": 23365
    },
    {
      "epoch": 2.6961236732810336,
      "grad_norm": 0.39394620060920715,
      "learning_rate": 6.186268800399675e-06,
      "loss": 0.8437,
      "step": 23370
    },
    {
      "epoch": 2.696700507614213,
      "grad_norm": 0.4831443428993225,
      "learning_rate": 6.163048923168391e-06,
      "loss": 0.8895,
      "step": 23375
    },
    {
      "epoch": 2.6972773419473928,
      "grad_norm": 0.4099637269973755,
      "learning_rate": 6.139871319666269e-06,
      "loss": 0.8729,
      "step": 23380
    },
    {
      "epoch": 2.6978541762805723,
      "grad_norm": 0.41623443365097046,
      "learning_rate": 6.116736000334888e-06,
      "loss": 0.8712,
      "step": 23385
    },
    {
      "epoch": 2.698431010613752,
      "grad_norm": 0.4021301865577698,
      "learning_rate": 6.0936429755967475e-06,
      "loss": 0.8658,
      "step": 23390
    },
    {
      "epoch": 2.699007844946931,
      "grad_norm": 0.4065121114253998,
      "learning_rate": 6.070592255855312e-06,
      "loss": 0.8956,
      "step": 23395
    },
    {
      "epoch": 2.6995846792801106,
      "grad_norm": 0.42467519640922546,
      "learning_rate": 6.047583851494965e-06,
      "loss": 0.8458,
      "step": 23400
    },
    {
      "epoch": 2.70016151361329,
      "grad_norm": 0.48021507263183594,
      "learning_rate": 6.024617772881058e-06,
      "loss": 0.8981,
      "step": 23405
    },
    {
      "epoch": 2.7007383479464697,
      "grad_norm": 0.42031559348106384,
      "learning_rate": 6.001694030359828e-06,
      "loss": 0.8129,
      "step": 23410
    },
    {
      "epoch": 2.7013151822796493,
      "grad_norm": 0.43568867444992065,
      "learning_rate": 5.978812634258468e-06,
      "loss": 0.9101,
      "step": 23415
    },
    {
      "epoch": 2.701892016612829,
      "grad_norm": 0.4510698616504669,
      "learning_rate": 5.955973594885111e-06,
      "loss": 0.8683,
      "step": 23420
    },
    {
      "epoch": 2.7024688509460084,
      "grad_norm": 0.4196416139602661,
      "learning_rate": 5.9331769225287825e-06,
      "loss": 0.826,
      "step": 23425
    },
    {
      "epoch": 2.703045685279188,
      "grad_norm": 0.40882837772369385,
      "learning_rate": 5.910422627459411e-06,
      "loss": 0.8191,
      "step": 23430
    },
    {
      "epoch": 2.7036225196123675,
      "grad_norm": 0.4428280293941498,
      "learning_rate": 5.887710719927853e-06,
      "loss": 0.8544,
      "step": 23435
    },
    {
      "epoch": 2.7041993539455467,
      "grad_norm": 0.40964657068252563,
      "learning_rate": 5.865041210165878e-06,
      "loss": 0.876,
      "step": 23440
    },
    {
      "epoch": 2.7047761882787262,
      "grad_norm": 0.5035736560821533,
      "learning_rate": 5.842414108386151e-06,
      "loss": 0.8761,
      "step": 23445
    },
    {
      "epoch": 2.705353022611906,
      "grad_norm": 0.416984498500824,
      "learning_rate": 5.8198294247822304e-06,
      "loss": 0.8908,
      "step": 23450
    },
    {
      "epoch": 2.7059298569450854,
      "grad_norm": 0.4158463776111603,
      "learning_rate": 5.7972871695285205e-06,
      "loss": 0.8577,
      "step": 23455
    },
    {
      "epoch": 2.706506691278265,
      "grad_norm": 0.42650651931762695,
      "learning_rate": 5.774787352780387e-06,
      "loss": 0.905,
      "step": 23460
    },
    {
      "epoch": 2.7070835256114445,
      "grad_norm": 0.4276556670665741,
      "learning_rate": 5.752329984674032e-06,
      "loss": 0.9039,
      "step": 23465
    },
    {
      "epoch": 2.707660359944624,
      "grad_norm": 0.4451688230037689,
      "learning_rate": 5.729915075326531e-06,
      "loss": 0.8185,
      "step": 23470
    },
    {
      "epoch": 2.708237194277803,
      "grad_norm": 0.40357574820518494,
      "learning_rate": 5.707542634835883e-06,
      "loss": 0.8972,
      "step": 23475
    },
    {
      "epoch": 2.708814028610983,
      "grad_norm": 0.44075727462768555,
      "learning_rate": 5.685212673280871e-06,
      "loss": 0.8165,
      "step": 23480
    },
    {
      "epoch": 2.7093908629441623,
      "grad_norm": 0.3642112612724304,
      "learning_rate": 5.662925200721203e-06,
      "loss": 0.8017,
      "step": 23485
    },
    {
      "epoch": 2.709967697277342,
      "grad_norm": 0.4557955861091614,
      "learning_rate": 5.640680227197426e-06,
      "loss": 0.8327,
      "step": 23490
    },
    {
      "epoch": 2.7105445316105214,
      "grad_norm": 0.43093356490135193,
      "learning_rate": 5.618477762730956e-06,
      "loss": 0.8696,
      "step": 23495
    },
    {
      "epoch": 2.711121365943701,
      "grad_norm": 0.4352542757987976,
      "learning_rate": 5.596317817324048e-06,
      "loss": 0.8514,
      "step": 23500
    },
    {
      "epoch": 2.7116982002768806,
      "grad_norm": 0.4431220293045044,
      "learning_rate": 5.574200400959773e-06,
      "loss": 0.8733,
      "step": 23505
    },
    {
      "epoch": 2.71227503461006,
      "grad_norm": 0.39745551347732544,
      "learning_rate": 5.552125523602092e-06,
      "loss": 0.8409,
      "step": 23510
    },
    {
      "epoch": 2.7128518689432397,
      "grad_norm": 0.44975340366363525,
      "learning_rate": 5.530093195195774e-06,
      "loss": 0.8785,
      "step": 23515
    },
    {
      "epoch": 2.713428703276419,
      "grad_norm": 0.4235374629497528,
      "learning_rate": 5.5081034256664445e-06,
      "loss": 0.8495,
      "step": 23520
    },
    {
      "epoch": 2.7140055376095984,
      "grad_norm": 0.42339780926704407,
      "learning_rate": 5.4861562249204916e-06,
      "loss": 0.8342,
      "step": 23525
    },
    {
      "epoch": 2.714582371942778,
      "grad_norm": 0.42378342151641846,
      "learning_rate": 5.464251602845238e-06,
      "loss": 0.8429,
      "step": 23530
    },
    {
      "epoch": 2.7151592062759575,
      "grad_norm": 0.43994879722595215,
      "learning_rate": 5.442389569308703e-06,
      "loss": 0.8321,
      "step": 23535
    },
    {
      "epoch": 2.715736040609137,
      "grad_norm": 0.42166078090667725,
      "learning_rate": 5.420570134159797e-06,
      "loss": 0.8969,
      "step": 23540
    },
    {
      "epoch": 2.7163128749423167,
      "grad_norm": 0.49834781885147095,
      "learning_rate": 5.3987933072282e-06,
      "loss": 0.8835,
      "step": 23545
    },
    {
      "epoch": 2.7168897092754962,
      "grad_norm": 0.470022052526474,
      "learning_rate": 5.377059098324455e-06,
      "loss": 0.833,
      "step": 23550
    },
    {
      "epoch": 2.7174665436086753,
      "grad_norm": 0.41667890548706055,
      "learning_rate": 5.355367517239829e-06,
      "loss": 0.8216,
      "step": 23555
    },
    {
      "epoch": 2.7180433779418554,
      "grad_norm": 0.39226892590522766,
      "learning_rate": 5.333718573746426e-06,
      "loss": 0.8741,
      "step": 23560
    },
    {
      "epoch": 2.7186202122750345,
      "grad_norm": 0.44253507256507874,
      "learning_rate": 5.312112277597159e-06,
      "loss": 0.8915,
      "step": 23565
    },
    {
      "epoch": 2.719197046608214,
      "grad_norm": 0.40449512004852295,
      "learning_rate": 5.290548638525694e-06,
      "loss": 0.8604,
      "step": 23570
    },
    {
      "epoch": 2.7197738809413936,
      "grad_norm": 0.43502479791641235,
      "learning_rate": 5.269027666246473e-06,
      "loss": 0.8704,
      "step": 23575
    },
    {
      "epoch": 2.720350715274573,
      "grad_norm": 0.4082026779651642,
      "learning_rate": 5.247549370454763e-06,
      "loss": 0.8301,
      "step": 23580
    },
    {
      "epoch": 2.7209275496077527,
      "grad_norm": 0.4308987557888031,
      "learning_rate": 5.2261137608265675e-06,
      "loss": 0.868,
      "step": 23585
    },
    {
      "epoch": 2.7215043839409323,
      "grad_norm": 0.4319988191127777,
      "learning_rate": 5.204720847018674e-06,
      "loss": 0.8218,
      "step": 23590
    },
    {
      "epoch": 2.722081218274112,
      "grad_norm": 0.42464616894721985,
      "learning_rate": 5.183370638668616e-06,
      "loss": 0.9011,
      "step": 23595
    },
    {
      "epoch": 2.722658052607291,
      "grad_norm": 0.3950149416923523,
      "learning_rate": 5.162063145394736e-06,
      "loss": 0.8079,
      "step": 23600
    },
    {
      "epoch": 2.7232348869404706,
      "grad_norm": 0.4139585793018341,
      "learning_rate": 5.140798376796064e-06,
      "loss": 0.8729,
      "step": 23605
    },
    {
      "epoch": 2.72381172127365,
      "grad_norm": 0.42863261699676514,
      "learning_rate": 5.119576342452459e-06,
      "loss": 0.8763,
      "step": 23610
    },
    {
      "epoch": 2.7243885556068297,
      "grad_norm": 0.4117303788661957,
      "learning_rate": 5.098397051924441e-06,
      "loss": 0.8344,
      "step": 23615
    },
    {
      "epoch": 2.7249653899400093,
      "grad_norm": 0.49408987164497375,
      "learning_rate": 5.077260514753379e-06,
      "loss": 0.9043,
      "step": 23620
    },
    {
      "epoch": 2.725542224273189,
      "grad_norm": 0.3946788012981415,
      "learning_rate": 5.056166740461265e-06,
      "loss": 0.8488,
      "step": 23625
    },
    {
      "epoch": 2.7261190586063684,
      "grad_norm": 0.38921356201171875,
      "learning_rate": 5.035115738550933e-06,
      "loss": 0.8801,
      "step": 23630
    },
    {
      "epoch": 2.7266958929395475,
      "grad_norm": 0.4450054168701172,
      "learning_rate": 5.014107518505862e-06,
      "loss": 0.8416,
      "step": 23635
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 0.41446584463119507,
      "learning_rate": 4.993142089790337e-06,
      "loss": 0.8843,
      "step": 23640
    },
    {
      "epoch": 2.7278495616059066,
      "grad_norm": 0.4286811053752899,
      "learning_rate": 4.972219461849293e-06,
      "loss": 0.8531,
      "step": 23645
    },
    {
      "epoch": 2.728426395939086,
      "grad_norm": 0.45309022068977356,
      "learning_rate": 4.951339644108422e-06,
      "loss": 0.9202,
      "step": 23650
    },
    {
      "epoch": 2.7290032302722658,
      "grad_norm": 0.4311707615852356,
      "learning_rate": 4.9305026459741224e-06,
      "loss": 0.8921,
      "step": 23655
    },
    {
      "epoch": 2.7295800646054453,
      "grad_norm": 0.4332069754600525,
      "learning_rate": 4.909708476833519e-06,
      "loss": 0.8678,
      "step": 23660
    },
    {
      "epoch": 2.730156898938625,
      "grad_norm": 0.438365638256073,
      "learning_rate": 4.888957146054407e-06,
      "loss": 0.8514,
      "step": 23665
    },
    {
      "epoch": 2.7307337332718045,
      "grad_norm": 0.4583643078804016,
      "learning_rate": 4.8682486629852975e-06,
      "loss": 0.9301,
      "step": 23670
    },
    {
      "epoch": 2.731310567604984,
      "grad_norm": 0.4006592631340027,
      "learning_rate": 4.8475830369554056e-06,
      "loss": 0.8692,
      "step": 23675
    },
    {
      "epoch": 2.731887401938163,
      "grad_norm": 0.4375672936439514,
      "learning_rate": 4.826960277274662e-06,
      "loss": 0.8924,
      "step": 23680
    },
    {
      "epoch": 2.7324642362713427,
      "grad_norm": 0.4388617277145386,
      "learning_rate": 4.8063803932336114e-06,
      "loss": 0.9116,
      "step": 23685
    },
    {
      "epoch": 2.7330410706045223,
      "grad_norm": 0.3963301479816437,
      "learning_rate": 4.785843394103584e-06,
      "loss": 0.8852,
      "step": 23690
    },
    {
      "epoch": 2.733617904937702,
      "grad_norm": 0.436459481716156,
      "learning_rate": 4.7653492891365005e-06,
      "loss": 0.8846,
      "step": 23695
    },
    {
      "epoch": 2.7341947392708814,
      "grad_norm": 0.4878678321838379,
      "learning_rate": 4.74489808756502e-06,
      "loss": 0.9057,
      "step": 23700
    },
    {
      "epoch": 2.734771573604061,
      "grad_norm": 0.4217132329940796,
      "learning_rate": 4.7244897986024165e-06,
      "loss": 0.9,
      "step": 23705
    },
    {
      "epoch": 2.7353484079372405,
      "grad_norm": 0.43869632482528687,
      "learning_rate": 4.704124431442702e-06,
      "loss": 0.912,
      "step": 23710
    },
    {
      "epoch": 2.7359252422704197,
      "grad_norm": 0.4051395654678345,
      "learning_rate": 4.683801995260484e-06,
      "loss": 0.8338,
      "step": 23715
    },
    {
      "epoch": 2.7365020766035997,
      "grad_norm": 0.4011788070201874,
      "learning_rate": 4.663522499211081e-06,
      "loss": 0.8586,
      "step": 23720
    },
    {
      "epoch": 2.737078910936779,
      "grad_norm": 0.4310953617095947,
      "learning_rate": 4.643285952430432e-06,
      "loss": 0.8509,
      "step": 23725
    },
    {
      "epoch": 2.7376557452699584,
      "grad_norm": 0.457690566778183,
      "learning_rate": 4.623092364035153e-06,
      "loss": 0.8965,
      "step": 23730
    },
    {
      "epoch": 2.738232579603138,
      "grad_norm": 0.4056735038757324,
      "learning_rate": 4.602941743122469e-06,
      "loss": 0.8441,
      "step": 23735
    },
    {
      "epoch": 2.7388094139363175,
      "grad_norm": 0.48004797101020813,
      "learning_rate": 4.5828340987703055e-06,
      "loss": 0.8624,
      "step": 23740
    },
    {
      "epoch": 2.739386248269497,
      "grad_norm": 0.4055866599082947,
      "learning_rate": 4.562769440037174e-06,
      "loss": 0.8006,
      "step": 23745
    },
    {
      "epoch": 2.7399630826026766,
      "grad_norm": 0.46271732449531555,
      "learning_rate": 4.542747775962264e-06,
      "loss": 0.9206,
      "step": 23750
    },
    {
      "epoch": 2.740539916935856,
      "grad_norm": 0.4342441260814667,
      "learning_rate": 4.5227691155653284e-06,
      "loss": 0.8854,
      "step": 23755
    },
    {
      "epoch": 2.7411167512690353,
      "grad_norm": 0.40607601404190063,
      "learning_rate": 4.502833467846857e-06,
      "loss": 0.8357,
      "step": 23760
    },
    {
      "epoch": 2.741693585602215,
      "grad_norm": 0.39423415064811707,
      "learning_rate": 4.4829408417878526e-06,
      "loss": 0.8733,
      "step": 23765
    },
    {
      "epoch": 2.7422704199353944,
      "grad_norm": 0.42036357522010803,
      "learning_rate": 4.4630912463500045e-06,
      "loss": 0.8773,
      "step": 23770
    },
    {
      "epoch": 2.742847254268574,
      "grad_norm": 0.4077562689781189,
      "learning_rate": 4.443284690475558e-06,
      "loss": 0.8332,
      "step": 23775
    },
    {
      "epoch": 2.7434240886017536,
      "grad_norm": 0.4172145426273346,
      "learning_rate": 4.423521183087453e-06,
      "loss": 0.8823,
      "step": 23780
    },
    {
      "epoch": 2.744000922934933,
      "grad_norm": 0.4038824141025543,
      "learning_rate": 4.40380073308917e-06,
      "loss": 0.8898,
      "step": 23785
    },
    {
      "epoch": 2.7445777572681127,
      "grad_norm": 0.4768836498260498,
      "learning_rate": 4.384123349364788e-06,
      "loss": 0.8761,
      "step": 23790
    },
    {
      "epoch": 2.7451545916012923,
      "grad_norm": 0.41172316670417786,
      "learning_rate": 4.364489040779029e-06,
      "loss": 0.8762,
      "step": 23795
    },
    {
      "epoch": 2.745731425934472,
      "grad_norm": 0.42793089151382446,
      "learning_rate": 4.344897816177207e-06,
      "loss": 0.8787,
      "step": 23800
    },
    {
      "epoch": 2.746308260267651,
      "grad_norm": 0.39413848519325256,
      "learning_rate": 4.32534968438516e-06,
      "loss": 0.8793,
      "step": 23805
    },
    {
      "epoch": 2.7468850946008305,
      "grad_norm": 0.6779045462608337,
      "learning_rate": 4.30584465420939e-06,
      "loss": 0.8953,
      "step": 23810
    },
    {
      "epoch": 2.74746192893401,
      "grad_norm": 0.4348139762878418,
      "learning_rate": 4.286382734436933e-06,
      "loss": 0.8643,
      "step": 23815
    },
    {
      "epoch": 2.7480387632671897,
      "grad_norm": 0.45245304703712463,
      "learning_rate": 4.266963933835455e-06,
      "loss": 0.8175,
      "step": 23820
    },
    {
      "epoch": 2.748615597600369,
      "grad_norm": 0.44413793087005615,
      "learning_rate": 4.2475882611531235e-06,
      "loss": 0.8779,
      "step": 23825
    },
    {
      "epoch": 2.749192431933549,
      "grad_norm": 0.41267192363739014,
      "learning_rate": 4.228255725118735e-06,
      "loss": 0.8591,
      "step": 23830
    },
    {
      "epoch": 2.7497692662667284,
      "grad_norm": 0.40713363885879517,
      "learning_rate": 4.208966334441633e-06,
      "loss": 0.8831,
      "step": 23835
    },
    {
      "epoch": 2.7503461005999075,
      "grad_norm": 0.4391236901283264,
      "learning_rate": 4.189720097811745e-06,
      "loss": 0.8798,
      "step": 23840
    },
    {
      "epoch": 2.7509229349330875,
      "grad_norm": 0.3802451491355896,
      "learning_rate": 4.1705170238994894e-06,
      "loss": 0.9073,
      "step": 23845
    },
    {
      "epoch": 2.7514997692662666,
      "grad_norm": 0.41070911288261414,
      "learning_rate": 4.151357121355947e-06,
      "loss": 0.8754,
      "step": 23850
    },
    {
      "epoch": 2.752076603599446,
      "grad_norm": 0.4319693446159363,
      "learning_rate": 4.132240398812648e-06,
      "loss": 0.8826,
      "step": 23855
    },
    {
      "epoch": 2.7526534379326257,
      "grad_norm": 0.40889811515808105,
      "learning_rate": 4.113166864881723e-06,
      "loss": 0.941,
      "step": 23860
    },
    {
      "epoch": 2.7532302722658053,
      "grad_norm": 0.46315550804138184,
      "learning_rate": 4.0941365281558454e-06,
      "loss": 0.8335,
      "step": 23865
    },
    {
      "epoch": 2.753807106598985,
      "grad_norm": 0.4564089775085449,
      "learning_rate": 4.075149397208222e-06,
      "loss": 0.8596,
      "step": 23870
    },
    {
      "epoch": 2.7543839409321644,
      "grad_norm": 0.4790186882019043,
      "learning_rate": 4.056205480592579e-06,
      "loss": 0.8531,
      "step": 23875
    },
    {
      "epoch": 2.754960775265344,
      "grad_norm": 0.3872862458229065,
      "learning_rate": 4.037304786843188e-06,
      "loss": 0.8556,
      "step": 23880
    },
    {
      "epoch": 2.755537609598523,
      "grad_norm": 0.4773821234703064,
      "learning_rate": 4.018447324474861e-06,
      "loss": 0.8746,
      "step": 23885
    },
    {
      "epoch": 2.7561144439317027,
      "grad_norm": 0.4576581120491028,
      "learning_rate": 3.9996331019829245e-06,
      "loss": 0.8682,
      "step": 23890
    },
    {
      "epoch": 2.7566912782648822,
      "grad_norm": 0.3763803541660309,
      "learning_rate": 3.980862127843199e-06,
      "loss": 0.8513,
      "step": 23895
    },
    {
      "epoch": 2.757268112598062,
      "grad_norm": 0.426574170589447,
      "learning_rate": 3.962134410512064e-06,
      "loss": 0.8845,
      "step": 23900
    },
    {
      "epoch": 2.7578449469312414,
      "grad_norm": 0.4308461844921112,
      "learning_rate": 3.9434499584263705e-06,
      "loss": 0.8892,
      "step": 23905
    },
    {
      "epoch": 2.758421781264421,
      "grad_norm": 0.40955811738967896,
      "learning_rate": 3.924808780003531e-06,
      "loss": 0.9009,
      "step": 23910
    },
    {
      "epoch": 2.7589986155976005,
      "grad_norm": 0.38951027393341064,
      "learning_rate": 3.906210883641415e-06,
      "loss": 0.836,
      "step": 23915
    },
    {
      "epoch": 2.7595754499307796,
      "grad_norm": 0.4530879259109497,
      "learning_rate": 3.887656277718432e-06,
      "loss": 0.858,
      "step": 23920
    },
    {
      "epoch": 2.7601522842639596,
      "grad_norm": 0.4421103000640869,
      "learning_rate": 3.86914497059343e-06,
      "loss": 0.8089,
      "step": 23925
    },
    {
      "epoch": 2.7607291185971388,
      "grad_norm": 0.4470920264720917,
      "learning_rate": 3.850676970605815e-06,
      "loss": 0.8756,
      "step": 23930
    },
    {
      "epoch": 2.7613059529303183,
      "grad_norm": 0.4560685157775879,
      "learning_rate": 3.832252286075444e-06,
      "loss": 0.8843,
      "step": 23935
    },
    {
      "epoch": 2.761882787263498,
      "grad_norm": 0.44257691502571106,
      "learning_rate": 3.813870925302698e-06,
      "loss": 0.8533,
      "step": 23940
    },
    {
      "epoch": 2.7624596215966775,
      "grad_norm": 0.42879223823547363,
      "learning_rate": 3.7955328965683877e-06,
      "loss": 0.8343,
      "step": 23945
    },
    {
      "epoch": 2.763036455929857,
      "grad_norm": 0.4552403688430786,
      "learning_rate": 3.7772382081338377e-06,
      "loss": 0.9233,
      "step": 23950
    },
    {
      "epoch": 2.7636132902630366,
      "grad_norm": 0.4159514009952545,
      "learning_rate": 3.7589868682408434e-06,
      "loss": 0.8426,
      "step": 23955
    },
    {
      "epoch": 2.764190124596216,
      "grad_norm": 0.4519195854663849,
      "learning_rate": 3.7407788851116845e-06,
      "loss": 0.9192,
      "step": 23960
    },
    {
      "epoch": 2.7647669589293953,
      "grad_norm": 0.40860262513160706,
      "learning_rate": 3.722614266949076e-06,
      "loss": 0.8953,
      "step": 23965
    },
    {
      "epoch": 2.765343793262575,
      "grad_norm": 0.4030444622039795,
      "learning_rate": 3.7044930219362063e-06,
      "loss": 0.8673,
      "step": 23970
    },
    {
      "epoch": 2.7659206275957544,
      "grad_norm": 0.4095665216445923,
      "learning_rate": 3.6864151582367446e-06,
      "loss": 0.8867,
      "step": 23975
    },
    {
      "epoch": 2.766497461928934,
      "grad_norm": 0.4742783308029175,
      "learning_rate": 3.668380683994799e-06,
      "loss": 0.8197,
      "step": 23980
    },
    {
      "epoch": 2.7670742962621135,
      "grad_norm": 0.41096723079681396,
      "learning_rate": 3.6503896073349587e-06,
      "loss": 0.8778,
      "step": 23985
    },
    {
      "epoch": 2.767651130595293,
      "grad_norm": 0.38448628783226013,
      "learning_rate": 3.632441936362174e-06,
      "loss": 0.8749,
      "step": 23990
    },
    {
      "epoch": 2.7682279649284727,
      "grad_norm": 0.38678210973739624,
      "learning_rate": 3.614537679161989e-06,
      "loss": 0.8455,
      "step": 23995
    },
    {
      "epoch": 2.768804799261652,
      "grad_norm": 0.37491893768310547,
      "learning_rate": 3.5966768438002507e-06,
      "loss": 0.8531,
      "step": 24000
    },
    {
      "epoch": 2.769381633594832,
      "grad_norm": 0.41927438974380493,
      "learning_rate": 3.5788594383233122e-06,
      "loss": 0.8561,
      "step": 24005
    },
    {
      "epoch": 2.769958467928011,
      "grad_norm": 0.44681841135025024,
      "learning_rate": 3.5610854707579523e-06,
      "loss": 0.8804,
      "step": 24010
    },
    {
      "epoch": 2.7705353022611905,
      "grad_norm": 0.39751601219177246,
      "learning_rate": 3.5433549491113884e-06,
      "loss": 0.8692,
      "step": 24015
    },
    {
      "epoch": 2.77111213659437,
      "grad_norm": 0.43277502059936523,
      "learning_rate": 3.5256678813712417e-06,
      "loss": 0.8762,
      "step": 24020
    },
    {
      "epoch": 2.7716889709275496,
      "grad_norm": 0.41634657979011536,
      "learning_rate": 3.5080242755055726e-06,
      "loss": 0.8523,
      "step": 24025
    },
    {
      "epoch": 2.772265805260729,
      "grad_norm": 0.3915756344795227,
      "learning_rate": 3.4904241394628557e-06,
      "loss": 0.8583,
      "step": 24030
    },
    {
      "epoch": 2.7728426395939088,
      "grad_norm": 0.4474189579486847,
      "learning_rate": 3.472867481172004e-06,
      "loss": 0.8383,
      "step": 24035
    },
    {
      "epoch": 2.7734194739270883,
      "grad_norm": 0.4305979907512665,
      "learning_rate": 3.455354308542291e-06,
      "loss": 0.8492,
      "step": 24040
    },
    {
      "epoch": 2.7739963082602674,
      "grad_norm": 0.4478662610054016,
      "learning_rate": 3.4378846294634835e-06,
      "loss": 0.8993,
      "step": 24045
    },
    {
      "epoch": 2.774573142593447,
      "grad_norm": 0.3863767683506012,
      "learning_rate": 3.4204584518056747e-06,
      "loss": 0.861,
      "step": 24050
    },
    {
      "epoch": 2.7751499769266266,
      "grad_norm": 0.4080348312854767,
      "learning_rate": 3.403075783419407e-06,
      "loss": 0.8817,
      "step": 24055
    },
    {
      "epoch": 2.775726811259806,
      "grad_norm": 0.4075169861316681,
      "learning_rate": 3.3857366321355722e-06,
      "loss": 0.8641,
      "step": 24060
    },
    {
      "epoch": 2.7763036455929857,
      "grad_norm": 0.45938998460769653,
      "learning_rate": 3.3684410057655435e-06,
      "loss": 0.868,
      "step": 24065
    },
    {
      "epoch": 2.7768804799261653,
      "grad_norm": 0.416753351688385,
      "learning_rate": 3.3511889121009886e-06,
      "loss": 0.8145,
      "step": 24070
    },
    {
      "epoch": 2.777457314259345,
      "grad_norm": 0.39491215348243713,
      "learning_rate": 3.3339803589140352e-06,
      "loss": 0.8632,
      "step": 24075
    },
    {
      "epoch": 2.778034148592524,
      "grad_norm": 0.41789379715919495,
      "learning_rate": 3.316815353957159e-06,
      "loss": 0.8214,
      "step": 24080
    },
    {
      "epoch": 2.778610982925704,
      "grad_norm": 0.3917028307914734,
      "learning_rate": 3.2996939049632415e-06,
      "loss": 0.8763,
      "step": 24085
    },
    {
      "epoch": 2.779187817258883,
      "grad_norm": 0.47759175300598145,
      "learning_rate": 3.2826160196455123e-06,
      "loss": 0.8576,
      "step": 24090
    },
    {
      "epoch": 2.7797646515920627,
      "grad_norm": 0.40511590242385864,
      "learning_rate": 3.2655817056975957e-06,
      "loss": 0.8361,
      "step": 24095
    },
    {
      "epoch": 2.780341485925242,
      "grad_norm": 0.4438006579875946,
      "learning_rate": 3.248590970793486e-06,
      "loss": 0.8489,
      "step": 24100
    },
    {
      "epoch": 2.780918320258422,
      "grad_norm": 0.4250805974006653,
      "learning_rate": 3.23164382258756e-06,
      "loss": 0.9377,
      "step": 24105
    },
    {
      "epoch": 2.7814951545916013,
      "grad_norm": 0.41761264204978943,
      "learning_rate": 3.214740268714511e-06,
      "loss": 0.8414,
      "step": 24110
    },
    {
      "epoch": 2.782071988924781,
      "grad_norm": 0.44590649008750916,
      "learning_rate": 3.1978803167894365e-06,
      "loss": 0.8881,
      "step": 24115
    },
    {
      "epoch": 2.7826488232579605,
      "grad_norm": 0.5200037360191345,
      "learning_rate": 3.181063974407772e-06,
      "loss": 0.8552,
      "step": 24120
    },
    {
      "epoch": 2.7832256575911396,
      "grad_norm": 0.4275122284889221,
      "learning_rate": 3.1642912491453346e-06,
      "loss": 0.8344,
      "step": 24125
    },
    {
      "epoch": 2.7838024919243196,
      "grad_norm": 0.4023871123790741,
      "learning_rate": 3.1475621485582253e-06,
      "loss": 0.8803,
      "step": 24130
    },
    {
      "epoch": 2.7843793262574987,
      "grad_norm": 0.43436017632484436,
      "learning_rate": 3.1308766801829926e-06,
      "loss": 0.8767,
      "step": 24135
    },
    {
      "epoch": 2.7849561605906783,
      "grad_norm": 0.4553993046283722,
      "learning_rate": 3.114234851536435e-06,
      "loss": 0.9139,
      "step": 24140
    },
    {
      "epoch": 2.785532994923858,
      "grad_norm": 0.44283974170684814,
      "learning_rate": 3.0976366701157445e-06,
      "loss": 0.8655,
      "step": 24145
    },
    {
      "epoch": 2.7861098292570374,
      "grad_norm": 0.46823441982269287,
      "learning_rate": 3.081082143398395e-06,
      "loss": 0.8996,
      "step": 24150
    },
    {
      "epoch": 2.786686663590217,
      "grad_norm": 0.46041712164878845,
      "learning_rate": 3.0645712788422985e-06,
      "loss": 0.8655,
      "step": 24155
    },
    {
      "epoch": 2.7872634979233966,
      "grad_norm": 0.4426816999912262,
      "learning_rate": 3.0481040838855833e-06,
      "loss": 0.9124,
      "step": 24160
    },
    {
      "epoch": 2.787840332256576,
      "grad_norm": 0.48756763339042664,
      "learning_rate": 3.0316805659467705e-06,
      "loss": 0.8486,
      "step": 24165
    },
    {
      "epoch": 2.7884171665897552,
      "grad_norm": 0.40607529878616333,
      "learning_rate": 3.015300732424686e-06,
      "loss": 0.8413,
      "step": 24170
    },
    {
      "epoch": 2.788994000922935,
      "grad_norm": 0.4447746276855469,
      "learning_rate": 2.998964590698483e-06,
      "loss": 0.8516,
      "step": 24175
    },
    {
      "epoch": 2.7895708352561144,
      "grad_norm": 0.4920103847980499,
      "learning_rate": 2.9826721481276077e-06,
      "loss": 0.8483,
      "step": 24180
    },
    {
      "epoch": 2.790147669589294,
      "grad_norm": 0.6388232111930847,
      "learning_rate": 2.9664234120518442e-06,
      "loss": 0.886,
      "step": 24185
    },
    {
      "epoch": 2.7907245039224735,
      "grad_norm": 0.4191949665546417,
      "learning_rate": 2.950218389791293e-06,
      "loss": 0.8321,
      "step": 24190
    },
    {
      "epoch": 2.791301338255653,
      "grad_norm": 0.45636507868766785,
      "learning_rate": 2.934057088646336e-06,
      "loss": 0.8973,
      "step": 24195
    },
    {
      "epoch": 2.7918781725888326,
      "grad_norm": 0.3961809277534485,
      "learning_rate": 2.91793951589765e-06,
      "loss": 0.8602,
      "step": 24200
    },
    {
      "epoch": 2.7924550069220118,
      "grad_norm": 0.3874358534812927,
      "learning_rate": 2.9018656788062813e-06,
      "loss": 0.8962,
      "step": 24205
    },
    {
      "epoch": 2.7930318412551918,
      "grad_norm": 0.40755221247673035,
      "learning_rate": 2.8858355846134944e-06,
      "loss": 0.8433,
      "step": 24210
    },
    {
      "epoch": 2.793608675588371,
      "grad_norm": 0.40966683626174927,
      "learning_rate": 2.8698492405408783e-06,
      "loss": 0.8909,
      "step": 24215
    },
    {
      "epoch": 2.7941855099215505,
      "grad_norm": 0.45877885818481445,
      "learning_rate": 2.8539066537903057e-06,
      "loss": 0.832,
      "step": 24220
    },
    {
      "epoch": 2.79476234425473,
      "grad_norm": 0.42089635133743286,
      "learning_rate": 2.8380078315439653e-06,
      "loss": 0.8511,
      "step": 24225
    },
    {
      "epoch": 2.7953391785879096,
      "grad_norm": 0.41457778215408325,
      "learning_rate": 2.8221527809642933e-06,
      "loss": 0.8538,
      "step": 24230
    },
    {
      "epoch": 2.795916012921089,
      "grad_norm": 0.4614168405532837,
      "learning_rate": 2.8063415091940216e-06,
      "loss": 0.8687,
      "step": 24235
    },
    {
      "epoch": 2.7964928472542687,
      "grad_norm": 0.4669603705406189,
      "learning_rate": 2.790574023356163e-06,
      "loss": 0.902,
      "step": 24240
    },
    {
      "epoch": 2.7970696815874483,
      "grad_norm": 0.42838254570961,
      "learning_rate": 2.774850330554002e-06,
      "loss": 0.8776,
      "step": 24245
    },
    {
      "epoch": 2.7976465159206274,
      "grad_norm": 0.4778376519680023,
      "learning_rate": 2.7591704378710836e-06,
      "loss": 0.8457,
      "step": 24250
    },
    {
      "epoch": 2.798223350253807,
      "grad_norm": 0.4371931850910187,
      "learning_rate": 2.7435343523712242e-06,
      "loss": 0.8841,
      "step": 24255
    },
    {
      "epoch": 2.7988001845869865,
      "grad_norm": 0.4283929169178009,
      "learning_rate": 2.7279420810985335e-06,
      "loss": 0.8337,
      "step": 24260
    },
    {
      "epoch": 2.799377018920166,
      "grad_norm": 0.4353237450122833,
      "learning_rate": 2.712393631077359e-06,
      "loss": 0.8101,
      "step": 24265
    },
    {
      "epoch": 2.7999538532533457,
      "grad_norm": 0.4581010043621063,
      "learning_rate": 2.6968890093122754e-06,
      "loss": 0.8645,
      "step": 24270
    },
    {
      "epoch": 2.8005306875865252,
      "grad_norm": 0.43251487612724304,
      "learning_rate": 2.681428222788174e-06,
      "loss": 0.8927,
      "step": 24275
    },
    {
      "epoch": 2.801107521919705,
      "grad_norm": 0.46299970149993896,
      "learning_rate": 2.6660112784701706e-06,
      "loss": 0.9245,
      "step": 24280
    },
    {
      "epoch": 2.801684356252884,
      "grad_norm": 0.41510728001594543,
      "learning_rate": 2.650638183303611e-06,
      "loss": 0.8558,
      "step": 24285
    },
    {
      "epoch": 2.802261190586064,
      "grad_norm": 0.4396783113479614,
      "learning_rate": 2.63530894421411e-06,
      "loss": 0.8827,
      "step": 24290
    },
    {
      "epoch": 2.802838024919243,
      "grad_norm": 0.47082120180130005,
      "learning_rate": 2.6200235681075324e-06,
      "loss": 0.8778,
      "step": 24295
    },
    {
      "epoch": 2.8034148592524226,
      "grad_norm": 0.40990665555000305,
      "learning_rate": 2.6047820618699592e-06,
      "loss": 0.8656,
      "step": 24300
    },
    {
      "epoch": 2.803991693585602,
      "grad_norm": 0.4030509293079376,
      "learning_rate": 2.58958443236772e-06,
      "loss": 0.8368,
      "step": 24305
    },
    {
      "epoch": 2.8045685279187818,
      "grad_norm": 0.39093419909477234,
      "learning_rate": 2.57443068644736e-06,
      "loss": 0.8536,
      "step": 24310
    },
    {
      "epoch": 2.8051453622519613,
      "grad_norm": 0.398468017578125,
      "learning_rate": 2.5593208309357187e-06,
      "loss": 0.8371,
      "step": 24315
    },
    {
      "epoch": 2.805722196585141,
      "grad_norm": 0.41183245182037354,
      "learning_rate": 2.544254872639762e-06,
      "loss": 0.8466,
      "step": 24320
    },
    {
      "epoch": 2.8062990309183204,
      "grad_norm": 0.4442862570285797,
      "learning_rate": 2.5292328183467606e-06,
      "loss": 0.8567,
      "step": 24325
    },
    {
      "epoch": 2.8068758652514996,
      "grad_norm": 0.43638312816619873,
      "learning_rate": 2.514254674824168e-06,
      "loss": 0.8596,
      "step": 24330
    },
    {
      "epoch": 2.807452699584679,
      "grad_norm": 0.4281933307647705,
      "learning_rate": 2.4993204488196865e-06,
      "loss": 0.9114,
      "step": 24335
    },
    {
      "epoch": 2.8080295339178587,
      "grad_norm": 0.4679451882839203,
      "learning_rate": 2.4844301470612007e-06,
      "loss": 0.8452,
      "step": 24340
    },
    {
      "epoch": 2.8086063682510383,
      "grad_norm": 0.438088059425354,
      "learning_rate": 2.469583776256812e-06,
      "loss": 0.8585,
      "step": 24345
    },
    {
      "epoch": 2.809183202584218,
      "grad_norm": 0.4836006760597229,
      "learning_rate": 2.4547813430948473e-06,
      "loss": 0.8732,
      "step": 24350
    },
    {
      "epoch": 2.8097600369173974,
      "grad_norm": 0.42423558235168457,
      "learning_rate": 2.4400228542438396e-06,
      "loss": 0.8497,
      "step": 24355
    },
    {
      "epoch": 2.810336871250577,
      "grad_norm": 0.45980703830718994,
      "learning_rate": 2.4253083163525038e-06,
      "loss": 0.9032,
      "step": 24360
    },
    {
      "epoch": 2.810913705583756,
      "grad_norm": 0.39328107237815857,
      "learning_rate": 2.4106377360497813e-06,
      "loss": 0.8759,
      "step": 24365
    },
    {
      "epoch": 2.811490539916936,
      "grad_norm": 0.420552134513855,
      "learning_rate": 2.3960111199447854e-06,
      "loss": 0.8785,
      "step": 24370
    },
    {
      "epoch": 2.812067374250115,
      "grad_norm": 0.4153137505054474,
      "learning_rate": 2.3814284746268344e-06,
      "loss": 0.8768,
      "step": 24375
    },
    {
      "epoch": 2.812644208583295,
      "grad_norm": 0.4206068515777588,
      "learning_rate": 2.366889806665451e-06,
      "loss": 0.8547,
      "step": 24380
    },
    {
      "epoch": 2.8132210429164743,
      "grad_norm": 0.42058634757995605,
      "learning_rate": 2.352395122610329e-06,
      "loss": 0.8448,
      "step": 24385
    },
    {
      "epoch": 2.813797877249654,
      "grad_norm": 0.44256195425987244,
      "learning_rate": 2.3379444289913342e-06,
      "loss": 0.8852,
      "step": 24390
    },
    {
      "epoch": 2.8143747115828335,
      "grad_norm": 0.4605026841163635,
      "learning_rate": 2.3235377323185593e-06,
      "loss": 0.8693,
      "step": 24395
    },
    {
      "epoch": 2.814951545916013,
      "grad_norm": 0.4125789403915405,
      "learning_rate": 2.3091750390822232e-06,
      "loss": 0.9181,
      "step": 24400
    },
    {
      "epoch": 2.8155283802491926,
      "grad_norm": 0.429656982421875,
      "learning_rate": 2.2948563557527836e-06,
      "loss": 0.8192,
      "step": 24405
    },
    {
      "epoch": 2.8161052145823717,
      "grad_norm": 0.4208963215351105,
      "learning_rate": 2.280581688780792e-06,
      "loss": 0.9055,
      "step": 24410
    },
    {
      "epoch": 2.8166820489155513,
      "grad_norm": 0.40175744891166687,
      "learning_rate": 2.266351044597037e-06,
      "loss": 0.8848,
      "step": 24415
    },
    {
      "epoch": 2.817258883248731,
      "grad_norm": 0.39711418747901917,
      "learning_rate": 2.2521644296124466e-06,
      "loss": 0.8627,
      "step": 24420
    },
    {
      "epoch": 2.8178357175819104,
      "grad_norm": 0.4246174693107605,
      "learning_rate": 2.2380218502181193e-06,
      "loss": 0.8744,
      "step": 24425
    },
    {
      "epoch": 2.81841255191509,
      "grad_norm": 0.4455760419368744,
      "learning_rate": 2.2239233127853366e-06,
      "loss": 0.8491,
      "step": 24430
    },
    {
      "epoch": 2.8189893862482696,
      "grad_norm": 0.3960672616958618,
      "learning_rate": 2.209868823665473e-06,
      "loss": 0.839,
      "step": 24435
    },
    {
      "epoch": 2.819566220581449,
      "grad_norm": 0.44110697507858276,
      "learning_rate": 2.1958583891901307e-06,
      "loss": 0.832,
      "step": 24440
    },
    {
      "epoch": 2.8201430549146282,
      "grad_norm": 0.43871957063674927,
      "learning_rate": 2.1818920156710387e-06,
      "loss": 0.8863,
      "step": 24445
    },
    {
      "epoch": 2.8207198892478083,
      "grad_norm": 0.4036722481250763,
      "learning_rate": 2.1679697094000638e-06,
      "loss": 0.8492,
      "step": 24450
    },
    {
      "epoch": 2.8212967235809874,
      "grad_norm": 0.4256274402141571,
      "learning_rate": 2.1540914766492336e-06,
      "loss": 0.8343,
      "step": 24455
    },
    {
      "epoch": 2.821873557914167,
      "grad_norm": 0.4566933810710907,
      "learning_rate": 2.1402573236707357e-06,
      "loss": 0.9099,
      "step": 24460
    },
    {
      "epoch": 2.8224503922473465,
      "grad_norm": 0.4147023856639862,
      "learning_rate": 2.1264672566968736e-06,
      "loss": 0.9145,
      "step": 24465
    },
    {
      "epoch": 2.823027226580526,
      "grad_norm": 0.3891587257385254,
      "learning_rate": 2.1127212819400775e-06,
      "loss": 0.8941,
      "step": 24470
    },
    {
      "epoch": 2.8236040609137056,
      "grad_norm": 0.4380452334880829,
      "learning_rate": 2.0990194055929723e-06,
      "loss": 0.8465,
      "step": 24475
    },
    {
      "epoch": 2.824180895246885,
      "grad_norm": 0.4227445721626282,
      "learning_rate": 2.0853616338282644e-06,
      "loss": 0.8529,
      "step": 24480
    },
    {
      "epoch": 2.8247577295800648,
      "grad_norm": 0.46117278933525085,
      "learning_rate": 2.0717479727987876e-06,
      "loss": 0.8607,
      "step": 24485
    },
    {
      "epoch": 2.825334563913244,
      "grad_norm": 0.4497821629047394,
      "learning_rate": 2.0581784286375585e-06,
      "loss": 0.8925,
      "step": 24490
    },
    {
      "epoch": 2.825911398246424,
      "grad_norm": 0.4422582685947418,
      "learning_rate": 2.044653007457653e-06,
      "loss": 0.943,
      "step": 24495
    },
    {
      "epoch": 2.826488232579603,
      "grad_norm": 0.4594055116176605,
      "learning_rate": 2.03117171535232e-06,
      "loss": 0.9053,
      "step": 24500
    },
    {
      "epoch": 2.8270650669127826,
      "grad_norm": 0.4220978319644928,
      "learning_rate": 2.017734558394879e-06,
      "loss": 0.8149,
      "step": 24505
    },
    {
      "epoch": 2.827641901245962,
      "grad_norm": 0.43389609456062317,
      "learning_rate": 2.0043415426388324e-06,
      "loss": 0.8253,
      "step": 24510
    },
    {
      "epoch": 2.8282187355791417,
      "grad_norm": 0.4446219503879547,
      "learning_rate": 1.9909926741177422e-06,
      "loss": 0.8887,
      "step": 24515
    },
    {
      "epoch": 2.8287955699123213,
      "grad_norm": 0.4167942404747009,
      "learning_rate": 1.977687958845298e-06,
      "loss": 0.8468,
      "step": 24520
    },
    {
      "epoch": 2.829372404245501,
      "grad_norm": 0.4484202563762665,
      "learning_rate": 1.964427402815294e-06,
      "loss": 0.8895,
      "step": 24525
    },
    {
      "epoch": 2.8299492385786804,
      "grad_norm": 0.4193936884403229,
      "learning_rate": 1.9512110120016638e-06,
      "loss": 0.8567,
      "step": 24530
    },
    {
      "epoch": 2.8305260729118595,
      "grad_norm": 0.4360736310482025,
      "learning_rate": 1.9380387923583877e-06,
      "loss": 0.8866,
      "step": 24535
    },
    {
      "epoch": 2.831102907245039,
      "grad_norm": 0.4343816936016083,
      "learning_rate": 1.924910749819586e-06,
      "loss": 0.8251,
      "step": 24540
    },
    {
      "epoch": 2.8316797415782187,
      "grad_norm": 0.4194128215312958,
      "learning_rate": 1.9118268902994617e-06,
      "loss": 0.8629,
      "step": 24545
    },
    {
      "epoch": 2.8322565759113982,
      "grad_norm": 0.41894203424453735,
      "learning_rate": 1.898787219692344e-06,
      "loss": 0.9119,
      "step": 24550
    },
    {
      "epoch": 2.832833410244578,
      "grad_norm": 0.4088500738143921,
      "learning_rate": 1.8857917438725892e-06,
      "loss": 0.8716,
      "step": 24555
    },
    {
      "epoch": 2.8334102445777574,
      "grad_norm": 0.42529842257499695,
      "learning_rate": 1.8728404686947253e-06,
      "loss": 0.8761,
      "step": 24560
    },
    {
      "epoch": 2.833987078910937,
      "grad_norm": 0.4301418364048004,
      "learning_rate": 1.8599333999932966e-06,
      "loss": 0.9105,
      "step": 24565
    },
    {
      "epoch": 2.834563913244116,
      "grad_norm": 0.4569443166255951,
      "learning_rate": 1.8470705435829849e-06,
      "loss": 0.8798,
      "step": 24570
    },
    {
      "epoch": 2.835140747577296,
      "grad_norm": 0.4449009597301483,
      "learning_rate": 1.8342519052584995e-06,
      "loss": 0.9007,
      "step": 24575
    },
    {
      "epoch": 2.835717581910475,
      "grad_norm": 0.4420955181121826,
      "learning_rate": 1.8214774907947097e-06,
      "loss": 0.8373,
      "step": 24580
    },
    {
      "epoch": 2.8362944162436547,
      "grad_norm": 0.43387678265571594,
      "learning_rate": 1.8087473059464788e-06,
      "loss": 0.8486,
      "step": 24585
    },
    {
      "epoch": 2.8368712505768343,
      "grad_norm": 0.45262908935546875,
      "learning_rate": 1.796061356448797e-06,
      "loss": 0.8243,
      "step": 24590
    },
    {
      "epoch": 2.837448084910014,
      "grad_norm": 0.4249054789543152,
      "learning_rate": 1.783419648016682e-06,
      "loss": 0.9009,
      "step": 24595
    },
    {
      "epoch": 2.8380249192431934,
      "grad_norm": 0.4239564836025238,
      "learning_rate": 1.770822186345289e-06,
      "loss": 0.8845,
      "step": 24600
    },
    {
      "epoch": 2.838601753576373,
      "grad_norm": 0.4570298492908478,
      "learning_rate": 1.7582689771097672e-06,
      "loss": 0.8624,
      "step": 24605
    },
    {
      "epoch": 2.8391785879095526,
      "grad_norm": 0.49115484952926636,
      "learning_rate": 1.7457600259653707e-06,
      "loss": 0.8467,
      "step": 24610
    },
    {
      "epoch": 2.8397554222427317,
      "grad_norm": 0.375393271446228,
      "learning_rate": 1.7332953385474027e-06,
      "loss": 0.8949,
      "step": 24615
    },
    {
      "epoch": 2.8403322565759113,
      "grad_norm": 0.44062313437461853,
      "learning_rate": 1.7208749204712493e-06,
      "loss": 0.9072,
      "step": 24620
    },
    {
      "epoch": 2.840909090909091,
      "grad_norm": 0.4475950598716736,
      "learning_rate": 1.7084987773323123e-06,
      "loss": 0.8664,
      "step": 24625
    },
    {
      "epoch": 2.8414859252422704,
      "grad_norm": 0.4045661985874176,
      "learning_rate": 1.6961669147060765e-06,
      "loss": 0.8447,
      "step": 24630
    },
    {
      "epoch": 2.84206275957545,
      "grad_norm": 0.40116119384765625,
      "learning_rate": 1.6838793381480644e-06,
      "loss": 0.8533,
      "step": 24635
    },
    {
      "epoch": 2.8426395939086295,
      "grad_norm": 0.46115195751190186,
      "learning_rate": 1.671636053193859e-06,
      "loss": 0.8771,
      "step": 24640
    },
    {
      "epoch": 2.843216428241809,
      "grad_norm": 0.41099631786346436,
      "learning_rate": 1.6594370653590706e-06,
      "loss": 0.8727,
      "step": 24645
    },
    {
      "epoch": 2.843793262574988,
      "grad_norm": 0.42782846093177795,
      "learning_rate": 1.647282380139392e-06,
      "loss": 0.8315,
      "step": 24650
    },
    {
      "epoch": 2.844370096908168,
      "grad_norm": 0.49658653140068054,
      "learning_rate": 1.63517200301051e-06,
      "loss": 0.8544,
      "step": 24655
    },
    {
      "epoch": 2.8449469312413473,
      "grad_norm": 0.4295457899570465,
      "learning_rate": 1.6231059394281934e-06,
      "loss": 0.8673,
      "step": 24660
    },
    {
      "epoch": 2.845523765574527,
      "grad_norm": 0.40046048164367676,
      "learning_rate": 1.611084194828194e-06,
      "loss": 0.8988,
      "step": 24665
    },
    {
      "epoch": 2.8461005999077065,
      "grad_norm": 0.462961345911026,
      "learning_rate": 1.5991067746263799e-06,
      "loss": 0.8601,
      "step": 24670
    },
    {
      "epoch": 2.846677434240886,
      "grad_norm": 0.43810996413230896,
      "learning_rate": 1.587173684218557e-06,
      "loss": 0.9112,
      "step": 24675
    },
    {
      "epoch": 2.8472542685740656,
      "grad_norm": 0.3803338408470154,
      "learning_rate": 1.5752849289806248e-06,
      "loss": 0.821,
      "step": 24680
    },
    {
      "epoch": 2.847831102907245,
      "grad_norm": 0.47152554988861084,
      "learning_rate": 1.5634405142684882e-06,
      "loss": 0.8787,
      "step": 24685
    },
    {
      "epoch": 2.8484079372404247,
      "grad_norm": 0.43199577927589417,
      "learning_rate": 1.55164044541809e-06,
      "loss": 0.8574,
      "step": 24690
    },
    {
      "epoch": 2.848984771573604,
      "grad_norm": 0.43718257546424866,
      "learning_rate": 1.5398847277453776e-06,
      "loss": 0.8367,
      "step": 24695
    },
    {
      "epoch": 2.8495616059067834,
      "grad_norm": 0.484164834022522,
      "learning_rate": 1.5281733665463038e-06,
      "loss": 0.8878,
      "step": 24700
    },
    {
      "epoch": 2.850138440239963,
      "grad_norm": 0.42896854877471924,
      "learning_rate": 1.5165063670968926e-06,
      "loss": 0.9043,
      "step": 24705
    },
    {
      "epoch": 2.8507152745731426,
      "grad_norm": 0.4501945674419403,
      "learning_rate": 1.5048837346531285e-06,
      "loss": 0.872,
      "step": 24710
    },
    {
      "epoch": 2.851292108906322,
      "grad_norm": 0.41754597425460815,
      "learning_rate": 1.4933054744510344e-06,
      "loss": 0.871,
      "step": 24715
    },
    {
      "epoch": 2.8518689432395017,
      "grad_norm": 0.4565035402774811,
      "learning_rate": 1.4817715917066488e-06,
      "loss": 0.8715,
      "step": 24720
    },
    {
      "epoch": 2.8524457775726813,
      "grad_norm": 0.4439191222190857,
      "learning_rate": 1.4702820916159931e-06,
      "loss": 0.8738,
      "step": 24725
    },
    {
      "epoch": 2.8530226119058604,
      "grad_norm": 0.39780697226524353,
      "learning_rate": 1.4588369793551271e-06,
      "loss": 0.9057,
      "step": 24730
    },
    {
      "epoch": 2.8535994462390404,
      "grad_norm": 0.42179131507873535,
      "learning_rate": 1.4474362600800706e-06,
      "loss": 0.881,
      "step": 24735
    },
    {
      "epoch": 2.8541762805722195,
      "grad_norm": 0.468203604221344,
      "learning_rate": 1.436079938926904e-06,
      "loss": 0.8879,
      "step": 24740
    },
    {
      "epoch": 2.854753114905399,
      "grad_norm": 0.44837823510169983,
      "learning_rate": 1.4247680210116465e-06,
      "loss": 0.9177,
      "step": 24745
    },
    {
      "epoch": 2.8553299492385786,
      "grad_norm": 0.42178669571876526,
      "learning_rate": 1.4135005114303435e-06,
      "loss": 0.835,
      "step": 24750
    },
    {
      "epoch": 2.855906783571758,
      "grad_norm": 0.3851020336151123,
      "learning_rate": 1.4022774152590235e-06,
      "loss": 0.8502,
      "step": 24755
    },
    {
      "epoch": 2.8564836179049378,
      "grad_norm": 0.43105438351631165,
      "learning_rate": 1.3910987375537422e-06,
      "loss": 0.8873,
      "step": 24760
    },
    {
      "epoch": 2.8570604522381173,
      "grad_norm": 0.5082978010177612,
      "learning_rate": 1.379964483350482e-06,
      "loss": 0.8742,
      "step": 24765
    },
    {
      "epoch": 2.857637286571297,
      "grad_norm": 0.4297298491001129,
      "learning_rate": 1.3688746576652646e-06,
      "loss": 0.8741,
      "step": 24770
    },
    {
      "epoch": 2.858214120904476,
      "grad_norm": 0.4092559218406677,
      "learning_rate": 1.3578292654940706e-06,
      "loss": 0.8442,
      "step": 24775
    },
    {
      "epoch": 2.8587909552376556,
      "grad_norm": 0.3794494867324829,
      "learning_rate": 1.3468283118128756e-06,
      "loss": 0.8774,
      "step": 24780
    },
    {
      "epoch": 2.859367789570835,
      "grad_norm": 0.4544118642807007,
      "learning_rate": 1.3358718015776262e-06,
      "loss": 0.8916,
      "step": 24785
    },
    {
      "epoch": 2.8599446239040147,
      "grad_norm": 0.43125012516975403,
      "learning_rate": 1.324959739724263e-06,
      "loss": 0.8613,
      "step": 24790
    },
    {
      "epoch": 2.8605214582371943,
      "grad_norm": 0.4157480299472809,
      "learning_rate": 1.314092131168665e-06,
      "loss": 0.9128,
      "step": 24795
    },
    {
      "epoch": 2.861098292570374,
      "grad_norm": 0.4354327619075775,
      "learning_rate": 1.303268980806749e-06,
      "loss": 0.8693,
      "step": 24800
    },
    {
      "epoch": 2.8616751269035534,
      "grad_norm": 0.41278907656669617,
      "learning_rate": 1.2924902935143258e-06,
      "loss": 0.8691,
      "step": 24805
    },
    {
      "epoch": 2.8622519612367325,
      "grad_norm": 0.39481645822525024,
      "learning_rate": 1.2817560741472445e-06,
      "loss": 0.8255,
      "step": 24810
    },
    {
      "epoch": 2.8628287955699125,
      "grad_norm": 0.4812294840812683,
      "learning_rate": 1.2710663275412705e-06,
      "loss": 0.8372,
      "step": 24815
    },
    {
      "epoch": 2.8634056299030917,
      "grad_norm": 0.42143645882606506,
      "learning_rate": 1.2604210585121845e-06,
      "loss": 0.8488,
      "step": 24820
    },
    {
      "epoch": 2.8639824642362712,
      "grad_norm": 0.43948641419410706,
      "learning_rate": 1.2498202718556617e-06,
      "loss": 0.8479,
      "step": 24825
    },
    {
      "epoch": 2.864559298569451,
      "grad_norm": 0.4091070592403412,
      "learning_rate": 1.2392639723474153e-06,
      "loss": 0.8532,
      "step": 24830
    },
    {
      "epoch": 2.8651361329026304,
      "grad_norm": 0.43268927931785583,
      "learning_rate": 1.2287521647430521e-06,
      "loss": 0.8438,
      "step": 24835
    },
    {
      "epoch": 2.86571296723581,
      "grad_norm": 0.41381460428237915,
      "learning_rate": 1.2182848537781622e-06,
      "loss": 0.86,
      "step": 24840
    },
    {
      "epoch": 2.8662898015689895,
      "grad_norm": 0.5061653852462769,
      "learning_rate": 1.2078620441683064e-06,
      "loss": 0.8887,
      "step": 24845
    },
    {
      "epoch": 2.866866635902169,
      "grad_norm": 0.4368427097797394,
      "learning_rate": 1.1974837406089846e-06,
      "loss": 0.8962,
      "step": 24850
    },
    {
      "epoch": 2.867443470235348,
      "grad_norm": 0.43707120418548584,
      "learning_rate": 1.187149947775612e-06,
      "loss": 0.8111,
      "step": 24855
    },
    {
      "epoch": 2.868020304568528,
      "grad_norm": 0.4115869998931885,
      "learning_rate": 1.1768606703236095e-06,
      "loss": 0.8536,
      "step": 24860
    },
    {
      "epoch": 2.8685971389017073,
      "grad_norm": 0.3919137418270111,
      "learning_rate": 1.1666159128883136e-06,
      "loss": 0.893,
      "step": 24865
    },
    {
      "epoch": 2.869173973234887,
      "grad_norm": 0.42175352573394775,
      "learning_rate": 1.1564156800849879e-06,
      "loss": 0.8756,
      "step": 24870
    },
    {
      "epoch": 2.8697508075680664,
      "grad_norm": 0.41766905784606934,
      "learning_rate": 1.1462599765088788e-06,
      "loss": 0.875,
      "step": 24875
    },
    {
      "epoch": 2.870327641901246,
      "grad_norm": 0.43964409828186035,
      "learning_rate": 1.13614880673516e-06,
      "loss": 0.8937,
      "step": 24880
    },
    {
      "epoch": 2.8709044762344256,
      "grad_norm": 0.44723713397979736,
      "learning_rate": 1.1260821753188987e-06,
      "loss": 0.9313,
      "step": 24885
    },
    {
      "epoch": 2.871481310567605,
      "grad_norm": 0.4014928936958313,
      "learning_rate": 1.1160600867951455e-06,
      "loss": 0.8574,
      "step": 24890
    },
    {
      "epoch": 2.8720581449007847,
      "grad_norm": 0.42120859026908875,
      "learning_rate": 1.106082545678877e-06,
      "loss": 0.866,
      "step": 24895
    },
    {
      "epoch": 2.872634979233964,
      "grad_norm": 0.4354383945465088,
      "learning_rate": 1.0961495564650092e-06,
      "loss": 0.8993,
      "step": 24900
    },
    {
      "epoch": 2.8732118135671434,
      "grad_norm": 0.38240766525268555,
      "learning_rate": 1.0862611236283405e-06,
      "loss": 0.8508,
      "step": 24905
    },
    {
      "epoch": 2.873788647900323,
      "grad_norm": 0.41191530227661133,
      "learning_rate": 1.0764172516236515e-06,
      "loss": 0.829,
      "step": 24910
    },
    {
      "epoch": 2.8743654822335025,
      "grad_norm": 0.5137365460395813,
      "learning_rate": 1.0666179448856174e-06,
      "loss": 0.9256,
      "step": 24915
    },
    {
      "epoch": 2.874942316566682,
      "grad_norm": 0.4102938175201416,
      "learning_rate": 1.056863207828851e-06,
      "loss": 0.8725,
      "step": 24920
    },
    {
      "epoch": 2.8755191508998617,
      "grad_norm": 0.3814050257205963,
      "learning_rate": 1.0471530448478705e-06,
      "loss": 0.8422,
      "step": 24925
    },
    {
      "epoch": 2.876095985233041,
      "grad_norm": 0.3723147213459015,
      "learning_rate": 1.0374874603171326e-06,
      "loss": 0.8474,
      "step": 24930
    },
    {
      "epoch": 2.8766728195662203,
      "grad_norm": 0.4165055751800537,
      "learning_rate": 1.027866458590998e-06,
      "loss": 0.8373,
      "step": 24935
    },
    {
      "epoch": 2.8772496538994004,
      "grad_norm": 0.49059098958969116,
      "learning_rate": 1.0182900440037447e-06,
      "loss": 0.8712,
      "step": 24940
    },
    {
      "epoch": 2.8778264882325795,
      "grad_norm": 0.41809362173080444,
      "learning_rate": 1.0087582208695768e-06,
      "loss": 0.9039,
      "step": 24945
    },
    {
      "epoch": 2.878403322565759,
      "grad_norm": 0.44100937247276306,
      "learning_rate": 9.992709934825816e-07,
      "loss": 0.819,
      "step": 24950
    },
    {
      "epoch": 2.8789801568989386,
      "grad_norm": 0.4227614402770996,
      "learning_rate": 9.898283661167851e-07,
      "loss": 0.8547,
      "step": 24955
    },
    {
      "epoch": 2.879556991232118,
      "grad_norm": 0.40128713846206665,
      "learning_rate": 9.804303430261174e-07,
      "loss": 0.8602,
      "step": 24960
    },
    {
      "epoch": 2.8801338255652977,
      "grad_norm": 0.40818148851394653,
      "learning_rate": 9.71076928444381e-07,
      "loss": 0.8955,
      "step": 24965
    },
    {
      "epoch": 2.8807106598984773,
      "grad_norm": 0.4075045883655548,
      "learning_rate": 9.617681265853273e-07,
      "loss": 0.8676,
      "step": 24970
    },
    {
      "epoch": 2.881287494231657,
      "grad_norm": 0.4465732276439667,
      "learning_rate": 9.525039416425907e-07,
      "loss": 0.859,
      "step": 24975
    },
    {
      "epoch": 2.881864328564836,
      "grad_norm": 0.45487141609191895,
      "learning_rate": 9.432843777896993e-07,
      "loss": 0.8775,
      "step": 24980
    },
    {
      "epoch": 2.8824411628980156,
      "grad_norm": 0.40925332903862,
      "learning_rate": 9.341094391800753e-07,
      "loss": 0.8657,
      "step": 24985
    },
    {
      "epoch": 2.883017997231195,
      "grad_norm": 0.4177109897136688,
      "learning_rate": 9.249791299470567e-07,
      "loss": 0.8468,
      "step": 24990
    },
    {
      "epoch": 2.8835948315643747,
      "grad_norm": 0.4083639979362488,
      "learning_rate": 9.158934542038755e-07,
      "loss": 0.8908,
      "step": 24995
    },
    {
      "epoch": 2.8841716658975542,
      "grad_norm": 0.49375760555267334,
      "learning_rate": 9.068524160436242e-07,
      "loss": 0.8653,
      "step": 25000
    },
    {
      "epoch": 2.884748500230734,
      "grad_norm": 0.4585607945919037,
      "learning_rate": 8.978560195393115e-07,
      "loss": 0.9006,
      "step": 25005
    },
    {
      "epoch": 2.8853253345639134,
      "grad_norm": 0.4486338198184967,
      "learning_rate": 8.889042687438509e-07,
      "loss": 0.8807,
      "step": 25010
    },
    {
      "epoch": 2.8859021688970925,
      "grad_norm": 0.4402649700641632,
      "learning_rate": 8.799971676900165e-07,
      "loss": 0.8646,
      "step": 25015
    },
    {
      "epoch": 2.8864790032302725,
      "grad_norm": 0.4237249791622162,
      "learning_rate": 8.711347203904541e-07,
      "loss": 0.9027,
      "step": 25020
    },
    {
      "epoch": 2.8870558375634516,
      "grad_norm": 0.4376821517944336,
      "learning_rate": 8.623169308377365e-07,
      "loss": 0.8866,
      "step": 25025
    },
    {
      "epoch": 2.887632671896631,
      "grad_norm": 0.3880428373813629,
      "learning_rate": 8.535438030042863e-07,
      "loss": 0.8597,
      "step": 25030
    },
    {
      "epoch": 2.8882095062298108,
      "grad_norm": 0.3971506655216217,
      "learning_rate": 8.448153408424087e-07,
      "loss": 0.8553,
      "step": 25035
    },
    {
      "epoch": 2.8887863405629903,
      "grad_norm": 0.4134998321533203,
      "learning_rate": 8.361315482843135e-07,
      "loss": 0.8479,
      "step": 25040
    },
    {
      "epoch": 2.88936317489617,
      "grad_norm": 0.42676177620887756,
      "learning_rate": 8.274924292420494e-07,
      "loss": 0.9141,
      "step": 25045
    },
    {
      "epoch": 2.8899400092293495,
      "grad_norm": 0.43701738119125366,
      "learning_rate": 8.188979876075475e-07,
      "loss": 0.866,
      "step": 25050
    },
    {
      "epoch": 2.890516843562529,
      "grad_norm": 0.4272172749042511,
      "learning_rate": 8.103482272526441e-07,
      "loss": 0.887,
      "step": 25055
    },
    {
      "epoch": 2.891093677895708,
      "grad_norm": 0.4200077950954437,
      "learning_rate": 8.018431520290027e-07,
      "loss": 0.8927,
      "step": 25060
    },
    {
      "epoch": 2.8916705122288877,
      "grad_norm": 0.43398797512054443,
      "learning_rate": 7.933827657682025e-07,
      "loss": 0.8489,
      "step": 25065
    },
    {
      "epoch": 2.8922473465620673,
      "grad_norm": 0.42155030369758606,
      "learning_rate": 7.849670722816283e-07,
      "loss": 0.8899,
      "step": 25070
    },
    {
      "epoch": 2.892824180895247,
      "grad_norm": 0.4187012016773224,
      "learning_rate": 7.765960753605916e-07,
      "loss": 0.8781,
      "step": 25075
    },
    {
      "epoch": 2.8934010152284264,
      "grad_norm": 0.4618048071861267,
      "learning_rate": 7.682697787762317e-07,
      "loss": 0.9056,
      "step": 25080
    },
    {
      "epoch": 2.893977849561606,
      "grad_norm": 0.4267641603946686,
      "learning_rate": 7.599881862795811e-07,
      "loss": 0.8268,
      "step": 25085
    },
    {
      "epoch": 2.8945546838947855,
      "grad_norm": 0.4020291268825531,
      "learning_rate": 7.517513016014777e-07,
      "loss": 0.8911,
      "step": 25090
    },
    {
      "epoch": 2.8951315182279647,
      "grad_norm": 0.44923388957977295,
      "learning_rate": 7.435591284526866e-07,
      "loss": 0.8486,
      "step": 25095
    },
    {
      "epoch": 2.8957083525611447,
      "grad_norm": 0.42874395847320557,
      "learning_rate": 7.354116705237779e-07,
      "loss": 0.893,
      "step": 25100
    },
    {
      "epoch": 2.896285186894324,
      "grad_norm": 0.41941747069358826,
      "learning_rate": 7.273089314852155e-07,
      "loss": 0.8298,
      "step": 25105
    },
    {
      "epoch": 2.8968620212275034,
      "grad_norm": 0.4208540916442871,
      "learning_rate": 7.192509149872684e-07,
      "loss": 0.8906,
      "step": 25110
    },
    {
      "epoch": 2.897438855560683,
      "grad_norm": 0.474488228559494,
      "learning_rate": 7.112376246601215e-07,
      "loss": 0.9308,
      "step": 25115
    },
    {
      "epoch": 2.8980156898938625,
      "grad_norm": 0.4039963185787201,
      "learning_rate": 7.032690641137651e-07,
      "loss": 0.8741,
      "step": 25120
    },
    {
      "epoch": 2.898592524227042,
      "grad_norm": 0.4397977590560913,
      "learning_rate": 6.953452369380497e-07,
      "loss": 0.867,
      "step": 25125
    },
    {
      "epoch": 2.8991693585602216,
      "grad_norm": 0.4029087722301483,
      "learning_rate": 6.874661467026756e-07,
      "loss": 0.8548,
      "step": 25130
    },
    {
      "epoch": 2.899746192893401,
      "grad_norm": 0.41865789890289307,
      "learning_rate": 6.79631796957192e-07,
      "loss": 0.8462,
      "step": 25135
    },
    {
      "epoch": 2.9003230272265803,
      "grad_norm": 0.45501357316970825,
      "learning_rate": 6.718421912309758e-07,
      "loss": 0.8626,
      "step": 25140
    },
    {
      "epoch": 2.90089986155976,
      "grad_norm": 0.41223421692848206,
      "learning_rate": 6.640973330332756e-07,
      "loss": 0.8938,
      "step": 25145
    },
    {
      "epoch": 2.9014766958929394,
      "grad_norm": 0.4089823365211487,
      "learning_rate": 6.563972258531559e-07,
      "loss": 0.8879,
      "step": 25150
    },
    {
      "epoch": 2.902053530226119,
      "grad_norm": 0.40834125876426697,
      "learning_rate": 6.487418731595418e-07,
      "loss": 0.8813,
      "step": 25155
    },
    {
      "epoch": 2.9026303645592986,
      "grad_norm": 0.4374805986881256,
      "learning_rate": 6.411312784011636e-07,
      "loss": 0.9119,
      "step": 25160
    },
    {
      "epoch": 2.903207198892478,
      "grad_norm": 0.41101983189582825,
      "learning_rate": 6.335654450066341e-07,
      "loss": 0.845,
      "step": 25165
    },
    {
      "epoch": 2.9037840332256577,
      "grad_norm": 0.4206681251525879,
      "learning_rate": 6.260443763843493e-07,
      "loss": 0.926,
      "step": 25170
    },
    {
      "epoch": 2.9043608675588373,
      "grad_norm": 0.4462982714176178,
      "learning_rate": 6.185680759225876e-07,
      "loss": 0.8473,
      "step": 25175
    },
    {
      "epoch": 2.904937701892017,
      "grad_norm": 0.47528621554374695,
      "learning_rate": 6.111365469894215e-07,
      "loss": 0.8485,
      "step": 25180
    },
    {
      "epoch": 2.905514536225196,
      "grad_norm": 0.40770596265792847,
      "learning_rate": 6.037497929327839e-07,
      "loss": 0.8505,
      "step": 25185
    },
    {
      "epoch": 2.9060913705583755,
      "grad_norm": 0.46322932839393616,
      "learning_rate": 5.964078170804133e-07,
      "loss": 0.8557,
      "step": 25190
    },
    {
      "epoch": 2.906668204891555,
      "grad_norm": 0.4159286618232727,
      "learning_rate": 5.891106227398857e-07,
      "loss": 0.8949,
      "step": 25195
    },
    {
      "epoch": 2.9072450392247347,
      "grad_norm": 0.41801539063453674,
      "learning_rate": 5.818582131985939e-07,
      "loss": 0.8604,
      "step": 25200
    },
    {
      "epoch": 2.907821873557914,
      "grad_norm": 0.4559653103351593,
      "learning_rate": 5.746505917237688e-07,
      "loss": 0.8639,
      "step": 25205
    },
    {
      "epoch": 2.908398707891094,
      "grad_norm": 0.45020580291748047,
      "learning_rate": 5.674877615624686e-07,
      "loss": 0.8776,
      "step": 25210
    },
    {
      "epoch": 2.9089755422242733,
      "grad_norm": 0.48369133472442627,
      "learning_rate": 5.603697259415341e-07,
      "loss": 0.9128,
      "step": 25215
    },
    {
      "epoch": 2.9095523765574525,
      "grad_norm": 0.44913429021835327,
      "learning_rate": 5.532964880676894e-07,
      "loss": 0.8563,
      "step": 25220
    },
    {
      "epoch": 2.9101292108906325,
      "grad_norm": 0.4256819486618042,
      "learning_rate": 5.462680511274187e-07,
      "loss": 0.8501,
      "step": 25225
    },
    {
      "epoch": 2.9107060452238116,
      "grad_norm": 0.46557503938674927,
      "learning_rate": 5.392844182870449e-07,
      "loss": 0.8828,
      "step": 25230
    },
    {
      "epoch": 2.911282879556991,
      "grad_norm": 0.43622320890426636,
      "learning_rate": 5.323455926927179e-07,
      "loss": 0.8439,
      "step": 25235
    },
    {
      "epoch": 2.9118597138901707,
      "grad_norm": 0.4079887568950653,
      "learning_rate": 5.254515774703927e-07,
      "loss": 0.8835,
      "step": 25240
    },
    {
      "epoch": 2.9124365482233503,
      "grad_norm": 0.43824106454849243,
      "learning_rate": 5.186023757258407e-07,
      "loss": 0.9007,
      "step": 25245
    },
    {
      "epoch": 2.91301338255653,
      "grad_norm": 0.43379032611846924,
      "learning_rate": 5.117979905446269e-07,
      "loss": 0.8738,
      "step": 25250
    },
    {
      "epoch": 2.9135902168897094,
      "grad_norm": 0.43974149227142334,
      "learning_rate": 5.050384249921436e-07,
      "loss": 0.8539,
      "step": 25255
    },
    {
      "epoch": 2.914167051222889,
      "grad_norm": 0.433366596698761,
      "learning_rate": 4.983236821135995e-07,
      "loss": 0.8263,
      "step": 25260
    },
    {
      "epoch": 2.914743885556068,
      "grad_norm": 0.4105539917945862,
      "learning_rate": 4.916537649339858e-07,
      "loss": 0.8926,
      "step": 25265
    },
    {
      "epoch": 2.9153207198892477,
      "grad_norm": 0.41276198625564575,
      "learning_rate": 4.850286764581102e-07,
      "loss": 0.8533,
      "step": 25270
    },
    {
      "epoch": 2.9158975542224272,
      "grad_norm": 0.4376462697982788,
      "learning_rate": 4.784484196706073e-07,
      "loss": 0.8931,
      "step": 25275
    },
    {
      "epoch": 2.916474388555607,
      "grad_norm": 0.46353521943092346,
      "learning_rate": 4.719129975358838e-07,
      "loss": 0.8613,
      "step": 25280
    },
    {
      "epoch": 2.9170512228887864,
      "grad_norm": 0.41219547390937805,
      "learning_rate": 4.6542241299816216e-07,
      "loss": 0.8311,
      "step": 25285
    },
    {
      "epoch": 2.917628057221966,
      "grad_norm": 0.428505539894104,
      "learning_rate": 4.5897666898145896e-07,
      "loss": 0.8886,
      "step": 25290
    },
    {
      "epoch": 2.9182048915551455,
      "grad_norm": 0.4599054753780365,
      "learning_rate": 4.5257576838960704e-07,
      "loss": 0.8833,
      "step": 25295
    },
    {
      "epoch": 2.9187817258883246,
      "grad_norm": 0.428320974111557,
      "learning_rate": 4.4621971410619967e-07,
      "loss": 0.9012,
      "step": 25300
    },
    {
      "epoch": 2.9193585602215046,
      "grad_norm": 0.430586576461792,
      "learning_rate": 4.3990850899467975e-07,
      "loss": 0.8718,
      "step": 25305
    },
    {
      "epoch": 2.9199353945546838,
      "grad_norm": 0.43232461810112,
      "learning_rate": 4.336421558982284e-07,
      "loss": 0.8654,
      "step": 25310
    },
    {
      "epoch": 2.9205122288878633,
      "grad_norm": 0.41640791296958923,
      "learning_rate": 4.274206576398876e-07,
      "loss": 0.8622,
      "step": 25315
    },
    {
      "epoch": 2.921089063221043,
      "grad_norm": 0.4136120080947876,
      "learning_rate": 4.2124401702241524e-07,
      "loss": 0.8064,
      "step": 25320
    },
    {
      "epoch": 2.9216658975542225,
      "grad_norm": 0.44188201427459717,
      "learning_rate": 4.151122368284299e-07,
      "loss": 0.8416,
      "step": 25325
    },
    {
      "epoch": 2.922242731887402,
      "grad_norm": 0.3820507228374481,
      "learning_rate": 4.090253198202887e-07,
      "loss": 0.8859,
      "step": 25330
    },
    {
      "epoch": 2.9228195662205816,
      "grad_norm": 0.3827502429485321,
      "learning_rate": 4.029832687401758e-07,
      "loss": 0.8754,
      "step": 25335
    },
    {
      "epoch": 2.923396400553761,
      "grad_norm": 0.4079684317111969,
      "learning_rate": 3.969860863100472e-07,
      "loss": 0.8505,
      "step": 25340
    },
    {
      "epoch": 2.9239732348869403,
      "grad_norm": 0.40014228224754333,
      "learning_rate": 3.9103377523163065e-07,
      "loss": 0.8446,
      "step": 25345
    },
    {
      "epoch": 2.92455006922012,
      "grad_norm": 0.4371630549430847,
      "learning_rate": 3.851263381864589e-07,
      "loss": 0.8806,
      "step": 25350
    },
    {
      "epoch": 2.9251269035532994,
      "grad_norm": 0.41463762521743774,
      "learning_rate": 3.7926377783585874e-07,
      "loss": 0.9124,
      "step": 25355
    },
    {
      "epoch": 2.925703737886479,
      "grad_norm": 0.44109275937080383,
      "learning_rate": 3.734460968208953e-07,
      "loss": 0.8611,
      "step": 25360
    },
    {
      "epoch": 2.9262805722196585,
      "grad_norm": 0.42749449610710144,
      "learning_rate": 3.67673297762483e-07,
      "loss": 0.8623,
      "step": 25365
    },
    {
      "epoch": 2.926857406552838,
      "grad_norm": 0.4444100558757782,
      "learning_rate": 3.619453832612418e-07,
      "loss": 0.8512,
      "step": 25370
    },
    {
      "epoch": 2.9274342408860177,
      "grad_norm": 0.4210651218891144,
      "learning_rate": 3.562623558976408e-07,
      "loss": 0.8563,
      "step": 25375
    },
    {
      "epoch": 2.928011075219197,
      "grad_norm": 0.4154101014137268,
      "learning_rate": 3.506242182318653e-07,
      "loss": 0.866,
      "step": 25380
    },
    {
      "epoch": 2.928587909552377,
      "grad_norm": 0.40918856859207153,
      "learning_rate": 3.4503097280392807e-07,
      "loss": 0.898,
      "step": 25385
    },
    {
      "epoch": 2.929164743885556,
      "grad_norm": 0.40677812695503235,
      "learning_rate": 3.394826221335912e-07,
      "loss": 0.9173,
      "step": 25390
    },
    {
      "epoch": 2.9297415782187355,
      "grad_norm": 0.3963682949542999,
      "learning_rate": 3.339791687203997e-07,
      "loss": 0.806,
      "step": 25395
    },
    {
      "epoch": 2.930318412551915,
      "grad_norm": 0.49158430099487305,
      "learning_rate": 3.285206150436593e-07,
      "loss": 0.8529,
      "step": 25400
    },
    {
      "epoch": 2.9308952468850946,
      "grad_norm": 0.46478375792503357,
      "learning_rate": 3.2310696356248063e-07,
      "loss": 0.8591,
      "step": 25405
    },
    {
      "epoch": 2.931472081218274,
      "grad_norm": 0.4427299201488495,
      "learning_rate": 3.177382167156906e-07,
      "loss": 0.8901,
      "step": 25410
    },
    {
      "epoch": 2.9320489155514537,
      "grad_norm": 0.37393638491630554,
      "learning_rate": 3.1241437692196563e-07,
      "loss": 0.843,
      "step": 25415
    },
    {
      "epoch": 2.9326257498846333,
      "grad_norm": 0.4250434339046478,
      "learning_rate": 3.0713544657966497e-07,
      "loss": 0.8659,
      "step": 25420
    },
    {
      "epoch": 2.9332025842178124,
      "grad_norm": 0.4124998152256012,
      "learning_rate": 3.019014280669641e-07,
      "loss": 0.8527,
      "step": 25425
    },
    {
      "epoch": 2.933779418550992,
      "grad_norm": 0.5417208075523376,
      "learning_rate": 2.967123237418212e-07,
      "loss": 0.8771,
      "step": 25430
    },
    {
      "epoch": 2.9343562528841716,
      "grad_norm": 0.4095006287097931,
      "learning_rate": 2.91568135941922e-07,
      "loss": 0.8655,
      "step": 25435
    },
    {
      "epoch": 2.934933087217351,
      "grad_norm": 0.43682244420051575,
      "learning_rate": 2.8646886698473484e-07,
      "loss": 0.8607,
      "step": 25440
    },
    {
      "epoch": 2.9355099215505307,
      "grad_norm": 0.4769591987133026,
      "learning_rate": 2.8141451916748887e-07,
      "loss": 0.9018,
      "step": 25445
    },
    {
      "epoch": 2.9360867558837103,
      "grad_norm": 0.42697569727897644,
      "learning_rate": 2.764050947671737e-07,
      "loss": 0.8414,
      "step": 25450
    },
    {
      "epoch": 2.93666359021689,
      "grad_norm": 0.44473573565483093,
      "learning_rate": 2.7144059604055085e-07,
      "loss": 0.8637,
      "step": 25455
    },
    {
      "epoch": 2.937240424550069,
      "grad_norm": 0.40624740719795227,
      "learning_rate": 2.6652102522414233e-07,
      "loss": 0.8681,
      "step": 25460
    },
    {
      "epoch": 2.937817258883249,
      "grad_norm": 0.45977187156677246,
      "learning_rate": 2.6164638453421984e-07,
      "loss": 0.8526,
      "step": 25465
    },
    {
      "epoch": 2.938394093216428,
      "grad_norm": 0.42826855182647705,
      "learning_rate": 2.568166761668156e-07,
      "loss": 0.8874,
      "step": 25470
    },
    {
      "epoch": 2.9389709275496076,
      "grad_norm": 0.450840026140213,
      "learning_rate": 2.5203190229771136e-07,
      "loss": 0.8652,
      "step": 25475
    },
    {
      "epoch": 2.939547761882787,
      "grad_norm": 0.49307262897491455,
      "learning_rate": 2.472920650824828e-07,
      "loss": 0.8745,
      "step": 25480
    },
    {
      "epoch": 2.940124596215967,
      "grad_norm": 0.4430190622806549,
      "learning_rate": 2.4259716665641083e-07,
      "loss": 0.8579,
      "step": 25485
    },
    {
      "epoch": 2.9407014305491463,
      "grad_norm": 0.40895533561706543,
      "learning_rate": 2.3794720913458136e-07,
      "loss": 0.8497,
      "step": 25490
    },
    {
      "epoch": 2.941278264882326,
      "grad_norm": 0.39817044138908386,
      "learning_rate": 2.333421946117853e-07,
      "loss": 0.806,
      "step": 25495
    },
    {
      "epoch": 2.9418550992155055,
      "grad_norm": 0.43439817428588867,
      "learning_rate": 2.2878212516260766e-07,
      "loss": 0.8963,
      "step": 25500
    },
    {
      "epoch": 2.9424319335486846,
      "grad_norm": 0.4119809567928314,
      "learning_rate": 2.242670028413607e-07,
      "loss": 0.8188,
      "step": 25505
    },
    {
      "epoch": 2.943008767881864,
      "grad_norm": 0.4609575569629669,
      "learning_rate": 2.1979682968211733e-07,
      "loss": 0.8902,
      "step": 25510
    },
    {
      "epoch": 2.9435856022150437,
      "grad_norm": 0.47616538405418396,
      "learning_rate": 2.1537160769870002e-07,
      "loss": 0.8088,
      "step": 25515
    },
    {
      "epoch": 2.9441624365482233,
      "grad_norm": 0.4469929039478302,
      "learning_rate": 2.109913388846807e-07,
      "loss": 0.8527,
      "step": 25520
    },
    {
      "epoch": 2.944739270881403,
      "grad_norm": 0.42096269130706787,
      "learning_rate": 2.066560252133698e-07,
      "loss": 0.8832,
      "step": 25525
    },
    {
      "epoch": 2.9453161052145824,
      "grad_norm": 0.5134122371673584,
      "learning_rate": 2.0236566863784944e-07,
      "loss": 0.867,
      "step": 25530
    },
    {
      "epoch": 2.945892939547762,
      "grad_norm": 0.43390166759490967,
      "learning_rate": 1.98120271090918e-07,
      "loss": 0.8658,
      "step": 25535
    },
    {
      "epoch": 2.9464697738809416,
      "grad_norm": 0.47963863611221313,
      "learning_rate": 1.9391983448514562e-07,
      "loss": 0.8758,
      "step": 25540
    },
    {
      "epoch": 2.947046608214121,
      "grad_norm": 0.4790579378604889,
      "learning_rate": 1.8976436071284076e-07,
      "loss": 0.845,
      "step": 25545
    },
    {
      "epoch": 2.9476234425473002,
      "grad_norm": 0.534599244594574,
      "learning_rate": 1.8565385164605042e-07,
      "loss": 0.8906,
      "step": 25550
    },
    {
      "epoch": 2.94820027688048,
      "grad_norm": 0.3933981657028198,
      "learning_rate": 1.815883091365489e-07,
      "loss": 0.8556,
      "step": 25555
    },
    {
      "epoch": 2.9487771112136594,
      "grad_norm": 0.481326162815094,
      "learning_rate": 1.775677350159044e-07,
      "loss": 0.8605,
      "step": 25560
    },
    {
      "epoch": 2.949353945546839,
      "grad_norm": 0.41006627678871155,
      "learning_rate": 1.73592131095357e-07,
      "loss": 0.8734,
      "step": 25565
    },
    {
      "epoch": 2.9499307798800185,
      "grad_norm": 0.40375635027885437,
      "learning_rate": 1.6966149916595176e-07,
      "loss": 0.8292,
      "step": 25570
    },
    {
      "epoch": 2.950507614213198,
      "grad_norm": 0.4399477541446686,
      "learning_rate": 1.657758409984278e-07,
      "loss": 0.8711,
      "step": 25575
    },
    {
      "epoch": 2.9510844485463776,
      "grad_norm": 0.4203029274940491,
      "learning_rate": 1.6193515834329599e-07,
      "loss": 0.8731,
      "step": 25580
    },
    {
      "epoch": 2.9516612828795568,
      "grad_norm": 0.4194481372833252,
      "learning_rate": 1.5813945293078337e-07,
      "loss": 0.8465,
      "step": 25585
    },
    {
      "epoch": 2.9522381172127368,
      "grad_norm": 0.3688070774078369,
      "learning_rate": 1.5438872647086655e-07,
      "loss": 0.844,
      "step": 25590
    },
    {
      "epoch": 2.952814951545916,
      "grad_norm": 0.44683653116226196,
      "learning_rate": 1.5068298065324947e-07,
      "loss": 0.8533,
      "step": 25595
    },
    {
      "epoch": 2.9533917858790955,
      "grad_norm": 0.44640904664993286,
      "learning_rate": 1.470222171473856e-07,
      "loss": 0.8771,
      "step": 25600
    },
    {
      "epoch": 2.953968620212275,
      "grad_norm": 0.42615175247192383,
      "learning_rate": 1.4340643760244464e-07,
      "loss": 0.8491,
      "step": 25605
    },
    {
      "epoch": 2.9545454545454546,
      "grad_norm": 0.44119492173194885,
      "learning_rate": 1.398356436473569e-07,
      "loss": 0.9057,
      "step": 25610
    },
    {
      "epoch": 2.955122288878634,
      "grad_norm": 0.4439990818500519,
      "learning_rate": 1.3630983689075782e-07,
      "loss": 0.8654,
      "step": 25615
    },
    {
      "epoch": 2.9556991232118137,
      "grad_norm": 0.41365984082221985,
      "learning_rate": 1.328290189210435e-07,
      "loss": 0.861,
      "step": 25620
    },
    {
      "epoch": 2.9562759575449933,
      "grad_norm": 0.3996998071670532,
      "learning_rate": 1.293931913063151e-07,
      "loss": 0.9193,
      "step": 25625
    },
    {
      "epoch": 2.9568527918781724,
      "grad_norm": 0.4594114422798157,
      "learning_rate": 1.2600235559443452e-07,
      "loss": 0.8558,
      "step": 25630
    },
    {
      "epoch": 2.957429626211352,
      "grad_norm": 0.4400221109390259,
      "learning_rate": 1.2265651331296869e-07,
      "loss": 0.9092,
      "step": 25635
    },
    {
      "epoch": 2.9580064605445315,
      "grad_norm": 0.4317476749420166,
      "learning_rate": 1.1935566596923408e-07,
      "loss": 0.8826,
      "step": 25640
    },
    {
      "epoch": 2.958583294877711,
      "grad_norm": 0.46063005924224854,
      "learning_rate": 1.1609981505025236e-07,
      "loss": 0.8511,
      "step": 25645
    },
    {
      "epoch": 2.9591601292108907,
      "grad_norm": 0.48726972937583923,
      "learning_rate": 1.1288896202281685e-07,
      "loss": 0.8762,
      "step": 25650
    },
    {
      "epoch": 2.9597369635440702,
      "grad_norm": 0.4288542866706848,
      "learning_rate": 1.0972310833340382e-07,
      "loss": 0.8795,
      "step": 25655
    },
    {
      "epoch": 2.96031379787725,
      "grad_norm": 0.42209890484809875,
      "learning_rate": 1.066022554082391e-07,
      "loss": 0.8819,
      "step": 25660
    },
    {
      "epoch": 2.960890632210429,
      "grad_norm": 0.45075806975364685,
      "learning_rate": 1.0352640465327578e-07,
      "loss": 0.8137,
      "step": 25665
    },
    {
      "epoch": 2.961467466543609,
      "grad_norm": 0.4366128146648407,
      "learning_rate": 1.0049555745419436e-07,
      "loss": 0.8849,
      "step": 25670
    },
    {
      "epoch": 2.962044300876788,
      "grad_norm": 0.40258607268333435,
      "learning_rate": 9.750971517639152e-08,
      "loss": 0.838,
      "step": 25675
    },
    {
      "epoch": 2.9626211352099676,
      "grad_norm": 0.39733338356018066,
      "learning_rate": 9.456887916499125e-08,
      "loss": 0.8313,
      "step": 25680
    },
    {
      "epoch": 2.963197969543147,
      "grad_norm": 0.4235396683216095,
      "learning_rate": 9.16730507448671e-08,
      "loss": 0.8569,
      "step": 25685
    },
    {
      "epoch": 2.9637748038763267,
      "grad_norm": 0.41825294494628906,
      "learning_rate": 8.882223122056443e-08,
      "loss": 0.8876,
      "step": 25690
    },
    {
      "epoch": 2.9643516382095063,
      "grad_norm": 0.4579850137233734,
      "learning_rate": 8.601642187640036e-08,
      "loss": 0.8732,
      "step": 25695
    },
    {
      "epoch": 2.964928472542686,
      "grad_norm": 0.43321606516838074,
      "learning_rate": 8.325562397640819e-08,
      "loss": 0.8948,
      "step": 25700
    },
    {
      "epoch": 2.9655053068758654,
      "grad_norm": 0.4565126895904541,
      "learning_rate": 8.053983876431526e-08,
      "loss": 0.8532,
      "step": 25705
    },
    {
      "epoch": 2.9660821412090446,
      "grad_norm": 0.3763374984264374,
      "learning_rate": 7.786906746358735e-08,
      "loss": 0.858,
      "step": 25710
    },
    {
      "epoch": 2.966658975542224,
      "grad_norm": 0.40120729804039,
      "learning_rate": 7.524331127741757e-08,
      "loss": 0.8648,
      "step": 25715
    },
    {
      "epoch": 2.9672358098754037,
      "grad_norm": 0.42425310611724854,
      "learning_rate": 7.266257138872634e-08,
      "loss": 0.8882,
      "step": 25720
    },
    {
      "epoch": 2.9678126442085833,
      "grad_norm": 0.4503181576728821,
      "learning_rate": 7.012684896011702e-08,
      "loss": 0.7968,
      "step": 25725
    },
    {
      "epoch": 2.968389478541763,
      "grad_norm": 0.4242125451564789,
      "learning_rate": 6.763614513395356e-08,
      "loss": 0.8441,
      "step": 25730
    },
    {
      "epoch": 2.9689663128749424,
      "grad_norm": 0.4024699628353119,
      "learning_rate": 6.519046103230508e-08,
      "loss": 0.9132,
      "step": 25735
    },
    {
      "epoch": 2.969543147208122,
      "grad_norm": 0.4432372748851776,
      "learning_rate": 6.278979775694582e-08,
      "loss": 0.8438,
      "step": 25740
    },
    {
      "epoch": 2.970119981541301,
      "grad_norm": 0.498928040266037,
      "learning_rate": 6.043415638938842e-08,
      "loss": 0.8593,
      "step": 25745
    },
    {
      "epoch": 2.970696815874481,
      "grad_norm": 0.4619358777999878,
      "learning_rate": 5.8123537990850684e-08,
      "loss": 0.8345,
      "step": 25750
    },
    {
      "epoch": 2.97127365020766,
      "grad_norm": 0.49964797496795654,
      "learning_rate": 5.585794360226659e-08,
      "loss": 0.9009,
      "step": 25755
    },
    {
      "epoch": 2.9718504845408398,
      "grad_norm": 0.3956110179424286,
      "learning_rate": 5.3637374244308594e-08,
      "loss": 0.8223,
      "step": 25760
    },
    {
      "epoch": 2.9724273188740193,
      "grad_norm": 0.4627871513366699,
      "learning_rate": 5.146183091732093e-08,
      "loss": 0.8327,
      "step": 25765
    },
    {
      "epoch": 2.973004153207199,
      "grad_norm": 0.4105352461338043,
      "learning_rate": 4.9331314601408495e-08,
      "loss": 0.8556,
      "step": 25770
    },
    {
      "epoch": 2.9735809875403785,
      "grad_norm": 0.4358341693878174,
      "learning_rate": 4.7245826256370194e-08,
      "loss": 0.8812,
      "step": 25775
    },
    {
      "epoch": 2.974157821873558,
      "grad_norm": 0.4529142677783966,
      "learning_rate": 4.520536682171006e-08,
      "loss": 0.8748,
      "step": 25780
    },
    {
      "epoch": 2.9747346562067376,
      "grad_norm": 0.4764014780521393,
      "learning_rate": 4.320993721668165e-08,
      "loss": 0.876,
      "step": 25785
    },
    {
      "epoch": 2.9753114905399167,
      "grad_norm": 0.47748324275016785,
      "learning_rate": 4.1259538340210345e-08,
      "loss": 0.9133,
      "step": 25790
    },
    {
      "epoch": 2.9758883248730963,
      "grad_norm": 0.4135587513446808,
      "learning_rate": 3.9354171070959955e-08,
      "loss": 0.8732,
      "step": 25795
    },
    {
      "epoch": 2.976465159206276,
      "grad_norm": 0.4119958281517029,
      "learning_rate": 3.7493836267310514e-08,
      "loss": 0.8257,
      "step": 25800
    },
    {
      "epoch": 2.9770419935394554,
      "grad_norm": 0.44728052616119385,
      "learning_rate": 3.567853476733607e-08,
      "loss": 0.8718,
      "step": 25805
    },
    {
      "epoch": 2.977618827872635,
      "grad_norm": 0.4038022756576538,
      "learning_rate": 3.390826738883801e-08,
      "loss": 0.8482,
      "step": 25810
    },
    {
      "epoch": 2.9781956622058146,
      "grad_norm": 0.461870938539505,
      "learning_rate": 3.218303492932284e-08,
      "loss": 0.8751,
      "step": 25815
    },
    {
      "epoch": 2.978772496538994,
      "grad_norm": 0.4327203035354614,
      "learning_rate": 3.050283816601329e-08,
      "loss": 0.8683,
      "step": 25820
    },
    {
      "epoch": 2.9793493308721732,
      "grad_norm": 0.4000185430049896,
      "learning_rate": 2.8867677855837217e-08,
      "loss": 0.8966,
      "step": 25825
    },
    {
      "epoch": 2.9799261652053533,
      "grad_norm": 0.4935370087623596,
      "learning_rate": 2.7277554735449794e-08,
      "loss": 0.8866,
      "step": 25830
    },
    {
      "epoch": 2.9805029995385324,
      "grad_norm": 0.40175366401672363,
      "learning_rate": 2.573246952118913e-08,
      "loss": 0.89,
      "step": 25835
    },
    {
      "epoch": 2.981079833871712,
      "grad_norm": 0.4187186360359192,
      "learning_rate": 2.4232422909131745e-08,
      "loss": 0.8667,
      "step": 25840
    },
    {
      "epoch": 2.9816566682048915,
      "grad_norm": 0.46566322445869446,
      "learning_rate": 2.2777415575037098e-08,
      "loss": 0.847,
      "step": 25845
    },
    {
      "epoch": 2.982233502538071,
      "grad_norm": 0.40721821784973145,
      "learning_rate": 2.136744817440306e-08,
      "loss": 0.9071,
      "step": 25850
    },
    {
      "epoch": 2.9828103368712506,
      "grad_norm": 0.3912089765071869,
      "learning_rate": 2.000252134241043e-08,
      "loss": 0.8563,
      "step": 25855
    },
    {
      "epoch": 2.98338717120443,
      "grad_norm": 0.4236389696598053,
      "learning_rate": 1.8682635693978433e-08,
      "loss": 0.8758,
      "step": 25860
    },
    {
      "epoch": 2.9839640055376098,
      "grad_norm": 0.4218614101409912,
      "learning_rate": 1.7407791823698115e-08,
      "loss": 0.8246,
      "step": 25865
    },
    {
      "epoch": 2.984540839870789,
      "grad_norm": 0.47850501537323,
      "learning_rate": 1.6177990305910053e-08,
      "loss": 0.8596,
      "step": 25870
    },
    {
      "epoch": 2.985117674203969,
      "grad_norm": 0.440022736787796,
      "learning_rate": 1.499323169462663e-08,
      "loss": 0.8545,
      "step": 25875
    },
    {
      "epoch": 2.985694508537148,
      "grad_norm": 0.4404289126396179,
      "learning_rate": 1.3853516523587572e-08,
      "loss": 0.8757,
      "step": 25880
    },
    {
      "epoch": 2.9862713428703276,
      "grad_norm": 0.4234904646873474,
      "learning_rate": 1.275884530622662e-08,
      "loss": 0.8972,
      "step": 25885
    },
    {
      "epoch": 2.986848177203507,
      "grad_norm": 0.416761577129364,
      "learning_rate": 1.1709218535715938e-08,
      "loss": 0.8496,
      "step": 25890
    },
    {
      "epoch": 2.9874250115366867,
      "grad_norm": 0.3803170621395111,
      "learning_rate": 1.0704636684910618e-08,
      "loss": 0.9054,
      "step": 25895
    },
    {
      "epoch": 2.9880018458698663,
      "grad_norm": 0.47127583622932434,
      "learning_rate": 9.74510020635977e-09,
      "loss": 0.8522,
      "step": 25900
    },
    {
      "epoch": 2.988578680203046,
      "grad_norm": 0.4301356375217438,
      "learning_rate": 8.83060953235093e-09,
      "loss": 0.8353,
      "step": 25905
    },
    {
      "epoch": 2.9891555145362254,
      "grad_norm": 0.5385040044784546,
      "learning_rate": 7.96116507485456e-09,
      "loss": 0.9251,
      "step": 25910
    },
    {
      "epoch": 2.9897323488694045,
      "grad_norm": 0.5166802406311035,
      "learning_rate": 7.136767225568441e-09,
      "loss": 0.8873,
      "step": 25915
    },
    {
      "epoch": 2.990309183202584,
      "grad_norm": 0.421013742685318,
      "learning_rate": 6.357416355884382e-09,
      "loss": 0.8365,
      "step": 25920
    },
    {
      "epoch": 2.9908860175357637,
      "grad_norm": 0.4109059274196625,
      "learning_rate": 5.62311281688821e-09,
      "loss": 0.8723,
      "step": 25925
    },
    {
      "epoch": 2.9914628518689432,
      "grad_norm": 0.5444170236587524,
      "learning_rate": 4.93385693940418e-09,
      "loss": 0.8993,
      "step": 25930
    },
    {
      "epoch": 2.992039686202123,
      "grad_norm": 0.4387677013874054,
      "learning_rate": 4.289649033928367e-09,
      "loss": 0.8556,
      "step": 25935
    },
    {
      "epoch": 2.9926165205353024,
      "grad_norm": 0.4136563539505005,
      "learning_rate": 3.6904893906730687e-09,
      "loss": 0.8288,
      "step": 25940
    },
    {
      "epoch": 2.993193354868482,
      "grad_norm": 0.4198967218399048,
      "learning_rate": 3.1363782795779117e-09,
      "loss": 0.8665,
      "step": 25945
    },
    {
      "epoch": 2.993770189201661,
      "grad_norm": 0.4858287572860718,
      "learning_rate": 2.627315950265441e-09,
      "loss": 0.9142,
      "step": 25950
    },
    {
      "epoch": 2.994347023534841,
      "grad_norm": 0.43194618821144104,
      "learning_rate": 2.1633026320633244e-09,
      "loss": 0.8462,
      "step": 25955
    },
    {
      "epoch": 2.99492385786802,
      "grad_norm": 0.5099018812179565,
      "learning_rate": 1.744338534015455e-09,
      "loss": 0.8614,
      "step": 25960
    },
    {
      "epoch": 2.9955006922011997,
      "grad_norm": 0.409798800945282,
      "learning_rate": 1.3704238448708496e-09,
      "loss": 0.8789,
      "step": 25965
    },
    {
      "epoch": 2.9960775265343793,
      "grad_norm": 0.38359326124191284,
      "learning_rate": 1.041558733061443e-09,
      "loss": 0.88,
      "step": 25970
    },
    {
      "epoch": 2.996654360867559,
      "grad_norm": 0.44114139676094055,
      "learning_rate": 7.577433467576001e-10,
      "loss": 0.8549,
      "step": 25975
    },
    {
      "epoch": 2.9972311952007384,
      "grad_norm": 0.4402305781841278,
      "learning_rate": 5.189778138237067e-10,
      "loss": 0.8814,
      "step": 25980
    },
    {
      "epoch": 2.997808029533918,
      "grad_norm": 0.43162301182746887,
      "learning_rate": 3.25262241795965e-10,
      "loss": 0.902,
      "step": 25985
    },
    {
      "epoch": 2.9983848638670976,
      "grad_norm": 0.43010595440864563,
      "learning_rate": 1.7659671797121134e-10,
      "loss": 0.8726,
      "step": 25990
    },
    {
      "epoch": 2.9989616982002767,
      "grad_norm": 0.47699853777885437,
      "learning_rate": 7.298130931809865e-11,
      "loss": 0.8768,
      "step": 25995
    },
    {
      "epoch": 2.9995385325334563,
      "grad_norm": 0.4188362956047058,
      "learning_rate": 1.4416062510402839e-11,
      "loss": 0.8605,
      "step": 26000
    },
    {
      "epoch": 3.0,
      "eval_loss": 0.9587316513061523,
      "eval_runtime": 959.8828,
      "eval_samples_per_second": 15.992,
      "eval_steps_per_second": 1.0,
      "step": 26004
    },
    {
      "epoch": 3.0,
      "step": 26004,
      "total_flos": 3.7211673621310734e+19,
      "train_loss": 0.9273421984235244,
      "train_runtime": 99241.3322,
      "train_samples_per_second": 4.192,
      "train_steps_per_second": 0.262
    }
  ],
  "logging_steps": 5,
  "max_steps": 26004,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.7211673621310734e+19,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}