{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 26004, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011536686663590217, "grad_norm": 0.36804234981536865, "learning_rate": 7.689350249903884e-08, "loss": 1.1237, "step": 1 }, { "epoch": 0.0005768343331795108, "grad_norm": 0.3614605665206909, "learning_rate": 3.844675124951942e-07, "loss": 1.1698, "step": 5 }, { "epoch": 0.0011536686663590216, "grad_norm": 0.4128214120864868, "learning_rate": 7.689350249903884e-07, "loss": 1.2018, "step": 10 }, { "epoch": 0.0017305029995385325, "grad_norm": 0.38955822587013245, "learning_rate": 1.1534025374855826e-06, "loss": 1.1142, "step": 15 }, { "epoch": 0.0023073373327180432, "grad_norm": 0.3124810457229614, "learning_rate": 1.5378700499807767e-06, "loss": 1.1988, "step": 20 }, { "epoch": 0.002884171665897554, "grad_norm": 0.37434640526771545, "learning_rate": 1.9223375624759706e-06, "loss": 1.1647, "step": 25 }, { "epoch": 0.003461005999077065, "grad_norm": 0.3723091781139374, "learning_rate": 2.3068050749711653e-06, "loss": 1.153, "step": 30 }, { "epoch": 0.0040378403322565756, "grad_norm": 0.29967185854911804, "learning_rate": 2.691272587466359e-06, "loss": 1.1444, "step": 35 }, { "epoch": 0.0046146746654360865, "grad_norm": 0.3371390402317047, "learning_rate": 3.0757400999615534e-06, "loss": 1.1453, "step": 40 }, { "epoch": 0.005191508998615597, "grad_norm": 0.3575346767902374, "learning_rate": 3.4602076124567477e-06, "loss": 1.1349, "step": 45 }, { "epoch": 0.005768343331795108, "grad_norm": 0.3574388027191162, "learning_rate": 3.844675124951941e-06, "loss": 1.11, "step": 50 }, { "epoch": 0.006345177664974619, "grad_norm": 0.28124046325683594, "learning_rate": 4.229142637447136e-06, "loss": 1.0939, "step": 55 }, { "epoch": 0.00692201199815413, "grad_norm": 0.348366379737854, "learning_rate": 4.6136101499423305e-06, "loss": 1.0845, "step": 60 }, { "epoch": 0.007498846331333641, "grad_norm": 0.28888341784477234, "learning_rate": 4.998077662437525e-06, "loss": 1.1139, "step": 65 }, { "epoch": 0.008075680664513151, "grad_norm": 0.3280262351036072, "learning_rate": 5.382545174932718e-06, "loss": 1.124, "step": 70 }, { "epoch": 0.008652514997692663, "grad_norm": 0.3112921118736267, "learning_rate": 5.7670126874279126e-06, "loss": 1.1424, "step": 75 }, { "epoch": 0.009229349330872173, "grad_norm": 0.31750813126564026, "learning_rate": 6.151480199923107e-06, "loss": 1.0663, "step": 80 }, { "epoch": 0.009806183664051685, "grad_norm": 0.2857876718044281, "learning_rate": 6.535947712418301e-06, "loss": 1.0595, "step": 85 }, { "epoch": 0.010383017997231195, "grad_norm": 0.2947840094566345, "learning_rate": 6.920415224913495e-06, "loss": 1.1298, "step": 90 }, { "epoch": 0.010959852330410707, "grad_norm": 0.3226536810398102, "learning_rate": 7.304882737408689e-06, "loss": 1.0671, "step": 95 }, { "epoch": 0.011536686663590217, "grad_norm": 0.2899499237537384, "learning_rate": 7.689350249903882e-06, "loss": 1.0859, "step": 100 }, { "epoch": 0.012113520996769728, "grad_norm": 0.2774418294429779, "learning_rate": 8.073817762399077e-06, "loss": 1.0811, "step": 105 }, { "epoch": 0.012690355329949238, "grad_norm": 0.2815534174442291, "learning_rate": 8.458285274894273e-06, "loss": 1.0238, "step": 110 }, { "epoch": 0.01326718966312875, "grad_norm": 0.28051742911338806, "learning_rate": 8.842752787389466e-06, "loss": 1.0586, "step": 115 }, { "epoch": 0.01384402399630826, "grad_norm": 0.263121098279953, "learning_rate": 9.227220299884661e-06, "loss": 1.026, "step": 120 }, { "epoch": 0.01442085832948777, "grad_norm": 0.3776288330554962, "learning_rate": 9.611687812379855e-06, "loss": 1.0285, "step": 125 }, { "epoch": 0.014997692662667282, "grad_norm": 0.2619612514972687, "learning_rate": 9.99615532487505e-06, "loss": 1.0555, "step": 130 }, { "epoch": 0.015574526995846792, "grad_norm": 0.2866584062576294, "learning_rate": 1.0380622837370241e-05, "loss": 1.029, "step": 135 }, { "epoch": 0.016151361329026302, "grad_norm": 0.27494966983795166, "learning_rate": 1.0765090349865437e-05, "loss": 0.9903, "step": 140 }, { "epoch": 0.016728195662205816, "grad_norm": 0.2557440996170044, "learning_rate": 1.1149557862360632e-05, "loss": 0.9651, "step": 145 }, { "epoch": 0.017305029995385326, "grad_norm": 0.27979356050491333, "learning_rate": 1.1534025374855825e-05, "loss": 0.9943, "step": 150 }, { "epoch": 0.017881864328564836, "grad_norm": 0.2777612507343292, "learning_rate": 1.191849288735102e-05, "loss": 1.0984, "step": 155 }, { "epoch": 0.018458698661744346, "grad_norm": 0.28315240144729614, "learning_rate": 1.2302960399846214e-05, "loss": 1.049, "step": 160 }, { "epoch": 0.01903553299492386, "grad_norm": 0.2681189477443695, "learning_rate": 1.2687427912341407e-05, "loss": 1.0103, "step": 165 }, { "epoch": 0.01961236732810337, "grad_norm": 0.2712986171245575, "learning_rate": 1.3071895424836602e-05, "loss": 1.0019, "step": 170 }, { "epoch": 0.02018920166128288, "grad_norm": 0.3040061295032501, "learning_rate": 1.3456362937331796e-05, "loss": 0.9938, "step": 175 }, { "epoch": 0.02076603599446239, "grad_norm": 0.2698034346103668, "learning_rate": 1.384083044982699e-05, "loss": 1.0235, "step": 180 }, { "epoch": 0.0213428703276419, "grad_norm": 0.2864539921283722, "learning_rate": 1.4225297962322184e-05, "loss": 1.0175, "step": 185 }, { "epoch": 0.021919704660821413, "grad_norm": 0.2855101227760315, "learning_rate": 1.4609765474817378e-05, "loss": 1.0239, "step": 190 }, { "epoch": 0.022496538994000923, "grad_norm": 0.30460137128829956, "learning_rate": 1.4994232987312573e-05, "loss": 0.9947, "step": 195 }, { "epoch": 0.023073373327180433, "grad_norm": 0.3101496398448944, "learning_rate": 1.5378700499807765e-05, "loss": 1.0407, "step": 200 }, { "epoch": 0.023650207660359943, "grad_norm": 0.29148516058921814, "learning_rate": 1.576316801230296e-05, "loss": 1.0206, "step": 205 }, { "epoch": 0.024227041993539457, "grad_norm": 0.3594810664653778, "learning_rate": 1.6147635524798155e-05, "loss": 1.0045, "step": 210 }, { "epoch": 0.024803876326718967, "grad_norm": 0.28806623816490173, "learning_rate": 1.653210303729335e-05, "loss": 1.0107, "step": 215 }, { "epoch": 0.025380710659898477, "grad_norm": 0.2913208603858948, "learning_rate": 1.6916570549788545e-05, "loss": 0.9894, "step": 220 }, { "epoch": 0.025957544993077987, "grad_norm": 0.30780768394470215, "learning_rate": 1.7301038062283735e-05, "loss": 1.0328, "step": 225 }, { "epoch": 0.0265343793262575, "grad_norm": 0.3564111292362213, "learning_rate": 1.7685505574778932e-05, "loss": 1.0326, "step": 230 }, { "epoch": 0.02711121365943701, "grad_norm": 0.3441677689552307, "learning_rate": 1.8069973087274125e-05, "loss": 0.9712, "step": 235 }, { "epoch": 0.02768804799261652, "grad_norm": 0.3250250518321991, "learning_rate": 1.8454440599769322e-05, "loss": 0.9905, "step": 240 }, { "epoch": 0.02826488232579603, "grad_norm": 0.30477702617645264, "learning_rate": 1.8838908112264512e-05, "loss": 0.9891, "step": 245 }, { "epoch": 0.02884171665897554, "grad_norm": 0.3071553707122803, "learning_rate": 1.922337562475971e-05, "loss": 1.0037, "step": 250 }, { "epoch": 0.029418550992155054, "grad_norm": 0.304791659116745, "learning_rate": 1.9607843137254903e-05, "loss": 0.9961, "step": 255 }, { "epoch": 0.029995385325334564, "grad_norm": 0.4236905872821808, "learning_rate": 1.99923106497501e-05, "loss": 0.9813, "step": 260 }, { "epoch": 0.030572219658514074, "grad_norm": 0.30435487627983093, "learning_rate": 2.0376778162245293e-05, "loss": 1.0412, "step": 265 }, { "epoch": 0.031149053991693584, "grad_norm": 0.30698785185813904, "learning_rate": 2.0761245674740483e-05, "loss": 1.0284, "step": 270 }, { "epoch": 0.031725888324873094, "grad_norm": 0.289407879114151, "learning_rate": 2.114571318723568e-05, "loss": 0.9807, "step": 275 }, { "epoch": 0.032302722658052604, "grad_norm": 0.3242761492729187, "learning_rate": 2.1530180699730873e-05, "loss": 1.0088, "step": 280 }, { "epoch": 0.03287955699123212, "grad_norm": 0.2820676863193512, "learning_rate": 2.191464821222607e-05, "loss": 0.9437, "step": 285 }, { "epoch": 0.03345639132441163, "grad_norm": 0.30823391675949097, "learning_rate": 2.2299115724721263e-05, "loss": 0.9973, "step": 290 }, { "epoch": 0.03403322565759114, "grad_norm": 0.32123836874961853, "learning_rate": 2.2683583237216457e-05, "loss": 0.9786, "step": 295 }, { "epoch": 0.03461005999077065, "grad_norm": 0.30905139446258545, "learning_rate": 2.306805074971165e-05, "loss": 1.0316, "step": 300 }, { "epoch": 0.03518689432395016, "grad_norm": 0.36113137006759644, "learning_rate": 2.3452518262206844e-05, "loss": 1.0258, "step": 305 }, { "epoch": 0.03576372865712967, "grad_norm": 0.3342955708503723, "learning_rate": 2.383698577470204e-05, "loss": 1.0542, "step": 310 }, { "epoch": 0.03634056299030918, "grad_norm": 0.3617742359638214, "learning_rate": 2.422145328719723e-05, "loss": 0.9642, "step": 315 }, { "epoch": 0.03691739732348869, "grad_norm": 0.3330320417881012, "learning_rate": 2.4605920799692427e-05, "loss": 0.9997, "step": 320 }, { "epoch": 0.0374942316566682, "grad_norm": 0.3109203279018402, "learning_rate": 2.499038831218762e-05, "loss": 1.0317, "step": 325 }, { "epoch": 0.03807106598984772, "grad_norm": 0.3200637400150299, "learning_rate": 2.5374855824682814e-05, "loss": 1.0115, "step": 330 }, { "epoch": 0.03864790032302723, "grad_norm": 0.3082106411457062, "learning_rate": 2.575932333717801e-05, "loss": 0.9983, "step": 335 }, { "epoch": 0.03922473465620674, "grad_norm": 0.31465351581573486, "learning_rate": 2.6143790849673204e-05, "loss": 0.9985, "step": 340 }, { "epoch": 0.03980156898938625, "grad_norm": 0.2957000434398651, "learning_rate": 2.6528258362168395e-05, "loss": 0.9736, "step": 345 }, { "epoch": 0.04037840332256576, "grad_norm": 0.32760462164878845, "learning_rate": 2.691272587466359e-05, "loss": 0.9602, "step": 350 }, { "epoch": 0.04095523765574527, "grad_norm": 0.3450353145599365, "learning_rate": 2.7297193387158788e-05, "loss": 1.049, "step": 355 }, { "epoch": 0.04153207198892478, "grad_norm": 1.0949097871780396, "learning_rate": 2.768166089965398e-05, "loss": 0.9967, "step": 360 }, { "epoch": 0.04210890632210429, "grad_norm": 0.3299037218093872, "learning_rate": 2.806612841214917e-05, "loss": 1.0066, "step": 365 }, { "epoch": 0.0426857406552838, "grad_norm": 0.32893791794776917, "learning_rate": 2.845059592464437e-05, "loss": 0.9696, "step": 370 }, { "epoch": 0.043262574988463316, "grad_norm": 0.3077922463417053, "learning_rate": 2.8835063437139565e-05, "loss": 1.0444, "step": 375 }, { "epoch": 0.043839409321642826, "grad_norm": 0.33225706219673157, "learning_rate": 2.9219530949634755e-05, "loss": 1.0146, "step": 380 }, { "epoch": 0.044416243654822336, "grad_norm": 0.3136041462421417, "learning_rate": 2.960399846212995e-05, "loss": 0.9937, "step": 385 }, { "epoch": 0.044993077988001846, "grad_norm": 0.3218904137611389, "learning_rate": 2.9988465974625146e-05, "loss": 0.9399, "step": 390 }, { "epoch": 0.045569912321181356, "grad_norm": 0.32936355471611023, "learning_rate": 3.0372933487120342e-05, "loss": 1.0736, "step": 395 }, { "epoch": 0.046146746654360866, "grad_norm": 0.29534292221069336, "learning_rate": 3.075740099961553e-05, "loss": 1.0335, "step": 400 }, { "epoch": 0.046723580987540377, "grad_norm": 0.33607929944992065, "learning_rate": 3.1141868512110726e-05, "loss": 0.9879, "step": 405 }, { "epoch": 0.04730041532071989, "grad_norm": 0.31074193120002747, "learning_rate": 3.152633602460592e-05, "loss": 1.0075, "step": 410 }, { "epoch": 0.0478772496538994, "grad_norm": 0.2880527675151825, "learning_rate": 3.191080353710111e-05, "loss": 1.0603, "step": 415 }, { "epoch": 0.048454083987078914, "grad_norm": 0.36315208673477173, "learning_rate": 3.229527104959631e-05, "loss": 1.0167, "step": 420 }, { "epoch": 0.049030918320258424, "grad_norm": 0.3171013295650482, "learning_rate": 3.2679738562091506e-05, "loss": 0.9625, "step": 425 }, { "epoch": 0.049607752653437934, "grad_norm": 0.32077813148498535, "learning_rate": 3.30642060745867e-05, "loss": 0.9845, "step": 430 }, { "epoch": 0.050184586986617444, "grad_norm": 0.3356740176677704, "learning_rate": 3.344867358708189e-05, "loss": 0.9825, "step": 435 }, { "epoch": 0.050761421319796954, "grad_norm": 0.2968508005142212, "learning_rate": 3.383314109957709e-05, "loss": 0.9812, "step": 440 }, { "epoch": 0.051338255652976464, "grad_norm": 0.30198368430137634, "learning_rate": 3.421760861207228e-05, "loss": 1.034, "step": 445 }, { "epoch": 0.051915089986155974, "grad_norm": 0.2889617681503296, "learning_rate": 3.460207612456747e-05, "loss": 1.0387, "step": 450 }, { "epoch": 0.052491924319335484, "grad_norm": 0.2965385615825653, "learning_rate": 3.498654363706267e-05, "loss": 0.9649, "step": 455 }, { "epoch": 0.053068758652515, "grad_norm": 0.2926345765590668, "learning_rate": 3.5371011149557864e-05, "loss": 0.981, "step": 460 }, { "epoch": 0.05364559298569451, "grad_norm": 0.2872851490974426, "learning_rate": 3.575547866205306e-05, "loss": 0.9766, "step": 465 }, { "epoch": 0.05422242731887402, "grad_norm": 0.2824111878871918, "learning_rate": 3.613994617454825e-05, "loss": 0.9689, "step": 470 }, { "epoch": 0.05479926165205353, "grad_norm": 0.29697051644325256, "learning_rate": 3.652441368704345e-05, "loss": 1.02, "step": 475 }, { "epoch": 0.05537609598523304, "grad_norm": 0.31594452261924744, "learning_rate": 3.6908881199538644e-05, "loss": 0.9976, "step": 480 }, { "epoch": 0.05595293031841255, "grad_norm": 0.3125144839286804, "learning_rate": 3.7293348712033834e-05, "loss": 0.9638, "step": 485 }, { "epoch": 0.05652976465159206, "grad_norm": 0.3013385832309723, "learning_rate": 3.7677816224529024e-05, "loss": 0.9993, "step": 490 }, { "epoch": 0.05710659898477157, "grad_norm": 0.2799026370048523, "learning_rate": 3.806228373702422e-05, "loss": 1.0197, "step": 495 }, { "epoch": 0.05768343331795108, "grad_norm": 0.3162376880645752, "learning_rate": 3.844675124951942e-05, "loss": 0.9985, "step": 500 }, { "epoch": 0.0582602676511306, "grad_norm": 0.30490589141845703, "learning_rate": 3.883121876201461e-05, "loss": 0.9825, "step": 505 }, { "epoch": 0.05883710198431011, "grad_norm": 0.2959028482437134, "learning_rate": 3.9215686274509805e-05, "loss": 1.0566, "step": 510 }, { "epoch": 0.05941393631748962, "grad_norm": 0.29472246766090393, "learning_rate": 3.9600153787005e-05, "loss": 0.9864, "step": 515 }, { "epoch": 0.05999077065066913, "grad_norm": 0.28552672266960144, "learning_rate": 3.99846212995002e-05, "loss": 1.0143, "step": 520 }, { "epoch": 0.06056760498384864, "grad_norm": 0.27690058946609497, "learning_rate": 4.036908881199539e-05, "loss": 0.9875, "step": 525 }, { "epoch": 0.06114443931702815, "grad_norm": 0.269163578748703, "learning_rate": 4.0753556324490586e-05, "loss": 0.9781, "step": 530 }, { "epoch": 0.06172127365020766, "grad_norm": 0.27882060408592224, "learning_rate": 4.1138023836985776e-05, "loss": 1.0087, "step": 535 }, { "epoch": 0.06229810798338717, "grad_norm": 0.28865769505500793, "learning_rate": 4.1522491349480966e-05, "loss": 0.9878, "step": 540 }, { "epoch": 0.06287494231656668, "grad_norm": 0.29274407029151917, "learning_rate": 4.190695886197616e-05, "loss": 0.9748, "step": 545 }, { "epoch": 0.06345177664974619, "grad_norm": 0.2818305194377899, "learning_rate": 4.229142637447136e-05, "loss": 0.9791, "step": 550 }, { "epoch": 0.0640286109829257, "grad_norm": 0.29222431778907776, "learning_rate": 4.2675893886966556e-05, "loss": 0.9484, "step": 555 }, { "epoch": 0.06460544531610521, "grad_norm": 0.301176518201828, "learning_rate": 4.3060361399461746e-05, "loss": 0.9872, "step": 560 }, { "epoch": 0.06518227964928472, "grad_norm": 0.2671689987182617, "learning_rate": 4.344482891195694e-05, "loss": 0.9763, "step": 565 }, { "epoch": 0.06575911398246424, "grad_norm": 0.2748938202857971, "learning_rate": 4.382929642445214e-05, "loss": 1.0321, "step": 570 }, { "epoch": 0.06633594831564375, "grad_norm": 0.2703133821487427, "learning_rate": 4.421376393694733e-05, "loss": 1.0098, "step": 575 }, { "epoch": 0.06691278264882326, "grad_norm": 0.4125090539455414, "learning_rate": 4.459823144944253e-05, "loss": 0.9726, "step": 580 }, { "epoch": 0.06748961698200277, "grad_norm": 0.30746424198150635, "learning_rate": 4.498269896193772e-05, "loss": 0.9914, "step": 585 }, { "epoch": 0.06806645131518228, "grad_norm": 0.2738732695579529, "learning_rate": 4.5367166474432914e-05, "loss": 0.9805, "step": 590 }, { "epoch": 0.0686432856483618, "grad_norm": 0.2670000195503235, "learning_rate": 4.5751633986928104e-05, "loss": 1.0024, "step": 595 }, { "epoch": 0.0692201199815413, "grad_norm": 0.27094706892967224, "learning_rate": 4.61361014994233e-05, "loss": 1.0052, "step": 600 }, { "epoch": 0.06979695431472081, "grad_norm": 0.2630774974822998, "learning_rate": 4.65205690119185e-05, "loss": 1.0215, "step": 605 }, { "epoch": 0.07037378864790032, "grad_norm": 0.70416659116745, "learning_rate": 4.690503652441369e-05, "loss": 1.0201, "step": 610 }, { "epoch": 0.07095062298107983, "grad_norm": 0.2626412808895111, "learning_rate": 4.7289504036908884e-05, "loss": 0.9938, "step": 615 }, { "epoch": 0.07152745731425934, "grad_norm": 0.3165227472782135, "learning_rate": 4.767397154940408e-05, "loss": 1.0007, "step": 620 }, { "epoch": 0.07210429164743885, "grad_norm": 0.26682278513908386, "learning_rate": 4.805843906189927e-05, "loss": 0.9994, "step": 625 }, { "epoch": 0.07268112598061836, "grad_norm": 0.26086610555648804, "learning_rate": 4.844290657439446e-05, "loss": 1.0282, "step": 630 }, { "epoch": 0.07325796031379787, "grad_norm": 0.2658156454563141, "learning_rate": 4.882737408688966e-05, "loss": 0.9934, "step": 635 }, { "epoch": 0.07383479464697738, "grad_norm": 0.2602865695953369, "learning_rate": 4.9211841599384855e-05, "loss": 0.992, "step": 640 }, { "epoch": 0.0744116289801569, "grad_norm": 0.2830445170402527, "learning_rate": 4.9596309111880045e-05, "loss": 0.9853, "step": 645 }, { "epoch": 0.0749884633133364, "grad_norm": 0.27104446291923523, "learning_rate": 4.998077662437524e-05, "loss": 1.0037, "step": 650 }, { "epoch": 0.07556529764651591, "grad_norm": 0.28188735246658325, "learning_rate": 5.036524413687044e-05, "loss": 0.9755, "step": 655 }, { "epoch": 0.07614213197969544, "grad_norm": 0.2641187012195587, "learning_rate": 5.074971164936563e-05, "loss": 0.963, "step": 660 }, { "epoch": 0.07671896631287495, "grad_norm": 0.2338995337486267, "learning_rate": 5.113417916186083e-05, "loss": 0.986, "step": 665 }, { "epoch": 0.07729580064605446, "grad_norm": 0.2605430483818054, "learning_rate": 5.151864667435602e-05, "loss": 0.9924, "step": 670 }, { "epoch": 0.07787263497923397, "grad_norm": 0.252963662147522, "learning_rate": 5.190311418685121e-05, "loss": 0.9906, "step": 675 }, { "epoch": 0.07844946931241348, "grad_norm": 0.2503387928009033, "learning_rate": 5.228758169934641e-05, "loss": 1.0003, "step": 680 }, { "epoch": 0.07902630364559299, "grad_norm": 0.26554322242736816, "learning_rate": 5.26720492118416e-05, "loss": 0.9846, "step": 685 }, { "epoch": 0.0796031379787725, "grad_norm": 0.28086698055267334, "learning_rate": 5.305651672433679e-05, "loss": 1.0031, "step": 690 }, { "epoch": 0.08017997231195201, "grad_norm": 0.25612834095954895, "learning_rate": 5.344098423683199e-05, "loss": 0.9386, "step": 695 }, { "epoch": 0.08075680664513152, "grad_norm": 0.2532890737056732, "learning_rate": 5.382545174932718e-05, "loss": 0.9487, "step": 700 }, { "epoch": 0.08133364097831103, "grad_norm": 0.2668962776660919, "learning_rate": 5.4209919261822386e-05, "loss": 0.9955, "step": 705 }, { "epoch": 0.08191047531149054, "grad_norm": 0.2815825939178467, "learning_rate": 5.4594386774317576e-05, "loss": 0.941, "step": 710 }, { "epoch": 0.08248730964467005, "grad_norm": 0.25937941670417786, "learning_rate": 5.4978854286812766e-05, "loss": 0.9959, "step": 715 }, { "epoch": 0.08306414397784956, "grad_norm": 0.26474928855895996, "learning_rate": 5.536332179930796e-05, "loss": 1.0061, "step": 720 }, { "epoch": 0.08364097831102907, "grad_norm": 0.24899785220623016, "learning_rate": 5.574778931180315e-05, "loss": 0.9811, "step": 725 }, { "epoch": 0.08421781264420858, "grad_norm": 0.2696942389011383, "learning_rate": 5.613225682429834e-05, "loss": 0.9386, "step": 730 }, { "epoch": 0.08479464697738809, "grad_norm": 0.25111255049705505, "learning_rate": 5.651672433679355e-05, "loss": 0.9834, "step": 735 }, { "epoch": 0.0853714813105676, "grad_norm": 0.25918304920196533, "learning_rate": 5.690119184928874e-05, "loss": 0.952, "step": 740 }, { "epoch": 0.08594831564374712, "grad_norm": 0.2587113380432129, "learning_rate": 5.728565936178393e-05, "loss": 0.9776, "step": 745 }, { "epoch": 0.08652514997692663, "grad_norm": 0.23401151597499847, "learning_rate": 5.767012687427913e-05, "loss": 0.9666, "step": 750 }, { "epoch": 0.08710198431010614, "grad_norm": 0.23831018805503845, "learning_rate": 5.805459438677432e-05, "loss": 0.9519, "step": 755 }, { "epoch": 0.08767881864328565, "grad_norm": 0.25140026211738586, "learning_rate": 5.843906189926951e-05, "loss": 0.949, "step": 760 }, { "epoch": 0.08825565297646516, "grad_norm": 0.23894332349300385, "learning_rate": 5.882352941176471e-05, "loss": 1.0335, "step": 765 }, { "epoch": 0.08883248730964467, "grad_norm": 0.2703632414340973, "learning_rate": 5.92079969242599e-05, "loss": 1.0152, "step": 770 }, { "epoch": 0.08940932164282418, "grad_norm": 0.2679862380027771, "learning_rate": 5.95924644367551e-05, "loss": 1.0539, "step": 775 }, { "epoch": 0.08998615597600369, "grad_norm": 0.24022874236106873, "learning_rate": 5.997693194925029e-05, "loss": 0.9764, "step": 780 }, { "epoch": 0.0905629903091832, "grad_norm": 0.25160306692123413, "learning_rate": 6.036139946174548e-05, "loss": 1.0094, "step": 785 }, { "epoch": 0.09113982464236271, "grad_norm": 0.25750991702079773, "learning_rate": 6.0745866974240685e-05, "loss": 1.0163, "step": 790 }, { "epoch": 0.09171665897554222, "grad_norm": 0.24511879682540894, "learning_rate": 6.113033448673587e-05, "loss": 0.9777, "step": 795 }, { "epoch": 0.09229349330872173, "grad_norm": 0.23882803320884705, "learning_rate": 6.151480199923106e-05, "loss": 0.9944, "step": 800 }, { "epoch": 0.09287032764190124, "grad_norm": 0.235401913523674, "learning_rate": 6.189926951172626e-05, "loss": 0.9443, "step": 805 }, { "epoch": 0.09344716197508075, "grad_norm": 0.22409269213676453, "learning_rate": 6.228373702422145e-05, "loss": 0.9946, "step": 810 }, { "epoch": 0.09402399630826026, "grad_norm": 0.26367950439453125, "learning_rate": 6.266820453671664e-05, "loss": 1.0017, "step": 815 }, { "epoch": 0.09460083064143977, "grad_norm": 0.24628588557243347, "learning_rate": 6.305267204921185e-05, "loss": 0.9653, "step": 820 }, { "epoch": 0.09517766497461928, "grad_norm": 0.24282719194889069, "learning_rate": 6.343713956170704e-05, "loss": 0.9741, "step": 825 }, { "epoch": 0.0957544993077988, "grad_norm": 0.22998395562171936, "learning_rate": 6.382160707420223e-05, "loss": 0.9731, "step": 830 }, { "epoch": 0.09633133364097832, "grad_norm": 0.23305819928646088, "learning_rate": 6.420607458669743e-05, "loss": 0.9533, "step": 835 }, { "epoch": 0.09690816797415783, "grad_norm": 0.2432815134525299, "learning_rate": 6.459054209919262e-05, "loss": 1.0331, "step": 840 }, { "epoch": 0.09748500230733734, "grad_norm": 0.25727975368499756, "learning_rate": 6.497500961168782e-05, "loss": 0.9055, "step": 845 }, { "epoch": 0.09806183664051685, "grad_norm": 0.2272929549217224, "learning_rate": 6.535947712418301e-05, "loss": 1.004, "step": 850 }, { "epoch": 0.09863867097369636, "grad_norm": 0.24411216378211975, "learning_rate": 6.57439446366782e-05, "loss": 1.0313, "step": 855 }, { "epoch": 0.09921550530687587, "grad_norm": 0.23192369937896729, "learning_rate": 6.61284121491734e-05, "loss": 0.9813, "step": 860 }, { "epoch": 0.09979233964005538, "grad_norm": 0.2519132196903229, "learning_rate": 6.65128796616686e-05, "loss": 0.9928, "step": 865 }, { "epoch": 0.10036917397323489, "grad_norm": 0.2485574334859848, "learning_rate": 6.689734717416379e-05, "loss": 0.9619, "step": 870 }, { "epoch": 0.1009460083064144, "grad_norm": 0.21956461668014526, "learning_rate": 6.728181468665899e-05, "loss": 0.9608, "step": 875 }, { "epoch": 0.10152284263959391, "grad_norm": 0.2283376157283783, "learning_rate": 6.766628219915418e-05, "loss": 0.9964, "step": 880 }, { "epoch": 0.10209967697277342, "grad_norm": 0.25438618659973145, "learning_rate": 6.805074971164937e-05, "loss": 0.9941, "step": 885 }, { "epoch": 0.10267651130595293, "grad_norm": 0.2448102980852127, "learning_rate": 6.843521722414456e-05, "loss": 0.9948, "step": 890 }, { "epoch": 0.10325334563913244, "grad_norm": 0.22878648340702057, "learning_rate": 6.881968473663975e-05, "loss": 0.941, "step": 895 }, { "epoch": 0.10383017997231195, "grad_norm": 0.2290370762348175, "learning_rate": 6.920415224913494e-05, "loss": 0.9948, "step": 900 }, { "epoch": 0.10440701430549146, "grad_norm": 0.2316291630268097, "learning_rate": 6.958861976163014e-05, "loss": 1.0563, "step": 905 }, { "epoch": 0.10498384863867097, "grad_norm": 0.2231028527021408, "learning_rate": 6.997308727412533e-05, "loss": 0.9321, "step": 910 }, { "epoch": 0.10556068297185048, "grad_norm": 0.24470089375972748, "learning_rate": 7.035755478662054e-05, "loss": 0.988, "step": 915 }, { "epoch": 0.10613751730503, "grad_norm": 0.23039510846138, "learning_rate": 7.074202229911573e-05, "loss": 0.9818, "step": 920 }, { "epoch": 0.10671435163820951, "grad_norm": 0.23940788209438324, "learning_rate": 7.112648981161092e-05, "loss": 0.9494, "step": 925 }, { "epoch": 0.10729118597138902, "grad_norm": 0.24120782315731049, "learning_rate": 7.151095732410612e-05, "loss": 1.0259, "step": 930 }, { "epoch": 0.10786802030456853, "grad_norm": 0.24399179220199585, "learning_rate": 7.189542483660131e-05, "loss": 0.9675, "step": 935 }, { "epoch": 0.10844485463774804, "grad_norm": 0.23285731673240662, "learning_rate": 7.22798923490965e-05, "loss": 0.9579, "step": 940 }, { "epoch": 0.10902168897092755, "grad_norm": 0.22285297513008118, "learning_rate": 7.26643598615917e-05, "loss": 0.9986, "step": 945 }, { "epoch": 0.10959852330410706, "grad_norm": 0.24459075927734375, "learning_rate": 7.30488273740869e-05, "loss": 0.9707, "step": 950 }, { "epoch": 0.11017535763728657, "grad_norm": 0.23282331228256226, "learning_rate": 7.343329488658209e-05, "loss": 0.9682, "step": 955 }, { "epoch": 0.11075219197046608, "grad_norm": 0.21960249543190002, "learning_rate": 7.381776239907729e-05, "loss": 0.9469, "step": 960 }, { "epoch": 0.11132902630364559, "grad_norm": 0.2371433526277542, "learning_rate": 7.420222991157248e-05, "loss": 0.9583, "step": 965 }, { "epoch": 0.1119058606368251, "grad_norm": 0.21388335525989532, "learning_rate": 7.458669742406767e-05, "loss": 0.9888, "step": 970 }, { "epoch": 0.11248269497000461, "grad_norm": 0.23984219133853912, "learning_rate": 7.497116493656286e-05, "loss": 0.9845, "step": 975 }, { "epoch": 0.11305952930318412, "grad_norm": 0.2341049760580063, "learning_rate": 7.535563244905805e-05, "loss": 0.9941, "step": 980 }, { "epoch": 0.11363636363636363, "grad_norm": 0.23360998928546906, "learning_rate": 7.574009996155325e-05, "loss": 1.0182, "step": 985 }, { "epoch": 0.11421319796954314, "grad_norm": 0.216692715883255, "learning_rate": 7.612456747404844e-05, "loss": 0.9683, "step": 990 }, { "epoch": 0.11479003230272265, "grad_norm": 0.2707868814468384, "learning_rate": 7.650903498654363e-05, "loss": 0.9892, "step": 995 }, { "epoch": 0.11536686663590216, "grad_norm": 0.2180621176958084, "learning_rate": 7.689350249903884e-05, "loss": 1.0335, "step": 1000 }, { "epoch": 0.11594370096908169, "grad_norm": 0.22857269644737244, "learning_rate": 7.727797001153403e-05, "loss": 1.003, "step": 1005 }, { "epoch": 0.1165205353022612, "grad_norm": 0.2268066108226776, "learning_rate": 7.766243752402922e-05, "loss": 1.0238, "step": 1010 }, { "epoch": 0.1170973696354407, "grad_norm": 0.2323949635028839, "learning_rate": 7.804690503652442e-05, "loss": 0.9725, "step": 1015 }, { "epoch": 0.11767420396862022, "grad_norm": 0.229869082570076, "learning_rate": 7.843137254901961e-05, "loss": 0.9143, "step": 1020 }, { "epoch": 0.11825103830179973, "grad_norm": 0.22364865243434906, "learning_rate": 7.88158400615148e-05, "loss": 0.9522, "step": 1025 }, { "epoch": 0.11882787263497924, "grad_norm": 0.2270103394985199, "learning_rate": 7.920030757401e-05, "loss": 1.0134, "step": 1030 }, { "epoch": 0.11940470696815875, "grad_norm": 0.22710810601711273, "learning_rate": 7.95847750865052e-05, "loss": 0.9782, "step": 1035 }, { "epoch": 0.11998154130133826, "grad_norm": 0.21234725415706635, "learning_rate": 7.99692425990004e-05, "loss": 0.9913, "step": 1040 }, { "epoch": 0.12055837563451777, "grad_norm": 0.22006261348724365, "learning_rate": 8.035371011149559e-05, "loss": 0.952, "step": 1045 }, { "epoch": 0.12113520996769728, "grad_norm": 0.21777155995368958, "learning_rate": 8.073817762399078e-05, "loss": 1.0183, "step": 1050 }, { "epoch": 0.12171204430087679, "grad_norm": 0.24173501133918762, "learning_rate": 8.112264513648598e-05, "loss": 1.0323, "step": 1055 }, { "epoch": 0.1222888786340563, "grad_norm": 0.21458600461483002, "learning_rate": 8.150711264898117e-05, "loss": 0.9759, "step": 1060 }, { "epoch": 0.12286571296723581, "grad_norm": 0.24267593026161194, "learning_rate": 8.189158016147636e-05, "loss": 0.9292, "step": 1065 }, { "epoch": 0.12344254730041532, "grad_norm": 0.23081839084625244, "learning_rate": 8.227604767397155e-05, "loss": 0.9736, "step": 1070 }, { "epoch": 0.12401938163359483, "grad_norm": 0.21484199166297913, "learning_rate": 8.266051518646674e-05, "loss": 1.005, "step": 1075 }, { "epoch": 0.12459621596677434, "grad_norm": 0.22842317819595337, "learning_rate": 8.304498269896193e-05, "loss": 0.9796, "step": 1080 }, { "epoch": 0.12517305029995385, "grad_norm": 0.21950694918632507, "learning_rate": 8.342945021145713e-05, "loss": 0.9551, "step": 1085 }, { "epoch": 0.12574988463313336, "grad_norm": 0.22929993271827698, "learning_rate": 8.381391772395232e-05, "loss": 1.016, "step": 1090 }, { "epoch": 0.12632671896631287, "grad_norm": 0.25066328048706055, "learning_rate": 8.419838523644751e-05, "loss": 0.9864, "step": 1095 }, { "epoch": 0.12690355329949238, "grad_norm": 0.22298945486545563, "learning_rate": 8.458285274894272e-05, "loss": 1.0026, "step": 1100 }, { "epoch": 0.1274803876326719, "grad_norm": 0.22116802632808685, "learning_rate": 8.496732026143791e-05, "loss": 0.9444, "step": 1105 }, { "epoch": 0.1280572219658514, "grad_norm": 0.22058875858783722, "learning_rate": 8.535178777393311e-05, "loss": 0.9712, "step": 1110 }, { "epoch": 0.1286340562990309, "grad_norm": 0.22648410499095917, "learning_rate": 8.57362552864283e-05, "loss": 0.9756, "step": 1115 }, { "epoch": 0.12921089063221042, "grad_norm": 0.21567432582378387, "learning_rate": 8.612072279892349e-05, "loss": 0.9689, "step": 1120 }, { "epoch": 0.12978772496538993, "grad_norm": 0.23186016082763672, "learning_rate": 8.65051903114187e-05, "loss": 0.9337, "step": 1125 }, { "epoch": 0.13036455929856944, "grad_norm": 0.22326567769050598, "learning_rate": 8.688965782391389e-05, "loss": 1.0075, "step": 1130 }, { "epoch": 0.13094139363174895, "grad_norm": 0.22077631950378418, "learning_rate": 8.727412533640908e-05, "loss": 0.9971, "step": 1135 }, { "epoch": 0.13151822796492849, "grad_norm": 0.23452219367027283, "learning_rate": 8.765859284890428e-05, "loss": 0.9606, "step": 1140 }, { "epoch": 0.132095062298108, "grad_norm": 0.22227661311626434, "learning_rate": 8.804306036139947e-05, "loss": 1.0054, "step": 1145 }, { "epoch": 0.1326718966312875, "grad_norm": 0.2087729573249817, "learning_rate": 8.842752787389466e-05, "loss": 0.9683, "step": 1150 }, { "epoch": 0.13324873096446702, "grad_norm": 0.2098720818758011, "learning_rate": 8.881199538638986e-05, "loss": 0.9824, "step": 1155 }, { "epoch": 0.13382556529764653, "grad_norm": 0.2236039638519287, "learning_rate": 8.919646289888505e-05, "loss": 0.9713, "step": 1160 }, { "epoch": 0.13440239963082604, "grad_norm": 0.2002977877855301, "learning_rate": 8.958093041138024e-05, "loss": 0.985, "step": 1165 }, { "epoch": 0.13497923396400555, "grad_norm": 0.2261250764131546, "learning_rate": 8.996539792387543e-05, "loss": 0.983, "step": 1170 }, { "epoch": 0.13555606829718506, "grad_norm": 0.21874304115772247, "learning_rate": 9.034986543637062e-05, "loss": 0.9592, "step": 1175 }, { "epoch": 0.13613290263036457, "grad_norm": 0.20477834343910217, "learning_rate": 9.073433294886583e-05, "loss": 0.939, "step": 1180 }, { "epoch": 0.13670973696354408, "grad_norm": 0.2184436321258545, "learning_rate": 9.111880046136102e-05, "loss": 1.0011, "step": 1185 }, { "epoch": 0.1372865712967236, "grad_norm": 0.2131531685590744, "learning_rate": 9.150326797385621e-05, "loss": 0.9775, "step": 1190 }, { "epoch": 0.1378634056299031, "grad_norm": 0.23714293539524078, "learning_rate": 9.188773548635141e-05, "loss": 1.0107, "step": 1195 }, { "epoch": 0.1384402399630826, "grad_norm": 0.21136564016342163, "learning_rate": 9.22722029988466e-05, "loss": 1.0216, "step": 1200 }, { "epoch": 0.13901707429626212, "grad_norm": 0.20974573493003845, "learning_rate": 9.265667051134179e-05, "loss": 0.9754, "step": 1205 }, { "epoch": 0.13959390862944163, "grad_norm": 0.21065163612365723, "learning_rate": 9.3041138023837e-05, "loss": 0.9459, "step": 1210 }, { "epoch": 0.14017074296262114, "grad_norm": 0.22418978810310364, "learning_rate": 9.342560553633218e-05, "loss": 0.998, "step": 1215 }, { "epoch": 0.14074757729580065, "grad_norm": 0.19713211059570312, "learning_rate": 9.381007304882737e-05, "loss": 1.0003, "step": 1220 }, { "epoch": 0.14132441162898016, "grad_norm": 0.21748420596122742, "learning_rate": 9.419454056132258e-05, "loss": 0.9521, "step": 1225 }, { "epoch": 0.14190124596215967, "grad_norm": 0.23637205362319946, "learning_rate": 9.457900807381777e-05, "loss": 0.9304, "step": 1230 }, { "epoch": 0.14247808029533918, "grad_norm": 0.2121867686510086, "learning_rate": 9.496347558631297e-05, "loss": 0.964, "step": 1235 }, { "epoch": 0.1430549146285187, "grad_norm": 0.22569303214550018, "learning_rate": 9.534794309880816e-05, "loss": 0.9457, "step": 1240 }, { "epoch": 0.1436317489616982, "grad_norm": 0.21256321668624878, "learning_rate": 9.573241061130335e-05, "loss": 0.9737, "step": 1245 }, { "epoch": 0.1442085832948777, "grad_norm": 0.21022173762321472, "learning_rate": 9.611687812379854e-05, "loss": 0.9996, "step": 1250 }, { "epoch": 0.14478541762805722, "grad_norm": 0.20462080836296082, "learning_rate": 9.650134563629373e-05, "loss": 0.9736, "step": 1255 }, { "epoch": 0.14536225196123673, "grad_norm": 0.21609050035476685, "learning_rate": 9.688581314878892e-05, "loss": 0.9608, "step": 1260 }, { "epoch": 0.14593908629441624, "grad_norm": 0.2156708985567093, "learning_rate": 9.727028066128413e-05, "loss": 0.9268, "step": 1265 }, { "epoch": 0.14651592062759575, "grad_norm": 0.21314367651939392, "learning_rate": 9.765474817377932e-05, "loss": 1.035, "step": 1270 }, { "epoch": 0.14709275496077526, "grad_norm": 0.23611678183078766, "learning_rate": 9.80392156862745e-05, "loss": 0.9169, "step": 1275 }, { "epoch": 0.14766958929395477, "grad_norm": 0.20264959335327148, "learning_rate": 9.842368319876971e-05, "loss": 0.9841, "step": 1280 }, { "epoch": 0.14824642362713428, "grad_norm": 0.21544356644153595, "learning_rate": 9.88081507112649e-05, "loss": 0.9589, "step": 1285 }, { "epoch": 0.1488232579603138, "grad_norm": 0.2142428606748581, "learning_rate": 9.919261822376009e-05, "loss": 0.9514, "step": 1290 }, { "epoch": 0.1494000922934933, "grad_norm": 0.22194676101207733, "learning_rate": 9.957708573625529e-05, "loss": 0.9047, "step": 1295 }, { "epoch": 0.1499769266266728, "grad_norm": 0.2201799899339676, "learning_rate": 9.996155324875048e-05, "loss": 1.0036, "step": 1300 }, { "epoch": 0.15055376095985232, "grad_norm": 0.2207772582769394, "learning_rate": 0.00010034602076124569, "loss": 1.025, "step": 1305 }, { "epoch": 0.15113059529303183, "grad_norm": 0.20949603617191315, "learning_rate": 0.00010073048827374088, "loss": 0.9697, "step": 1310 }, { "epoch": 0.15170742962621137, "grad_norm": 0.20334427058696747, "learning_rate": 0.00010111495578623607, "loss": 0.9643, "step": 1315 }, { "epoch": 0.15228426395939088, "grad_norm": 0.20708300173282623, "learning_rate": 0.00010149942329873126, "loss": 0.9753, "step": 1320 }, { "epoch": 0.15286109829257039, "grad_norm": 0.22487087547779083, "learning_rate": 0.00010188389081122645, "loss": 1.0286, "step": 1325 }, { "epoch": 0.1534379326257499, "grad_norm": 0.2103419154882431, "learning_rate": 0.00010226835832372166, "loss": 0.9356, "step": 1330 }, { "epoch": 0.1540147669589294, "grad_norm": 0.22619712352752686, "learning_rate": 0.00010265282583621685, "loss": 0.9911, "step": 1335 }, { "epoch": 0.15459160129210892, "grad_norm": 0.2243778258562088, "learning_rate": 0.00010303729334871204, "loss": 1.0075, "step": 1340 }, { "epoch": 0.15516843562528843, "grad_norm": 0.20728860795497894, "learning_rate": 0.00010342176086120723, "loss": 0.9891, "step": 1345 }, { "epoch": 0.15574526995846794, "grad_norm": 0.21606509387493134, "learning_rate": 0.00010380622837370242, "loss": 0.9393, "step": 1350 }, { "epoch": 0.15632210429164745, "grad_norm": 0.21745528280735016, "learning_rate": 0.00010419069588619763, "loss": 0.9348, "step": 1355 }, { "epoch": 0.15689893862482696, "grad_norm": 0.21311575174331665, "learning_rate": 0.00010457516339869282, "loss": 0.9448, "step": 1360 }, { "epoch": 0.15747577295800647, "grad_norm": 0.2084248661994934, "learning_rate": 0.00010495963091118801, "loss": 0.9728, "step": 1365 }, { "epoch": 0.15805260729118598, "grad_norm": 0.21669849753379822, "learning_rate": 0.0001053440984236832, "loss": 0.9356, "step": 1370 }, { "epoch": 0.15862944162436549, "grad_norm": 0.22640497982501984, "learning_rate": 0.00010572856593617839, "loss": 0.9547, "step": 1375 }, { "epoch": 0.159206275957545, "grad_norm": 0.2224353551864624, "learning_rate": 0.00010611303344867358, "loss": 1.0064, "step": 1380 }, { "epoch": 0.1597831102907245, "grad_norm": 0.22003819048404694, "learning_rate": 0.0001064975009611688, "loss": 0.9927, "step": 1385 }, { "epoch": 0.16035994462390402, "grad_norm": 0.23084236681461334, "learning_rate": 0.00010688196847366399, "loss": 0.9538, "step": 1390 }, { "epoch": 0.16093677895708353, "grad_norm": 0.20851434767246246, "learning_rate": 0.00010726643598615918, "loss": 0.9822, "step": 1395 }, { "epoch": 0.16151361329026304, "grad_norm": 0.21839269995689392, "learning_rate": 0.00010765090349865437, "loss": 1.0137, "step": 1400 }, { "epoch": 0.16209044762344255, "grad_norm": 0.2095949798822403, "learning_rate": 0.00010803537101114956, "loss": 0.959, "step": 1405 }, { "epoch": 0.16266728195662206, "grad_norm": 0.20211191475391388, "learning_rate": 0.00010841983852364477, "loss": 0.9657, "step": 1410 }, { "epoch": 0.16324411628980157, "grad_norm": 0.22096802294254303, "learning_rate": 0.00010880430603613996, "loss": 0.9896, "step": 1415 }, { "epoch": 0.16382095062298108, "grad_norm": 0.30525097250938416, "learning_rate": 0.00010918877354863515, "loss": 0.956, "step": 1420 }, { "epoch": 0.1643977849561606, "grad_norm": 0.22134092450141907, "learning_rate": 0.00010957324106113034, "loss": 1.0124, "step": 1425 }, { "epoch": 0.1649746192893401, "grad_norm": 0.270163357257843, "learning_rate": 0.00010995770857362553, "loss": 0.9718, "step": 1430 }, { "epoch": 0.1655514536225196, "grad_norm": 0.21976105868816376, "learning_rate": 0.00011034217608612072, "loss": 0.9658, "step": 1435 }, { "epoch": 0.16612828795569912, "grad_norm": 0.20475271344184875, "learning_rate": 0.00011072664359861593, "loss": 1.024, "step": 1440 }, { "epoch": 0.16670512228887863, "grad_norm": 0.22362278401851654, "learning_rate": 0.00011111111111111112, "loss": 1.0096, "step": 1445 }, { "epoch": 0.16728195662205814, "grad_norm": 0.2165699005126953, "learning_rate": 0.0001114955786236063, "loss": 0.9666, "step": 1450 }, { "epoch": 0.16785879095523765, "grad_norm": 0.2233060896396637, "learning_rate": 0.0001118800461361015, "loss": 0.9615, "step": 1455 }, { "epoch": 0.16843562528841716, "grad_norm": 0.22626008093357086, "learning_rate": 0.00011226451364859669, "loss": 0.9821, "step": 1460 }, { "epoch": 0.16901245962159667, "grad_norm": 0.21532657742500305, "learning_rate": 0.00011264898116109188, "loss": 0.9635, "step": 1465 }, { "epoch": 0.16958929395477618, "grad_norm": 0.2170170545578003, "learning_rate": 0.0001130334486735871, "loss": 0.9817, "step": 1470 }, { "epoch": 0.1701661282879557, "grad_norm": 0.2258489727973938, "learning_rate": 0.00011341791618608228, "loss": 0.9803, "step": 1475 }, { "epoch": 0.1707429626211352, "grad_norm": 0.2402639538049698, "learning_rate": 0.00011380238369857747, "loss": 1.0435, "step": 1480 }, { "epoch": 0.1713197969543147, "grad_norm": 0.22203443944454193, "learning_rate": 0.00011418685121107266, "loss": 0.9883, "step": 1485 }, { "epoch": 0.17189663128749424, "grad_norm": 0.22157324850559235, "learning_rate": 0.00011457131872356785, "loss": 0.975, "step": 1490 }, { "epoch": 0.17247346562067375, "grad_norm": 0.2377525120973587, "learning_rate": 0.00011495578623606307, "loss": 0.9843, "step": 1495 }, { "epoch": 0.17305029995385326, "grad_norm": 0.29224807024002075, "learning_rate": 0.00011534025374855826, "loss": 0.974, "step": 1500 }, { "epoch": 0.17362713428703277, "grad_norm": 0.21139585971832275, "learning_rate": 0.00011572472126105345, "loss": 0.9682, "step": 1505 }, { "epoch": 0.17420396862021229, "grad_norm": 0.24163000285625458, "learning_rate": 0.00011610918877354864, "loss": 0.9543, "step": 1510 }, { "epoch": 0.1747808029533918, "grad_norm": 0.2104686051607132, "learning_rate": 0.00011649365628604383, "loss": 0.9524, "step": 1515 }, { "epoch": 0.1753576372865713, "grad_norm": 0.22780703008174896, "learning_rate": 0.00011687812379853902, "loss": 0.969, "step": 1520 }, { "epoch": 0.17593447161975082, "grad_norm": 0.22077947854995728, "learning_rate": 0.00011726259131103422, "loss": 0.9561, "step": 1525 }, { "epoch": 0.17651130595293033, "grad_norm": 0.21336746215820312, "learning_rate": 0.00011764705882352942, "loss": 0.9994, "step": 1530 }, { "epoch": 0.17708814028610984, "grad_norm": 0.2229880541563034, "learning_rate": 0.0001180315263360246, "loss": 0.9907, "step": 1535 }, { "epoch": 0.17766497461928935, "grad_norm": 0.21756049990653992, "learning_rate": 0.0001184159938485198, "loss": 1.006, "step": 1540 }, { "epoch": 0.17824180895246886, "grad_norm": 0.2166442722082138, "learning_rate": 0.00011880046136101499, "loss": 0.9598, "step": 1545 }, { "epoch": 0.17881864328564837, "grad_norm": 0.21556849777698517, "learning_rate": 0.0001191849288735102, "loss": 1.0291, "step": 1550 }, { "epoch": 0.17939547761882788, "grad_norm": 0.24204514920711517, "learning_rate": 0.00011956939638600539, "loss": 0.9589, "step": 1555 }, { "epoch": 0.17997231195200739, "grad_norm": 0.21335336565971375, "learning_rate": 0.00011995386389850058, "loss": 1.0487, "step": 1560 }, { "epoch": 0.1805491462851869, "grad_norm": 0.2127862423658371, "learning_rate": 0.00012033833141099577, "loss": 0.9542, "step": 1565 }, { "epoch": 0.1811259806183664, "grad_norm": 0.28124332427978516, "learning_rate": 0.00012072279892349096, "loss": 0.966, "step": 1570 }, { "epoch": 0.18170281495154592, "grad_norm": 0.2275981307029724, "learning_rate": 0.00012110726643598615, "loss": 0.8945, "step": 1575 }, { "epoch": 0.18227964928472543, "grad_norm": 0.20791442692279816, "learning_rate": 0.00012149173394848137, "loss": 1.0404, "step": 1580 }, { "epoch": 0.18285648361790494, "grad_norm": 0.21446120738983154, "learning_rate": 0.00012187620146097656, "loss": 0.9633, "step": 1585 }, { "epoch": 0.18343331795108445, "grad_norm": 0.2326110452413559, "learning_rate": 0.00012226066897347174, "loss": 1.0091, "step": 1590 }, { "epoch": 0.18401015228426396, "grad_norm": 0.2202981859445572, "learning_rate": 0.00012264513648596693, "loss": 0.9841, "step": 1595 }, { "epoch": 0.18458698661744347, "grad_norm": 0.22043539583683014, "learning_rate": 0.00012302960399846212, "loss": 1.0092, "step": 1600 }, { "epoch": 0.18516382095062298, "grad_norm": 0.23187553882598877, "learning_rate": 0.00012341407151095733, "loss": 0.999, "step": 1605 }, { "epoch": 0.18574065528380249, "grad_norm": 0.210253044962883, "learning_rate": 0.00012379853902345252, "loss": 0.9328, "step": 1610 }, { "epoch": 0.186317489616982, "grad_norm": 0.21368882060050964, "learning_rate": 0.00012418300653594771, "loss": 0.9898, "step": 1615 }, { "epoch": 0.1868943239501615, "grad_norm": 0.20804187655448914, "learning_rate": 0.0001245674740484429, "loss": 0.9704, "step": 1620 }, { "epoch": 0.18747115828334102, "grad_norm": 0.2148929387331009, "learning_rate": 0.0001249519415609381, "loss": 0.9855, "step": 1625 }, { "epoch": 0.18804799261652053, "grad_norm": 0.21504488587379456, "learning_rate": 0.00012533640907343328, "loss": 0.957, "step": 1630 }, { "epoch": 0.18862482694970004, "grad_norm": 0.21804533898830414, "learning_rate": 0.0001257208765859285, "loss": 1.0268, "step": 1635 }, { "epoch": 0.18920166128287955, "grad_norm": 0.2117997258901596, "learning_rate": 0.0001261053440984237, "loss": 1.0142, "step": 1640 }, { "epoch": 0.18977849561605906, "grad_norm": 0.2191164642572403, "learning_rate": 0.00012648981161091888, "loss": 0.9501, "step": 1645 }, { "epoch": 0.19035532994923857, "grad_norm": 0.21926717460155487, "learning_rate": 0.00012687427912341407, "loss": 0.9199, "step": 1650 }, { "epoch": 0.19093216428241808, "grad_norm": 0.22619302570819855, "learning_rate": 0.00012725874663590926, "loss": 0.9813, "step": 1655 }, { "epoch": 0.1915089986155976, "grad_norm": 0.2379409521818161, "learning_rate": 0.00012764321414840445, "loss": 0.9914, "step": 1660 }, { "epoch": 0.19208583294877712, "grad_norm": 0.20328597724437714, "learning_rate": 0.00012802768166089967, "loss": 0.9643, "step": 1665 }, { "epoch": 0.19266266728195663, "grad_norm": 0.22967956960201263, "learning_rate": 0.00012841214917339486, "loss": 0.9425, "step": 1670 }, { "epoch": 0.19323950161513614, "grad_norm": 0.22374680638313293, "learning_rate": 0.00012879661668589005, "loss": 0.9906, "step": 1675 }, { "epoch": 0.19381633594831565, "grad_norm": 0.20658764243125916, "learning_rate": 0.00012918108419838524, "loss": 0.9612, "step": 1680 }, { "epoch": 0.19439317028149516, "grad_norm": 0.22407005727291107, "learning_rate": 0.00012956555171088043, "loss": 1.0238, "step": 1685 }, { "epoch": 0.19497000461467467, "grad_norm": 0.2127981185913086, "learning_rate": 0.00012995001922337565, "loss": 0.9385, "step": 1690 }, { "epoch": 0.19554683894785418, "grad_norm": 0.23836196959018707, "learning_rate": 0.00013033448673587084, "loss": 0.9823, "step": 1695 }, { "epoch": 0.1961236732810337, "grad_norm": 0.21363188326358795, "learning_rate": 0.00013071895424836603, "loss": 0.9546, "step": 1700 }, { "epoch": 0.1967005076142132, "grad_norm": 0.22047366201877594, "learning_rate": 0.00013110342176086122, "loss": 0.9882, "step": 1705 }, { "epoch": 0.19727734194739271, "grad_norm": 0.2106313556432724, "learning_rate": 0.0001314878892733564, "loss": 0.9714, "step": 1710 }, { "epoch": 0.19785417628057222, "grad_norm": 0.2216782122850418, "learning_rate": 0.0001318723567858516, "loss": 0.9775, "step": 1715 }, { "epoch": 0.19843101061375173, "grad_norm": 0.2969755232334137, "learning_rate": 0.0001322568242983468, "loss": 0.9424, "step": 1720 }, { "epoch": 0.19900784494693124, "grad_norm": 0.218833327293396, "learning_rate": 0.000132641291810842, "loss": 0.9197, "step": 1725 }, { "epoch": 0.19958467928011075, "grad_norm": 0.2189982384443283, "learning_rate": 0.0001330257593233372, "loss": 1.0039, "step": 1730 }, { "epoch": 0.20016151361329027, "grad_norm": 0.22366592288017273, "learning_rate": 0.00013341022683583238, "loss": 1.0028, "step": 1735 }, { "epoch": 0.20073834794646978, "grad_norm": 0.22287939488887787, "learning_rate": 0.00013379469434832757, "loss": 0.9895, "step": 1740 }, { "epoch": 0.20131518227964929, "grad_norm": 0.22727978229522705, "learning_rate": 0.0001341791618608228, "loss": 0.99, "step": 1745 }, { "epoch": 0.2018920166128288, "grad_norm": 0.2164393663406372, "learning_rate": 0.00013456362937331798, "loss": 0.9771, "step": 1750 }, { "epoch": 0.2024688509460083, "grad_norm": 0.20931628346443176, "learning_rate": 0.00013494809688581317, "loss": 0.975, "step": 1755 }, { "epoch": 0.20304568527918782, "grad_norm": 0.22553858160972595, "learning_rate": 0.00013533256439830836, "loss": 0.9175, "step": 1760 }, { "epoch": 0.20362251961236733, "grad_norm": 0.21317711472511292, "learning_rate": 0.00013571703191080355, "loss": 0.9971, "step": 1765 }, { "epoch": 0.20419935394554684, "grad_norm": 0.21764598786830902, "learning_rate": 0.00013610149942329874, "loss": 1.0036, "step": 1770 }, { "epoch": 0.20477618827872635, "grad_norm": 0.21724803745746613, "learning_rate": 0.00013648596693579393, "loss": 0.9264, "step": 1775 }, { "epoch": 0.20535302261190586, "grad_norm": 0.21628911793231964, "learning_rate": 0.00013687043444828912, "loss": 0.9324, "step": 1780 }, { "epoch": 0.20592985694508537, "grad_norm": 0.23706841468811035, "learning_rate": 0.0001372549019607843, "loss": 1.0163, "step": 1785 }, { "epoch": 0.20650669127826488, "grad_norm": 0.2230122983455658, "learning_rate": 0.0001376393694732795, "loss": 0.9531, "step": 1790 }, { "epoch": 0.20708352561144439, "grad_norm": 0.2194584757089615, "learning_rate": 0.0001380238369857747, "loss": 0.9271, "step": 1795 }, { "epoch": 0.2076603599446239, "grad_norm": 0.22293421626091003, "learning_rate": 0.00013840830449826988, "loss": 0.983, "step": 1800 }, { "epoch": 0.2082371942778034, "grad_norm": 0.23112259805202484, "learning_rate": 0.0001387927720107651, "loss": 0.9551, "step": 1805 }, { "epoch": 0.20881402861098292, "grad_norm": 0.2262236773967743, "learning_rate": 0.0001391772395232603, "loss": 0.9971, "step": 1810 }, { "epoch": 0.20939086294416243, "grad_norm": 0.23392175137996674, "learning_rate": 0.00013956170703575548, "loss": 0.9363, "step": 1815 }, { "epoch": 0.20996769727734194, "grad_norm": 0.2209835797548294, "learning_rate": 0.00013994617454825067, "loss": 0.9597, "step": 1820 }, { "epoch": 0.21054453161052145, "grad_norm": 0.21317939460277557, "learning_rate": 0.00014033064206074586, "loss": 0.9425, "step": 1825 }, { "epoch": 0.21112136594370096, "grad_norm": 0.23041388392448425, "learning_rate": 0.00014071510957324108, "loss": 0.9675, "step": 1830 }, { "epoch": 0.2116982002768805, "grad_norm": 0.2254861295223236, "learning_rate": 0.00014109957708573627, "loss": 0.994, "step": 1835 }, { "epoch": 0.21227503461006, "grad_norm": 0.21605949103832245, "learning_rate": 0.00014148404459823146, "loss": 0.9866, "step": 1840 }, { "epoch": 0.21285186894323951, "grad_norm": 0.22147153317928314, "learning_rate": 0.00014186851211072665, "loss": 1.0008, "step": 1845 }, { "epoch": 0.21342870327641902, "grad_norm": 0.2367514967918396, "learning_rate": 0.00014225297962322184, "loss": 0.9128, "step": 1850 }, { "epoch": 0.21400553760959853, "grad_norm": 0.23112766444683075, "learning_rate": 0.00014263744713571703, "loss": 1.0061, "step": 1855 }, { "epoch": 0.21458237194277804, "grad_norm": 0.21282008290290833, "learning_rate": 0.00014302191464821224, "loss": 0.9833, "step": 1860 }, { "epoch": 0.21515920627595755, "grad_norm": 0.22202390432357788, "learning_rate": 0.00014340638216070743, "loss": 0.9929, "step": 1865 }, { "epoch": 0.21573604060913706, "grad_norm": 0.21571186184883118, "learning_rate": 0.00014379084967320262, "loss": 0.9666, "step": 1870 }, { "epoch": 0.21631287494231657, "grad_norm": 0.24291980266571045, "learning_rate": 0.0001441753171856978, "loss": 0.9818, "step": 1875 }, { "epoch": 0.21688970927549608, "grad_norm": 0.21617773175239563, "learning_rate": 0.000144559784698193, "loss": 0.9596, "step": 1880 }, { "epoch": 0.2174665436086756, "grad_norm": 0.22341042757034302, "learning_rate": 0.00014494425221068822, "loss": 1.0061, "step": 1885 }, { "epoch": 0.2180433779418551, "grad_norm": 0.20930033922195435, "learning_rate": 0.0001453287197231834, "loss": 0.9514, "step": 1890 }, { "epoch": 0.21862021227503461, "grad_norm": 0.26263025403022766, "learning_rate": 0.0001457131872356786, "loss": 0.9916, "step": 1895 }, { "epoch": 0.21919704660821412, "grad_norm": 0.21907363831996918, "learning_rate": 0.0001460976547481738, "loss": 0.9689, "step": 1900 }, { "epoch": 0.21977388094139363, "grad_norm": 0.20985926687717438, "learning_rate": 0.00014648212226066898, "loss": 0.932, "step": 1905 }, { "epoch": 0.22035071527457314, "grad_norm": 0.2179040014743805, "learning_rate": 0.00014686658977316417, "loss": 0.9683, "step": 1910 }, { "epoch": 0.22092754960775265, "grad_norm": 0.22273583710193634, "learning_rate": 0.0001472510572856594, "loss": 1.0013, "step": 1915 }, { "epoch": 0.22150438394093216, "grad_norm": 0.2257058471441269, "learning_rate": 0.00014763552479815458, "loss": 0.9805, "step": 1920 }, { "epoch": 0.22208121827411167, "grad_norm": 0.230184406042099, "learning_rate": 0.00014801999231064977, "loss": 0.9983, "step": 1925 }, { "epoch": 0.22265805260729118, "grad_norm": 0.22738578915596008, "learning_rate": 0.00014840445982314496, "loss": 0.9552, "step": 1930 }, { "epoch": 0.2232348869404707, "grad_norm": 0.2206430733203888, "learning_rate": 0.00014878892733564015, "loss": 0.9695, "step": 1935 }, { "epoch": 0.2238117212736502, "grad_norm": 0.22058050334453583, "learning_rate": 0.00014917339484813534, "loss": 0.9474, "step": 1940 }, { "epoch": 0.22438855560682971, "grad_norm": 0.20870976150035858, "learning_rate": 0.00014955786236063053, "loss": 0.9606, "step": 1945 }, { "epoch": 0.22496538994000922, "grad_norm": 0.21419864892959595, "learning_rate": 0.00014994232987312572, "loss": 0.9374, "step": 1950 }, { "epoch": 0.22554222427318874, "grad_norm": 0.24207691848278046, "learning_rate": 0.0001503267973856209, "loss": 1.0411, "step": 1955 }, { "epoch": 0.22611905860636825, "grad_norm": 0.2237478345632553, "learning_rate": 0.0001507112648981161, "loss": 0.9959, "step": 1960 }, { "epoch": 0.22669589293954776, "grad_norm": 0.23635807633399963, "learning_rate": 0.0001510957324106113, "loss": 0.9434, "step": 1965 }, { "epoch": 0.22727272727272727, "grad_norm": 0.245008185505867, "learning_rate": 0.0001514801999231065, "loss": 0.963, "step": 1970 }, { "epoch": 0.22784956160590678, "grad_norm": 0.2118360847234726, "learning_rate": 0.0001518646674356017, "loss": 0.9645, "step": 1975 }, { "epoch": 0.22842639593908629, "grad_norm": 0.22724303603172302, "learning_rate": 0.00015224913494809689, "loss": 0.9884, "step": 1980 }, { "epoch": 0.2290032302722658, "grad_norm": 0.24408259987831116, "learning_rate": 0.00015263360246059208, "loss": 1.008, "step": 1985 }, { "epoch": 0.2295800646054453, "grad_norm": 0.22420641779899597, "learning_rate": 0.00015301806997308727, "loss": 0.9636, "step": 1990 }, { "epoch": 0.23015689893862482, "grad_norm": 0.20731568336486816, "learning_rate": 0.00015340253748558246, "loss": 0.965, "step": 1995 }, { "epoch": 0.23073373327180433, "grad_norm": 0.2164364755153656, "learning_rate": 0.00015378700499807767, "loss": 0.9844, "step": 2000 }, { "epoch": 0.23131056760498384, "grad_norm": 0.22473865747451782, "learning_rate": 0.00015417147251057286, "loss": 1.019, "step": 2005 }, { "epoch": 0.23188740193816337, "grad_norm": 0.2263941615819931, "learning_rate": 0.00015455594002306805, "loss": 1.0051, "step": 2010 }, { "epoch": 0.23246423627134288, "grad_norm": 0.2233802229166031, "learning_rate": 0.00015494040753556324, "loss": 1.0199, "step": 2015 }, { "epoch": 0.2330410706045224, "grad_norm": 0.22767069935798645, "learning_rate": 0.00015532487504805843, "loss": 0.9763, "step": 2020 }, { "epoch": 0.2336179049377019, "grad_norm": 0.2226293981075287, "learning_rate": 0.00015570934256055365, "loss": 0.9811, "step": 2025 }, { "epoch": 0.2341947392708814, "grad_norm": 0.24052846431732178, "learning_rate": 0.00015609381007304884, "loss": 1.0216, "step": 2030 }, { "epoch": 0.23477157360406092, "grad_norm": 0.2248363345861435, "learning_rate": 0.00015647827758554403, "loss": 0.9654, "step": 2035 }, { "epoch": 0.23534840793724043, "grad_norm": 0.2263503223657608, "learning_rate": 0.00015686274509803922, "loss": 0.9874, "step": 2040 }, { "epoch": 0.23592524227041994, "grad_norm": 0.23036567866802216, "learning_rate": 0.0001572472126105344, "loss": 0.9362, "step": 2045 }, { "epoch": 0.23650207660359945, "grad_norm": 0.2279501110315323, "learning_rate": 0.0001576316801230296, "loss": 0.9495, "step": 2050 }, { "epoch": 0.23707891093677896, "grad_norm": 0.23070128262043, "learning_rate": 0.00015801614763552482, "loss": 0.975, "step": 2055 }, { "epoch": 0.23765574526995847, "grad_norm": 0.22231556475162506, "learning_rate": 0.00015840061514802, "loss": 1.0113, "step": 2060 }, { "epoch": 0.23823257960313798, "grad_norm": 0.2362755835056305, "learning_rate": 0.0001587850826605152, "loss": 0.977, "step": 2065 }, { "epoch": 0.2388094139363175, "grad_norm": 0.21221700310707092, "learning_rate": 0.0001591695501730104, "loss": 0.8838, "step": 2070 }, { "epoch": 0.239386248269497, "grad_norm": 0.23924385011196136, "learning_rate": 0.00015955401768550558, "loss": 0.9988, "step": 2075 }, { "epoch": 0.23996308260267651, "grad_norm": 0.21702693402767181, "learning_rate": 0.0001599384851980008, "loss": 0.9345, "step": 2080 }, { "epoch": 0.24053991693585602, "grad_norm": 0.2238760143518448, "learning_rate": 0.00016032295271049598, "loss": 0.9751, "step": 2085 }, { "epoch": 0.24111675126903553, "grad_norm": 0.2292429357767105, "learning_rate": 0.00016070742022299117, "loss": 0.9696, "step": 2090 }, { "epoch": 0.24169358560221504, "grad_norm": 0.33746978640556335, "learning_rate": 0.00016109188773548636, "loss": 0.9767, "step": 2095 }, { "epoch": 0.24227041993539455, "grad_norm": 0.22750449180603027, "learning_rate": 0.00016147635524798155, "loss": 0.9648, "step": 2100 }, { "epoch": 0.24284725426857406, "grad_norm": 0.2371540665626526, "learning_rate": 0.00016186082276047674, "loss": 0.9723, "step": 2105 }, { "epoch": 0.24342408860175357, "grad_norm": 0.22886401414871216, "learning_rate": 0.00016224529027297196, "loss": 0.9615, "step": 2110 }, { "epoch": 0.24400092293493308, "grad_norm": 0.23935814201831818, "learning_rate": 0.00016262975778546715, "loss": 0.9645, "step": 2115 }, { "epoch": 0.2445777572681126, "grad_norm": 0.23147407174110413, "learning_rate": 0.00016301422529796234, "loss": 0.9454, "step": 2120 }, { "epoch": 0.2451545916012921, "grad_norm": 0.23258179426193237, "learning_rate": 0.00016339869281045753, "loss": 0.947, "step": 2125 }, { "epoch": 0.24573142593447161, "grad_norm": 0.23673345148563385, "learning_rate": 0.00016378316032295272, "loss": 0.9649, "step": 2130 }, { "epoch": 0.24630826026765112, "grad_norm": 0.2366418093442917, "learning_rate": 0.0001641676278354479, "loss": 0.995, "step": 2135 }, { "epoch": 0.24688509460083063, "grad_norm": 0.23460763692855835, "learning_rate": 0.0001645520953479431, "loss": 1.0027, "step": 2140 }, { "epoch": 0.24746192893401014, "grad_norm": 0.2338368147611618, "learning_rate": 0.0001649365628604383, "loss": 0.9455, "step": 2145 }, { "epoch": 0.24803876326718965, "grad_norm": 0.23727886378765106, "learning_rate": 0.00016532103037293348, "loss": 0.9894, "step": 2150 }, { "epoch": 0.24861559760036916, "grad_norm": 0.23833225667476654, "learning_rate": 0.00016570549788542867, "loss": 1.0365, "step": 2155 }, { "epoch": 0.24919243193354867, "grad_norm": 0.21977971494197845, "learning_rate": 0.00016608996539792386, "loss": 0.9554, "step": 2160 }, { "epoch": 0.24976926626672818, "grad_norm": 0.22417233884334564, "learning_rate": 0.00016647443291041908, "loss": 0.9308, "step": 2165 }, { "epoch": 0.2503461005999077, "grad_norm": 0.2396731674671173, "learning_rate": 0.00016685890042291427, "loss": 0.9998, "step": 2170 }, { "epoch": 0.2509229349330872, "grad_norm": 0.25424474477767944, "learning_rate": 0.00016724336793540946, "loss": 1.0282, "step": 2175 }, { "epoch": 0.2514997692662667, "grad_norm": 0.24737310409545898, "learning_rate": 0.00016762783544790465, "loss": 0.9514, "step": 2180 }, { "epoch": 0.2520766035994462, "grad_norm": 0.22595183551311493, "learning_rate": 0.00016801230296039984, "loss": 0.9732, "step": 2185 }, { "epoch": 0.25265343793262574, "grad_norm": 0.22247906029224396, "learning_rate": 0.00016839677047289503, "loss": 0.9478, "step": 2190 }, { "epoch": 0.25323027226580525, "grad_norm": 0.23501867055892944, "learning_rate": 0.00016878123798539025, "loss": 0.961, "step": 2195 }, { "epoch": 0.25380710659898476, "grad_norm": 0.22328057885169983, "learning_rate": 0.00016916570549788544, "loss": 0.9561, "step": 2200 }, { "epoch": 0.25438394093216427, "grad_norm": 0.23454934358596802, "learning_rate": 0.00016955017301038063, "loss": 0.8989, "step": 2205 }, { "epoch": 0.2549607752653438, "grad_norm": 0.2128182202577591, "learning_rate": 0.00016993464052287582, "loss": 0.9227, "step": 2210 }, { "epoch": 0.2555376095985233, "grad_norm": 0.22926746308803558, "learning_rate": 0.000170319108035371, "loss": 0.9504, "step": 2215 }, { "epoch": 0.2561144439317028, "grad_norm": 0.2354438304901123, "learning_rate": 0.00017070357554786622, "loss": 0.9596, "step": 2220 }, { "epoch": 0.2566912782648823, "grad_norm": 0.22310538589954376, "learning_rate": 0.00017108804306036141, "loss": 0.9727, "step": 2225 }, { "epoch": 0.2572681125980618, "grad_norm": 0.24562855064868927, "learning_rate": 0.0001714725105728566, "loss": 1.0191, "step": 2230 }, { "epoch": 0.2578449469312413, "grad_norm": 0.23208841681480408, "learning_rate": 0.0001718569780853518, "loss": 1.0146, "step": 2235 }, { "epoch": 0.25842178126442084, "grad_norm": 0.2338448017835617, "learning_rate": 0.00017224144559784698, "loss": 0.9586, "step": 2240 }, { "epoch": 0.25899861559760035, "grad_norm": 0.22157028317451477, "learning_rate": 0.00017262591311034217, "loss": 1.0005, "step": 2245 }, { "epoch": 0.25957544993077986, "grad_norm": 0.23284341394901276, "learning_rate": 0.0001730103806228374, "loss": 0.9742, "step": 2250 }, { "epoch": 0.26015228426395937, "grad_norm": 0.2183004915714264, "learning_rate": 0.00017339484813533258, "loss": 0.9649, "step": 2255 }, { "epoch": 0.2607291185971389, "grad_norm": 0.2606106996536255, "learning_rate": 0.00017377931564782777, "loss": 1.0056, "step": 2260 }, { "epoch": 0.2613059529303184, "grad_norm": 0.2305658459663391, "learning_rate": 0.00017416378316032296, "loss": 0.958, "step": 2265 }, { "epoch": 0.2618827872634979, "grad_norm": 0.23455555737018585, "learning_rate": 0.00017454825067281815, "loss": 0.966, "step": 2270 }, { "epoch": 0.26245962159667746, "grad_norm": 0.22993336617946625, "learning_rate": 0.00017493271818531337, "loss": 0.955, "step": 2275 }, { "epoch": 0.26303645592985697, "grad_norm": 0.22990167140960693, "learning_rate": 0.00017531718569780856, "loss": 1.0025, "step": 2280 }, { "epoch": 0.2636132902630365, "grad_norm": 0.23351961374282837, "learning_rate": 0.00017570165321030375, "loss": 1.0094, "step": 2285 }, { "epoch": 0.264190124596216, "grad_norm": 0.24218833446502686, "learning_rate": 0.00017608612072279894, "loss": 0.9458, "step": 2290 }, { "epoch": 0.2647669589293955, "grad_norm": 0.23496471345424652, "learning_rate": 0.00017647058823529413, "loss": 0.999, "step": 2295 }, { "epoch": 0.265343793262575, "grad_norm": 0.24932752549648285, "learning_rate": 0.00017685505574778932, "loss": 1.0319, "step": 2300 }, { "epoch": 0.2659206275957545, "grad_norm": 0.2334304302930832, "learning_rate": 0.00017723952326028454, "loss": 1.0447, "step": 2305 }, { "epoch": 0.26649746192893403, "grad_norm": 0.23765085637569427, "learning_rate": 0.00017762399077277973, "loss": 0.9873, "step": 2310 }, { "epoch": 0.26707429626211354, "grad_norm": 0.22014391422271729, "learning_rate": 0.00017800845828527492, "loss": 0.9984, "step": 2315 }, { "epoch": 0.26765113059529305, "grad_norm": 0.23627543449401855, "learning_rate": 0.0001783929257977701, "loss": 0.9945, "step": 2320 }, { "epoch": 0.26822796492847256, "grad_norm": 0.24393457174301147, "learning_rate": 0.0001787773933102653, "loss": 1.002, "step": 2325 }, { "epoch": 0.26880479926165207, "grad_norm": 0.2255178838968277, "learning_rate": 0.0001791618608227605, "loss": 0.9723, "step": 2330 }, { "epoch": 0.2693816335948316, "grad_norm": 0.2558629512786865, "learning_rate": 0.00017954632833525568, "loss": 1.0223, "step": 2335 }, { "epoch": 0.2699584679280111, "grad_norm": 0.23998694121837616, "learning_rate": 0.00017993079584775087, "loss": 0.9723, "step": 2340 }, { "epoch": 0.2705353022611906, "grad_norm": 0.24648216366767883, "learning_rate": 0.00018031526336024606, "loss": 0.9943, "step": 2345 }, { "epoch": 0.2711121365943701, "grad_norm": 0.26392728090286255, "learning_rate": 0.00018069973087274125, "loss": 0.9565, "step": 2350 }, { "epoch": 0.2716889709275496, "grad_norm": 0.24391917884349823, "learning_rate": 0.00018108419838523644, "loss": 1.0053, "step": 2355 }, { "epoch": 0.27226580526072913, "grad_norm": 0.23389026522636414, "learning_rate": 0.00018146866589773165, "loss": 0.9856, "step": 2360 }, { "epoch": 0.27284263959390864, "grad_norm": 0.23027820885181427, "learning_rate": 0.00018185313341022684, "loss": 0.9421, "step": 2365 }, { "epoch": 0.27341947392708815, "grad_norm": 0.21990667283535004, "learning_rate": 0.00018223760092272203, "loss": 0.9932, "step": 2370 }, { "epoch": 0.27399630826026766, "grad_norm": 0.23242932558059692, "learning_rate": 0.00018262206843521722, "loss": 0.9354, "step": 2375 }, { "epoch": 0.2745731425934472, "grad_norm": 0.23268820345401764, "learning_rate": 0.00018300653594771241, "loss": 0.9453, "step": 2380 }, { "epoch": 0.2751499769266267, "grad_norm": 0.2401697039604187, "learning_rate": 0.0001833910034602076, "loss": 0.9552, "step": 2385 }, { "epoch": 0.2757268112598062, "grad_norm": 0.22992229461669922, "learning_rate": 0.00018377547097270282, "loss": 0.9592, "step": 2390 }, { "epoch": 0.2763036455929857, "grad_norm": 0.2419811487197876, "learning_rate": 0.000184159938485198, "loss": 0.9724, "step": 2395 }, { "epoch": 0.2768804799261652, "grad_norm": 0.23714175820350647, "learning_rate": 0.0001845444059976932, "loss": 0.9591, "step": 2400 }, { "epoch": 0.2774573142593447, "grad_norm": 0.23658522963523865, "learning_rate": 0.0001849288735101884, "loss": 0.9249, "step": 2405 }, { "epoch": 0.27803414859252423, "grad_norm": 0.25390779972076416, "learning_rate": 0.00018531334102268358, "loss": 0.9518, "step": 2410 }, { "epoch": 0.27861098292570374, "grad_norm": 0.24043525755405426, "learning_rate": 0.0001856978085351788, "loss": 0.9804, "step": 2415 }, { "epoch": 0.27918781725888325, "grad_norm": 0.24651384353637695, "learning_rate": 0.000186082276047674, "loss": 0.9305, "step": 2420 }, { "epoch": 0.27976465159206276, "grad_norm": 0.2362329661846161, "learning_rate": 0.00018646674356016918, "loss": 1.0093, "step": 2425 }, { "epoch": 0.2803414859252423, "grad_norm": 0.23057129979133606, "learning_rate": 0.00018685121107266437, "loss": 0.9448, "step": 2430 }, { "epoch": 0.2809183202584218, "grad_norm": 0.24105559289455414, "learning_rate": 0.00018723567858515956, "loss": 0.9549, "step": 2435 }, { "epoch": 0.2814951545916013, "grad_norm": 0.2542194128036499, "learning_rate": 0.00018762014609765475, "loss": 0.9946, "step": 2440 }, { "epoch": 0.2820719889247808, "grad_norm": 0.25193148851394653, "learning_rate": 0.00018800461361014997, "loss": 0.9697, "step": 2445 }, { "epoch": 0.2826488232579603, "grad_norm": 0.25175783038139343, "learning_rate": 0.00018838908112264516, "loss": 0.947, "step": 2450 }, { "epoch": 0.2832256575911398, "grad_norm": 0.24037009477615356, "learning_rate": 0.00018877354863514035, "loss": 0.9593, "step": 2455 }, { "epoch": 0.28380249192431933, "grad_norm": 0.2611483633518219, "learning_rate": 0.00018915801614763554, "loss": 0.9462, "step": 2460 }, { "epoch": 0.28437932625749884, "grad_norm": 0.23972219228744507, "learning_rate": 0.00018954248366013073, "loss": 0.9776, "step": 2465 }, { "epoch": 0.28495616059067835, "grad_norm": 0.24529783427715302, "learning_rate": 0.00018992695117262594, "loss": 0.9445, "step": 2470 }, { "epoch": 0.28553299492385786, "grad_norm": 0.2517401874065399, "learning_rate": 0.00019031141868512113, "loss": 0.9944, "step": 2475 }, { "epoch": 0.2861098292570374, "grad_norm": 0.25316643714904785, "learning_rate": 0.00019069588619761632, "loss": 1.0143, "step": 2480 }, { "epoch": 0.2866866635902169, "grad_norm": 0.23407518863677979, "learning_rate": 0.00019108035371011151, "loss": 1.0202, "step": 2485 }, { "epoch": 0.2872634979233964, "grad_norm": 0.251371830701828, "learning_rate": 0.0001914648212226067, "loss": 1.0087, "step": 2490 }, { "epoch": 0.2878403322565759, "grad_norm": 0.2522146701812744, "learning_rate": 0.0001918492887351019, "loss": 0.9761, "step": 2495 }, { "epoch": 0.2884171665897554, "grad_norm": 0.24245892465114594, "learning_rate": 0.00019223375624759708, "loss": 0.9882, "step": 2500 }, { "epoch": 0.2889940009229349, "grad_norm": 0.23890967667102814, "learning_rate": 0.00019261822376009227, "loss": 0.9504, "step": 2505 }, { "epoch": 0.28957083525611443, "grad_norm": 0.2405172884464264, "learning_rate": 0.00019300269127258746, "loss": 1.014, "step": 2510 }, { "epoch": 0.29014766958929394, "grad_norm": 0.2515077590942383, "learning_rate": 0.00019338715878508265, "loss": 0.9936, "step": 2515 }, { "epoch": 0.29072450392247345, "grad_norm": 0.23620381951332092, "learning_rate": 0.00019377162629757784, "loss": 0.9828, "step": 2520 }, { "epoch": 0.29130133825565296, "grad_norm": 0.25077781081199646, "learning_rate": 0.00019415609381007303, "loss": 0.9711, "step": 2525 }, { "epoch": 0.2918781725888325, "grad_norm": 0.23658686876296997, "learning_rate": 0.00019454056132256825, "loss": 1.0081, "step": 2530 }, { "epoch": 0.292455006922012, "grad_norm": 0.24098335206508636, "learning_rate": 0.00019492502883506344, "loss": 0.9102, "step": 2535 }, { "epoch": 0.2930318412551915, "grad_norm": 0.2481927126646042, "learning_rate": 0.00019530949634755863, "loss": 0.9652, "step": 2540 }, { "epoch": 0.293608675588371, "grad_norm": 0.23471227288246155, "learning_rate": 0.00019569396386005382, "loss": 0.9101, "step": 2545 }, { "epoch": 0.2941855099215505, "grad_norm": 0.24416758120059967, "learning_rate": 0.000196078431372549, "loss": 0.975, "step": 2550 }, { "epoch": 0.29476234425473, "grad_norm": 0.2367262840270996, "learning_rate": 0.00019646289888504423, "loss": 0.9632, "step": 2555 }, { "epoch": 0.29533917858790953, "grad_norm": 0.24644485116004944, "learning_rate": 0.00019684736639753942, "loss": 0.9472, "step": 2560 }, { "epoch": 0.29591601292108904, "grad_norm": 0.26747334003448486, "learning_rate": 0.0001972318339100346, "loss": 0.93, "step": 2565 }, { "epoch": 0.29649284725426855, "grad_norm": 0.24286748468875885, "learning_rate": 0.0001976163014225298, "loss": 0.9563, "step": 2570 }, { "epoch": 0.29706968158744806, "grad_norm": 0.24571438133716583, "learning_rate": 0.000198000768935025, "loss": 0.9762, "step": 2575 }, { "epoch": 0.2976465159206276, "grad_norm": 0.2596196234226227, "learning_rate": 0.00019838523644752018, "loss": 1.0187, "step": 2580 }, { "epoch": 0.2982233502538071, "grad_norm": 0.2407630831003189, "learning_rate": 0.0001987697039600154, "loss": 0.9882, "step": 2585 }, { "epoch": 0.2988001845869866, "grad_norm": 0.24728403985500336, "learning_rate": 0.00019915417147251059, "loss": 0.9734, "step": 2590 }, { "epoch": 0.2993770189201661, "grad_norm": 0.2402627021074295, "learning_rate": 0.00019953863898500578, "loss": 1.0186, "step": 2595 }, { "epoch": 0.2999538532533456, "grad_norm": 0.24274969100952148, "learning_rate": 0.00019992310649750097, "loss": 0.9835, "step": 2600 }, { "epoch": 0.3005306875865251, "grad_norm": 0.26291441917419434, "learning_rate": 0.00019999998558393748, "loss": 0.997, "step": 2605 }, { "epoch": 0.30110752191970463, "grad_norm": 0.2553878724575043, "learning_rate": 0.0001999999270186907, "loss": 1.0249, "step": 2610 }, { "epoch": 0.30168435625288414, "grad_norm": 0.24096915125846863, "learning_rate": 0.00019999982340328205, "loss": 1.0262, "step": 2615 }, { "epoch": 0.30226119058606365, "grad_norm": 0.2416466474533081, "learning_rate": 0.0001999996747377582, "loss": 1.0066, "step": 2620 }, { "epoch": 0.3028380249192432, "grad_norm": 0.24086306989192963, "learning_rate": 0.0001999994810221862, "loss": 0.9956, "step": 2625 }, { "epoch": 0.30341485925242273, "grad_norm": 0.27510523796081543, "learning_rate": 0.00019999924225665326, "loss": 1.0013, "step": 2630 }, { "epoch": 0.30399169358560224, "grad_norm": 0.24594350159168243, "learning_rate": 0.00019999895844126695, "loss": 0.9678, "step": 2635 }, { "epoch": 0.30456852791878175, "grad_norm": 0.2500142753124237, "learning_rate": 0.00019999862957615513, "loss": 1.0076, "step": 2640 }, { "epoch": 0.30514536225196126, "grad_norm": 0.2603604197502136, "learning_rate": 0.000199998255661466, "loss": 1.0059, "step": 2645 }, { "epoch": 0.30572219658514077, "grad_norm": 0.24498620629310608, "learning_rate": 0.00019999783669736795, "loss": 0.9441, "step": 2650 }, { "epoch": 0.3062990309183203, "grad_norm": 0.2519979774951935, "learning_rate": 0.00019999737268404973, "loss": 0.9315, "step": 2655 }, { "epoch": 0.3068758652514998, "grad_norm": 0.24573497474193573, "learning_rate": 0.00019999686362172044, "loss": 0.9806, "step": 2660 }, { "epoch": 0.3074526995846793, "grad_norm": 0.24200966954231262, "learning_rate": 0.00019999630951060934, "loss": 1.0077, "step": 2665 }, { "epoch": 0.3080295339178588, "grad_norm": 0.26442426443099976, "learning_rate": 0.00019999571035096608, "loss": 0.975, "step": 2670 }, { "epoch": 0.3086063682510383, "grad_norm": 0.2420646846294403, "learning_rate": 0.0001999950661430606, "loss": 0.9541, "step": 2675 }, { "epoch": 0.30918320258421783, "grad_norm": 0.2571803331375122, "learning_rate": 0.00019999437688718313, "loss": 1.0074, "step": 2680 }, { "epoch": 0.30976003691739734, "grad_norm": 0.2612910568714142, "learning_rate": 0.00019999364258364413, "loss": 1.0123, "step": 2685 }, { "epoch": 0.31033687125057685, "grad_norm": 0.2611737549304962, "learning_rate": 0.00019999286323277445, "loss": 1.0179, "step": 2690 }, { "epoch": 0.31091370558375636, "grad_norm": 0.24607665836811066, "learning_rate": 0.00019999203883492515, "loss": 0.9675, "step": 2695 }, { "epoch": 0.31149053991693587, "grad_norm": 0.24605637788772583, "learning_rate": 0.00019999116939046764, "loss": 0.9357, "step": 2700 }, { "epoch": 0.3120673742501154, "grad_norm": 0.24667702615261078, "learning_rate": 0.00019999025489979367, "loss": 0.9726, "step": 2705 }, { "epoch": 0.3126442085832949, "grad_norm": 0.24471449851989746, "learning_rate": 0.0001999892953633151, "loss": 0.9708, "step": 2710 }, { "epoch": 0.3132210429164744, "grad_norm": 0.2780803442001343, "learning_rate": 0.0001999882907814643, "loss": 0.9675, "step": 2715 }, { "epoch": 0.3137978772496539, "grad_norm": 0.264237642288208, "learning_rate": 0.00019998724115469378, "loss": 1.0132, "step": 2720 }, { "epoch": 0.3143747115828334, "grad_norm": 0.2395864874124527, "learning_rate": 0.00019998614648347642, "loss": 1.0061, "step": 2725 }, { "epoch": 0.31495154591601293, "grad_norm": 0.2584296762943268, "learning_rate": 0.0001999850067683054, "loss": 0.9822, "step": 2730 }, { "epoch": 0.31552838024919244, "grad_norm": 0.24742144346237183, "learning_rate": 0.0001999838220096941, "loss": 0.9794, "step": 2735 }, { "epoch": 0.31610521458237195, "grad_norm": 41.201873779296875, "learning_rate": 0.0001999825922081763, "loss": 1.0792, "step": 2740 }, { "epoch": 0.31668204891555146, "grad_norm": 0.2647322714328766, "learning_rate": 0.00019998131736430604, "loss": 0.9826, "step": 2745 }, { "epoch": 0.31725888324873097, "grad_norm": 0.3127318322658539, "learning_rate": 0.0001999799974786576, "loss": 1.0236, "step": 2750 }, { "epoch": 0.3178357175819105, "grad_norm": 0.23633776605129242, "learning_rate": 0.0001999786325518256, "loss": 0.9661, "step": 2755 }, { "epoch": 0.31841255191509, "grad_norm": 0.25068148970603943, "learning_rate": 0.00019997722258442499, "loss": 1.0099, "step": 2760 }, { "epoch": 0.3189893862482695, "grad_norm": 0.26392409205436707, "learning_rate": 0.00019997576757709089, "loss": 0.9987, "step": 2765 }, { "epoch": 0.319566220581449, "grad_norm": 8.13176155090332, "learning_rate": 0.00019997426753047882, "loss": 1.0128, "step": 2770 }, { "epoch": 0.3201430549146285, "grad_norm": 2.1088223457336426, "learning_rate": 0.00019997272244526456, "loss": 0.9769, "step": 2775 }, { "epoch": 0.32071988924780803, "grad_norm": 0.7730352878570557, "learning_rate": 0.00019997113232214417, "loss": 1.0047, "step": 2780 }, { "epoch": 0.32129672358098754, "grad_norm": 0.272935688495636, "learning_rate": 0.000199969497161834, "loss": 0.9996, "step": 2785 }, { "epoch": 0.32187355791416705, "grad_norm": 5.394055366516113, "learning_rate": 0.00019996781696507069, "loss": 1.0325, "step": 2790 }, { "epoch": 0.32245039224734656, "grad_norm": 0.26502957940101624, "learning_rate": 0.00019996609173261116, "loss": 0.9684, "step": 2795 }, { "epoch": 0.3230272265805261, "grad_norm": 0.25611191987991333, "learning_rate": 0.00019996432146523267, "loss": 0.9542, "step": 2800 }, { "epoch": 0.3236040609137056, "grad_norm": 0.2904262840747833, "learning_rate": 0.00019996250616373268, "loss": 0.9687, "step": 2805 }, { "epoch": 0.3241808952468851, "grad_norm": 0.33797159790992737, "learning_rate": 0.00019996064582892905, "loss": 0.9814, "step": 2810 }, { "epoch": 0.3247577295800646, "grad_norm": 0.2576303482055664, "learning_rate": 0.00019995874046165981, "loss": 0.9391, "step": 2815 }, { "epoch": 0.3253345639132441, "grad_norm": 0.2760027050971985, "learning_rate": 0.0001999567900627833, "loss": 0.9729, "step": 2820 }, { "epoch": 0.3259113982464236, "grad_norm": 0.2771868109703064, "learning_rate": 0.0001999547946331783, "loss": 1.0334, "step": 2825 }, { "epoch": 0.32648823257960313, "grad_norm": 0.28071510791778564, "learning_rate": 0.00019995275417374365, "loss": 0.9437, "step": 2830 }, { "epoch": 0.32706506691278264, "grad_norm": 0.25998392701148987, "learning_rate": 0.0001999506686853986, "loss": 1.0159, "step": 2835 }, { "epoch": 0.32764190124596215, "grad_norm": 0.2917766273021698, "learning_rate": 0.0001999485381690827, "loss": 1.0045, "step": 2840 }, { "epoch": 0.32821873557914166, "grad_norm": 0.24891650676727295, "learning_rate": 0.0001999463626257557, "loss": 0.9768, "step": 2845 }, { "epoch": 0.3287955699123212, "grad_norm": 0.26400068402290344, "learning_rate": 0.00019994414205639775, "loss": 1.002, "step": 2850 }, { "epoch": 0.3293724042455007, "grad_norm": 0.266400545835495, "learning_rate": 0.00019994187646200917, "loss": 1.0267, "step": 2855 }, { "epoch": 0.3299492385786802, "grad_norm": 0.25532013177871704, "learning_rate": 0.00019993956584361063, "loss": 0.9929, "step": 2860 }, { "epoch": 0.3305260729118597, "grad_norm": 0.24749302864074707, "learning_rate": 0.00019993721020224308, "loss": 0.9693, "step": 2865 }, { "epoch": 0.3311029072450392, "grad_norm": 0.2967720925807953, "learning_rate": 0.0001999348095389677, "loss": 0.9674, "step": 2870 }, { "epoch": 0.3316797415782187, "grad_norm": 0.25716492533683777, "learning_rate": 0.00019993236385486607, "loss": 0.9448, "step": 2875 }, { "epoch": 0.33225657591139823, "grad_norm": 0.265095978975296, "learning_rate": 0.0001999298731510399, "loss": 0.9434, "step": 2880 }, { "epoch": 0.33283341024457774, "grad_norm": 0.25619763135910034, "learning_rate": 0.00019992733742861128, "loss": 0.9688, "step": 2885 }, { "epoch": 0.33341024457775725, "grad_norm": 0.25639405846595764, "learning_rate": 0.0001999247566887226, "loss": 0.9769, "step": 2890 }, { "epoch": 0.33398707891093676, "grad_norm": 0.29292336106300354, "learning_rate": 0.00019992213093253643, "loss": 0.9579, "step": 2895 }, { "epoch": 0.3345639132441163, "grad_norm": 0.3063664436340332, "learning_rate": 0.0001999194601612357, "loss": 1.0057, "step": 2900 }, { "epoch": 0.3351407475772958, "grad_norm": 0.26349523663520813, "learning_rate": 0.00019991674437602362, "loss": 1.0265, "step": 2905 }, { "epoch": 0.3357175819104753, "grad_norm": 0.26573020219802856, "learning_rate": 0.0001999139835781236, "loss": 1.0405, "step": 2910 }, { "epoch": 0.3362944162436548, "grad_norm": 0.2737710177898407, "learning_rate": 0.00019991117776877942, "loss": 0.9219, "step": 2915 }, { "epoch": 0.3368712505768343, "grad_norm": 0.26778504252433777, "learning_rate": 0.00019990832694925513, "loss": 1.0212, "step": 2920 }, { "epoch": 0.3374480849100138, "grad_norm": 0.26119446754455566, "learning_rate": 0.00019990543112083503, "loss": 0.9696, "step": 2925 }, { "epoch": 0.33802491924319333, "grad_norm": 0.256541907787323, "learning_rate": 0.00019990249028482363, "loss": 1.0118, "step": 2930 }, { "epoch": 0.33860175357637284, "grad_norm": 0.2648811936378479, "learning_rate": 0.0001998995044425458, "loss": 0.9676, "step": 2935 }, { "epoch": 0.33917858790955235, "grad_norm": 0.254853755235672, "learning_rate": 0.00019989647359534672, "loss": 0.9455, "step": 2940 }, { "epoch": 0.33975542224273186, "grad_norm": 0.26572519540786743, "learning_rate": 0.00019989339774459177, "loss": 0.9875, "step": 2945 }, { "epoch": 0.3403322565759114, "grad_norm": 0.27205315232276917, "learning_rate": 0.00019989027689166662, "loss": 0.9661, "step": 2950 }, { "epoch": 0.3409090909090909, "grad_norm": 0.2578754723072052, "learning_rate": 0.0001998871110379772, "loss": 0.976, "step": 2955 }, { "epoch": 0.3414859252422704, "grad_norm": 0.2672814130783081, "learning_rate": 0.00019988390018494976, "loss": 0.9705, "step": 2960 }, { "epoch": 0.3420627595754499, "grad_norm": 0.2663863003253937, "learning_rate": 0.00019988064433403078, "loss": 0.976, "step": 2965 }, { "epoch": 0.3426395939086294, "grad_norm": 0.2686617970466614, "learning_rate": 0.00019987734348668706, "loss": 0.9612, "step": 2970 }, { "epoch": 0.343216428241809, "grad_norm": 0.24667376279830933, "learning_rate": 0.00019987399764440558, "loss": 0.9562, "step": 2975 }, { "epoch": 0.3437932625749885, "grad_norm": 0.2621404528617859, "learning_rate": 0.0001998706068086937, "loss": 1.0241, "step": 2980 }, { "epoch": 0.344370096908168, "grad_norm": 0.2822721600532532, "learning_rate": 0.00019986717098107896, "loss": 0.993, "step": 2985 }, { "epoch": 0.3449469312413475, "grad_norm": 0.26043906807899475, "learning_rate": 0.00019986369016310925, "loss": 0.9727, "step": 2990 }, { "epoch": 0.345523765574527, "grad_norm": 0.27216097712516785, "learning_rate": 0.00019986016435635264, "loss": 0.9866, "step": 2995 }, { "epoch": 0.34610059990770653, "grad_norm": 0.26094967126846313, "learning_rate": 0.00019985659356239758, "loss": 0.925, "step": 3000 }, { "epoch": 0.34667743424088604, "grad_norm": 0.28546351194381714, "learning_rate": 0.0001998529777828526, "loss": 0.9614, "step": 3005 }, { "epoch": 0.34725426857406555, "grad_norm": 0.2722310423851013, "learning_rate": 0.00019984931701934677, "loss": 0.9827, "step": 3010 }, { "epoch": 0.34783110290724506, "grad_norm": 0.2718545198440552, "learning_rate": 0.00019984561127352914, "loss": 0.9642, "step": 3015 }, { "epoch": 0.34840793724042457, "grad_norm": 0.2546035349369049, "learning_rate": 0.00019984186054706923, "loss": 0.9957, "step": 3020 }, { "epoch": 0.3489847715736041, "grad_norm": 0.2482123076915741, "learning_rate": 0.00019983806484165674, "loss": 0.9924, "step": 3025 }, { "epoch": 0.3495616059067836, "grad_norm": 0.2893499732017517, "learning_rate": 0.00019983422415900158, "loss": 1.0, "step": 3030 }, { "epoch": 0.3501384402399631, "grad_norm": 0.2613871693611145, "learning_rate": 0.00019983033850083407, "loss": 1.0196, "step": 3035 }, { "epoch": 0.3507152745731426, "grad_norm": 0.27375757694244385, "learning_rate": 0.00019982640786890465, "loss": 0.9422, "step": 3040 }, { "epoch": 0.3512921089063221, "grad_norm": 0.2552187442779541, "learning_rate": 0.00019982243226498411, "loss": 0.9861, "step": 3045 }, { "epoch": 0.35186894323950163, "grad_norm": 0.2758137583732605, "learning_rate": 0.00019981841169086346, "loss": 0.9763, "step": 3050 }, { "epoch": 0.35244577757268114, "grad_norm": 0.26991990208625793, "learning_rate": 0.00019981434614835397, "loss": 0.9611, "step": 3055 }, { "epoch": 0.35302261190586065, "grad_norm": 0.27569013833999634, "learning_rate": 0.00019981023563928716, "loss": 0.9829, "step": 3060 }, { "epoch": 0.35359944623904016, "grad_norm": 0.27400723099708557, "learning_rate": 0.00019980608016551487, "loss": 0.9673, "step": 3065 }, { "epoch": 0.35417628057221967, "grad_norm": 0.2885724902153015, "learning_rate": 0.0001998018797289091, "loss": 0.9851, "step": 3070 }, { "epoch": 0.3547531149053992, "grad_norm": 0.2800985276699066, "learning_rate": 0.00019979763433136216, "loss": 0.9221, "step": 3075 }, { "epoch": 0.3553299492385787, "grad_norm": 0.28481200337409973, "learning_rate": 0.00019979334397478665, "loss": 0.9812, "step": 3080 }, { "epoch": 0.3559067835717582, "grad_norm": 0.2865158021450043, "learning_rate": 0.00019978900866111533, "loss": 1.0323, "step": 3085 }, { "epoch": 0.3564836179049377, "grad_norm": 0.2730359733104706, "learning_rate": 0.00019978462839230133, "loss": 0.9722, "step": 3090 }, { "epoch": 0.3570604522381172, "grad_norm": 0.2704940736293793, "learning_rate": 0.0001997802031703179, "loss": 1.0168, "step": 3095 }, { "epoch": 0.35763728657129673, "grad_norm": 0.26255276799201965, "learning_rate": 0.00019977573299715865, "loss": 0.9872, "step": 3100 }, { "epoch": 0.35821412090447624, "grad_norm": 0.275738924741745, "learning_rate": 0.0001997712178748374, "loss": 0.9876, "step": 3105 }, { "epoch": 0.35879095523765575, "grad_norm": 0.24914264678955078, "learning_rate": 0.00019976665780538824, "loss": 0.8764, "step": 3110 }, { "epoch": 0.35936778957083526, "grad_norm": 0.26297125220298767, "learning_rate": 0.0001997620527908654, "loss": 1.0151, "step": 3115 }, { "epoch": 0.35994462390401477, "grad_norm": 0.2562413811683655, "learning_rate": 0.0001997574028333436, "loss": 0.9859, "step": 3120 }, { "epoch": 0.3605214582371943, "grad_norm": 0.2676263451576233, "learning_rate": 0.0001997527079349175, "loss": 0.9625, "step": 3125 }, { "epoch": 0.3610982925703738, "grad_norm": 0.2798160910606384, "learning_rate": 0.0001997479680977023, "loss": 1.0202, "step": 3130 }, { "epoch": 0.3616751269035533, "grad_norm": 0.3188830018043518, "learning_rate": 0.0001997431833238332, "loss": 0.9762, "step": 3135 }, { "epoch": 0.3622519612367328, "grad_norm": 0.2774507403373718, "learning_rate": 0.00019973835361546577, "loss": 0.9795, "step": 3140 }, { "epoch": 0.3628287955699123, "grad_norm": 0.2787860631942749, "learning_rate": 0.0001997334789747759, "loss": 0.9737, "step": 3145 }, { "epoch": 0.36340562990309183, "grad_norm": 0.3146364390850067, "learning_rate": 0.00019972855940395947, "loss": 0.935, "step": 3150 }, { "epoch": 0.36398246423627134, "grad_norm": 0.26312851905822754, "learning_rate": 0.00019972359490523284, "loss": 0.9587, "step": 3155 }, { "epoch": 0.36455929856945085, "grad_norm": 0.2730996906757355, "learning_rate": 0.0001997185854808325, "loss": 0.9661, "step": 3160 }, { "epoch": 0.36513613290263036, "grad_norm": 0.28980138897895813, "learning_rate": 0.00019971353113301527, "loss": 0.9461, "step": 3165 }, { "epoch": 0.36571296723580987, "grad_norm": 0.2760608494281769, "learning_rate": 0.00019970843186405807, "loss": 1.0146, "step": 3170 }, { "epoch": 0.3662898015689894, "grad_norm": 0.27623140811920166, "learning_rate": 0.0001997032876762582, "loss": 0.9888, "step": 3175 }, { "epoch": 0.3668666359021689, "grad_norm": 0.2842963635921478, "learning_rate": 0.00019969809857193306, "loss": 0.9676, "step": 3180 }, { "epoch": 0.3674434702353484, "grad_norm": 0.29437965154647827, "learning_rate": 0.00019969286455342035, "loss": 1.0161, "step": 3185 }, { "epoch": 0.3680203045685279, "grad_norm": 0.2872377336025238, "learning_rate": 0.00019968758562307807, "loss": 0.9173, "step": 3190 }, { "epoch": 0.3685971389017074, "grad_norm": 0.2813303768634796, "learning_rate": 0.0001996822617832843, "loss": 0.9569, "step": 3195 }, { "epoch": 0.36917397323488693, "grad_norm": 0.2630009353160858, "learning_rate": 0.00019967689303643753, "loss": 0.919, "step": 3200 }, { "epoch": 0.36975080756806644, "grad_norm": 0.28360888361930847, "learning_rate": 0.00019967147938495635, "loss": 0.9788, "step": 3205 }, { "epoch": 0.37032764190124595, "grad_norm": 0.2732391655445099, "learning_rate": 0.0001996660208312796, "loss": 1.0405, "step": 3210 }, { "epoch": 0.37090447623442546, "grad_norm": 0.28286460041999817, "learning_rate": 0.00019966051737786643, "loss": 1.0152, "step": 3215 }, { "epoch": 0.37148131056760497, "grad_norm": 0.31790444254875183, "learning_rate": 0.0001996549690271961, "loss": 1.0018, "step": 3220 }, { "epoch": 0.3720581449007845, "grad_norm": 0.28235310316085815, "learning_rate": 0.00019964937578176816, "loss": 1.0148, "step": 3225 }, { "epoch": 0.372634979233964, "grad_norm": 0.29751458764076233, "learning_rate": 0.00019964373764410237, "loss": 0.9432, "step": 3230 }, { "epoch": 0.3732118135671435, "grad_norm": 0.2763459086418152, "learning_rate": 0.00019963805461673876, "loss": 0.9836, "step": 3235 }, { "epoch": 0.373788647900323, "grad_norm": 0.2639414966106415, "learning_rate": 0.00019963232670223752, "loss": 0.9687, "step": 3240 }, { "epoch": 0.3743654822335025, "grad_norm": 0.25585541129112244, "learning_rate": 0.0001996265539031791, "loss": 0.9544, "step": 3245 }, { "epoch": 0.37494231656668203, "grad_norm": 0.30802932381629944, "learning_rate": 0.00019962073622216417, "loss": 1.0018, "step": 3250 }, { "epoch": 0.37551915089986154, "grad_norm": 0.27707305550575256, "learning_rate": 0.00019961487366181355, "loss": 0.948, "step": 3255 }, { "epoch": 0.37609598523304105, "grad_norm": 0.26873978972435, "learning_rate": 0.0001996089662247684, "loss": 0.9899, "step": 3260 }, { "epoch": 0.37667281956622056, "grad_norm": 0.2803768217563629, "learning_rate": 0.00019960301391368996, "loss": 0.9381, "step": 3265 }, { "epoch": 0.3772496538994001, "grad_norm": 0.2769775688648224, "learning_rate": 0.00019959701673125983, "loss": 0.9207, "step": 3270 }, { "epoch": 0.3778264882325796, "grad_norm": 0.27256807684898376, "learning_rate": 0.0001995909746801797, "loss": 1.0029, "step": 3275 }, { "epoch": 0.3784033225657591, "grad_norm": 0.2660912573337555, "learning_rate": 0.0001995848877631716, "loss": 0.9918, "step": 3280 }, { "epoch": 0.3789801568989386, "grad_norm": 0.28680184483528137, "learning_rate": 0.00019957875598297759, "loss": 0.9344, "step": 3285 }, { "epoch": 0.3795569912321181, "grad_norm": 0.2940070331096649, "learning_rate": 0.00019957257934236013, "loss": 0.9575, "step": 3290 }, { "epoch": 0.3801338255652976, "grad_norm": 0.25873056054115295, "learning_rate": 0.00019956635784410177, "loss": 0.981, "step": 3295 }, { "epoch": 0.38071065989847713, "grad_norm": 0.2866104245185852, "learning_rate": 0.00019956009149100533, "loss": 0.9889, "step": 3300 }, { "epoch": 0.38128749423165664, "grad_norm": 0.2744695246219635, "learning_rate": 0.00019955378028589383, "loss": 0.9321, "step": 3305 }, { "epoch": 0.38186432856483615, "grad_norm": 0.2819940745830536, "learning_rate": 0.0001995474242316104, "loss": 0.9603, "step": 3310 }, { "epoch": 0.38244116289801566, "grad_norm": 0.2561120390892029, "learning_rate": 0.00019954102333101856, "loss": 0.9994, "step": 3315 }, { "epoch": 0.3830179972311952, "grad_norm": 0.2975214123725891, "learning_rate": 0.00019953457758700184, "loss": 1.0012, "step": 3320 }, { "epoch": 0.38359483156437474, "grad_norm": 0.26979345083236694, "learning_rate": 0.00019952808700246413, "loss": 0.9865, "step": 3325 }, { "epoch": 0.38417166589755425, "grad_norm": 0.2876468002796173, "learning_rate": 0.0001995215515803294, "loss": 0.973, "step": 3330 }, { "epoch": 0.38474850023073376, "grad_norm": 0.2840271592140198, "learning_rate": 0.0001995149713235419, "loss": 1.0173, "step": 3335 }, { "epoch": 0.38532533456391327, "grad_norm": 0.27148565649986267, "learning_rate": 0.00019950834623506602, "loss": 0.9508, "step": 3340 }, { "epoch": 0.3859021688970928, "grad_norm": 0.38565149903297424, "learning_rate": 0.00019950167631788642, "loss": 1.0152, "step": 3345 }, { "epoch": 0.3864790032302723, "grad_norm": 0.2814615070819855, "learning_rate": 0.00019949496157500786, "loss": 0.9375, "step": 3350 }, { "epoch": 0.3870558375634518, "grad_norm": 0.29191091656684875, "learning_rate": 0.00019948820200945536, "loss": 1.0029, "step": 3355 }, { "epoch": 0.3876326718966313, "grad_norm": 0.29751864075660706, "learning_rate": 0.00019948139762427416, "loss": 1.0162, "step": 3360 }, { "epoch": 0.3882095062298108, "grad_norm": 0.2734597623348236, "learning_rate": 0.0001994745484225296, "loss": 0.9674, "step": 3365 }, { "epoch": 0.38878634056299033, "grad_norm": 0.2579993009567261, "learning_rate": 0.0001994676544073073, "loss": 0.9396, "step": 3370 }, { "epoch": 0.38936317489616984, "grad_norm": 0.2908860743045807, "learning_rate": 0.000199460715581713, "loss": 1.0213, "step": 3375 }, { "epoch": 0.38994000922934935, "grad_norm": 0.3159235417842865, "learning_rate": 0.0001994537319488726, "loss": 0.9886, "step": 3380 }, { "epoch": 0.39051684356252886, "grad_norm": 0.27023908495903015, "learning_rate": 0.00019944670351193232, "loss": 0.9804, "step": 3385 }, { "epoch": 0.39109367789570837, "grad_norm": 0.2643144428730011, "learning_rate": 0.0001994396302740585, "loss": 0.9912, "step": 3390 }, { "epoch": 0.3916705122288879, "grad_norm": 0.3253761827945709, "learning_rate": 0.00019943251223843755, "loss": 1.0102, "step": 3395 }, { "epoch": 0.3922473465620674, "grad_norm": 0.2623632848262787, "learning_rate": 0.00019942534940827625, "loss": 0.9324, "step": 3400 }, { "epoch": 0.3928241808952469, "grad_norm": 0.27166181802749634, "learning_rate": 0.00019941814178680144, "loss": 0.9578, "step": 3405 }, { "epoch": 0.3934010152284264, "grad_norm": 0.28351280093193054, "learning_rate": 0.00019941088937726011, "loss": 0.9684, "step": 3410 }, { "epoch": 0.3939778495616059, "grad_norm": 0.2918001413345337, "learning_rate": 0.0001994035921829196, "loss": 0.9833, "step": 3415 }, { "epoch": 0.39455468389478543, "grad_norm": 0.2910713255405426, "learning_rate": 0.00019939625020706724, "loss": 0.9937, "step": 3420 }, { "epoch": 0.39513151822796494, "grad_norm": 0.27372169494628906, "learning_rate": 0.0001993888634530106, "loss": 0.9769, "step": 3425 }, { "epoch": 0.39570835256114445, "grad_norm": 0.298957884311676, "learning_rate": 0.00019938143192407744, "loss": 0.991, "step": 3430 }, { "epoch": 0.39628518689432396, "grad_norm": 0.2799958884716034, "learning_rate": 0.00019937395562361564, "loss": 1.0079, "step": 3435 }, { "epoch": 0.39686202122750347, "grad_norm": 0.2845383882522583, "learning_rate": 0.00019936643455499336, "loss": 1.0427, "step": 3440 }, { "epoch": 0.397438855560683, "grad_norm": 0.2783012390136719, "learning_rate": 0.00019935886872159885, "loss": 0.9672, "step": 3445 }, { "epoch": 0.3980156898938625, "grad_norm": 0.27244484424591064, "learning_rate": 0.00019935125812684047, "loss": 0.9309, "step": 3450 }, { "epoch": 0.398592524227042, "grad_norm": 0.3226234018802643, "learning_rate": 0.00019934360277414686, "loss": 0.9441, "step": 3455 }, { "epoch": 0.3991693585602215, "grad_norm": 0.2866813540458679, "learning_rate": 0.00019933590266696673, "loss": 1.0051, "step": 3460 }, { "epoch": 0.399746192893401, "grad_norm": 0.2691948413848877, "learning_rate": 0.00019932815780876904, "loss": 0.9913, "step": 3465 }, { "epoch": 0.40032302722658053, "grad_norm": 0.2774306833744049, "learning_rate": 0.0001993203682030428, "loss": 0.9852, "step": 3470 }, { "epoch": 0.40089986155976004, "grad_norm": 0.2922816276550293, "learning_rate": 0.00019931253385329734, "loss": 0.9824, "step": 3475 }, { "epoch": 0.40147669589293955, "grad_norm": 0.26545560359954834, "learning_rate": 0.00019930465476306197, "loss": 0.9951, "step": 3480 }, { "epoch": 0.40205353022611906, "grad_norm": 0.27202633023262024, "learning_rate": 0.00019929673093588624, "loss": 0.9971, "step": 3485 }, { "epoch": 0.40263036455929857, "grad_norm": 0.29674404859542847, "learning_rate": 0.00019928876237533988, "loss": 1.0056, "step": 3490 }, { "epoch": 0.4032071988924781, "grad_norm": 0.276904821395874, "learning_rate": 0.00019928074908501272, "loss": 0.976, "step": 3495 }, { "epoch": 0.4037840332256576, "grad_norm": 0.26667892932891846, "learning_rate": 0.00019927269106851482, "loss": 0.9564, "step": 3500 }, { "epoch": 0.4043608675588371, "grad_norm": 0.31080129742622375, "learning_rate": 0.00019926458832947622, "loss": 1.0365, "step": 3505 }, { "epoch": 0.4049377018920166, "grad_norm": 0.278057724237442, "learning_rate": 0.00019925644087154734, "loss": 0.9868, "step": 3510 }, { "epoch": 0.4055145362251961, "grad_norm": 0.2963494658470154, "learning_rate": 0.00019924824869839853, "loss": 0.9729, "step": 3515 }, { "epoch": 0.40609137055837563, "grad_norm": 0.27625954151153564, "learning_rate": 0.00019924001181372046, "loss": 0.9466, "step": 3520 }, { "epoch": 0.40666820489155514, "grad_norm": 0.31308531761169434, "learning_rate": 0.00019923173022122378, "loss": 1.0257, "step": 3525 }, { "epoch": 0.40724503922473465, "grad_norm": 0.32162705063819885, "learning_rate": 0.0001992234039246394, "loss": 0.9872, "step": 3530 }, { "epoch": 0.40782187355791416, "grad_norm": 0.281136691570282, "learning_rate": 0.0001992150329277184, "loss": 1.0024, "step": 3535 }, { "epoch": 0.40839870789109367, "grad_norm": 0.27098140120506287, "learning_rate": 0.00019920661723423183, "loss": 0.9851, "step": 3540 }, { "epoch": 0.4089755422242732, "grad_norm": 0.28920409083366394, "learning_rate": 0.000199198156847971, "loss": 1.0035, "step": 3545 }, { "epoch": 0.4095523765574527, "grad_norm": 0.2802961766719818, "learning_rate": 0.00019918965177274735, "loss": 1.016, "step": 3550 }, { "epoch": 0.4101292108906322, "grad_norm": 0.2619479298591614, "learning_rate": 0.00019918110201239247, "loss": 0.9942, "step": 3555 }, { "epoch": 0.4107060452238117, "grad_norm": 0.26170656085014343, "learning_rate": 0.00019917250757075795, "loss": 0.9494, "step": 3560 }, { "epoch": 0.4112828795569912, "grad_norm": 0.2886107265949249, "learning_rate": 0.00019916386845171568, "loss": 0.9857, "step": 3565 }, { "epoch": 0.41185971389017073, "grad_norm": 0.26587212085723877, "learning_rate": 0.00019915518465915758, "loss": 0.9708, "step": 3570 }, { "epoch": 0.41243654822335024, "grad_norm": 0.2751521170139313, "learning_rate": 0.00019914645619699571, "loss": 0.9622, "step": 3575 }, { "epoch": 0.41301338255652975, "grad_norm": 0.28140199184417725, "learning_rate": 0.00019913768306916227, "loss": 1.0042, "step": 3580 }, { "epoch": 0.41359021688970926, "grad_norm": 0.2568947374820709, "learning_rate": 0.00019912886527960954, "loss": 0.9312, "step": 3585 }, { "epoch": 0.41416705122288877, "grad_norm": 0.27960750460624695, "learning_rate": 0.00019912000283231, "loss": 0.9689, "step": 3590 }, { "epoch": 0.4147438855560683, "grad_norm": 0.30046120285987854, "learning_rate": 0.00019911109573125617, "loss": 1.0254, "step": 3595 }, { "epoch": 0.4153207198892478, "grad_norm": 0.2881747782230377, "learning_rate": 0.0001991021439804607, "loss": 1.0188, "step": 3600 }, { "epoch": 0.4158975542224273, "grad_norm": 0.28633803129196167, "learning_rate": 0.00019909314758395638, "loss": 0.9999, "step": 3605 }, { "epoch": 0.4164743885556068, "grad_norm": 0.27391380071640015, "learning_rate": 0.00019908410654579615, "loss": 0.9707, "step": 3610 }, { "epoch": 0.4170512228887863, "grad_norm": 0.2890860438346863, "learning_rate": 0.00019907502087005297, "loss": 0.998, "step": 3615 }, { "epoch": 0.41762805722196583, "grad_norm": 0.28422412276268005, "learning_rate": 0.00019906589056081995, "loss": 0.9878, "step": 3620 }, { "epoch": 0.41820489155514534, "grad_norm": 0.26787862181663513, "learning_rate": 0.0001990567156222103, "loss": 0.9707, "step": 3625 }, { "epoch": 0.41878172588832485, "grad_norm": 0.28404876589775085, "learning_rate": 0.00019904749605835742, "loss": 0.9737, "step": 3630 }, { "epoch": 0.41935856022150436, "grad_norm": 0.2951257824897766, "learning_rate": 0.0001990382318734147, "loss": 0.9745, "step": 3635 }, { "epoch": 0.41993539455468387, "grad_norm": 0.28763440251350403, "learning_rate": 0.00019902892307155563, "loss": 0.946, "step": 3640 }, { "epoch": 0.4205122288878634, "grad_norm": 0.3106132447719574, "learning_rate": 0.00019901956965697387, "loss": 0.9918, "step": 3645 }, { "epoch": 0.4210890632210429, "grad_norm": 0.29146450757980347, "learning_rate": 0.00019901017163388322, "loss": 0.9604, "step": 3650 }, { "epoch": 0.4216658975542224, "grad_norm": 0.2769738733768463, "learning_rate": 0.00019900072900651744, "loss": 0.9916, "step": 3655 }, { "epoch": 0.4222427318874019, "grad_norm": 0.5984246730804443, "learning_rate": 0.00019899124177913041, "loss": 0.9699, "step": 3660 }, { "epoch": 0.4228195662205814, "grad_norm": 0.2717759907245636, "learning_rate": 0.00019898170995599627, "loss": 0.9808, "step": 3665 }, { "epoch": 0.423396400553761, "grad_norm": 0.292111337184906, "learning_rate": 0.00019897213354140903, "loss": 0.9985, "step": 3670 }, { "epoch": 0.4239732348869405, "grad_norm": 0.2680363655090332, "learning_rate": 0.00019896251253968288, "loss": 1.0368, "step": 3675 }, { "epoch": 0.42455006922012, "grad_norm": 0.2782565653324127, "learning_rate": 0.00019895284695515213, "loss": 0.9531, "step": 3680 }, { "epoch": 0.4251269035532995, "grad_norm": 0.290493369102478, "learning_rate": 0.00019894313679217116, "loss": 0.948, "step": 3685 }, { "epoch": 0.42570373788647903, "grad_norm": 0.2900492250919342, "learning_rate": 0.0001989333820551144, "loss": 1.0143, "step": 3690 }, { "epoch": 0.42628057221965854, "grad_norm": 0.2770122289657593, "learning_rate": 0.00019892358274837638, "loss": 0.9729, "step": 3695 }, { "epoch": 0.42685740655283805, "grad_norm": 0.28434714674949646, "learning_rate": 0.00019891373887637168, "loss": 0.9791, "step": 3700 }, { "epoch": 0.42743424088601756, "grad_norm": 0.28020331263542175, "learning_rate": 0.00019890385044353501, "loss": 0.9201, "step": 3705 }, { "epoch": 0.42801107521919707, "grad_norm": 0.28748229146003723, "learning_rate": 0.00019889391745432113, "loss": 0.9768, "step": 3710 }, { "epoch": 0.4285879095523766, "grad_norm": 0.28073740005493164, "learning_rate": 0.00019888393991320487, "loss": 0.9962, "step": 3715 }, { "epoch": 0.4291647438855561, "grad_norm": 0.2757289409637451, "learning_rate": 0.00019887391782468113, "loss": 1.0037, "step": 3720 }, { "epoch": 0.4297415782187356, "grad_norm": 0.2921489179134369, "learning_rate": 0.00019886385119326488, "loss": 0.9662, "step": 3725 }, { "epoch": 0.4303184125519151, "grad_norm": 0.2666405141353607, "learning_rate": 0.0001988537400234911, "loss": 0.9689, "step": 3730 }, { "epoch": 0.4308952468850946, "grad_norm": 0.3027278780937195, "learning_rate": 0.000198843584319915, "loss": 0.9806, "step": 3735 }, { "epoch": 0.43147208121827413, "grad_norm": 0.28119519352912903, "learning_rate": 0.00019883338408711168, "loss": 1.0291, "step": 3740 }, { "epoch": 0.43204891555145364, "grad_norm": 0.2693753242492676, "learning_rate": 0.0001988231393296764, "loss": 1.0011, "step": 3745 }, { "epoch": 0.43262574988463315, "grad_norm": 0.2855510413646698, "learning_rate": 0.0001988128500522244, "loss": 1.0099, "step": 3750 }, { "epoch": 0.43320258421781266, "grad_norm": 0.2987017035484314, "learning_rate": 0.00019880251625939104, "loss": 1.0431, "step": 3755 }, { "epoch": 0.43377941855099217, "grad_norm": 0.29469966888427734, "learning_rate": 0.0001987921379558317, "loss": 0.9965, "step": 3760 }, { "epoch": 0.4343562528841717, "grad_norm": 0.30328744649887085, "learning_rate": 0.00019878171514622187, "loss": 0.9773, "step": 3765 }, { "epoch": 0.4349330872173512, "grad_norm": 0.2655210494995117, "learning_rate": 0.00019877124783525697, "loss": 0.9963, "step": 3770 }, { "epoch": 0.4355099215505307, "grad_norm": 0.27883434295654297, "learning_rate": 0.00019876073602765262, "loss": 1.0189, "step": 3775 }, { "epoch": 0.4360867558837102, "grad_norm": 0.3105422556400299, "learning_rate": 0.00019875017972814435, "loss": 0.931, "step": 3780 }, { "epoch": 0.4366635902168897, "grad_norm": 0.2792617678642273, "learning_rate": 0.00019873957894148782, "loss": 0.9956, "step": 3785 }, { "epoch": 0.43724042455006923, "grad_norm": 0.2996716797351837, "learning_rate": 0.00019872893367245875, "loss": 0.9462, "step": 3790 }, { "epoch": 0.43781725888324874, "grad_norm": 0.29715242981910706, "learning_rate": 0.00019871824392585276, "loss": 0.941, "step": 3795 }, { "epoch": 0.43839409321642825, "grad_norm": 0.2681552767753601, "learning_rate": 0.00019870750970648568, "loss": 0.9552, "step": 3800 }, { "epoch": 0.43897092754960776, "grad_norm": 0.2906797528266907, "learning_rate": 0.00019869673101919325, "loss": 0.974, "step": 3805 }, { "epoch": 0.43954776188278727, "grad_norm": 0.3114708960056305, "learning_rate": 0.00019868590786883134, "loss": 0.9228, "step": 3810 }, { "epoch": 0.4401245962159668, "grad_norm": 0.30557358264923096, "learning_rate": 0.00019867504026027576, "loss": 0.991, "step": 3815 }, { "epoch": 0.4407014305491463, "grad_norm": 0.31230202317237854, "learning_rate": 0.00019866412819842237, "loss": 0.9541, "step": 3820 }, { "epoch": 0.4412782648823258, "grad_norm": 0.2935572564601898, "learning_rate": 0.00019865317168818713, "loss": 0.951, "step": 3825 }, { "epoch": 0.4418550992155053, "grad_norm": 0.2815152406692505, "learning_rate": 0.00019864217073450595, "loss": 0.966, "step": 3830 }, { "epoch": 0.4424319335486848, "grad_norm": 0.2667374312877655, "learning_rate": 0.00019863112534233474, "loss": 0.9722, "step": 3835 }, { "epoch": 0.44300876788186433, "grad_norm": 0.28506049513816833, "learning_rate": 0.0001986200355166495, "loss": 0.9854, "step": 3840 }, { "epoch": 0.44358560221504384, "grad_norm": 0.28640660643577576, "learning_rate": 0.00019860890126244626, "loss": 1.0193, "step": 3845 }, { "epoch": 0.44416243654822335, "grad_norm": 0.27358683943748474, "learning_rate": 0.000198597722584741, "loss": 1.0066, "step": 3850 }, { "epoch": 0.44473927088140286, "grad_norm": 0.29887109994888306, "learning_rate": 0.0001985864994885697, "loss": 0.997, "step": 3855 }, { "epoch": 0.44531610521458237, "grad_norm": 0.27347439527511597, "learning_rate": 0.00019857523197898836, "loss": 0.9212, "step": 3860 }, { "epoch": 0.4458929395477619, "grad_norm": 0.26219651103019714, "learning_rate": 0.0001985639200610731, "loss": 0.9621, "step": 3865 }, { "epoch": 0.4464697738809414, "grad_norm": 0.3028033673763275, "learning_rate": 0.00019855256373991993, "loss": 0.9535, "step": 3870 }, { "epoch": 0.4470466082141209, "grad_norm": 0.31690603494644165, "learning_rate": 0.00019854116302064488, "loss": 1.0083, "step": 3875 }, { "epoch": 0.4476234425473004, "grad_norm": 0.3070354759693146, "learning_rate": 0.00019852971790838402, "loss": 0.99, "step": 3880 }, { "epoch": 0.4482002768804799, "grad_norm": 0.26783743500709534, "learning_rate": 0.00019851822840829338, "loss": 1.0102, "step": 3885 }, { "epoch": 0.44877711121365943, "grad_norm": 0.315676748752594, "learning_rate": 0.00019850669452554898, "loss": 0.9339, "step": 3890 }, { "epoch": 0.44935394554683894, "grad_norm": 0.2907865345478058, "learning_rate": 0.00019849511626534688, "loss": 0.973, "step": 3895 }, { "epoch": 0.44993077988001845, "grad_norm": 0.2988938093185425, "learning_rate": 0.0001984834936329031, "loss": 0.9626, "step": 3900 }, { "epoch": 0.45050761421319796, "grad_norm": 0.27597156167030334, "learning_rate": 0.00019847182663345372, "loss": 0.968, "step": 3905 }, { "epoch": 0.45108444854637747, "grad_norm": 0.28698641061782837, "learning_rate": 0.00019846011527225463, "loss": 0.9504, "step": 3910 }, { "epoch": 0.451661282879557, "grad_norm": 0.31607502698898315, "learning_rate": 0.00019844835955458193, "loss": 1.0293, "step": 3915 }, { "epoch": 0.4522381172127365, "grad_norm": 0.28895917534828186, "learning_rate": 0.00019843655948573153, "loss": 0.9979, "step": 3920 }, { "epoch": 0.452814951545916, "grad_norm": 0.2905782461166382, "learning_rate": 0.00019842471507101937, "loss": 0.9544, "step": 3925 }, { "epoch": 0.4533917858790955, "grad_norm": 0.3004554510116577, "learning_rate": 0.00019841282631578145, "loss": 0.9802, "step": 3930 }, { "epoch": 0.453968620212275, "grad_norm": 0.2894156277179718, "learning_rate": 0.00019840089322537363, "loss": 0.996, "step": 3935 }, { "epoch": 0.45454545454545453, "grad_norm": 0.30515992641448975, "learning_rate": 0.0001983889158051718, "loss": 0.9647, "step": 3940 }, { "epoch": 0.45512228887863404, "grad_norm": 0.2915753126144409, "learning_rate": 0.00019837689406057183, "loss": 0.9816, "step": 3945 }, { "epoch": 0.45569912321181355, "grad_norm": 0.29045170545578003, "learning_rate": 0.0001983648279969895, "loss": 0.9855, "step": 3950 }, { "epoch": 0.45627595754499306, "grad_norm": 0.31583306193351746, "learning_rate": 0.00019835271761986062, "loss": 0.9932, "step": 3955 }, { "epoch": 0.45685279187817257, "grad_norm": 0.2948096990585327, "learning_rate": 0.00019834056293464093, "loss": 0.9854, "step": 3960 }, { "epoch": 0.4574296262113521, "grad_norm": 0.29406291246414185, "learning_rate": 0.00019832836394680615, "loss": 0.9591, "step": 3965 }, { "epoch": 0.4580064605445316, "grad_norm": 0.36170923709869385, "learning_rate": 0.00019831612066185193, "loss": 0.9963, "step": 3970 }, { "epoch": 0.4585832948777111, "grad_norm": 0.29276424646377563, "learning_rate": 0.00019830383308529393, "loss": 0.9363, "step": 3975 }, { "epoch": 0.4591601292108906, "grad_norm": 0.33209964632987976, "learning_rate": 0.0001982915012226677, "loss": 0.937, "step": 3980 }, { "epoch": 0.4597369635440701, "grad_norm": 0.2749769389629364, "learning_rate": 0.00019827912507952876, "loss": 0.968, "step": 3985 }, { "epoch": 0.46031379787724963, "grad_norm": 0.29793781042099, "learning_rate": 0.00019826670466145262, "loss": 0.9565, "step": 3990 }, { "epoch": 0.46089063221042914, "grad_norm": 0.2820662260055542, "learning_rate": 0.00019825423997403462, "loss": 1.0132, "step": 3995 }, { "epoch": 0.46146746654360865, "grad_norm": 0.2783183157444, "learning_rate": 0.00019824173102289027, "loss": 1.0136, "step": 4000 }, { "epoch": 0.46204430087678816, "grad_norm": 0.29929864406585693, "learning_rate": 0.00019822917781365474, "loss": 0.9783, "step": 4005 }, { "epoch": 0.46262113520996767, "grad_norm": 0.278143048286438, "learning_rate": 0.00019821658035198332, "loss": 0.9579, "step": 4010 }, { "epoch": 0.4631979695431472, "grad_norm": 0.28964924812316895, "learning_rate": 0.00019820393864355122, "loss": 0.9698, "step": 4015 }, { "epoch": 0.46377480387632675, "grad_norm": 0.29153019189834595, "learning_rate": 0.00019819125269405352, "loss": 0.9622, "step": 4020 }, { "epoch": 0.46435163820950626, "grad_norm": 0.28546687960624695, "learning_rate": 0.0001981785225092053, "loss": 1.0262, "step": 4025 }, { "epoch": 0.46492847254268577, "grad_norm": 0.30681976675987244, "learning_rate": 0.00019816574809474152, "loss": 0.9656, "step": 4030 }, { "epoch": 0.4655053068758653, "grad_norm": 0.29933658242225647, "learning_rate": 0.00019815292945641705, "loss": 1.0036, "step": 4035 }, { "epoch": 0.4660821412090448, "grad_norm": 0.2728285491466522, "learning_rate": 0.0001981400666000067, "loss": 0.9178, "step": 4040 }, { "epoch": 0.4666589755422243, "grad_norm": 0.27215078473091125, "learning_rate": 0.0001981271595313053, "loss": 0.9528, "step": 4045 }, { "epoch": 0.4672358098754038, "grad_norm": 0.29951199889183044, "learning_rate": 0.0001981142082561274, "loss": 1.0329, "step": 4050 }, { "epoch": 0.4678126442085833, "grad_norm": 0.3168124556541443, "learning_rate": 0.00019810121278030768, "loss": 0.9542, "step": 4055 }, { "epoch": 0.4683894785417628, "grad_norm": 0.31161144375801086, "learning_rate": 0.00019808817310970053, "loss": 0.9796, "step": 4060 }, { "epoch": 0.46896631287494234, "grad_norm": 0.32224801182746887, "learning_rate": 0.0001980750892501804, "loss": 1.0188, "step": 4065 }, { "epoch": 0.46954314720812185, "grad_norm": 0.30205318331718445, "learning_rate": 0.0001980619612076416, "loss": 0.9637, "step": 4070 }, { "epoch": 0.47011998154130136, "grad_norm": 0.28752511739730835, "learning_rate": 0.00019804878898799835, "loss": 0.9901, "step": 4075 }, { "epoch": 0.47069681587448087, "grad_norm": 0.2915705740451813, "learning_rate": 0.0001980355725971847, "loss": 0.9823, "step": 4080 }, { "epoch": 0.4712736502076604, "grad_norm": 0.29493892192840576, "learning_rate": 0.00019802231204115472, "loss": 1.0158, "step": 4085 }, { "epoch": 0.4718504845408399, "grad_norm": 0.30375000834465027, "learning_rate": 0.00019800900732588227, "loss": 0.9978, "step": 4090 }, { "epoch": 0.4724273188740194, "grad_norm": 0.30259644985198975, "learning_rate": 0.0001979956584573612, "loss": 0.9674, "step": 4095 }, { "epoch": 0.4730041532071989, "grad_norm": 0.2954089939594269, "learning_rate": 0.00019798226544160511, "loss": 0.9954, "step": 4100 }, { "epoch": 0.4735809875403784, "grad_norm": 0.3070552349090576, "learning_rate": 0.00019796882828464768, "loss": 0.9597, "step": 4105 }, { "epoch": 0.4741578218735579, "grad_norm": 0.3012619614601135, "learning_rate": 0.00019795534699254238, "loss": 0.9749, "step": 4110 }, { "epoch": 0.47473465620673744, "grad_norm": 0.30565857887268066, "learning_rate": 0.00019794182157136246, "loss": 0.946, "step": 4115 }, { "epoch": 0.47531149053991695, "grad_norm": 0.2922807037830353, "learning_rate": 0.0001979282520272012, "loss": 0.9334, "step": 4120 }, { "epoch": 0.47588832487309646, "grad_norm": 0.2870953381061554, "learning_rate": 0.00019791463836617176, "loss": 1.0199, "step": 4125 }, { "epoch": 0.47646515920627597, "grad_norm": 0.29794153571128845, "learning_rate": 0.00019790098059440704, "loss": 1.0163, "step": 4130 }, { "epoch": 0.4770419935394555, "grad_norm": 0.26819470524787903, "learning_rate": 0.00019788727871805994, "loss": 0.988, "step": 4135 }, { "epoch": 0.477618827872635, "grad_norm": 0.28677570819854736, "learning_rate": 0.00019787353274330313, "loss": 0.9704, "step": 4140 }, { "epoch": 0.4781956622058145, "grad_norm": 0.30988967418670654, "learning_rate": 0.00019785974267632928, "loss": 0.9583, "step": 4145 }, { "epoch": 0.478772496538994, "grad_norm": 0.29228535294532776, "learning_rate": 0.00019784590852335078, "loss": 0.9948, "step": 4150 }, { "epoch": 0.4793493308721735, "grad_norm": 0.29787692427635193, "learning_rate": 0.00019783203029059997, "loss": 1.0165, "step": 4155 }, { "epoch": 0.47992616520535303, "grad_norm": 0.29377439618110657, "learning_rate": 0.000197818107984329, "loss": 0.9344, "step": 4160 }, { "epoch": 0.48050299953853254, "grad_norm": 0.26499155163764954, "learning_rate": 0.0001978041416108099, "loss": 0.9622, "step": 4165 }, { "epoch": 0.48107983387171205, "grad_norm": 0.2975757122039795, "learning_rate": 0.00019779013117633454, "loss": 0.9544, "step": 4170 }, { "epoch": 0.48165666820489156, "grad_norm": 0.2974706292152405, "learning_rate": 0.00019777607668721467, "loss": 0.9379, "step": 4175 }, { "epoch": 0.48223350253807107, "grad_norm": 0.29165658354759216, "learning_rate": 0.00019776197814978187, "loss": 0.9735, "step": 4180 }, { "epoch": 0.4828103368712506, "grad_norm": 0.29878005385398865, "learning_rate": 0.00019774783557038755, "loss": 0.9626, "step": 4185 }, { "epoch": 0.4833871712044301, "grad_norm": 0.2908392548561096, "learning_rate": 0.00019773364895540296, "loss": 0.9788, "step": 4190 }, { "epoch": 0.4839640055376096, "grad_norm": 0.328003466129303, "learning_rate": 0.00019771941831121922, "loss": 0.9278, "step": 4195 }, { "epoch": 0.4845408398707891, "grad_norm": 0.30477482080459595, "learning_rate": 0.00019770514364424725, "loss": 0.9544, "step": 4200 }, { "epoch": 0.4851176742039686, "grad_norm": 0.2799585461616516, "learning_rate": 0.0001976908249609178, "loss": 0.9221, "step": 4205 }, { "epoch": 0.48569450853714813, "grad_norm": 0.2844686210155487, "learning_rate": 0.00019767646226768147, "loss": 0.9881, "step": 4210 }, { "epoch": 0.48627134287032764, "grad_norm": 0.3142208158969879, "learning_rate": 0.00019766205557100868, "loss": 0.9814, "step": 4215 }, { "epoch": 0.48684817720350715, "grad_norm": 0.3052613139152527, "learning_rate": 0.0001976476048773897, "loss": 1.0083, "step": 4220 }, { "epoch": 0.48742501153668666, "grad_norm": 0.2918776869773865, "learning_rate": 0.00019763311019333456, "loss": 1.0219, "step": 4225 }, { "epoch": 0.48800184586986617, "grad_norm": 0.30725133419036865, "learning_rate": 0.0001976185715253732, "loss": 1.0352, "step": 4230 }, { "epoch": 0.4885786802030457, "grad_norm": 0.3034648895263672, "learning_rate": 0.00019760398888005526, "loss": 0.9231, "step": 4235 }, { "epoch": 0.4891555145362252, "grad_norm": 0.30674096941947937, "learning_rate": 0.00019758936226395025, "loss": 1.0671, "step": 4240 }, { "epoch": 0.4897323488694047, "grad_norm": 0.3024204969406128, "learning_rate": 0.0001975746916836475, "loss": 0.9656, "step": 4245 }, { "epoch": 0.4903091832025842, "grad_norm": 0.26722845435142517, "learning_rate": 0.0001975599771457562, "loss": 0.9844, "step": 4250 }, { "epoch": 0.4908860175357637, "grad_norm": 0.29083022475242615, "learning_rate": 0.00019754521865690517, "loss": 0.9373, "step": 4255 }, { "epoch": 0.49146285186894323, "grad_norm": 0.3160659670829773, "learning_rate": 0.0001975304162237432, "loss": 1.0018, "step": 4260 }, { "epoch": 0.49203968620212274, "grad_norm": 0.2868952453136444, "learning_rate": 0.0001975155698529388, "loss": 0.9585, "step": 4265 }, { "epoch": 0.49261652053530225, "grad_norm": 0.309334397315979, "learning_rate": 0.00019750067955118033, "loss": 0.9586, "step": 4270 }, { "epoch": 0.49319335486848176, "grad_norm": 0.3061840832233429, "learning_rate": 0.00019748574532517586, "loss": 0.945, "step": 4275 }, { "epoch": 0.49377018920166127, "grad_norm": 0.2853107452392578, "learning_rate": 0.00019747076718165324, "loss": 0.8572, "step": 4280 }, { "epoch": 0.4943470235348408, "grad_norm": 0.30467069149017334, "learning_rate": 0.00019745574512736026, "loss": 0.9617, "step": 4285 }, { "epoch": 0.4949238578680203, "grad_norm": 0.3257976472377777, "learning_rate": 0.0001974406791690643, "loss": 0.9749, "step": 4290 }, { "epoch": 0.4955006922011998, "grad_norm": 0.2866266369819641, "learning_rate": 0.00019742556931355261, "loss": 0.9973, "step": 4295 }, { "epoch": 0.4960775265343793, "grad_norm": 0.2891503572463989, "learning_rate": 0.0001974104155676323, "loss": 1.0119, "step": 4300 }, { "epoch": 0.4966543608675588, "grad_norm": 0.27966293692588806, "learning_rate": 0.00019739521793813006, "loss": 0.8837, "step": 4305 }, { "epoch": 0.49723119520073833, "grad_norm": 0.30494049191474915, "learning_rate": 0.00019737997643189248, "loss": 0.9661, "step": 4310 }, { "epoch": 0.49780802953391784, "grad_norm": 0.3047969937324524, "learning_rate": 0.0001973646910557859, "loss": 1.004, "step": 4315 }, { "epoch": 0.49838486386709735, "grad_norm": 0.2946149706840515, "learning_rate": 0.00019734936181669638, "loss": 0.9053, "step": 4320 }, { "epoch": 0.49896169820027686, "grad_norm": 0.3126681447029114, "learning_rate": 0.00019733398872152984, "loss": 0.9533, "step": 4325 }, { "epoch": 0.49953853253345637, "grad_norm": 0.30080464482307434, "learning_rate": 0.00019731857177721182, "loss": 1.0052, "step": 4330 }, { "epoch": 0.5001153668666359, "grad_norm": 0.2873613238334656, "learning_rate": 0.00019730311099068771, "loss": 0.9475, "step": 4335 }, { "epoch": 0.5006922011998154, "grad_norm": 0.3170192539691925, "learning_rate": 0.00019728760636892267, "loss": 0.9549, "step": 4340 }, { "epoch": 0.501269035532995, "grad_norm": 0.3064529299736023, "learning_rate": 0.0001972720579189015, "loss": 0.9708, "step": 4345 }, { "epoch": 0.5018458698661744, "grad_norm": 0.30155086517333984, "learning_rate": 0.00019725646564762878, "loss": 0.9321, "step": 4350 }, { "epoch": 0.502422704199354, "grad_norm": 0.3222599923610687, "learning_rate": 0.00019724082956212895, "loss": 0.9894, "step": 4355 }, { "epoch": 0.5029995385325334, "grad_norm": 0.30701473355293274, "learning_rate": 0.00019722514966944604, "loss": 0.9928, "step": 4360 }, { "epoch": 0.503576372865713, "grad_norm": 0.32665449380874634, "learning_rate": 0.00019720942597664385, "loss": 1.0055, "step": 4365 }, { "epoch": 0.5041532071988925, "grad_norm": 0.2928261458873749, "learning_rate": 0.00019719365849080598, "loss": 0.9232, "step": 4370 }, { "epoch": 0.504730041532072, "grad_norm": 0.3082767128944397, "learning_rate": 0.00019717784721903572, "loss": 1.019, "step": 4375 }, { "epoch": 0.5053068758652515, "grad_norm": 0.2902364432811737, "learning_rate": 0.00019716199216845604, "loss": 1.0212, "step": 4380 }, { "epoch": 0.505883710198431, "grad_norm": 0.30737555027008057, "learning_rate": 0.0001971460933462097, "loss": 0.9832, "step": 4385 }, { "epoch": 0.5064605445316105, "grad_norm": 0.27934834361076355, "learning_rate": 0.00019713015075945912, "loss": 0.9313, "step": 4390 }, { "epoch": 0.5070373788647901, "grad_norm": 0.29655081033706665, "learning_rate": 0.00019711416441538652, "loss": 0.9946, "step": 4395 }, { "epoch": 0.5076142131979695, "grad_norm": 0.31587305665016174, "learning_rate": 0.00019709813432119372, "loss": 0.9534, "step": 4400 }, { "epoch": 0.5081910475311491, "grad_norm": 0.3084726631641388, "learning_rate": 0.00019708206048410233, "loss": 1.0091, "step": 4405 }, { "epoch": 0.5087678818643285, "grad_norm": 0.3127329647541046, "learning_rate": 0.00019706594291135366, "loss": 0.9801, "step": 4410 }, { "epoch": 0.5093447161975081, "grad_norm": 0.30119284987449646, "learning_rate": 0.00019704978161020871, "loss": 0.9964, "step": 4415 }, { "epoch": 0.5099215505306876, "grad_norm": 0.31809040904045105, "learning_rate": 0.00019703357658794817, "loss": 1.0082, "step": 4420 }, { "epoch": 0.5104983848638671, "grad_norm": 0.2844845652580261, "learning_rate": 0.0001970173278518724, "loss": 0.961, "step": 4425 }, { "epoch": 0.5110752191970466, "grad_norm": 0.3252412676811218, "learning_rate": 0.00019700103540930153, "loss": 0.9484, "step": 4430 }, { "epoch": 0.5116520535302261, "grad_norm": 0.32319343090057373, "learning_rate": 0.00019698469926757533, "loss": 0.9962, "step": 4435 }, { "epoch": 0.5122288878634056, "grad_norm": 0.2903590202331543, "learning_rate": 0.00019696831943405324, "loss": 0.971, "step": 4440 }, { "epoch": 0.5128057221965852, "grad_norm": 0.30612602829933167, "learning_rate": 0.00019695189591611441, "loss": 1.0329, "step": 4445 }, { "epoch": 0.5133825565297646, "grad_norm": 0.3082347810268402, "learning_rate": 0.00019693542872115772, "loss": 0.9756, "step": 4450 }, { "epoch": 0.5139593908629442, "grad_norm": 0.31226348876953125, "learning_rate": 0.0001969189178566016, "loss": 0.9693, "step": 4455 }, { "epoch": 0.5145362251961236, "grad_norm": 0.30261221528053284, "learning_rate": 0.00019690236332988427, "loss": 0.9734, "step": 4460 }, { "epoch": 0.5151130595293032, "grad_norm": 0.301273375749588, "learning_rate": 0.00019688576514846357, "loss": 1.0081, "step": 4465 }, { "epoch": 0.5156898938624827, "grad_norm": 0.27689802646636963, "learning_rate": 0.00019686912331981702, "loss": 1.0053, "step": 4470 }, { "epoch": 0.5162667281956622, "grad_norm": 0.3226846158504486, "learning_rate": 0.00019685243785144175, "loss": 0.9996, "step": 4475 }, { "epoch": 0.5168435625288417, "grad_norm": 0.29088863730430603, "learning_rate": 0.00019683570875085469, "loss": 0.9376, "step": 4480 }, { "epoch": 0.5174203968620212, "grad_norm": 0.3116596043109894, "learning_rate": 0.00019681893602559224, "loss": 0.9622, "step": 4485 }, { "epoch": 0.5179972311952007, "grad_norm": 0.302528977394104, "learning_rate": 0.00019680211968321057, "loss": 1.002, "step": 4490 }, { "epoch": 0.5185740655283803, "grad_norm": 0.30661019682884216, "learning_rate": 0.0001967852597312855, "loss": 0.9535, "step": 4495 }, { "epoch": 0.5191508998615597, "grad_norm": 0.31948038935661316, "learning_rate": 0.00019676835617741249, "loss": 0.9869, "step": 4500 }, { "epoch": 0.5197277341947393, "grad_norm": 0.32470494508743286, "learning_rate": 0.0001967514090292065, "loss": 1.0095, "step": 4505 }, { "epoch": 0.5203045685279187, "grad_norm": 0.3115653395652771, "learning_rate": 0.0001967344182943024, "loss": 0.9293, "step": 4510 }, { "epoch": 0.5208814028610983, "grad_norm": 0.2958611845970154, "learning_rate": 0.0001967173839803545, "loss": 0.9401, "step": 4515 }, { "epoch": 0.5214582371942778, "grad_norm": 0.30965474247932434, "learning_rate": 0.00019670030609503678, "loss": 0.967, "step": 4520 }, { "epoch": 0.5220350715274573, "grad_norm": 0.3066132068634033, "learning_rate": 0.00019668318464604285, "loss": 0.9297, "step": 4525 }, { "epoch": 0.5226119058606368, "grad_norm": 0.29480454325675964, "learning_rate": 0.00019666601964108598, "loss": 1.0089, "step": 4530 }, { "epoch": 0.5231887401938163, "grad_norm": 0.3011881709098816, "learning_rate": 0.000196648811087899, "loss": 0.9437, "step": 4535 }, { "epoch": 0.5237655745269958, "grad_norm": 0.3015040159225464, "learning_rate": 0.00019663155899423445, "loss": 1.0103, "step": 4540 }, { "epoch": 0.5243424088601754, "grad_norm": 0.2976595163345337, "learning_rate": 0.00019661426336786445, "loss": 1.0073, "step": 4545 }, { "epoch": 0.5249192431933549, "grad_norm": 0.2739868760108948, "learning_rate": 0.0001965969242165806, "loss": 0.9681, "step": 4550 }, { "epoch": 0.5254960775265344, "grad_norm": 0.3420281708240509, "learning_rate": 0.00019657954154819434, "loss": 0.9661, "step": 4555 }, { "epoch": 0.5260729118597139, "grad_norm": 0.2705232501029968, "learning_rate": 0.00019656211537053654, "loss": 0.9766, "step": 4560 }, { "epoch": 0.5266497461928934, "grad_norm": 0.3390316069126129, "learning_rate": 0.0001965446456914577, "loss": 0.9616, "step": 4565 }, { "epoch": 0.527226580526073, "grad_norm": 0.31515753269195557, "learning_rate": 0.00019652713251882802, "loss": 0.928, "step": 4570 }, { "epoch": 0.5278034148592524, "grad_norm": 0.31040945649147034, "learning_rate": 0.00019650957586053716, "loss": 1.0218, "step": 4575 }, { "epoch": 0.528380249192432, "grad_norm": 0.3205651342868805, "learning_rate": 0.00019649197572449442, "loss": 1.0069, "step": 4580 }, { "epoch": 0.5289570835256114, "grad_norm": 0.32492998242378235, "learning_rate": 0.00019647433211862877, "loss": 0.9838, "step": 4585 }, { "epoch": 0.529533917858791, "grad_norm": 0.33265379071235657, "learning_rate": 0.00019645664505088864, "loss": 0.9834, "step": 4590 }, { "epoch": 0.5301107521919705, "grad_norm": 0.2898595631122589, "learning_rate": 0.00019643891452924205, "loss": 0.9763, "step": 4595 }, { "epoch": 0.53068758652515, "grad_norm": 0.3158058226108551, "learning_rate": 0.0001964211405616767, "loss": 0.9989, "step": 4600 }, { "epoch": 0.5312644208583295, "grad_norm": 0.3105302155017853, "learning_rate": 0.00019640332315619977, "loss": 0.9999, "step": 4605 }, { "epoch": 0.531841255191509, "grad_norm": 0.3675985634326935, "learning_rate": 0.000196385462320838, "loss": 0.9716, "step": 4610 }, { "epoch": 0.5324180895246885, "grad_norm": 0.3245541453361511, "learning_rate": 0.00019636755806363783, "loss": 0.9309, "step": 4615 }, { "epoch": 0.5329949238578681, "grad_norm": 0.39721789956092834, "learning_rate": 0.00019634961039266506, "loss": 0.9995, "step": 4620 }, { "epoch": 0.5335717581910475, "grad_norm": 0.32997554540634155, "learning_rate": 0.00019633161931600522, "loss": 1.0062, "step": 4625 }, { "epoch": 0.5341485925242271, "grad_norm": 0.30046844482421875, "learning_rate": 0.00019631358484176325, "loss": 0.9584, "step": 4630 }, { "epoch": 0.5347254268574065, "grad_norm": 0.3374291658401489, "learning_rate": 0.0001962955069780638, "loss": 1.0601, "step": 4635 }, { "epoch": 0.5353022611905861, "grad_norm": 0.3347518742084503, "learning_rate": 0.00019627738573305093, "loss": 0.9493, "step": 4640 }, { "epoch": 0.5358790955237656, "grad_norm": 0.3199424147605896, "learning_rate": 0.00019625922111488831, "loss": 0.9892, "step": 4645 }, { "epoch": 0.5364559298569451, "grad_norm": 0.30987706780433655, "learning_rate": 0.00019624101313175918, "loss": 0.9624, "step": 4650 }, { "epoch": 0.5370327641901246, "grad_norm": 0.2967313826084137, "learning_rate": 0.00019622276179186615, "loss": 0.977, "step": 4655 }, { "epoch": 0.5376095985233041, "grad_norm": 0.32611724734306335, "learning_rate": 0.00019620446710343162, "loss": 0.994, "step": 4660 }, { "epoch": 0.5381864328564836, "grad_norm": 0.32929539680480957, "learning_rate": 0.00019618612907469732, "loss": 0.9307, "step": 4665 }, { "epoch": 0.5387632671896632, "grad_norm": 0.36718735098838806, "learning_rate": 0.00019616774771392457, "loss": 0.9736, "step": 4670 }, { "epoch": 0.5393401015228426, "grad_norm": 0.3094222843647003, "learning_rate": 0.0001961493230293942, "loss": 1.0393, "step": 4675 }, { "epoch": 0.5399169358560222, "grad_norm": 0.30510690808296204, "learning_rate": 0.00019613085502940658, "loss": 0.9187, "step": 4680 }, { "epoch": 0.5404937701892016, "grad_norm": 0.302712082862854, "learning_rate": 0.0001961123437222816, "loss": 0.991, "step": 4685 }, { "epoch": 0.5410706045223812, "grad_norm": 0.3168560266494751, "learning_rate": 0.0001960937891163586, "loss": 0.9739, "step": 4690 }, { "epoch": 0.5416474388555607, "grad_norm": 0.3183704614639282, "learning_rate": 0.00019607519121999647, "loss": 0.9794, "step": 4695 }, { "epoch": 0.5422242731887402, "grad_norm": 0.2977442741394043, "learning_rate": 0.00019605655004157363, "loss": 0.9596, "step": 4700 }, { "epoch": 0.5428011075219197, "grad_norm": 0.34837374091148376, "learning_rate": 0.00019603786558948795, "loss": 1.0085, "step": 4705 }, { "epoch": 0.5433779418550992, "grad_norm": 0.29827648401260376, "learning_rate": 0.00019601913787215683, "loss": 0.9767, "step": 4710 }, { "epoch": 0.5439547761882787, "grad_norm": 0.3032287061214447, "learning_rate": 0.0001960003668980171, "loss": 0.9961, "step": 4715 }, { "epoch": 0.5445316105214583, "grad_norm": 0.3106001019477844, "learning_rate": 0.00019598155267552513, "loss": 1.0001, "step": 4720 }, { "epoch": 0.5451084448546377, "grad_norm": 0.3098970055580139, "learning_rate": 0.0001959626952131568, "loss": 0.9958, "step": 4725 }, { "epoch": 0.5456852791878173, "grad_norm": 0.3304188847541809, "learning_rate": 0.00019594379451940742, "loss": 0.9547, "step": 4730 }, { "epoch": 0.5462621135209967, "grad_norm": 0.2954466938972473, "learning_rate": 0.0001959248506027918, "loss": 0.9614, "step": 4735 }, { "epoch": 0.5468389478541763, "grad_norm": 0.3072229325771332, "learning_rate": 0.00019590586347184417, "loss": 0.931, "step": 4740 }, { "epoch": 0.5474157821873558, "grad_norm": 0.2952127754688263, "learning_rate": 0.00019588683313511828, "loss": 0.9975, "step": 4745 }, { "epoch": 0.5479926165205353, "grad_norm": 0.27312129735946655, "learning_rate": 0.00019586775960118738, "loss": 0.9927, "step": 4750 }, { "epoch": 0.5485694508537148, "grad_norm": 0.30434802174568176, "learning_rate": 0.00019584864287864408, "loss": 1.0163, "step": 4755 }, { "epoch": 0.5491462851868943, "grad_norm": 0.3188215494155884, "learning_rate": 0.00019582948297610053, "loss": 1.0148, "step": 4760 }, { "epoch": 0.5497231195200738, "grad_norm": 0.3095898926258087, "learning_rate": 0.00019581027990218827, "loss": 0.9767, "step": 4765 }, { "epoch": 0.5502999538532534, "grad_norm": 0.2899004817008972, "learning_rate": 0.0001957910336655584, "loss": 1.0051, "step": 4770 }, { "epoch": 0.5508767881864328, "grad_norm": 0.3027417063713074, "learning_rate": 0.00019577174427488128, "loss": 0.9726, "step": 4775 }, { "epoch": 0.5514536225196124, "grad_norm": 0.319034218788147, "learning_rate": 0.00019575241173884692, "loss": 0.9724, "step": 4780 }, { "epoch": 0.5520304568527918, "grad_norm": 0.3255804479122162, "learning_rate": 0.00019573303606616459, "loss": 0.9959, "step": 4785 }, { "epoch": 0.5526072911859714, "grad_norm": 0.31864824891090393, "learning_rate": 0.00019571361726556307, "loss": 0.9799, "step": 4790 }, { "epoch": 0.5531841255191509, "grad_norm": 0.3090226352214813, "learning_rate": 0.00019569415534579062, "loss": 0.9547, "step": 4795 }, { "epoch": 0.5537609598523304, "grad_norm": 0.342669814825058, "learning_rate": 0.00019567465031561487, "loss": 0.9725, "step": 4800 }, { "epoch": 0.5543377941855099, "grad_norm": 0.3012937903404236, "learning_rate": 0.0001956551021838228, "loss": 1.0521, "step": 4805 }, { "epoch": 0.5549146285186894, "grad_norm": 0.30988457798957825, "learning_rate": 0.000195635510959221, "loss": 0.9583, "step": 4810 }, { "epoch": 0.5554914628518689, "grad_norm": 0.29991039633750916, "learning_rate": 0.0001956158766506352, "loss": 0.976, "step": 4815 }, { "epoch": 0.5560682971850485, "grad_norm": 0.300327867269516, "learning_rate": 0.00019559619926691086, "loss": 0.9673, "step": 4820 }, { "epoch": 0.5566451315182279, "grad_norm": 0.3010301887989044, "learning_rate": 0.00019557647881691254, "loss": 0.9791, "step": 4825 }, { "epoch": 0.5572219658514075, "grad_norm": 0.3046521246433258, "learning_rate": 0.00019555671530952445, "loss": 0.9923, "step": 4830 }, { "epoch": 0.5577988001845869, "grad_norm": 0.2898954451084137, "learning_rate": 0.00019553690875365, "loss": 0.9813, "step": 4835 }, { "epoch": 0.5583756345177665, "grad_norm": 0.30507126450538635, "learning_rate": 0.00019551705915821216, "loss": 0.9931, "step": 4840 }, { "epoch": 0.558952468850946, "grad_norm": 0.3123108446598053, "learning_rate": 0.00019549716653215318, "loss": 0.9476, "step": 4845 }, { "epoch": 0.5595293031841255, "grad_norm": 0.2869836986064911, "learning_rate": 0.00019547723088443467, "loss": 0.9031, "step": 4850 }, { "epoch": 0.560106137517305, "grad_norm": 0.4195156991481781, "learning_rate": 0.00019545725222403775, "loss": 0.9947, "step": 4855 }, { "epoch": 0.5606829718504845, "grad_norm": 0.3179481029510498, "learning_rate": 0.00019543723055996282, "loss": 1.0052, "step": 4860 }, { "epoch": 0.561259806183664, "grad_norm": 0.30937254428863525, "learning_rate": 0.00019541716590122971, "loss": 0.9722, "step": 4865 }, { "epoch": 0.5618366405168436, "grad_norm": 0.3025272488594055, "learning_rate": 0.00019539705825687755, "loss": 0.9893, "step": 4870 }, { "epoch": 0.562413474850023, "grad_norm": 0.28743186593055725, "learning_rate": 0.00019537690763596487, "loss": 0.9719, "step": 4875 }, { "epoch": 0.5629903091832026, "grad_norm": 0.3209276795387268, "learning_rate": 0.00019535671404756957, "loss": 1.0181, "step": 4880 }, { "epoch": 0.563567143516382, "grad_norm": 0.3113163411617279, "learning_rate": 0.0001953364775007889, "loss": 1.0092, "step": 4885 }, { "epoch": 0.5641439778495616, "grad_norm": 0.30241066217422485, "learning_rate": 0.00019531619800473952, "loss": 0.9724, "step": 4890 }, { "epoch": 0.5647208121827412, "grad_norm": 0.3012004792690277, "learning_rate": 0.0001952958755685573, "loss": 1.0069, "step": 4895 }, { "epoch": 0.5652976465159206, "grad_norm": 4.258053302764893, "learning_rate": 0.00019527551020139759, "loss": 0.9895, "step": 4900 }, { "epoch": 0.5658744808491002, "grad_norm": 0.3009074926376343, "learning_rate": 0.00019525510191243498, "loss": 1.0587, "step": 4905 }, { "epoch": 0.5664513151822796, "grad_norm": 0.2939055860042572, "learning_rate": 0.0001952346507108635, "loss": 0.9872, "step": 4910 }, { "epoch": 0.5670281495154592, "grad_norm": 0.2984018623828888, "learning_rate": 0.00019521415660589644, "loss": 0.9779, "step": 4915 }, { "epoch": 0.5676049838486387, "grad_norm": 0.30733126401901245, "learning_rate": 0.0001951936196067664, "loss": 0.9718, "step": 4920 }, { "epoch": 0.5681818181818182, "grad_norm": 0.28078576922416687, "learning_rate": 0.00019517303972272536, "loss": 0.9951, "step": 4925 }, { "epoch": 0.5687586525149977, "grad_norm": 0.31320542097091675, "learning_rate": 0.0001951524169630446, "loss": 1.0027, "step": 4930 }, { "epoch": 0.5693354868481773, "grad_norm": 0.2962873578071594, "learning_rate": 0.00019513175133701474, "loss": 0.9434, "step": 4935 }, { "epoch": 0.5699123211813567, "grad_norm": 0.31314268708229065, "learning_rate": 0.0001951110428539456, "loss": 0.9575, "step": 4940 }, { "epoch": 0.5704891555145363, "grad_norm": 0.30815884470939636, "learning_rate": 0.00019509029152316648, "loss": 1.0102, "step": 4945 }, { "epoch": 0.5710659898477157, "grad_norm": 0.31001782417297363, "learning_rate": 0.00019506949735402588, "loss": 0.9899, "step": 4950 }, { "epoch": 0.5716428241808953, "grad_norm": 0.2955753803253174, "learning_rate": 0.0001950486603558916, "loss": 0.9249, "step": 4955 }, { "epoch": 0.5722196585140747, "grad_norm": 0.28989970684051514, "learning_rate": 0.00019502778053815073, "loss": 0.9685, "step": 4960 }, { "epoch": 0.5727964928472543, "grad_norm": 0.3111148178577423, "learning_rate": 0.00019500685791020968, "loss": 0.9826, "step": 4965 }, { "epoch": 0.5733733271804338, "grad_norm": 0.32383227348327637, "learning_rate": 0.00019498589248149415, "loss": 0.9688, "step": 4970 }, { "epoch": 0.5739501615136133, "grad_norm": 0.29687562584877014, "learning_rate": 0.0001949648842614491, "loss": 0.924, "step": 4975 }, { "epoch": 0.5745269958467928, "grad_norm": 0.3350512683391571, "learning_rate": 0.00019494383325953875, "loss": 1.0041, "step": 4980 }, { "epoch": 0.5751038301799724, "grad_norm": 0.2898659110069275, "learning_rate": 0.00019492273948524665, "loss": 0.9846, "step": 4985 }, { "epoch": 0.5756806645131518, "grad_norm": 0.31808215379714966, "learning_rate": 0.00019490160294807556, "loss": 0.977, "step": 4990 }, { "epoch": 0.5762574988463314, "grad_norm": 0.3810345530509949, "learning_rate": 0.00019488042365754758, "loss": 1.0013, "step": 4995 }, { "epoch": 0.5768343331795108, "grad_norm": 0.27463066577911377, "learning_rate": 0.00019485920162320394, "loss": 0.947, "step": 5000 }, { "epoch": 0.5774111675126904, "grad_norm": 0.30058354139328003, "learning_rate": 0.0001948379368546053, "loss": 0.936, "step": 5005 }, { "epoch": 0.5779880018458698, "grad_norm": 0.34601083397865295, "learning_rate": 0.0001948166293613314, "loss": 1.0091, "step": 5010 }, { "epoch": 0.5785648361790494, "grad_norm": 0.2960231602191925, "learning_rate": 0.00019479527915298135, "loss": 0.9458, "step": 5015 }, { "epoch": 0.5791416705122289, "grad_norm": 0.2993627190589905, "learning_rate": 0.00019477388623917344, "loss": 0.9932, "step": 5020 }, { "epoch": 0.5797185048454084, "grad_norm": 0.3270370066165924, "learning_rate": 0.00019475245062954523, "loss": 0.9759, "step": 5025 }, { "epoch": 0.5802953391785879, "grad_norm": 0.32682403922080994, "learning_rate": 0.00019473097233375355, "loss": 0.9963, "step": 5030 }, { "epoch": 0.5808721735117675, "grad_norm": 0.29264119267463684, "learning_rate": 0.00019470945136147431, "loss": 0.9417, "step": 5035 }, { "epoch": 0.5814490078449469, "grad_norm": 0.3218499720096588, "learning_rate": 0.00019468788772240286, "loss": 0.962, "step": 5040 }, { "epoch": 0.5820258421781265, "grad_norm": 0.2924599349498749, "learning_rate": 0.00019466628142625358, "loss": 0.9967, "step": 5045 }, { "epoch": 0.5826026765113059, "grad_norm": 0.3072007894515991, "learning_rate": 0.00019464463248276018, "loss": 0.9936, "step": 5050 }, { "epoch": 0.5831795108444855, "grad_norm": 0.33545422554016113, "learning_rate": 0.00019462294090167554, "loss": 0.9554, "step": 5055 }, { "epoch": 0.583756345177665, "grad_norm": 0.3114611506462097, "learning_rate": 0.0001946012066927718, "loss": 0.9685, "step": 5060 }, { "epoch": 0.5843331795108445, "grad_norm": 0.3209623396396637, "learning_rate": 0.00019457942986584022, "loss": 0.9987, "step": 5065 }, { "epoch": 0.584910013844024, "grad_norm": 0.3333243131637573, "learning_rate": 0.0001945576104306913, "loss": 1.0107, "step": 5070 }, { "epoch": 0.5854868481772035, "grad_norm": 0.31956392526626587, "learning_rate": 0.0001945357483971548, "loss": 0.9694, "step": 5075 }, { "epoch": 0.586063682510383, "grad_norm": 0.31336089968681335, "learning_rate": 0.0001945138437750795, "loss": 0.9743, "step": 5080 }, { "epoch": 0.5866405168435626, "grad_norm": 0.311122328042984, "learning_rate": 0.00019449189657433358, "loss": 0.9676, "step": 5085 }, { "epoch": 0.587217351176742, "grad_norm": 0.30517736077308655, "learning_rate": 0.00019446990680480424, "loss": 1.0225, "step": 5090 }, { "epoch": 0.5877941855099216, "grad_norm": 0.3143622875213623, "learning_rate": 0.00019444787447639791, "loss": 0.9529, "step": 5095 }, { "epoch": 0.588371019843101, "grad_norm": 0.29679304361343384, "learning_rate": 0.00019442579959904024, "loss": 0.984, "step": 5100 }, { "epoch": 0.5889478541762806, "grad_norm": 0.32181552052497864, "learning_rate": 0.00019440368218267596, "loss": 0.9447, "step": 5105 }, { "epoch": 0.58952468850946, "grad_norm": 0.3025820851325989, "learning_rate": 0.00019438152223726904, "loss": 0.98, "step": 5110 }, { "epoch": 0.5901015228426396, "grad_norm": 0.3288577198982239, "learning_rate": 0.0001943593197728026, "loss": 0.9507, "step": 5115 }, { "epoch": 0.5906783571758191, "grad_norm": 0.30060875415802, "learning_rate": 0.0001943370747992788, "loss": 0.9607, "step": 5120 }, { "epoch": 0.5912551915089986, "grad_norm": 0.34586185216903687, "learning_rate": 0.00019431478732671916, "loss": 0.9845, "step": 5125 }, { "epoch": 0.5918320258421781, "grad_norm": 0.32572728395462036, "learning_rate": 0.00019429245736516415, "loss": 1.0146, "step": 5130 }, { "epoch": 0.5924088601753577, "grad_norm": 0.39712613821029663, "learning_rate": 0.00019427008492467346, "loss": 0.9785, "step": 5135 }, { "epoch": 0.5929856945085371, "grad_norm": 0.32232365012168884, "learning_rate": 0.00019424767001532598, "loss": 0.9658, "step": 5140 }, { "epoch": 0.5935625288417167, "grad_norm": 0.3060401380062103, "learning_rate": 0.00019422521264721962, "loss": 1.0234, "step": 5145 }, { "epoch": 0.5941393631748961, "grad_norm": 0.33208033442497253, "learning_rate": 0.0001942027128304715, "loss": 0.9906, "step": 5150 }, { "epoch": 0.5947161975080757, "grad_norm": 0.3264749348163605, "learning_rate": 0.0001941801705752178, "loss": 1.0163, "step": 5155 }, { "epoch": 0.5952930318412551, "grad_norm": 0.3408641219139099, "learning_rate": 0.00019415758589161385, "loss": 0.9299, "step": 5160 }, { "epoch": 0.5958698661744347, "grad_norm": 0.2970794141292572, "learning_rate": 0.00019413495878983414, "loss": 0.9351, "step": 5165 }, { "epoch": 0.5964467005076142, "grad_norm": 0.2991337180137634, "learning_rate": 0.00019411228928007215, "loss": 0.8861, "step": 5170 }, { "epoch": 0.5970235348407937, "grad_norm": 0.3135198652744293, "learning_rate": 0.0001940895773725406, "loss": 0.9857, "step": 5175 }, { "epoch": 0.5976003691739732, "grad_norm": 0.3125375807285309, "learning_rate": 0.00019406682307747123, "loss": 0.9865, "step": 5180 }, { "epoch": 0.5981772035071528, "grad_norm": 0.30986109375953674, "learning_rate": 0.0001940440264051149, "loss": 0.9213, "step": 5185 }, { "epoch": 0.5987540378403322, "grad_norm": 0.32293030619621277, "learning_rate": 0.00019402118736574155, "loss": 0.9697, "step": 5190 }, { "epoch": 0.5993308721735118, "grad_norm": 0.304883748292923, "learning_rate": 0.0001939983059696402, "loss": 0.9424, "step": 5195 }, { "epoch": 0.5999077065066912, "grad_norm": 0.30604103207588196, "learning_rate": 0.00019397538222711895, "loss": 0.9705, "step": 5200 }, { "epoch": 0.6004845408398708, "grad_norm": 0.33686572313308716, "learning_rate": 0.00019395241614850504, "loss": 0.9414, "step": 5205 }, { "epoch": 0.6010613751730502, "grad_norm": 0.30786630511283875, "learning_rate": 0.0001939294077441447, "loss": 0.9526, "step": 5210 }, { "epoch": 0.6016382095062298, "grad_norm": 0.31119078397750854, "learning_rate": 0.00019390635702440324, "loss": 1.0046, "step": 5215 }, { "epoch": 0.6022150438394093, "grad_norm": 0.32397225499153137, "learning_rate": 0.00019388326399966515, "loss": 1.0064, "step": 5220 }, { "epoch": 0.6027918781725888, "grad_norm": 0.31831094622612, "learning_rate": 0.00019386012868033374, "loss": 0.9344, "step": 5225 }, { "epoch": 0.6033687125057683, "grad_norm": 0.3024968206882477, "learning_rate": 0.00019383695107683165, "loss": 0.9749, "step": 5230 }, { "epoch": 0.6039455468389479, "grad_norm": 0.2912804186344147, "learning_rate": 0.00019381373119960033, "loss": 0.9439, "step": 5235 }, { "epoch": 0.6045223811721273, "grad_norm": 0.3093802332878113, "learning_rate": 0.00019379046905910045, "loss": 0.9043, "step": 5240 }, { "epoch": 0.6050992155053069, "grad_norm": 0.3029499650001526, "learning_rate": 0.00019376716466581163, "loss": 0.9259, "step": 5245 }, { "epoch": 0.6056760498384864, "grad_norm": 0.32635313272476196, "learning_rate": 0.00019374381803023252, "loss": 0.9789, "step": 5250 }, { "epoch": 0.6062528841716659, "grad_norm": 0.31280118227005005, "learning_rate": 0.00019372042916288083, "loss": 0.9825, "step": 5255 }, { "epoch": 0.6068297185048455, "grad_norm": 0.2918443977832794, "learning_rate": 0.00019369699807429336, "loss": 0.9931, "step": 5260 }, { "epoch": 0.6074065528380249, "grad_norm": 0.3114100992679596, "learning_rate": 0.00019367352477502576, "loss": 0.9962, "step": 5265 }, { "epoch": 0.6079833871712045, "grad_norm": 0.3649381697177887, "learning_rate": 0.00019365000927565285, "loss": 0.9618, "step": 5270 }, { "epoch": 0.6085602215043839, "grad_norm": 0.3101176917552948, "learning_rate": 0.00019362645158676843, "loss": 0.9584, "step": 5275 }, { "epoch": 0.6091370558375635, "grad_norm": 0.3260380029678345, "learning_rate": 0.0001936028517189852, "loss": 0.9658, "step": 5280 }, { "epoch": 0.609713890170743, "grad_norm": 0.3273615539073944, "learning_rate": 0.00019357920968293506, "loss": 0.9475, "step": 5285 }, { "epoch": 0.6102907245039225, "grad_norm": 0.334358811378479, "learning_rate": 0.00019355552548926873, "loss": 0.9985, "step": 5290 }, { "epoch": 0.610867558837102, "grad_norm": 0.3484663665294647, "learning_rate": 0.00019353179914865596, "loss": 0.9556, "step": 5295 }, { "epoch": 0.6114443931702815, "grad_norm": 0.3266102373600006, "learning_rate": 0.00019350803067178556, "loss": 0.9634, "step": 5300 }, { "epoch": 0.612021227503461, "grad_norm": 0.29789167642593384, "learning_rate": 0.00019348422006936527, "loss": 0.9699, "step": 5305 }, { "epoch": 0.6125980618366406, "grad_norm": 0.31655624508857727, "learning_rate": 0.00019346036735212177, "loss": 0.9982, "step": 5310 }, { "epoch": 0.61317489616982, "grad_norm": 0.3286411762237549, "learning_rate": 0.0001934364725308008, "loss": 0.9968, "step": 5315 }, { "epoch": 0.6137517305029996, "grad_norm": 0.2995690703392029, "learning_rate": 0.00019341253561616704, "loss": 0.9343, "step": 5320 }, { "epoch": 0.614328564836179, "grad_norm": 0.33955976366996765, "learning_rate": 0.00019338855661900405, "loss": 0.9588, "step": 5325 }, { "epoch": 0.6149053991693586, "grad_norm": 0.3300839960575104, "learning_rate": 0.00019336453555011447, "loss": 0.9648, "step": 5330 }, { "epoch": 0.6154822335025381, "grad_norm": 0.30221325159072876, "learning_rate": 0.0001933404724203198, "loss": 0.9968, "step": 5335 }, { "epoch": 0.6160590678357176, "grad_norm": 0.3104861080646515, "learning_rate": 0.00019331636724046058, "loss": 1.0049, "step": 5340 }, { "epoch": 0.6166359021688971, "grad_norm": 0.310660719871521, "learning_rate": 0.0001932922200213962, "loss": 0.9425, "step": 5345 }, { "epoch": 0.6172127365020766, "grad_norm": 0.2998371422290802, "learning_rate": 0.00019326803077400503, "loss": 0.9332, "step": 5350 }, { "epoch": 0.6177895708352561, "grad_norm": 0.3054348826408386, "learning_rate": 0.00019324379950918437, "loss": 0.9569, "step": 5355 }, { "epoch": 0.6183664051684357, "grad_norm": 0.31838247179985046, "learning_rate": 0.00019321952623785048, "loss": 0.957, "step": 5360 }, { "epoch": 0.6189432395016151, "grad_norm": 0.30139824748039246, "learning_rate": 0.00019319521097093846, "loss": 0.9397, "step": 5365 }, { "epoch": 0.6195200738347947, "grad_norm": 0.3231588900089264, "learning_rate": 0.00019317085371940246, "loss": 0.9888, "step": 5370 }, { "epoch": 0.6200969081679741, "grad_norm": 0.2997867465019226, "learning_rate": 0.00019314645449421543, "loss": 0.9553, "step": 5375 }, { "epoch": 0.6206737425011537, "grad_norm": 0.2987963855266571, "learning_rate": 0.00019312201330636927, "loss": 0.9653, "step": 5380 }, { "epoch": 0.6212505768343332, "grad_norm": 0.28030556440353394, "learning_rate": 0.00019309753016687477, "loss": 0.9596, "step": 5385 }, { "epoch": 0.6218274111675127, "grad_norm": 0.3068777620792389, "learning_rate": 0.00019307300508676165, "loss": 0.9994, "step": 5390 }, { "epoch": 0.6224042455006922, "grad_norm": 0.33228716254234314, "learning_rate": 0.00019304843807707852, "loss": 0.9922, "step": 5395 }, { "epoch": 0.6229810798338717, "grad_norm": 0.31027698516845703, "learning_rate": 0.00019302382914889284, "loss": 1.0011, "step": 5400 }, { "epoch": 0.6235579141670512, "grad_norm": 0.29779738187789917, "learning_rate": 0.00019299917831329099, "loss": 0.995, "step": 5405 }, { "epoch": 0.6241347485002308, "grad_norm": 0.3637996315956116, "learning_rate": 0.0001929744855813782, "loss": 0.9827, "step": 5410 }, { "epoch": 0.6247115828334102, "grad_norm": 0.31740111112594604, "learning_rate": 0.00019294975096427862, "loss": 0.9959, "step": 5415 }, { "epoch": 0.6252884171665898, "grad_norm": 0.34880760312080383, "learning_rate": 0.0001929249744731352, "loss": 0.9982, "step": 5420 }, { "epoch": 0.6258652514997692, "grad_norm": 0.3125605285167694, "learning_rate": 0.0001929001561191099, "loss": 0.9531, "step": 5425 }, { "epoch": 0.6264420858329488, "grad_norm": 0.34269124269485474, "learning_rate": 0.00019287529591338333, "loss": 1.0116, "step": 5430 }, { "epoch": 0.6270189201661283, "grad_norm": 0.3100661039352417, "learning_rate": 0.00019285039386715512, "loss": 0.9815, "step": 5435 }, { "epoch": 0.6275957544993078, "grad_norm": 0.35185715556144714, "learning_rate": 0.00019282544999164365, "loss": 0.9986, "step": 5440 }, { "epoch": 0.6281725888324873, "grad_norm": 0.29009369015693665, "learning_rate": 0.0001928004642980862, "loss": 0.9381, "step": 5445 }, { "epoch": 0.6287494231656668, "grad_norm": 0.3412320017814636, "learning_rate": 0.00019277543679773889, "loss": 0.945, "step": 5450 }, { "epoch": 0.6293262574988463, "grad_norm": 0.3375879228115082, "learning_rate": 0.00019275036750187664, "loss": 1.0051, "step": 5455 }, { "epoch": 0.6299030918320259, "grad_norm": 0.3794129490852356, "learning_rate": 0.00019272525642179323, "loss": 0.9496, "step": 5460 }, { "epoch": 0.6304799261652053, "grad_norm": 0.2998787462711334, "learning_rate": 0.00019270010356880124, "loss": 0.9871, "step": 5465 }, { "epoch": 0.6310567604983849, "grad_norm": 0.3201323449611664, "learning_rate": 0.00019267490895423208, "loss": 0.9909, "step": 5470 }, { "epoch": 0.6316335948315643, "grad_norm": 0.30740267038345337, "learning_rate": 0.00019264967258943595, "loss": 0.9674, "step": 5475 }, { "epoch": 0.6322104291647439, "grad_norm": 0.31996774673461914, "learning_rate": 0.00019262439448578195, "loss": 0.9257, "step": 5480 }, { "epoch": 0.6327872634979234, "grad_norm": 0.31498971581459045, "learning_rate": 0.00019259907465465784, "loss": 0.9565, "step": 5485 }, { "epoch": 0.6333640978311029, "grad_norm": 0.3249649405479431, "learning_rate": 0.0001925737131074703, "loss": 0.951, "step": 5490 }, { "epoch": 0.6339409321642824, "grad_norm": 0.29183363914489746, "learning_rate": 0.00019254830985564474, "loss": 0.9347, "step": 5495 }, { "epoch": 0.6345177664974619, "grad_norm": 0.3501313626766205, "learning_rate": 0.0001925228649106254, "loss": 0.98, "step": 5500 }, { "epoch": 0.6350946008306414, "grad_norm": 0.3065842390060425, "learning_rate": 0.00019249737828387522, "loss": 0.9954, "step": 5505 }, { "epoch": 0.635671435163821, "grad_norm": 0.3278423845767975, "learning_rate": 0.000192471849986876, "loss": 0.9612, "step": 5510 }, { "epoch": 0.6362482694970004, "grad_norm": 0.33244070410728455, "learning_rate": 0.0001924462800311283, "loss": 0.9869, "step": 5515 }, { "epoch": 0.63682510383018, "grad_norm": 0.3195953369140625, "learning_rate": 0.00019242066842815146, "loss": 0.9758, "step": 5520 }, { "epoch": 0.6374019381633594, "grad_norm": 0.3243964910507202, "learning_rate": 0.0001923950151894835, "loss": 0.9614, "step": 5525 }, { "epoch": 0.637978772496539, "grad_norm": 0.3167535662651062, "learning_rate": 0.0001923693203266813, "loss": 0.9606, "step": 5530 }, { "epoch": 0.6385556068297185, "grad_norm": 0.32592856884002686, "learning_rate": 0.00019234358385132038, "loss": 0.9483, "step": 5535 }, { "epoch": 0.639132441162898, "grad_norm": 0.31928128004074097, "learning_rate": 0.00019231780577499516, "loss": 0.9309, "step": 5540 }, { "epoch": 0.6397092754960775, "grad_norm": 0.3262840509414673, "learning_rate": 0.00019229198610931866, "loss": 0.9872, "step": 5545 }, { "epoch": 0.640286109829257, "grad_norm": 0.3317157030105591, "learning_rate": 0.00019226612486592271, "loss": 0.9277, "step": 5550 }, { "epoch": 0.6408629441624365, "grad_norm": 0.3596244156360626, "learning_rate": 0.00019224022205645785, "loss": 0.9683, "step": 5555 }, { "epoch": 0.6414397784956161, "grad_norm": 0.3185982406139374, "learning_rate": 0.00019221427769259333, "loss": 0.9785, "step": 5560 }, { "epoch": 0.6420166128287955, "grad_norm": 0.301542729139328, "learning_rate": 0.00019218829178601713, "loss": 0.9568, "step": 5565 }, { "epoch": 0.6425934471619751, "grad_norm": 0.32796069979667664, "learning_rate": 0.00019216226434843597, "loss": 1.0116, "step": 5570 }, { "epoch": 0.6431702814951545, "grad_norm": 0.2905179560184479, "learning_rate": 0.0001921361953915753, "loss": 0.9531, "step": 5575 }, { "epoch": 0.6437471158283341, "grad_norm": 0.33059805631637573, "learning_rate": 0.00019211008492717914, "loss": 0.9578, "step": 5580 }, { "epoch": 0.6443239501615136, "grad_norm": 0.3101516366004944, "learning_rate": 0.00019208393296701038, "loss": 1.0044, "step": 5585 }, { "epoch": 0.6449007844946931, "grad_norm": 0.3204366862773895, "learning_rate": 0.00019205773952285052, "loss": 0.9383, "step": 5590 }, { "epoch": 0.6454776188278727, "grad_norm": 0.308737188577652, "learning_rate": 0.0001920315046064997, "loss": 0.9672, "step": 5595 }, { "epoch": 0.6460544531610521, "grad_norm": 0.2993175983428955, "learning_rate": 0.0001920052282297769, "loss": 0.9724, "step": 5600 }, { "epoch": 0.6466312874942317, "grad_norm": 0.35242629051208496, "learning_rate": 0.00019197891040451963, "loss": 0.9366, "step": 5605 }, { "epoch": 0.6472081218274112, "grad_norm": 0.3320680260658264, "learning_rate": 0.00019195255114258408, "loss": 1.0048, "step": 5610 }, { "epoch": 0.6477849561605907, "grad_norm": 0.3301430940628052, "learning_rate": 0.00019192615045584522, "loss": 0.9553, "step": 5615 }, { "epoch": 0.6483617904937702, "grad_norm": 0.327357679605484, "learning_rate": 0.00019189970835619652, "loss": 0.963, "step": 5620 }, { "epoch": 0.6489386248269498, "grad_norm": 0.3190898597240448, "learning_rate": 0.00019187322485555031, "loss": 0.9617, "step": 5625 }, { "epoch": 0.6495154591601292, "grad_norm": 0.3255945146083832, "learning_rate": 0.00019184669996583737, "loss": 0.9493, "step": 5630 }, { "epoch": 0.6500922934933088, "grad_norm": 0.3097761273384094, "learning_rate": 0.00019182013369900726, "loss": 0.9099, "step": 5635 }, { "epoch": 0.6506691278264882, "grad_norm": 0.3031613528728485, "learning_rate": 0.00019179352606702813, "loss": 0.9767, "step": 5640 }, { "epoch": 0.6512459621596678, "grad_norm": 0.3292886018753052, "learning_rate": 0.00019176687708188675, "loss": 1.0219, "step": 5645 }, { "epoch": 0.6518227964928472, "grad_norm": 0.3024533689022064, "learning_rate": 0.00019174018675558854, "loss": 0.9109, "step": 5650 }, { "epoch": 0.6523996308260268, "grad_norm": 0.3066127896308899, "learning_rate": 0.00019171345510015758, "loss": 0.9726, "step": 5655 }, { "epoch": 0.6529764651592063, "grad_norm": 0.3431326150894165, "learning_rate": 0.0001916866821276365, "loss": 0.9434, "step": 5660 }, { "epoch": 0.6535532994923858, "grad_norm": 0.3115648627281189, "learning_rate": 0.00019165986785008658, "loss": 0.9453, "step": 5665 }, { "epoch": 0.6541301338255653, "grad_norm": 0.3325398862361908, "learning_rate": 0.0001916330122795877, "loss": 0.9581, "step": 5670 }, { "epoch": 0.6547069681587449, "grad_norm": 0.3112443685531616, "learning_rate": 0.00019160611542823837, "loss": 0.9718, "step": 5675 }, { "epoch": 0.6552838024919243, "grad_norm": 0.29900282621383667, "learning_rate": 0.00019157917730815567, "loss": 0.9952, "step": 5680 }, { "epoch": 0.6558606368251039, "grad_norm": 0.31644630432128906, "learning_rate": 0.00019155219793147522, "loss": 0.96, "step": 5685 }, { "epoch": 0.6564374711582833, "grad_norm": 0.3477376103401184, "learning_rate": 0.00019152517731035139, "loss": 0.9834, "step": 5690 }, { "epoch": 0.6570143054914629, "grad_norm": 0.30350586771965027, "learning_rate": 0.00019149811545695692, "loss": 1.0281, "step": 5695 }, { "epoch": 0.6575911398246423, "grad_norm": 0.2941493093967438, "learning_rate": 0.00019147101238348326, "loss": 0.9535, "step": 5700 }, { "epoch": 0.6581679741578219, "grad_norm": 0.32863160967826843, "learning_rate": 0.00019144386810214043, "loss": 1.0231, "step": 5705 }, { "epoch": 0.6587448084910014, "grad_norm": 0.32498711347579956, "learning_rate": 0.00019141668262515692, "loss": 0.9913, "step": 5710 }, { "epoch": 0.6593216428241809, "grad_norm": 0.31866273283958435, "learning_rate": 0.00019138945596477994, "loss": 1.0029, "step": 5715 }, { "epoch": 0.6598984771573604, "grad_norm": 0.3033033013343811, "learning_rate": 0.00019136218813327503, "loss": 0.98, "step": 5720 }, { "epoch": 0.66047531149054, "grad_norm": 0.3383612036705017, "learning_rate": 0.0001913348791429265, "loss": 1.02, "step": 5725 }, { "epoch": 0.6610521458237194, "grad_norm": 0.3032139539718628, "learning_rate": 0.00019130752900603702, "loss": 0.9907, "step": 5730 }, { "epoch": 0.661628980156899, "grad_norm": 0.3184412717819214, "learning_rate": 0.00019128013773492795, "loss": 0.9988, "step": 5735 }, { "epoch": 0.6622058144900784, "grad_norm": 0.4634701907634735, "learning_rate": 0.0001912527053419391, "loss": 0.9851, "step": 5740 }, { "epoch": 0.662782648823258, "grad_norm": 0.3319401443004608, "learning_rate": 0.00019122523183942879, "loss": 0.9319, "step": 5745 }, { "epoch": 0.6633594831564374, "grad_norm": 0.3245648443698883, "learning_rate": 0.00019119771723977386, "loss": 0.9687, "step": 5750 }, { "epoch": 0.663936317489617, "grad_norm": 0.3270566463470459, "learning_rate": 0.00019117016155536978, "loss": 0.9949, "step": 5755 }, { "epoch": 0.6645131518227965, "grad_norm": 0.32096368074417114, "learning_rate": 0.00019114256479863038, "loss": 0.9699, "step": 5760 }, { "epoch": 0.665089986155976, "grad_norm": 0.29931652545928955, "learning_rate": 0.00019111492698198804, "loss": 0.9539, "step": 5765 }, { "epoch": 0.6656668204891555, "grad_norm": 0.40578532218933105, "learning_rate": 0.00019108724811789366, "loss": 0.9947, "step": 5770 }, { "epoch": 0.666243654822335, "grad_norm": 0.3238760530948639, "learning_rate": 0.00019105952821881668, "loss": 0.9727, "step": 5775 }, { "epoch": 0.6668204891555145, "grad_norm": 0.30622220039367676, "learning_rate": 0.0001910317672972449, "loss": 0.9811, "step": 5780 }, { "epoch": 0.6673973234886941, "grad_norm": 0.29258066415786743, "learning_rate": 0.0001910039653656847, "loss": 0.996, "step": 5785 }, { "epoch": 0.6679741578218735, "grad_norm": 0.3499821126461029, "learning_rate": 0.00019097612243666086, "loss": 0.9957, "step": 5790 }, { "epoch": 0.6685509921550531, "grad_norm": 0.32826563715934753, "learning_rate": 0.00019094823852271674, "loss": 0.9775, "step": 5795 }, { "epoch": 0.6691278264882325, "grad_norm": 0.2916145324707031, "learning_rate": 0.00019092031363641406, "loss": 0.9277, "step": 5800 }, { "epoch": 0.6697046608214121, "grad_norm": 0.30102667212486267, "learning_rate": 0.00019089234779033306, "loss": 1.018, "step": 5805 }, { "epoch": 0.6702814951545916, "grad_norm": 0.3221781551837921, "learning_rate": 0.00019086434099707238, "loss": 0.9232, "step": 5810 }, { "epoch": 0.6708583294877711, "grad_norm": 0.3488676846027374, "learning_rate": 0.0001908362932692491, "loss": 0.9638, "step": 5815 }, { "epoch": 0.6714351638209506, "grad_norm": 0.3418358564376831, "learning_rate": 0.00019080820461949886, "loss": 0.9207, "step": 5820 }, { "epoch": 0.6720119981541302, "grad_norm": 0.32656848430633545, "learning_rate": 0.00019078007506047564, "loss": 0.9877, "step": 5825 }, { "epoch": 0.6725888324873096, "grad_norm": 0.32590511441230774, "learning_rate": 0.0001907519046048518, "loss": 1.0079, "step": 5830 }, { "epoch": 0.6731656668204892, "grad_norm": 0.32146522402763367, "learning_rate": 0.00019072369326531824, "loss": 0.9815, "step": 5835 }, { "epoch": 0.6737425011536686, "grad_norm": 0.34426286816596985, "learning_rate": 0.00019069544105458416, "loss": 0.9684, "step": 5840 }, { "epoch": 0.6743193354868482, "grad_norm": 0.3510322868824005, "learning_rate": 0.0001906671479853773, "loss": 0.9691, "step": 5845 }, { "epoch": 0.6748961698200276, "grad_norm": 0.31030935049057007, "learning_rate": 0.00019063881407044373, "loss": 1.0024, "step": 5850 }, { "epoch": 0.6754730041532072, "grad_norm": 0.3513205647468567, "learning_rate": 0.00019061043932254795, "loss": 0.9485, "step": 5855 }, { "epoch": 0.6760498384863867, "grad_norm": 0.323853462934494, "learning_rate": 0.00019058202375447277, "loss": 0.9441, "step": 5860 }, { "epoch": 0.6766266728195662, "grad_norm": 0.32843998074531555, "learning_rate": 0.00019055356737901952, "loss": 1.0128, "step": 5865 }, { "epoch": 0.6772035071527457, "grad_norm": 0.35758525133132935, "learning_rate": 0.00019052507020900783, "loss": 0.9361, "step": 5870 }, { "epoch": 0.6777803414859253, "grad_norm": 0.31614959239959717, "learning_rate": 0.00019049653225727573, "loss": 0.9649, "step": 5875 }, { "epoch": 0.6783571758191047, "grad_norm": 0.2904086410999298, "learning_rate": 0.00019046795353667965, "loss": 0.9506, "step": 5880 }, { "epoch": 0.6789340101522843, "grad_norm": 0.30251458287239075, "learning_rate": 0.00019043933406009432, "loss": 0.9243, "step": 5885 }, { "epoch": 0.6795108444854637, "grad_norm": 0.3204381763935089, "learning_rate": 0.0001904106738404129, "loss": 0.9446, "step": 5890 }, { "epoch": 0.6800876788186433, "grad_norm": 0.3202415704727173, "learning_rate": 0.00019038197289054684, "loss": 0.9668, "step": 5895 }, { "epoch": 0.6806645131518227, "grad_norm": 0.3108007609844208, "learning_rate": 0.000190353231223426, "loss": 0.9507, "step": 5900 }, { "epoch": 0.6812413474850023, "grad_norm": 0.31462720036506653, "learning_rate": 0.00019032444885199858, "loss": 0.9459, "step": 5905 }, { "epoch": 0.6818181818181818, "grad_norm": 0.3226790726184845, "learning_rate": 0.00019029562578923106, "loss": 0.9564, "step": 5910 }, { "epoch": 0.6823950161513613, "grad_norm": 0.35159701108932495, "learning_rate": 0.00019026676204810826, "loss": 1.0154, "step": 5915 }, { "epoch": 0.6829718504845408, "grad_norm": 0.3501424193382263, "learning_rate": 0.00019023785764163344, "loss": 0.9834, "step": 5920 }, { "epoch": 0.6835486848177204, "grad_norm": 0.34057801961898804, "learning_rate": 0.000190208912582828, "loss": 1.0144, "step": 5925 }, { "epoch": 0.6841255191508998, "grad_norm": 0.3468494415283203, "learning_rate": 0.0001901799268847318, "loss": 0.9348, "step": 5930 }, { "epoch": 0.6847023534840794, "grad_norm": 0.30606213212013245, "learning_rate": 0.00019015090056040293, "loss": 0.9893, "step": 5935 }, { "epoch": 0.6852791878172588, "grad_norm": 0.3168538808822632, "learning_rate": 0.0001901218336229178, "loss": 0.9701, "step": 5940 }, { "epoch": 0.6858560221504384, "grad_norm": 0.3141446113586426, "learning_rate": 0.00019009272608537113, "loss": 1.025, "step": 5945 }, { "epoch": 0.686432856483618, "grad_norm": 0.3107317388057709, "learning_rate": 0.00019006357796087596, "loss": 1.0144, "step": 5950 }, { "epoch": 0.6870096908167974, "grad_norm": 0.31770557165145874, "learning_rate": 0.0001900343892625635, "loss": 0.9909, "step": 5955 }, { "epoch": 0.687586525149977, "grad_norm": 0.3323291838169098, "learning_rate": 0.0001900051600035834, "loss": 1.0551, "step": 5960 }, { "epoch": 0.6881633594831564, "grad_norm": 0.3626686632633209, "learning_rate": 0.00018997589019710342, "loss": 0.9785, "step": 5965 }, { "epoch": 0.688740193816336, "grad_norm": 0.34098559617996216, "learning_rate": 0.00018994657985630972, "loss": 0.9226, "step": 5970 }, { "epoch": 0.6893170281495155, "grad_norm": 0.3267138600349426, "learning_rate": 0.00018991722899440664, "loss": 1.0036, "step": 5975 }, { "epoch": 0.689893862482695, "grad_norm": 0.33355411887168884, "learning_rate": 0.0001898878376246168, "loss": 0.9962, "step": 5980 }, { "epoch": 0.6904706968158745, "grad_norm": 0.3542887270450592, "learning_rate": 0.00018985840576018107, "loss": 0.9529, "step": 5985 }, { "epoch": 0.691047531149054, "grad_norm": 0.3203210234642029, "learning_rate": 0.0001898289334143586, "loss": 0.9599, "step": 5990 }, { "epoch": 0.6916243654822335, "grad_norm": 0.3354796767234802, "learning_rate": 0.00018979942060042668, "loss": 0.9762, "step": 5995 }, { "epoch": 0.6922011998154131, "grad_norm": 0.30847635865211487, "learning_rate": 0.00018976986733168093, "loss": 1.0016, "step": 6000 }, { "epoch": 0.6927780341485925, "grad_norm": 0.33731263875961304, "learning_rate": 0.00018974027362143514, "loss": 0.9719, "step": 6005 }, { "epoch": 0.6933548684817721, "grad_norm": 0.31261542439460754, "learning_rate": 0.00018971063948302133, "loss": 1.0101, "step": 6010 }, { "epoch": 0.6939317028149515, "grad_norm": 0.33092984557151794, "learning_rate": 0.00018968096492978976, "loss": 0.969, "step": 6015 }, { "epoch": 0.6945085371481311, "grad_norm": 0.30494657158851624, "learning_rate": 0.00018965124997510883, "loss": 0.9667, "step": 6020 }, { "epoch": 0.6950853714813106, "grad_norm": 0.3187716007232666, "learning_rate": 0.00018962149463236524, "loss": 0.9676, "step": 6025 }, { "epoch": 0.6956622058144901, "grad_norm": 0.3075034022331238, "learning_rate": 0.0001895916989149638, "loss": 1.0125, "step": 6030 }, { "epoch": 0.6962390401476696, "grad_norm": 0.3571907877922058, "learning_rate": 0.00018956186283632754, "loss": 0.9668, "step": 6035 }, { "epoch": 0.6968158744808491, "grad_norm": 0.3178498148918152, "learning_rate": 0.00018953198640989764, "loss": 0.9635, "step": 6040 }, { "epoch": 0.6973927088140286, "grad_norm": 0.309283584356308, "learning_rate": 0.00018950206964913355, "loss": 0.9705, "step": 6045 }, { "epoch": 0.6979695431472082, "grad_norm": 0.3307679295539856, "learning_rate": 0.0001894721125675128, "loss": 0.9601, "step": 6050 }, { "epoch": 0.6985463774803876, "grad_norm": 0.33936187624931335, "learning_rate": 0.00018944211517853113, "loss": 0.9583, "step": 6055 }, { "epoch": 0.6991232118135672, "grad_norm": 0.33675289154052734, "learning_rate": 0.00018941207749570237, "loss": 0.9807, "step": 6060 }, { "epoch": 0.6997000461467466, "grad_norm": 0.2894688546657562, "learning_rate": 0.00018938199953255863, "loss": 0.9624, "step": 6065 }, { "epoch": 0.7002768804799262, "grad_norm": 0.325270414352417, "learning_rate": 0.00018935188130265004, "loss": 0.9874, "step": 6070 }, { "epoch": 0.7008537148131057, "grad_norm": 0.32608747482299805, "learning_rate": 0.0001893217228195449, "loss": 1.0085, "step": 6075 }, { "epoch": 0.7014305491462852, "grad_norm": 0.3333377540111542, "learning_rate": 0.00018929152409682972, "loss": 0.927, "step": 6080 }, { "epoch": 0.7020073834794647, "grad_norm": 0.3285461366176605, "learning_rate": 0.00018926128514810907, "loss": 0.9696, "step": 6085 }, { "epoch": 0.7025842178126442, "grad_norm": 0.32103848457336426, "learning_rate": 0.00018923100598700561, "loss": 1.017, "step": 6090 }, { "epoch": 0.7031610521458237, "grad_norm": 0.3383493423461914, "learning_rate": 0.00018920068662716023, "loss": 0.9833, "step": 6095 }, { "epoch": 0.7037378864790033, "grad_norm": 0.36620640754699707, "learning_rate": 0.00018917032708223183, "loss": 0.9836, "step": 6100 }, { "epoch": 0.7043147208121827, "grad_norm": 0.33626264333724976, "learning_rate": 0.00018913992736589746, "loss": 0.9322, "step": 6105 }, { "epoch": 0.7048915551453623, "grad_norm": 0.3257678151130676, "learning_rate": 0.0001891094874918522, "loss": 1.0343, "step": 6110 }, { "epoch": 0.7054683894785417, "grad_norm": 0.3210621178150177, "learning_rate": 0.00018907900747380932, "loss": 0.9852, "step": 6115 }, { "epoch": 0.7060452238117213, "grad_norm": 0.3681231141090393, "learning_rate": 0.0001890484873255001, "loss": 0.9589, "step": 6120 }, { "epoch": 0.7066220581449008, "grad_norm": 0.3156203627586365, "learning_rate": 0.00018901792706067395, "loss": 0.9706, "step": 6125 }, { "epoch": 0.7071988924780803, "grad_norm": 0.3281550407409668, "learning_rate": 0.00018898732669309833, "loss": 0.9647, "step": 6130 }, { "epoch": 0.7077757268112598, "grad_norm": 0.3214692771434784, "learning_rate": 0.00018895668623655873, "loss": 0.9394, "step": 6135 }, { "epoch": 0.7083525611444393, "grad_norm": 0.3246460258960724, "learning_rate": 0.0001889260057048588, "loss": 1.0101, "step": 6140 }, { "epoch": 0.7089293954776188, "grad_norm": 0.3262513279914856, "learning_rate": 0.0001888952851118201, "loss": 0.9386, "step": 6145 }, { "epoch": 0.7095062298107984, "grad_norm": 0.32824644446372986, "learning_rate": 0.0001888645244712824, "loss": 1.0007, "step": 6150 }, { "epoch": 0.7100830641439778, "grad_norm": 0.33368590474128723, "learning_rate": 0.00018883372379710332, "loss": 0.9683, "step": 6155 }, { "epoch": 0.7106598984771574, "grad_norm": 0.3157143294811249, "learning_rate": 0.00018880288310315873, "loss": 0.9925, "step": 6160 }, { "epoch": 0.7112367328103368, "grad_norm": 0.3037188947200775, "learning_rate": 0.00018877200240334236, "loss": 0.972, "step": 6165 }, { "epoch": 0.7118135671435164, "grad_norm": 0.3481830358505249, "learning_rate": 0.00018874108171156606, "loss": 0.9672, "step": 6170 }, { "epoch": 0.7123904014766959, "grad_norm": 0.33353695273399353, "learning_rate": 0.00018871012104175968, "loss": 0.9719, "step": 6175 }, { "epoch": 0.7129672358098754, "grad_norm": 0.35782551765441895, "learning_rate": 0.00018867912040787096, "loss": 0.9503, "step": 6180 }, { "epoch": 0.7135440701430549, "grad_norm": 0.32563239336013794, "learning_rate": 0.00018864807982386586, "loss": 1.043, "step": 6185 }, { "epoch": 0.7141209044762344, "grad_norm": 0.32650572061538696, "learning_rate": 0.00018861699930372816, "loss": 0.9347, "step": 6190 }, { "epoch": 0.7146977388094139, "grad_norm": 0.3114981949329376, "learning_rate": 0.00018858587886145975, "loss": 0.9725, "step": 6195 }, { "epoch": 0.7152745731425935, "grad_norm": 0.3412109613418579, "learning_rate": 0.00018855471851108037, "loss": 0.967, "step": 6200 }, { "epoch": 0.7158514074757729, "grad_norm": 0.3184211850166321, "learning_rate": 0.0001885235182666279, "loss": 0.9883, "step": 6205 }, { "epoch": 0.7164282418089525, "grad_norm": 0.335102379322052, "learning_rate": 0.00018849227814215805, "loss": 0.9357, "step": 6210 }, { "epoch": 0.7170050761421319, "grad_norm": 0.33024275302886963, "learning_rate": 0.00018846099815174458, "loss": 0.962, "step": 6215 }, { "epoch": 0.7175819104753115, "grad_norm": 0.31068938970565796, "learning_rate": 0.00018842967830947916, "loss": 0.9599, "step": 6220 }, { "epoch": 0.718158744808491, "grad_norm": 0.3194611668586731, "learning_rate": 0.00018839831862947152, "loss": 0.9768, "step": 6225 }, { "epoch": 0.7187355791416705, "grad_norm": 0.34374377131462097, "learning_rate": 0.0001883669191258492, "loss": 0.9773, "step": 6230 }, { "epoch": 0.71931241347485, "grad_norm": 0.33975133299827576, "learning_rate": 0.00018833547981275773, "loss": 1.0063, "step": 6235 }, { "epoch": 0.7198892478080295, "grad_norm": 0.3632505238056183, "learning_rate": 0.00018830400070436057, "loss": 0.999, "step": 6240 }, { "epoch": 0.720466082141209, "grad_norm": 0.2948671281337738, "learning_rate": 0.00018827248181483915, "loss": 0.9911, "step": 6245 }, { "epoch": 0.7210429164743886, "grad_norm": 0.36003610491752625, "learning_rate": 0.0001882409231583928, "loss": 0.9918, "step": 6250 }, { "epoch": 0.721619750807568, "grad_norm": 0.33334627747535706, "learning_rate": 0.00018820932474923873, "loss": 0.9975, "step": 6255 }, { "epoch": 0.7221965851407476, "grad_norm": 0.4000126123428345, "learning_rate": 0.0001881776866016121, "loss": 0.9123, "step": 6260 }, { "epoch": 0.722773419473927, "grad_norm": 0.3385787606239319, "learning_rate": 0.00018814600872976594, "loss": 0.9299, "step": 6265 }, { "epoch": 0.7233502538071066, "grad_norm": 0.31627988815307617, "learning_rate": 0.00018811429114797123, "loss": 0.9416, "step": 6270 }, { "epoch": 0.7239270881402861, "grad_norm": 0.33831337094306946, "learning_rate": 0.00018808253387051678, "loss": 0.9513, "step": 6275 }, { "epoch": 0.7245039224734656, "grad_norm": 0.31706488132476807, "learning_rate": 0.00018805073691170927, "loss": 0.9834, "step": 6280 }, { "epoch": 0.7250807568066451, "grad_norm": 0.3332441449165344, "learning_rate": 0.00018801890028587333, "loss": 0.9409, "step": 6285 }, { "epoch": 0.7256575911398246, "grad_norm": 0.34476813673973083, "learning_rate": 0.00018798702400735145, "loss": 0.9399, "step": 6290 }, { "epoch": 0.7262344254730042, "grad_norm": 0.31431716680526733, "learning_rate": 0.0001879551080905039, "loss": 1.0019, "step": 6295 }, { "epoch": 0.7268112598061837, "grad_norm": 0.3677690625190735, "learning_rate": 0.0001879231525497089, "loss": 0.9862, "step": 6300 }, { "epoch": 0.7273880941393632, "grad_norm": 0.31107670068740845, "learning_rate": 0.00018789115739936243, "loss": 0.9535, "step": 6305 }, { "epoch": 0.7279649284725427, "grad_norm": 0.35047629475593567, "learning_rate": 0.00018785912265387845, "loss": 0.9733, "step": 6310 }, { "epoch": 0.7285417628057222, "grad_norm": 0.33065545558929443, "learning_rate": 0.0001878270483276886, "loss": 0.99, "step": 6315 }, { "epoch": 0.7291185971389017, "grad_norm": 0.3244021236896515, "learning_rate": 0.00018779493443524245, "loss": 0.9656, "step": 6320 }, { "epoch": 0.7296954314720813, "grad_norm": 0.36192965507507324, "learning_rate": 0.0001877627809910074, "loss": 1.0014, "step": 6325 }, { "epoch": 0.7302722658052607, "grad_norm": 0.3353778123855591, "learning_rate": 0.00018773058800946858, "loss": 0.9828, "step": 6330 }, { "epoch": 0.7308491001384403, "grad_norm": 0.32873064279556274, "learning_rate": 0.00018769835550512908, "loss": 1.0209, "step": 6335 }, { "epoch": 0.7314259344716197, "grad_norm": 0.3185397684574127, "learning_rate": 0.00018766608349250966, "loss": 0.996, "step": 6340 }, { "epoch": 0.7320027688047993, "grad_norm": 0.30948343873023987, "learning_rate": 0.00018763377198614887, "loss": 0.9423, "step": 6345 }, { "epoch": 0.7325796031379788, "grad_norm": 0.31592267751693726, "learning_rate": 0.0001876014210006032, "loss": 0.9255, "step": 6350 }, { "epoch": 0.7331564374711583, "grad_norm": 0.3489915430545807, "learning_rate": 0.00018756903055044675, "loss": 0.9889, "step": 6355 }, { "epoch": 0.7337332718043378, "grad_norm": 0.3357676863670349, "learning_rate": 0.00018753660065027152, "loss": 0.915, "step": 6360 }, { "epoch": 0.7343101061375173, "grad_norm": 0.30153629183769226, "learning_rate": 0.00018750413131468725, "loss": 1.016, "step": 6365 }, { "epoch": 0.7348869404706968, "grad_norm": 0.3336898386478424, "learning_rate": 0.00018747162255832142, "loss": 0.9868, "step": 6370 }, { "epoch": 0.7354637748038764, "grad_norm": 0.3211246430873871, "learning_rate": 0.00018743907439581933, "loss": 0.9637, "step": 6375 }, { "epoch": 0.7360406091370558, "grad_norm": 0.32898247241973877, "learning_rate": 0.00018740648684184395, "loss": 0.9667, "step": 6380 }, { "epoch": 0.7366174434702354, "grad_norm": 0.33533334732055664, "learning_rate": 0.00018737385991107603, "loss": 0.947, "step": 6385 }, { "epoch": 0.7371942778034148, "grad_norm": 0.33783718943595886, "learning_rate": 0.0001873411936182141, "loss": 0.9626, "step": 6390 }, { "epoch": 0.7377711121365944, "grad_norm": 0.33185064792633057, "learning_rate": 0.00018730848797797437, "loss": 0.9885, "step": 6395 }, { "epoch": 0.7383479464697739, "grad_norm": 0.35967978835105896, "learning_rate": 0.00018727574300509076, "loss": 0.9882, "step": 6400 }, { "epoch": 0.7389247808029534, "grad_norm": 0.3004412055015564, "learning_rate": 0.000187242958714315, "loss": 0.9386, "step": 6405 }, { "epoch": 0.7395016151361329, "grad_norm": 0.3231200575828552, "learning_rate": 0.00018721013512041647, "loss": 0.9473, "step": 6410 }, { "epoch": 0.7400784494693124, "grad_norm": 0.35318905115127563, "learning_rate": 0.00018717727223818223, "loss": 0.9524, "step": 6415 }, { "epoch": 0.7406552838024919, "grad_norm": 0.32204577326774597, "learning_rate": 0.00018714437008241709, "loss": 0.9413, "step": 6420 }, { "epoch": 0.7412321181356715, "grad_norm": 0.35092103481292725, "learning_rate": 0.00018711142866794354, "loss": 1.0402, "step": 6425 }, { "epoch": 0.7418089524688509, "grad_norm": 0.3684171736240387, "learning_rate": 0.00018707844800960177, "loss": 0.9495, "step": 6430 }, { "epoch": 0.7423857868020305, "grad_norm": 0.34715521335601807, "learning_rate": 0.00018704542812224956, "loss": 0.9848, "step": 6435 }, { "epoch": 0.7429626211352099, "grad_norm": 0.3401312828063965, "learning_rate": 0.0001870123690207625, "loss": 0.9969, "step": 6440 }, { "epoch": 0.7435394554683895, "grad_norm": 0.33639106154441833, "learning_rate": 0.00018697927072003378, "loss": 0.9768, "step": 6445 }, { "epoch": 0.744116289801569, "grad_norm": 0.3611484169960022, "learning_rate": 0.00018694613323497422, "loss": 0.9478, "step": 6450 }, { "epoch": 0.7446931241347485, "grad_norm": 0.3171774446964264, "learning_rate": 0.00018691295658051233, "loss": 1.0148, "step": 6455 }, { "epoch": 0.745269958467928, "grad_norm": 0.33016425371170044, "learning_rate": 0.00018687974077159428, "loss": 0.9979, "step": 6460 }, { "epoch": 0.7458467928011075, "grad_norm": 0.3236633241176605, "learning_rate": 0.0001868464858231838, "loss": 0.9243, "step": 6465 }, { "epoch": 0.746423627134287, "grad_norm": 0.3428235650062561, "learning_rate": 0.00018681319175026237, "loss": 0.9754, "step": 6470 }, { "epoch": 0.7470004614674666, "grad_norm": 0.33331015706062317, "learning_rate": 0.000186779858567829, "loss": 0.9638, "step": 6475 }, { "epoch": 0.747577295800646, "grad_norm": 0.32325270771980286, "learning_rate": 0.0001867464862909004, "loss": 0.9898, "step": 6480 }, { "epoch": 0.7481541301338256, "grad_norm": 0.3331642746925354, "learning_rate": 0.00018671307493451074, "loss": 0.9401, "step": 6485 }, { "epoch": 0.748730964467005, "grad_norm": 0.33681750297546387, "learning_rate": 0.000186679624513712, "loss": 0.9885, "step": 6490 }, { "epoch": 0.7493077988001846, "grad_norm": 0.3355301320552826, "learning_rate": 0.00018664613504357366, "loss": 0.9511, "step": 6495 }, { "epoch": 0.7498846331333641, "grad_norm": 0.33696606755256653, "learning_rate": 0.0001866126065391827, "loss": 0.951, "step": 6500 }, { "epoch": 0.7504614674665436, "grad_norm": 0.317131370306015, "learning_rate": 0.00018657903901564388, "loss": 0.9431, "step": 6505 }, { "epoch": 0.7510383017997231, "grad_norm": 0.31651514768600464, "learning_rate": 0.0001865454324880794, "loss": 0.9611, "step": 6510 }, { "epoch": 0.7516151361329027, "grad_norm": 0.3429376780986786, "learning_rate": 0.00018651178697162902, "loss": 0.9579, "step": 6515 }, { "epoch": 0.7521919704660821, "grad_norm": 0.31091445684432983, "learning_rate": 0.00018647810248145018, "loss": 1.0021, "step": 6520 }, { "epoch": 0.7527688047992617, "grad_norm": 0.38521134853363037, "learning_rate": 0.00018644437903271778, "loss": 0.9911, "step": 6525 }, { "epoch": 0.7533456391324411, "grad_norm": 0.33354058861732483, "learning_rate": 0.00018641061664062428, "loss": 0.9634, "step": 6530 }, { "epoch": 0.7539224734656207, "grad_norm": 0.3340102434158325, "learning_rate": 0.0001863768153203797, "loss": 0.9433, "step": 6535 }, { "epoch": 0.7544993077988001, "grad_norm": 0.3207364082336426, "learning_rate": 0.00018634297508721167, "loss": 0.9275, "step": 6540 }, { "epoch": 0.7550761421319797, "grad_norm": 0.3502042889595032, "learning_rate": 0.00018630909595636523, "loss": 1.0206, "step": 6545 }, { "epoch": 0.7556529764651592, "grad_norm": 0.31954869627952576, "learning_rate": 0.00018627517794310298, "loss": 0.9621, "step": 6550 }, { "epoch": 0.7562298107983387, "grad_norm": 0.33266472816467285, "learning_rate": 0.00018624122106270506, "loss": 0.9419, "step": 6555 }, { "epoch": 0.7568066451315182, "grad_norm": 0.337035596370697, "learning_rate": 0.0001862072253304691, "loss": 0.9389, "step": 6560 }, { "epoch": 0.7573834794646978, "grad_norm": 0.31855568289756775, "learning_rate": 0.00018617319076171028, "loss": 0.9541, "step": 6565 }, { "epoch": 0.7579603137978772, "grad_norm": 0.322386234998703, "learning_rate": 0.00018613911737176125, "loss": 0.978, "step": 6570 }, { "epoch": 0.7585371481310568, "grad_norm": 0.34818416833877563, "learning_rate": 0.00018610500517597206, "loss": 1.0214, "step": 6575 }, { "epoch": 0.7591139824642362, "grad_norm": 0.30302146077156067, "learning_rate": 0.0001860708541897104, "loss": 0.9401, "step": 6580 }, { "epoch": 0.7596908167974158, "grad_norm": 0.3100593388080597, "learning_rate": 0.0001860366644283613, "loss": 1.0366, "step": 6585 }, { "epoch": 0.7602676511305952, "grad_norm": 0.32050052285194397, "learning_rate": 0.0001860024359073274, "loss": 0.9593, "step": 6590 }, { "epoch": 0.7608444854637748, "grad_norm": 0.3930646479129791, "learning_rate": 0.00018596816864202862, "loss": 0.9761, "step": 6595 }, { "epoch": 0.7614213197969543, "grad_norm": 0.31919702887535095, "learning_rate": 0.00018593386264790243, "loss": 0.9626, "step": 6600 }, { "epoch": 0.7619981541301338, "grad_norm": 0.33068013191223145, "learning_rate": 0.0001858995179404038, "loss": 0.9705, "step": 6605 }, { "epoch": 0.7625749884633133, "grad_norm": 0.31021422147750854, "learning_rate": 0.00018586513453500508, "loss": 0.9527, "step": 6610 }, { "epoch": 0.7631518227964929, "grad_norm": 0.38333725929260254, "learning_rate": 0.00018583071244719607, "loss": 0.9773, "step": 6615 }, { "epoch": 0.7637286571296723, "grad_norm": 0.3238351047039032, "learning_rate": 0.00018579625169248395, "loss": 1.0035, "step": 6620 }, { "epoch": 0.7643054914628519, "grad_norm": 0.304609477519989, "learning_rate": 0.0001857617522863934, "loss": 1.0089, "step": 6625 }, { "epoch": 0.7648823257960313, "grad_norm": 0.32782503962516785, "learning_rate": 0.0001857272142444664, "loss": 0.9194, "step": 6630 }, { "epoch": 0.7654591601292109, "grad_norm": 0.3101833462715149, "learning_rate": 0.0001856926375822625, "loss": 0.9782, "step": 6635 }, { "epoch": 0.7660359944623903, "grad_norm": 0.3171398639678955, "learning_rate": 0.00018565802231535847, "loss": 0.9407, "step": 6640 }, { "epoch": 0.7666128287955699, "grad_norm": 0.3236709237098694, "learning_rate": 0.0001856233684593486, "loss": 1.0112, "step": 6645 }, { "epoch": 0.7671896631287495, "grad_norm": 0.36800485849380493, "learning_rate": 0.0001855886760298445, "loss": 0.9796, "step": 6650 }, { "epoch": 0.7677664974619289, "grad_norm": 0.34232667088508606, "learning_rate": 0.00018555394504247521, "loss": 0.9916, "step": 6655 }, { "epoch": 0.7683433317951085, "grad_norm": 0.33114129304885864, "learning_rate": 0.00018551917551288706, "loss": 0.9857, "step": 6660 }, { "epoch": 0.768920166128288, "grad_norm": 0.3472602367401123, "learning_rate": 0.00018548436745674383, "loss": 0.9574, "step": 6665 }, { "epoch": 0.7694970004614675, "grad_norm": 0.3226907551288605, "learning_rate": 0.00018544952088972658, "loss": 0.9783, "step": 6670 }, { "epoch": 0.770073834794647, "grad_norm": 0.34243959188461304, "learning_rate": 0.0001854146358275338, "loss": 0.9587, "step": 6675 }, { "epoch": 0.7706506691278265, "grad_norm": 0.32891300320625305, "learning_rate": 0.00018537971228588124, "loss": 0.9303, "step": 6680 }, { "epoch": 0.771227503461006, "grad_norm": 0.3494172692298889, "learning_rate": 0.00018534475028050205, "loss": 0.9454, "step": 6685 }, { "epoch": 0.7718043377941856, "grad_norm": 0.33006858825683594, "learning_rate": 0.00018530974982714667, "loss": 0.9697, "step": 6690 }, { "epoch": 0.772381172127365, "grad_norm": 0.33270421624183655, "learning_rate": 0.00018527471094158287, "loss": 0.9943, "step": 6695 }, { "epoch": 0.7729580064605446, "grad_norm": 0.34100016951560974, "learning_rate": 0.00018523963363959573, "loss": 0.9427, "step": 6700 }, { "epoch": 0.773534840793724, "grad_norm": 0.32470348477363586, "learning_rate": 0.0001852045179369877, "loss": 0.9585, "step": 6705 }, { "epoch": 0.7741116751269036, "grad_norm": 0.3840593993663788, "learning_rate": 0.00018516936384957834, "loss": 0.9694, "step": 6710 }, { "epoch": 0.774688509460083, "grad_norm": 0.32040587067604065, "learning_rate": 0.0001851341713932048, "loss": 1.0201, "step": 6715 }, { "epoch": 0.7752653437932626, "grad_norm": 0.3363640606403351, "learning_rate": 0.0001850989405837212, "loss": 0.9696, "step": 6720 }, { "epoch": 0.7758421781264421, "grad_norm": 0.3235708177089691, "learning_rate": 0.00018506367143699922, "loss": 0.917, "step": 6725 }, { "epoch": 0.7764190124596216, "grad_norm": 0.32720041275024414, "learning_rate": 0.0001850283639689276, "loss": 1.0065, "step": 6730 }, { "epoch": 0.7769958467928011, "grad_norm": 0.34144657850265503, "learning_rate": 0.0001849930181954124, "loss": 1.0124, "step": 6735 }, { "epoch": 0.7775726811259807, "grad_norm": 0.3346642851829529, "learning_rate": 0.00018495763413237706, "loss": 0.9115, "step": 6740 }, { "epoch": 0.7781495154591601, "grad_norm": 0.3458048105239868, "learning_rate": 0.00018492221179576207, "loss": 0.9607, "step": 6745 }, { "epoch": 0.7787263497923397, "grad_norm": 0.33692750334739685, "learning_rate": 0.00018488675120152532, "loss": 0.986, "step": 6750 }, { "epoch": 0.7793031841255191, "grad_norm": 0.3468713164329529, "learning_rate": 0.00018485125236564185, "loss": 0.9971, "step": 6755 }, { "epoch": 0.7798800184586987, "grad_norm": 0.34560665488243103, "learning_rate": 0.00018481571530410397, "loss": 0.9439, "step": 6760 }, { "epoch": 0.7804568527918782, "grad_norm": 0.3573435842990875, "learning_rate": 0.00018478014003292116, "loss": 1.0003, "step": 6765 }, { "epoch": 0.7810336871250577, "grad_norm": 0.3445967137813568, "learning_rate": 0.0001847445265681202, "loss": 0.949, "step": 6770 }, { "epoch": 0.7816105214582372, "grad_norm": 0.34904471039772034, "learning_rate": 0.00018470887492574503, "loss": 0.9932, "step": 6775 }, { "epoch": 0.7821873557914167, "grad_norm": 0.31675389409065247, "learning_rate": 0.0001846731851218567, "loss": 1.0151, "step": 6780 }, { "epoch": 0.7827641901245962, "grad_norm": 0.35233479738235474, "learning_rate": 0.00018463745717253364, "loss": 0.9555, "step": 6785 }, { "epoch": 0.7833410244577758, "grad_norm": 0.3339458703994751, "learning_rate": 0.0001846016910938713, "loss": 0.9464, "step": 6790 }, { "epoch": 0.7839178587909552, "grad_norm": 0.35707125067710876, "learning_rate": 0.00018456588690198236, "loss": 0.9831, "step": 6795 }, { "epoch": 0.7844946931241348, "grad_norm": 0.3830420672893524, "learning_rate": 0.00018453004461299672, "loss": 0.9704, "step": 6800 }, { "epoch": 0.7850715274573142, "grad_norm": 0.3077176809310913, "learning_rate": 0.00018449416424306137, "loss": 0.9407, "step": 6805 }, { "epoch": 0.7856483617904938, "grad_norm": 0.3609665632247925, "learning_rate": 0.0001844582458083405, "loss": 0.9694, "step": 6810 }, { "epoch": 0.7862251961236733, "grad_norm": 0.34184277057647705, "learning_rate": 0.00018442228932501545, "loss": 0.9996, "step": 6815 }, { "epoch": 0.7868020304568528, "grad_norm": 0.3383423686027527, "learning_rate": 0.00018438629480928466, "loss": 1.0248, "step": 6820 }, { "epoch": 0.7873788647900323, "grad_norm": 0.32342755794525146, "learning_rate": 0.0001843502622773637, "loss": 0.9698, "step": 6825 }, { "epoch": 0.7879556991232118, "grad_norm": 0.3433450758457184, "learning_rate": 0.00018431419174548539, "loss": 0.9495, "step": 6830 }, { "epoch": 0.7885325334563913, "grad_norm": 0.32221582531929016, "learning_rate": 0.0001842780832298995, "loss": 0.9998, "step": 6835 }, { "epoch": 0.7891093677895709, "grad_norm": 0.3204023838043213, "learning_rate": 0.00018424193674687297, "loss": 0.9729, "step": 6840 }, { "epoch": 0.7896862021227503, "grad_norm": 0.3542555868625641, "learning_rate": 0.00018420575231268993, "loss": 0.9883, "step": 6845 }, { "epoch": 0.7902630364559299, "grad_norm": 0.3774206340312958, "learning_rate": 0.0001841695299436515, "loss": 0.9701, "step": 6850 }, { "epoch": 0.7908398707891093, "grad_norm": 0.337774395942688, "learning_rate": 0.00018413326965607593, "loss": 1.0354, "step": 6855 }, { "epoch": 0.7914167051222889, "grad_norm": 0.35492873191833496, "learning_rate": 0.00018409697146629854, "loss": 0.9896, "step": 6860 }, { "epoch": 0.7919935394554684, "grad_norm": 0.3379327952861786, "learning_rate": 0.00018406063539067174, "loss": 0.9961, "step": 6865 }, { "epoch": 0.7925703737886479, "grad_norm": 0.3227647840976715, "learning_rate": 0.00018402426144556504, "loss": 1.0273, "step": 6870 }, { "epoch": 0.7931472081218274, "grad_norm": 0.3234160244464874, "learning_rate": 0.00018398784964736493, "loss": 0.896, "step": 6875 }, { "epoch": 0.7937240424550069, "grad_norm": 0.32284024357795715, "learning_rate": 0.00018395140001247498, "loss": 0.9938, "step": 6880 }, { "epoch": 0.7943008767881864, "grad_norm": 0.34763550758361816, "learning_rate": 0.0001839149125573159, "loss": 0.9291, "step": 6885 }, { "epoch": 0.794877711121366, "grad_norm": 0.363441526889801, "learning_rate": 0.00018387838729832528, "loss": 0.9537, "step": 6890 }, { "epoch": 0.7954545454545454, "grad_norm": 0.3477483093738556, "learning_rate": 0.00018384182425195786, "loss": 1.0244, "step": 6895 }, { "epoch": 0.796031379787725, "grad_norm": 0.398753821849823, "learning_rate": 0.00018380522343468532, "loss": 0.9409, "step": 6900 }, { "epoch": 0.7966082141209044, "grad_norm": 0.3567346930503845, "learning_rate": 0.00018376858486299647, "loss": 0.9713, "step": 6905 }, { "epoch": 0.797185048454084, "grad_norm": 0.3397413492202759, "learning_rate": 0.000183731908553397, "loss": 0.9822, "step": 6910 }, { "epoch": 0.7977618827872635, "grad_norm": 0.3155917823314667, "learning_rate": 0.00018369519452240973, "loss": 0.9795, "step": 6915 }, { "epoch": 0.798338717120443, "grad_norm": 0.37791383266448975, "learning_rate": 0.00018365844278657432, "loss": 0.9761, "step": 6920 }, { "epoch": 0.7989155514536225, "grad_norm": 0.35001733899116516, "learning_rate": 0.00018362165336244753, "loss": 1.0093, "step": 6925 }, { "epoch": 0.799492385786802, "grad_norm": 0.3731771409511566, "learning_rate": 0.00018358482626660303, "loss": 0.9313, "step": 6930 }, { "epoch": 0.8000692201199815, "grad_norm": 0.36500659584999084, "learning_rate": 0.00018354796151563157, "loss": 0.951, "step": 6935 }, { "epoch": 0.8006460544531611, "grad_norm": 0.3376425802707672, "learning_rate": 0.00018351105912614078, "loss": 0.9301, "step": 6940 }, { "epoch": 0.8012228887863405, "grad_norm": 0.4149805009365082, "learning_rate": 0.0001834741191147552, "loss": 0.9878, "step": 6945 }, { "epoch": 0.8017997231195201, "grad_norm": 0.32159149646759033, "learning_rate": 0.00018343714149811642, "loss": 0.9877, "step": 6950 }, { "epoch": 0.8023765574526995, "grad_norm": 0.3310393691062927, "learning_rate": 0.00018340012629288293, "loss": 0.9769, "step": 6955 }, { "epoch": 0.8029533917858791, "grad_norm": 0.36446118354797363, "learning_rate": 0.00018336307351573018, "loss": 1.0259, "step": 6960 }, { "epoch": 0.8035302261190586, "grad_norm": 0.325137734413147, "learning_rate": 0.0001833259831833504, "loss": 1.0434, "step": 6965 }, { "epoch": 0.8041070604522381, "grad_norm": 0.37154528498649597, "learning_rate": 0.00018328885531245298, "loss": 0.9711, "step": 6970 }, { "epoch": 0.8046838947854176, "grad_norm": 0.30865800380706787, "learning_rate": 0.00018325168991976408, "loss": 0.9248, "step": 6975 }, { "epoch": 0.8052607291185971, "grad_norm": 0.33374345302581787, "learning_rate": 0.00018321448702202675, "loss": 0.9578, "step": 6980 }, { "epoch": 0.8058375634517766, "grad_norm": 0.3469264805316925, "learning_rate": 0.00018317724663600098, "loss": 1.0174, "step": 6985 }, { "epoch": 0.8064143977849562, "grad_norm": 0.32863757014274597, "learning_rate": 0.00018313996877846361, "loss": 0.9553, "step": 6990 }, { "epoch": 0.8069912321181357, "grad_norm": 0.34554439783096313, "learning_rate": 0.00018310265346620843, "loss": 0.9881, "step": 6995 }, { "epoch": 0.8075680664513152, "grad_norm": 0.3216921389102936, "learning_rate": 0.00018306530071604603, "loss": 0.9685, "step": 7000 }, { "epoch": 0.8081449007844947, "grad_norm": 0.315304160118103, "learning_rate": 0.00018302791054480394, "loss": 0.9431, "step": 7005 }, { "epoch": 0.8087217351176742, "grad_norm": 0.34708473086357117, "learning_rate": 0.00018299048296932643, "loss": 0.9449, "step": 7010 }, { "epoch": 0.8092985694508538, "grad_norm": 0.32933709025382996, "learning_rate": 0.00018295301800647475, "loss": 0.9696, "step": 7015 }, { "epoch": 0.8098754037840332, "grad_norm": 0.3329271078109741, "learning_rate": 0.00018291551567312694, "loss": 0.9897, "step": 7020 }, { "epoch": 0.8104522381172128, "grad_norm": 0.35639676451683044, "learning_rate": 0.00018287797598617785, "loss": 0.9221, "step": 7025 }, { "epoch": 0.8110290724503922, "grad_norm": 0.3705461919307709, "learning_rate": 0.00018284039896253923, "loss": 0.9838, "step": 7030 }, { "epoch": 0.8116059067835718, "grad_norm": 0.3558962345123291, "learning_rate": 0.00018280278461913952, "loss": 0.935, "step": 7035 }, { "epoch": 0.8121827411167513, "grad_norm": 0.3236217200756073, "learning_rate": 0.00018276513297292414, "loss": 0.9597, "step": 7040 }, { "epoch": 0.8127595754499308, "grad_norm": 0.32449623942375183, "learning_rate": 0.00018272744404085512, "loss": 1.018, "step": 7045 }, { "epoch": 0.8133364097831103, "grad_norm": 0.3508262038230896, "learning_rate": 0.00018268971783991152, "loss": 1.024, "step": 7050 }, { "epoch": 0.8139132441162898, "grad_norm": 0.31073689460754395, "learning_rate": 0.00018265195438708904, "loss": 0.9202, "step": 7055 }, { "epoch": 0.8144900784494693, "grad_norm": 0.3384758532047272, "learning_rate": 0.00018261415369940013, "loss": 1.0022, "step": 7060 }, { "epoch": 0.8150669127826489, "grad_norm": 0.3505370318889618, "learning_rate": 0.00018257631579387412, "loss": 0.9807, "step": 7065 }, { "epoch": 0.8156437471158283, "grad_norm": 0.344837486743927, "learning_rate": 0.00018253844068755702, "loss": 0.9415, "step": 7070 }, { "epoch": 0.8162205814490079, "grad_norm": 0.32771411538124084, "learning_rate": 0.00018250052839751172, "loss": 0.9546, "step": 7075 }, { "epoch": 0.8167974157821873, "grad_norm": 0.3505322337150574, "learning_rate": 0.0001824625789408177, "loss": 0.9879, "step": 7080 }, { "epoch": 0.8173742501153669, "grad_norm": 0.3172287940979004, "learning_rate": 0.00018242459233457127, "loss": 0.9891, "step": 7085 }, { "epoch": 0.8179510844485464, "grad_norm": 0.3298950493335724, "learning_rate": 0.00018238656859588553, "loss": 0.9842, "step": 7090 }, { "epoch": 0.8185279187817259, "grad_norm": 0.3447635769844055, "learning_rate": 0.00018234850774189018, "loss": 0.9829, "step": 7095 }, { "epoch": 0.8191047531149054, "grad_norm": 0.35230880975723267, "learning_rate": 0.00018231040978973178, "loss": 0.9781, "step": 7100 }, { "epoch": 0.819681587448085, "grad_norm": 0.3099910318851471, "learning_rate": 0.00018227227475657346, "loss": 1.0082, "step": 7105 }, { "epoch": 0.8202584217812644, "grad_norm": 0.3409811854362488, "learning_rate": 0.00018223410265959516, "loss": 1.0083, "step": 7110 }, { "epoch": 0.820835256114444, "grad_norm": 0.3492036759853363, "learning_rate": 0.0001821958935159935, "loss": 1.0077, "step": 7115 }, { "epoch": 0.8214120904476234, "grad_norm": 0.3637070655822754, "learning_rate": 0.00018215764734298172, "loss": 1.0113, "step": 7120 }, { "epoch": 0.821988924780803, "grad_norm": 0.37920454144477844, "learning_rate": 0.00018211936415778984, "loss": 0.9417, "step": 7125 }, { "epoch": 0.8225657591139824, "grad_norm": 0.32240965962409973, "learning_rate": 0.00018208104397766453, "loss": 0.9896, "step": 7130 }, { "epoch": 0.823142593447162, "grad_norm": 0.3595122694969177, "learning_rate": 0.00018204268681986903, "loss": 0.9406, "step": 7135 }, { "epoch": 0.8237194277803415, "grad_norm": 0.3721562623977661, "learning_rate": 0.0001820042927016834, "loss": 0.981, "step": 7140 }, { "epoch": 0.824296262113521, "grad_norm": 0.44455429911613464, "learning_rate": 0.0001819658616404042, "loss": 0.9788, "step": 7145 }, { "epoch": 0.8248730964467005, "grad_norm": 0.33321380615234375, "learning_rate": 0.00018192739365334473, "loss": 1.009, "step": 7150 }, { "epoch": 0.82544993077988, "grad_norm": 0.3545369803905487, "learning_rate": 0.0001818888887578349, "loss": 0.9786, "step": 7155 }, { "epoch": 0.8260267651130595, "grad_norm": 0.3792615532875061, "learning_rate": 0.0001818503469712212, "loss": 1.0339, "step": 7160 }, { "epoch": 0.8266035994462391, "grad_norm": 0.3251761794090271, "learning_rate": 0.00018181176831086684, "loss": 0.9957, "step": 7165 }, { "epoch": 0.8271804337794185, "grad_norm": 0.3693098723888397, "learning_rate": 0.00018177315279415153, "loss": 1.0073, "step": 7170 }, { "epoch": 0.8277572681125981, "grad_norm": 0.3383295238018036, "learning_rate": 0.00018173450043847163, "loss": 0.979, "step": 7175 }, { "epoch": 0.8283341024457775, "grad_norm": 0.32615846395492554, "learning_rate": 0.00018169581126124015, "loss": 0.9472, "step": 7180 }, { "epoch": 0.8289109367789571, "grad_norm": 0.3272216022014618, "learning_rate": 0.00018165708527988664, "loss": 1.0187, "step": 7185 }, { "epoch": 0.8294877711121366, "grad_norm": 0.3196505606174469, "learning_rate": 0.00018161832251185715, "loss": 1.0547, "step": 7190 }, { "epoch": 0.8300646054453161, "grad_norm": 0.33881649374961853, "learning_rate": 0.00018157952297461448, "loss": 0.9226, "step": 7195 }, { "epoch": 0.8306414397784956, "grad_norm": 0.3978832960128784, "learning_rate": 0.00018154068668563782, "loss": 0.99, "step": 7200 }, { "epoch": 0.8312182741116751, "grad_norm": 0.3536497950553894, "learning_rate": 0.00018150181366242304, "loss": 0.9566, "step": 7205 }, { "epoch": 0.8317951084448546, "grad_norm": 0.34969255328178406, "learning_rate": 0.00018146290392248254, "loss": 0.9805, "step": 7210 }, { "epoch": 0.8323719427780342, "grad_norm": 0.3390229046344757, "learning_rate": 0.00018142395748334513, "loss": 0.972, "step": 7215 }, { "epoch": 0.8329487771112136, "grad_norm": 0.34188541769981384, "learning_rate": 0.00018138497436255636, "loss": 0.9357, "step": 7220 }, { "epoch": 0.8335256114443932, "grad_norm": 0.3517703711986542, "learning_rate": 0.00018134595457767815, "loss": 0.954, "step": 7225 }, { "epoch": 0.8341024457775726, "grad_norm": 0.361482173204422, "learning_rate": 0.000181306898146289, "loss": 0.9047, "step": 7230 }, { "epoch": 0.8346792801107522, "grad_norm": 0.3323913812637329, "learning_rate": 0.00018126780508598392, "loss": 0.9574, "step": 7235 }, { "epoch": 0.8352561144439317, "grad_norm": 0.34358808398246765, "learning_rate": 0.0001812286754143744, "loss": 0.9792, "step": 7240 }, { "epoch": 0.8358329487771112, "grad_norm": 0.3494786024093628, "learning_rate": 0.00018118950914908843, "loss": 0.9597, "step": 7245 }, { "epoch": 0.8364097831102907, "grad_norm": 0.35309937596321106, "learning_rate": 0.0001811503063077705, "loss": 0.9857, "step": 7250 }, { "epoch": 0.8369866174434702, "grad_norm": 0.3490305542945862, "learning_rate": 0.00018111106690808155, "loss": 0.9398, "step": 7255 }, { "epoch": 0.8375634517766497, "grad_norm": 0.36818861961364746, "learning_rate": 0.00018107179096769901, "loss": 0.9171, "step": 7260 }, { "epoch": 0.8381402861098293, "grad_norm": 0.33169808983802795, "learning_rate": 0.0001810324785043168, "loss": 0.9641, "step": 7265 }, { "epoch": 0.8387171204430087, "grad_norm": 0.3376959264278412, "learning_rate": 0.0001809931295356452, "loss": 0.9399, "step": 7270 }, { "epoch": 0.8392939547761883, "grad_norm": 0.348026841878891, "learning_rate": 0.00018095374407941104, "loss": 0.9203, "step": 7275 }, { "epoch": 0.8398707891093677, "grad_norm": 0.3790396749973297, "learning_rate": 0.00018091432215335752, "loss": 0.9878, "step": 7280 }, { "epoch": 0.8404476234425473, "grad_norm": 0.35788261890411377, "learning_rate": 0.00018087486377524434, "loss": 0.9829, "step": 7285 }, { "epoch": 0.8410244577757268, "grad_norm": 0.35284462571144104, "learning_rate": 0.0001808353689628475, "loss": 0.9356, "step": 7290 }, { "epoch": 0.8416012921089063, "grad_norm": 0.3505180776119232, "learning_rate": 0.00018079583773395957, "loss": 0.9599, "step": 7295 }, { "epoch": 0.8421781264420858, "grad_norm": 0.35011520981788635, "learning_rate": 0.00018075627010638942, "loss": 1.0106, "step": 7300 }, { "epoch": 0.8427549607752653, "grad_norm": 0.3106728792190552, "learning_rate": 0.0001807166660979623, "loss": 1.0036, "step": 7305 }, { "epoch": 0.8433317951084448, "grad_norm": 0.3722745180130005, "learning_rate": 0.00018067702572651997, "loss": 0.9706, "step": 7310 }, { "epoch": 0.8439086294416244, "grad_norm": 0.36281153559684753, "learning_rate": 0.00018063734900992045, "loss": 0.956, "step": 7315 }, { "epoch": 0.8444854637748038, "grad_norm": 0.32843315601348877, "learning_rate": 0.00018059763596603814, "loss": 0.9172, "step": 7320 }, { "epoch": 0.8450622981079834, "grad_norm": 0.32707735896110535, "learning_rate": 0.00018055788661276392, "loss": 0.975, "step": 7325 }, { "epoch": 0.8456391324411628, "grad_norm": 0.36803141236305237, "learning_rate": 0.0001805181009680049, "loss": 0.9455, "step": 7330 }, { "epoch": 0.8462159667743424, "grad_norm": 0.3357301950454712, "learning_rate": 0.0001804782790496846, "loss": 0.9609, "step": 7335 }, { "epoch": 0.846792801107522, "grad_norm": 0.3335096538066864, "learning_rate": 0.00018043842087574286, "loss": 0.9276, "step": 7340 }, { "epoch": 0.8473696354407014, "grad_norm": 0.3681318759918213, "learning_rate": 0.00018039852646413592, "loss": 1.0033, "step": 7345 }, { "epoch": 0.847946469773881, "grad_norm": 0.3279605507850647, "learning_rate": 0.00018035859583283626, "loss": 0.9662, "step": 7350 }, { "epoch": 0.8485233041070604, "grad_norm": 0.38645607233047485, "learning_rate": 0.00018031862899983264, "loss": 0.9416, "step": 7355 }, { "epoch": 0.84910013844024, "grad_norm": 0.3565409183502197, "learning_rate": 0.0001802786259831303, "loss": 0.9579, "step": 7360 }, { "epoch": 0.8496769727734195, "grad_norm": 0.3465147614479065, "learning_rate": 0.00018023858680075061, "loss": 0.9472, "step": 7365 }, { "epoch": 0.850253807106599, "grad_norm": 0.32295745611190796, "learning_rate": 0.00018019851147073134, "loss": 0.9665, "step": 7370 }, { "epoch": 0.8508306414397785, "grad_norm": 0.3312920331954956, "learning_rate": 0.0001801584000111265, "loss": 0.9478, "step": 7375 }, { "epoch": 0.8514074757729581, "grad_norm": 0.35227471590042114, "learning_rate": 0.00018011825244000632, "loss": 0.9475, "step": 7380 }, { "epoch": 0.8519843101061375, "grad_norm": 0.3861836791038513, "learning_rate": 0.00018007806877545744, "loss": 0.9884, "step": 7385 }, { "epoch": 0.8525611444393171, "grad_norm": 0.3391641080379486, "learning_rate": 0.00018003784903558264, "loss": 0.9773, "step": 7390 }, { "epoch": 0.8531379787724965, "grad_norm": 0.37857285141944885, "learning_rate": 0.00017999759323850098, "loss": 1.0079, "step": 7395 }, { "epoch": 0.8537148131056761, "grad_norm": 0.3548443019390106, "learning_rate": 0.0001799573014023478, "loss": 0.9685, "step": 7400 }, { "epoch": 0.8542916474388556, "grad_norm": 0.3362700045108795, "learning_rate": 0.00017991697354527463, "loss": 0.9403, "step": 7405 }, { "epoch": 0.8548684817720351, "grad_norm": 0.41143998503685, "learning_rate": 0.0001798766096854493, "loss": 0.9746, "step": 7410 }, { "epoch": 0.8554453161052146, "grad_norm": 0.33060017228126526, "learning_rate": 0.00017983620984105572, "loss": 1.013, "step": 7415 }, { "epoch": 0.8560221504383941, "grad_norm": 0.35252559185028076, "learning_rate": 0.00017979577403029416, "loss": 0.9738, "step": 7420 }, { "epoch": 0.8565989847715736, "grad_norm": 0.3686401844024658, "learning_rate": 0.00017975530227138105, "loss": 0.9608, "step": 7425 }, { "epoch": 0.8571758191047532, "grad_norm": 0.34325218200683594, "learning_rate": 0.00017971479458254894, "loss": 0.9885, "step": 7430 }, { "epoch": 0.8577526534379326, "grad_norm": 0.33591338992118835, "learning_rate": 0.00017967425098204664, "loss": 0.9853, "step": 7435 }, { "epoch": 0.8583294877711122, "grad_norm": 0.3713604509830475, "learning_rate": 0.00017963367148813913, "loss": 0.991, "step": 7440 }, { "epoch": 0.8589063221042916, "grad_norm": 0.34418532252311707, "learning_rate": 0.00017959305611910752, "loss": 0.9596, "step": 7445 }, { "epoch": 0.8594831564374712, "grad_norm": 0.32015010714530945, "learning_rate": 0.00017955240489324917, "loss": 0.9974, "step": 7450 }, { "epoch": 0.8600599907706507, "grad_norm": 0.3452713191509247, "learning_rate": 0.0001795117178288775, "loss": 0.9906, "step": 7455 }, { "epoch": 0.8606368251038302, "grad_norm": 0.33043158054351807, "learning_rate": 0.00017947099494432212, "loss": 0.9454, "step": 7460 }, { "epoch": 0.8612136594370097, "grad_norm": 0.34975555539131165, "learning_rate": 0.00017943023625792878, "loss": 0.9766, "step": 7465 }, { "epoch": 0.8617904937701892, "grad_norm": 0.37061572074890137, "learning_rate": 0.00017938944178805933, "loss": 1.0097, "step": 7470 }, { "epoch": 0.8623673281033687, "grad_norm": 0.349991112947464, "learning_rate": 0.00017934861155309174, "loss": 0.9862, "step": 7475 }, { "epoch": 0.8629441624365483, "grad_norm": 0.3499099016189575, "learning_rate": 0.0001793077455714202, "loss": 0.9513, "step": 7480 }, { "epoch": 0.8635209967697277, "grad_norm": 0.34608426690101624, "learning_rate": 0.00017926684386145478, "loss": 0.9782, "step": 7485 }, { "epoch": 0.8640978311029073, "grad_norm": 0.4855821132659912, "learning_rate": 0.00017922590644162188, "loss": 0.9442, "step": 7490 }, { "epoch": 0.8646746654360867, "grad_norm": 0.32568657398223877, "learning_rate": 0.00017918493333036383, "loss": 0.9528, "step": 7495 }, { "epoch": 0.8652514997692663, "grad_norm": 0.37212124466896057, "learning_rate": 0.00017914392454613913, "loss": 1.0034, "step": 7500 }, { "epoch": 0.8658283341024458, "grad_norm": 0.3470038175582886, "learning_rate": 0.00017910288010742235, "loss": 0.9732, "step": 7505 }, { "epoch": 0.8664051684356253, "grad_norm": 0.3674761950969696, "learning_rate": 0.00017906180003270396, "loss": 0.9784, "step": 7510 }, { "epoch": 0.8669820027688048, "grad_norm": 0.3694908916950226, "learning_rate": 0.00017902068434049077, "loss": 0.9666, "step": 7515 }, { "epoch": 0.8675588371019843, "grad_norm": 0.35502108931541443, "learning_rate": 0.00017897953304930542, "loss": 1.0092, "step": 7520 }, { "epoch": 0.8681356714351638, "grad_norm": 0.3409530222415924, "learning_rate": 0.0001789383461776866, "loss": 0.9093, "step": 7525 }, { "epoch": 0.8687125057683434, "grad_norm": 0.38244783878326416, "learning_rate": 0.00017889712374418912, "loss": 1.0119, "step": 7530 }, { "epoch": 0.8692893401015228, "grad_norm": 0.3554026782512665, "learning_rate": 0.0001788558657673838, "loss": 0.9517, "step": 7535 }, { "epoch": 0.8698661744347024, "grad_norm": 0.3271549940109253, "learning_rate": 0.00017881457226585735, "loss": 0.952, "step": 7540 }, { "epoch": 0.8704430087678818, "grad_norm": 0.3484821021556854, "learning_rate": 0.00017877324325821264, "loss": 1.0257, "step": 7545 }, { "epoch": 0.8710198431010614, "grad_norm": 0.3790343105792999, "learning_rate": 0.00017873187876306848, "loss": 0.9899, "step": 7550 }, { "epoch": 0.8715966774342409, "grad_norm": 0.3237937390804291, "learning_rate": 0.00017869047879905958, "loss": 0.977, "step": 7555 }, { "epoch": 0.8721735117674204, "grad_norm": 0.2987980246543884, "learning_rate": 0.00017864904338483676, "loss": 0.93, "step": 7560 }, { "epoch": 0.8727503461005999, "grad_norm": 0.3538360893726349, "learning_rate": 0.00017860757253906675, "loss": 1.0224, "step": 7565 }, { "epoch": 0.8733271804337794, "grad_norm": 0.349717915058136, "learning_rate": 0.00017856606628043227, "loss": 0.9598, "step": 7570 }, { "epoch": 0.8739040147669589, "grad_norm": 0.34832173585891724, "learning_rate": 0.00017852452462763192, "loss": 0.9837, "step": 7575 }, { "epoch": 0.8744808491001385, "grad_norm": 0.396838515996933, "learning_rate": 0.00017848294759938033, "loss": 1.0234, "step": 7580 }, { "epoch": 0.8750576834333179, "grad_norm": 0.3279632329940796, "learning_rate": 0.000178441335214408, "loss": 0.9785, "step": 7585 }, { "epoch": 0.8756345177664975, "grad_norm": 0.3444277048110962, "learning_rate": 0.00017839968749146142, "loss": 0.969, "step": 7590 }, { "epoch": 0.8762113520996769, "grad_norm": 0.3291734755039215, "learning_rate": 0.00017835800444930298, "loss": 0.9432, "step": 7595 }, { "epoch": 0.8767881864328565, "grad_norm": 0.3443477749824524, "learning_rate": 0.00017831628610671092, "loss": 0.9852, "step": 7600 }, { "epoch": 0.877365020766036, "grad_norm": 0.3432978093624115, "learning_rate": 0.0001782745324824795, "loss": 0.9759, "step": 7605 }, { "epoch": 0.8779418550992155, "grad_norm": 0.36630696058273315, "learning_rate": 0.00017823274359541876, "loss": 1.0005, "step": 7610 }, { "epoch": 0.878518689432395, "grad_norm": 0.35049259662628174, "learning_rate": 0.0001781909194643547, "loss": 0.9894, "step": 7615 }, { "epoch": 0.8790955237655745, "grad_norm": 0.3564181923866272, "learning_rate": 0.00017814906010812912, "loss": 1.0125, "step": 7620 }, { "epoch": 0.879672358098754, "grad_norm": 0.3521508574485779, "learning_rate": 0.00017810716554559982, "loss": 0.9931, "step": 7625 }, { "epoch": 0.8802491924319336, "grad_norm": 0.37031182646751404, "learning_rate": 0.00017806523579564037, "loss": 0.985, "step": 7630 }, { "epoch": 0.880826026765113, "grad_norm": 0.3313467800617218, "learning_rate": 0.00017802327087714016, "loss": 0.9245, "step": 7635 }, { "epoch": 0.8814028610982926, "grad_norm": 0.32318732142448425, "learning_rate": 0.0001779812708090045, "loss": 0.9655, "step": 7640 }, { "epoch": 0.881979695431472, "grad_norm": 0.3676154315471649, "learning_rate": 0.0001779392356101545, "loss": 0.9997, "step": 7645 }, { "epoch": 0.8825565297646516, "grad_norm": 0.3498569130897522, "learning_rate": 0.00017789716529952704, "loss": 1.0039, "step": 7650 }, { "epoch": 0.883133364097831, "grad_norm": 0.3374147117137909, "learning_rate": 0.00017785505989607495, "loss": 0.982, "step": 7655 }, { "epoch": 0.8837101984310106, "grad_norm": 0.31364133954048157, "learning_rate": 0.0001778129194187668, "loss": 1.0058, "step": 7660 }, { "epoch": 0.8842870327641901, "grad_norm": 0.33285027742385864, "learning_rate": 0.00017777074388658693, "loss": 0.9447, "step": 7665 }, { "epoch": 0.8848638670973696, "grad_norm": 0.31633734703063965, "learning_rate": 0.00017772853331853548, "loss": 0.9616, "step": 7670 }, { "epoch": 0.8854407014305491, "grad_norm": 0.3172147572040558, "learning_rate": 0.0001776862877336284, "loss": 0.9363, "step": 7675 }, { "epoch": 0.8860175357637287, "grad_norm": 0.3388344347476959, "learning_rate": 0.00017764400715089744, "loss": 0.9531, "step": 7680 }, { "epoch": 0.8865943700969081, "grad_norm": 0.3313317596912384, "learning_rate": 0.00017760169158939005, "loss": 1.0407, "step": 7685 }, { "epoch": 0.8871712044300877, "grad_norm": 0.35782188177108765, "learning_rate": 0.00017755934106816951, "loss": 0.9826, "step": 7690 }, { "epoch": 0.8877480387632672, "grad_norm": 0.36099013686180115, "learning_rate": 0.0001775169556063148, "loss": 0.981, "step": 7695 }, { "epoch": 0.8883248730964467, "grad_norm": 0.335542768239975, "learning_rate": 0.00017747453522292065, "loss": 0.9161, "step": 7700 }, { "epoch": 0.8889017074296263, "grad_norm": 0.31969940662384033, "learning_rate": 0.00017743207993709746, "loss": 0.9504, "step": 7705 }, { "epoch": 0.8894785417628057, "grad_norm": 0.364268034696579, "learning_rate": 0.00017738958976797157, "loss": 1.0168, "step": 7710 }, { "epoch": 0.8900553760959853, "grad_norm": 0.3400898873806, "learning_rate": 0.0001773470647346847, "loss": 0.9774, "step": 7715 }, { "epoch": 0.8906322104291647, "grad_norm": 0.33001792430877686, "learning_rate": 0.0001773045048563946, "loss": 1.0044, "step": 7720 }, { "epoch": 0.8912090447623443, "grad_norm": 0.33356598019599915, "learning_rate": 0.00017726191015227452, "loss": 0.927, "step": 7725 }, { "epoch": 0.8917858790955238, "grad_norm": 0.33303821086883545, "learning_rate": 0.00017721928064151347, "loss": 0.9675, "step": 7730 }, { "epoch": 0.8923627134287033, "grad_norm": 0.3559563159942627, "learning_rate": 0.00017717661634331612, "loss": 1.0159, "step": 7735 }, { "epoch": 0.8929395477618828, "grad_norm": 0.4187619686126709, "learning_rate": 0.00017713391727690284, "loss": 1.0143, "step": 7740 }, { "epoch": 0.8935163820950623, "grad_norm": 0.3573208749294281, "learning_rate": 0.00017709118346150964, "loss": 0.9379, "step": 7745 }, { "epoch": 0.8940932164282418, "grad_norm": 0.3350745141506195, "learning_rate": 0.00017704841491638816, "loss": 0.9326, "step": 7750 }, { "epoch": 0.8946700507614214, "grad_norm": 0.35027506947517395, "learning_rate": 0.0001770056116608057, "loss": 0.989, "step": 7755 }, { "epoch": 0.8952468850946008, "grad_norm": 0.4128049910068512, "learning_rate": 0.00017696277371404527, "loss": 0.9921, "step": 7760 }, { "epoch": 0.8958237194277804, "grad_norm": 0.32600775361061096, "learning_rate": 0.00017691990109540542, "loss": 0.9384, "step": 7765 }, { "epoch": 0.8964005537609598, "grad_norm": 0.3520258367061615, "learning_rate": 0.0001768769938242003, "loss": 0.9502, "step": 7770 }, { "epoch": 0.8969773880941394, "grad_norm": 0.34424781799316406, "learning_rate": 0.00017683405191975981, "loss": 0.9873, "step": 7775 }, { "epoch": 0.8975542224273189, "grad_norm": 0.4074617028236389, "learning_rate": 0.0001767910754014293, "loss": 0.975, "step": 7780 }, { "epoch": 0.8981310567604984, "grad_norm": 0.351262629032135, "learning_rate": 0.0001767480642885698, "loss": 0.9896, "step": 7785 }, { "epoch": 0.8987078910936779, "grad_norm": 0.41109660267829895, "learning_rate": 0.00017670501860055787, "loss": 0.9817, "step": 7790 }, { "epoch": 0.8992847254268574, "grad_norm": 0.3734431564807892, "learning_rate": 0.00017666193835678571, "loss": 0.9658, "step": 7795 }, { "epoch": 0.8998615597600369, "grad_norm": 0.3481912612915039, "learning_rate": 0.00017661882357666105, "loss": 0.9937, "step": 7800 }, { "epoch": 0.9004383940932165, "grad_norm": 0.3160158395767212, "learning_rate": 0.00017657567427960716, "loss": 1.0098, "step": 7805 }, { "epoch": 0.9010152284263959, "grad_norm": 0.35561150312423706, "learning_rate": 0.00017653249048506288, "loss": 0.9887, "step": 7810 }, { "epoch": 0.9015920627595755, "grad_norm": 0.35436055064201355, "learning_rate": 0.00017648927221248264, "loss": 1.0033, "step": 7815 }, { "epoch": 0.9021688970927549, "grad_norm": 0.3714834153652191, "learning_rate": 0.0001764460194813363, "loss": 1.0272, "step": 7820 }, { "epoch": 0.9027457314259345, "grad_norm": 0.35957440733909607, "learning_rate": 0.00017640273231110933, "loss": 0.9775, "step": 7825 }, { "epoch": 0.903322565759114, "grad_norm": 0.36524468660354614, "learning_rate": 0.00017635941072130268, "loss": 0.9737, "step": 7830 }, { "epoch": 0.9038994000922935, "grad_norm": 0.34316587448120117, "learning_rate": 0.00017631605473143283, "loss": 0.9265, "step": 7835 }, { "epoch": 0.904476234425473, "grad_norm": 0.3439452648162842, "learning_rate": 0.00017627266436103168, "loss": 0.9628, "step": 7840 }, { "epoch": 0.9050530687586525, "grad_norm": 0.3037916421890259, "learning_rate": 0.00017622923962964672, "loss": 0.9687, "step": 7845 }, { "epoch": 0.905629903091832, "grad_norm": 0.3490997850894928, "learning_rate": 0.0001761857805568409, "loss": 0.9824, "step": 7850 }, { "epoch": 0.9062067374250116, "grad_norm": 0.35158243775367737, "learning_rate": 0.00017614228716219255, "loss": 0.9667, "step": 7855 }, { "epoch": 0.906783571758191, "grad_norm": 0.38031184673309326, "learning_rate": 0.0001760987594652956, "loss": 1.0161, "step": 7860 }, { "epoch": 0.9073604060913706, "grad_norm": 0.34765803813934326, "learning_rate": 0.0001760551974857593, "loss": 0.9449, "step": 7865 }, { "epoch": 0.90793724042455, "grad_norm": 0.36684057116508484, "learning_rate": 0.00017601160124320844, "loss": 0.9568, "step": 7870 }, { "epoch": 0.9085140747577296, "grad_norm": 0.3498166501522064, "learning_rate": 0.00017596797075728322, "loss": 0.9692, "step": 7875 }, { "epoch": 0.9090909090909091, "grad_norm": 0.32973629236221313, "learning_rate": 0.00017592430604763924, "loss": 0.9436, "step": 7880 }, { "epoch": 0.9096677434240886, "grad_norm": 0.3311896026134491, "learning_rate": 0.0001758806071339475, "loss": 0.9726, "step": 7885 }, { "epoch": 0.9102445777572681, "grad_norm": 0.3403119742870331, "learning_rate": 0.00017583687403589454, "loss": 0.9592, "step": 7890 }, { "epoch": 0.9108214120904476, "grad_norm": 0.3610020577907562, "learning_rate": 0.00017579310677318214, "loss": 0.9674, "step": 7895 }, { "epoch": 0.9113982464236271, "grad_norm": 0.8834409117698669, "learning_rate": 0.00017574930536552757, "loss": 0.977, "step": 7900 }, { "epoch": 0.9119750807568067, "grad_norm": 0.3714768588542938, "learning_rate": 0.0001757054698326634, "loss": 1.0151, "step": 7905 }, { "epoch": 0.9125519150899861, "grad_norm": 0.3592800796031952, "learning_rate": 0.00017566160019433767, "loss": 0.9825, "step": 7910 }, { "epoch": 0.9131287494231657, "grad_norm": 0.334396630525589, "learning_rate": 0.0001756176964703137, "loss": 0.9823, "step": 7915 }, { "epoch": 0.9137055837563451, "grad_norm": 0.5015032291412354, "learning_rate": 0.00017557375868037026, "loss": 0.9493, "step": 7920 }, { "epoch": 0.9142824180895247, "grad_norm": 0.34532681107521057, "learning_rate": 0.00017552978684430134, "loss": 0.9458, "step": 7925 }, { "epoch": 0.9148592524227042, "grad_norm": 0.3358142077922821, "learning_rate": 0.00017548578098191636, "loss": 0.9446, "step": 7930 }, { "epoch": 0.9154360867558837, "grad_norm": 0.334096759557724, "learning_rate": 0.0001754417411130401, "loss": 0.9478, "step": 7935 }, { "epoch": 0.9160129210890632, "grad_norm": 0.38140803575515747, "learning_rate": 0.00017539766725751252, "loss": 0.9654, "step": 7940 }, { "epoch": 0.9165897554222427, "grad_norm": 0.3713916540145874, "learning_rate": 0.00017535355943518906, "loss": 1.002, "step": 7945 }, { "epoch": 0.9171665897554222, "grad_norm": 0.34454530477523804, "learning_rate": 0.0001753094176659403, "loss": 1.0134, "step": 7950 }, { "epoch": 0.9177434240886018, "grad_norm": 0.32925593852996826, "learning_rate": 0.0001752652419696523, "loss": 1.013, "step": 7955 }, { "epoch": 0.9183202584217812, "grad_norm": 0.3872643709182739, "learning_rate": 0.0001752210323662262, "loss": 1.0731, "step": 7960 }, { "epoch": 0.9188970927549608, "grad_norm": 0.3960958421230316, "learning_rate": 0.0001751767888755785, "loss": 1.0367, "step": 7965 }, { "epoch": 0.9194739270881402, "grad_norm": 0.4250330626964569, "learning_rate": 0.00017513251151764109, "loss": 0.993, "step": 7970 }, { "epoch": 0.9200507614213198, "grad_norm": 0.3722861409187317, "learning_rate": 0.0001750882003123609, "loss": 1.0253, "step": 7975 }, { "epoch": 0.9206275957544993, "grad_norm": 0.34457963705062866, "learning_rate": 0.00017504385527970028, "loss": 0.9638, "step": 7980 }, { "epoch": 0.9212044300876788, "grad_norm": 0.42666009068489075, "learning_rate": 0.00017499947643963672, "loss": 1.0463, "step": 7985 }, { "epoch": 0.9217812644208583, "grad_norm": 0.34574225544929504, "learning_rate": 0.00017495506381216296, "loss": 1.034, "step": 7990 }, { "epoch": 0.9223580987540378, "grad_norm": 0.37822163105010986, "learning_rate": 0.00017491061741728702, "loss": 0.9528, "step": 7995 }, { "epoch": 0.9229349330872173, "grad_norm": 0.36716076731681824, "learning_rate": 0.00017486613727503206, "loss": 0.986, "step": 8000 }, { "epoch": 0.9235117674203969, "grad_norm": 0.32711061835289, "learning_rate": 0.00017482162340543646, "loss": 0.9237, "step": 8005 }, { "epoch": 0.9240886017535763, "grad_norm": 0.39083343744277954, "learning_rate": 0.00017477707582855384, "loss": 0.9882, "step": 8010 }, { "epoch": 0.9246654360867559, "grad_norm": 0.3748285472393036, "learning_rate": 0.00017473249456445293, "loss": 1.025, "step": 8015 }, { "epoch": 0.9252422704199353, "grad_norm": 0.3572528064250946, "learning_rate": 0.0001746878796332177, "loss": 0.9893, "step": 8020 }, { "epoch": 0.9258191047531149, "grad_norm": 0.33131203055381775, "learning_rate": 0.00017464323105494727, "loss": 0.9768, "step": 8025 }, { "epoch": 0.9263959390862944, "grad_norm": 0.3413887917995453, "learning_rate": 0.0001745985488497559, "loss": 0.9692, "step": 8030 }, { "epoch": 0.9269727734194739, "grad_norm": 0.33206212520599365, "learning_rate": 0.000174553833037773, "loss": 0.9642, "step": 8035 }, { "epoch": 0.9275496077526535, "grad_norm": 0.3740399181842804, "learning_rate": 0.00017450908363914316, "loss": 0.9518, "step": 8040 }, { "epoch": 0.928126442085833, "grad_norm": 0.32293087244033813, "learning_rate": 0.00017446430067402603, "loss": 0.9629, "step": 8045 }, { "epoch": 0.9287032764190125, "grad_norm": 0.3599385619163513, "learning_rate": 0.00017441948416259645, "loss": 0.9665, "step": 8050 }, { "epoch": 0.929280110752192, "grad_norm": 0.36823639273643494, "learning_rate": 0.00017437463412504437, "loss": 0.942, "step": 8055 }, { "epoch": 0.9298569450853715, "grad_norm": 0.33894339203834534, "learning_rate": 0.00017432975058157473, "loss": 0.9273, "step": 8060 }, { "epoch": 0.930433779418551, "grad_norm": 0.3650658428668976, "learning_rate": 0.0001742848335524078, "loss": 1.0035, "step": 8065 }, { "epoch": 0.9310106137517306, "grad_norm": 0.31818750500679016, "learning_rate": 0.00017423988305777864, "loss": 0.972, "step": 8070 }, { "epoch": 0.93158744808491, "grad_norm": 0.35458680987358093, "learning_rate": 0.0001741948991179376, "loss": 0.9694, "step": 8075 }, { "epoch": 0.9321642824180896, "grad_norm": 0.37025484442710876, "learning_rate": 0.00017414988175315006, "loss": 0.9765, "step": 8080 }, { "epoch": 0.932741116751269, "grad_norm": 0.3461233377456665, "learning_rate": 0.0001741048309836964, "loss": 0.9824, "step": 8085 }, { "epoch": 0.9333179510844486, "grad_norm": 0.3459435701370239, "learning_rate": 0.00017405974682987204, "loss": 0.9828, "step": 8090 }, { "epoch": 0.933894785417628, "grad_norm": 0.6978673338890076, "learning_rate": 0.00017401462931198756, "loss": 0.9872, "step": 8095 }, { "epoch": 0.9344716197508076, "grad_norm": 0.3274736702442169, "learning_rate": 0.00017396947845036844, "loss": 0.9817, "step": 8100 }, { "epoch": 0.9350484540839871, "grad_norm": 0.357372522354126, "learning_rate": 0.00017392429426535527, "loss": 0.9568, "step": 8105 }, { "epoch": 0.9356252884171666, "grad_norm": 0.34130859375, "learning_rate": 0.00017387907677730353, "loss": 0.998, "step": 8110 }, { "epoch": 0.9362021227503461, "grad_norm": 0.3448001742362976, "learning_rate": 0.00017383382600658388, "loss": 1.0056, "step": 8115 }, { "epoch": 0.9367789570835257, "grad_norm": 0.33317503333091736, "learning_rate": 0.00017378854197358181, "loss": 0.9587, "step": 8120 }, { "epoch": 0.9373557914167051, "grad_norm": 0.328156054019928, "learning_rate": 0.0001737432246986979, "loss": 0.973, "step": 8125 }, { "epoch": 0.9379326257498847, "grad_norm": 0.3665638267993927, "learning_rate": 0.0001736978742023477, "loss": 0.9792, "step": 8130 }, { "epoch": 0.9385094600830641, "grad_norm": 0.3370935022830963, "learning_rate": 0.00017365249050496165, "loss": 0.9701, "step": 8135 }, { "epoch": 0.9390862944162437, "grad_norm": 0.3585437536239624, "learning_rate": 0.00017360707362698517, "loss": 0.9784, "step": 8140 }, { "epoch": 0.9396631287494231, "grad_norm": 0.3112260103225708, "learning_rate": 0.00017356162358887875, "loss": 0.9374, "step": 8145 }, { "epoch": 0.9402399630826027, "grad_norm": 0.3765519857406616, "learning_rate": 0.00017351614041111763, "loss": 0.987, "step": 8150 }, { "epoch": 0.9408167974157822, "grad_norm": 0.34944432973861694, "learning_rate": 0.00017347062411419208, "loss": 0.9721, "step": 8155 }, { "epoch": 0.9413936317489617, "grad_norm": 0.31840527057647705, "learning_rate": 0.00017342507471860733, "loss": 0.9476, "step": 8160 }, { "epoch": 0.9419704660821412, "grad_norm": 0.38349196314811707, "learning_rate": 0.00017337949224488343, "loss": 0.9886, "step": 8165 }, { "epoch": 0.9425473004153208, "grad_norm": 0.34952884912490845, "learning_rate": 0.00017333387671355542, "loss": 0.9522, "step": 8170 }, { "epoch": 0.9431241347485002, "grad_norm": 0.35600200295448303, "learning_rate": 0.0001732882281451731, "loss": 0.9515, "step": 8175 }, { "epoch": 0.9437009690816798, "grad_norm": 0.351744681596756, "learning_rate": 0.00017324254656030132, "loss": 1.053, "step": 8180 }, { "epoch": 0.9442778034148592, "grad_norm": 0.3579868674278259, "learning_rate": 0.00017319683197951967, "loss": 0.9598, "step": 8185 }, { "epoch": 0.9448546377480388, "grad_norm": 0.363534539937973, "learning_rate": 0.0001731510844234227, "loss": 0.9699, "step": 8190 }, { "epoch": 0.9454314720812182, "grad_norm": 0.3423145115375519, "learning_rate": 0.00017310530391261976, "loss": 0.9543, "step": 8195 }, { "epoch": 0.9460083064143978, "grad_norm": 0.35032740235328674, "learning_rate": 0.00017305949046773504, "loss": 0.9589, "step": 8200 }, { "epoch": 0.9465851407475773, "grad_norm": 0.32708555459976196, "learning_rate": 0.0001730136441094076, "loss": 0.9298, "step": 8205 }, { "epoch": 0.9471619750807568, "grad_norm": 0.36810487508773804, "learning_rate": 0.0001729677648582913, "loss": 1.0214, "step": 8210 }, { "epoch": 0.9477388094139363, "grad_norm": 0.36619478464126587, "learning_rate": 0.00017292185273505486, "loss": 1.0026, "step": 8215 }, { "epoch": 0.9483156437471159, "grad_norm": 0.34805989265441895, "learning_rate": 0.00017287590776038177, "loss": 0.9813, "step": 8220 }, { "epoch": 0.9488924780802953, "grad_norm": 0.3577708303928375, "learning_rate": 0.0001728299299549703, "loss": 0.9486, "step": 8225 }, { "epoch": 0.9494693124134749, "grad_norm": 0.34766557812690735, "learning_rate": 0.00017278391933953362, "loss": 0.9412, "step": 8230 }, { "epoch": 0.9500461467466543, "grad_norm": 0.4266161620616913, "learning_rate": 0.0001727378759347995, "loss": 1.0086, "step": 8235 }, { "epoch": 0.9506229810798339, "grad_norm": 0.37093380093574524, "learning_rate": 0.00017269179976151067, "loss": 0.9979, "step": 8240 }, { "epoch": 0.9511998154130133, "grad_norm": 0.3407989740371704, "learning_rate": 0.00017264569084042447, "loss": 0.9537, "step": 8245 }, { "epoch": 0.9517766497461929, "grad_norm": 0.34715965390205383, "learning_rate": 0.0001725995491923131, "loss": 0.9931, "step": 8250 }, { "epoch": 0.9523534840793724, "grad_norm": 0.35018789768218994, "learning_rate": 0.00017255337483796344, "loss": 0.9385, "step": 8255 }, { "epoch": 0.9529303184125519, "grad_norm": 0.3394466042518616, "learning_rate": 0.00017250716779817715, "loss": 0.9758, "step": 8260 }, { "epoch": 0.9535071527457314, "grad_norm": 0.3476242125034332, "learning_rate": 0.00017246092809377058, "loss": 0.9571, "step": 8265 }, { "epoch": 0.954083987078911, "grad_norm": 0.34088173508644104, "learning_rate": 0.00017241465574557475, "loss": 0.8954, "step": 8270 }, { "epoch": 0.9546608214120904, "grad_norm": 0.3062012493610382, "learning_rate": 0.00017236835077443557, "loss": 0.919, "step": 8275 }, { "epoch": 0.95523765574527, "grad_norm": 0.33732926845550537, "learning_rate": 0.0001723220132012134, "loss": 0.9417, "step": 8280 }, { "epoch": 0.9558144900784494, "grad_norm": 0.32906705141067505, "learning_rate": 0.00017227564304678346, "loss": 0.9216, "step": 8285 }, { "epoch": 0.956391324411629, "grad_norm": 0.3598426282405853, "learning_rate": 0.0001722292403320356, "loss": 0.9826, "step": 8290 }, { "epoch": 0.9569681587448085, "grad_norm": 0.3878871202468872, "learning_rate": 0.00017218280507787435, "loss": 0.971, "step": 8295 }, { "epoch": 0.957544993077988, "grad_norm": 0.3313681185245514, "learning_rate": 0.0001721363373052188, "loss": 0.9708, "step": 8300 }, { "epoch": 0.9581218274111675, "grad_norm": 0.36274102330207825, "learning_rate": 0.00017208983703500286, "loss": 0.9782, "step": 8305 }, { "epoch": 0.958698661744347, "grad_norm": 0.36623549461364746, "learning_rate": 0.00017204330428817496, "loss": 0.9838, "step": 8310 }, { "epoch": 0.9592754960775265, "grad_norm": 0.36566877365112305, "learning_rate": 0.00017199673908569819, "loss": 0.9703, "step": 8315 }, { "epoch": 0.9598523304107061, "grad_norm": 0.4171159863471985, "learning_rate": 0.00017195014144855025, "loss": 0.9786, "step": 8320 }, { "epoch": 0.9604291647438855, "grad_norm": 0.37554582953453064, "learning_rate": 0.00017190351139772348, "loss": 0.9437, "step": 8325 }, { "epoch": 0.9610059990770651, "grad_norm": 0.34158971905708313, "learning_rate": 0.00017185684895422483, "loss": 0.9366, "step": 8330 }, { "epoch": 0.9615828334102445, "grad_norm": 0.35574156045913696, "learning_rate": 0.0001718101541390758, "loss": 0.981, "step": 8335 }, { "epoch": 0.9621596677434241, "grad_norm": 0.3361159563064575, "learning_rate": 0.00017176342697331246, "loss": 0.9917, "step": 8340 }, { "epoch": 0.9627365020766036, "grad_norm": 0.3361218571662903, "learning_rate": 0.00017171666747798557, "loss": 0.956, "step": 8345 }, { "epoch": 0.9633133364097831, "grad_norm": 0.3416488468647003, "learning_rate": 0.00017166987567416033, "loss": 0.9817, "step": 8350 }, { "epoch": 0.9638901707429626, "grad_norm": 0.35277891159057617, "learning_rate": 0.00017162305158291655, "loss": 0.939, "step": 8355 }, { "epoch": 0.9644670050761421, "grad_norm": 0.3547937572002411, "learning_rate": 0.00017157619522534853, "loss": 0.9504, "step": 8360 }, { "epoch": 0.9650438394093216, "grad_norm": 0.4167003333568573, "learning_rate": 0.00017152930662256522, "loss": 0.9831, "step": 8365 }, { "epoch": 0.9656206737425012, "grad_norm": 0.33474335074424744, "learning_rate": 0.00017148238579568995, "loss": 0.9962, "step": 8370 }, { "epoch": 0.9661975080756806, "grad_norm": 0.3461887538433075, "learning_rate": 0.00017143543276586072, "loss": 0.9769, "step": 8375 }, { "epoch": 0.9667743424088602, "grad_norm": 0.33460527658462524, "learning_rate": 0.00017138844755422992, "loss": 0.9541, "step": 8380 }, { "epoch": 0.9673511767420396, "grad_norm": 0.32638758420944214, "learning_rate": 0.00017134143018196447, "loss": 0.967, "step": 8385 }, { "epoch": 0.9679280110752192, "grad_norm": 0.3325026333332062, "learning_rate": 0.0001712943806702458, "loss": 0.9852, "step": 8390 }, { "epoch": 0.9685048454083988, "grad_norm": 0.3283535838127136, "learning_rate": 0.0001712472990402698, "loss": 0.9792, "step": 8395 }, { "epoch": 0.9690816797415782, "grad_norm": 0.34361085295677185, "learning_rate": 0.00017120018531324689, "loss": 0.9412, "step": 8400 }, { "epoch": 0.9696585140747578, "grad_norm": 0.32706576585769653, "learning_rate": 0.00017115303951040182, "loss": 0.9493, "step": 8405 }, { "epoch": 0.9702353484079372, "grad_norm": 0.34150487184524536, "learning_rate": 0.00017110586165297392, "loss": 0.9744, "step": 8410 }, { "epoch": 0.9708121827411168, "grad_norm": 0.3405235707759857, "learning_rate": 0.00017105865176221684, "loss": 0.9312, "step": 8415 }, { "epoch": 0.9713890170742963, "grad_norm": 0.3444620668888092, "learning_rate": 0.0001710114098593988, "loss": 1.0102, "step": 8420 }, { "epoch": 0.9719658514074758, "grad_norm": 0.3453844487667084, "learning_rate": 0.00017096413596580238, "loss": 0.9673, "step": 8425 }, { "epoch": 0.9725426857406553, "grad_norm": 0.3417566418647766, "learning_rate": 0.00017091683010272447, "loss": 0.9831, "step": 8430 }, { "epoch": 0.9731195200738348, "grad_norm": 0.3529415428638458, "learning_rate": 0.00017086949229147652, "loss": 0.9837, "step": 8435 }, { "epoch": 0.9736963544070143, "grad_norm": 0.3694683909416199, "learning_rate": 0.00017082212255338432, "loss": 0.9564, "step": 8440 }, { "epoch": 0.9742731887401939, "grad_norm": 0.3446985185146332, "learning_rate": 0.00017077472090978798, "loss": 0.9441, "step": 8445 }, { "epoch": 0.9748500230733733, "grad_norm": 0.3844882845878601, "learning_rate": 0.000170727287382042, "loss": 0.932, "step": 8450 }, { "epoch": 0.9754268574065529, "grad_norm": 0.3843259811401367, "learning_rate": 0.00017067982199151543, "loss": 0.9793, "step": 8455 }, { "epoch": 0.9760036917397323, "grad_norm": 0.39412060379981995, "learning_rate": 0.00017063232475959133, "loss": 1.0562, "step": 8460 }, { "epoch": 0.9765805260729119, "grad_norm": 0.3523117005825043, "learning_rate": 0.00017058479570766745, "loss": 0.938, "step": 8465 }, { "epoch": 0.9771573604060914, "grad_norm": 0.3870525658130646, "learning_rate": 0.00017053723485715563, "loss": 0.9556, "step": 8470 }, { "epoch": 0.9777341947392709, "grad_norm": 0.36875876784324646, "learning_rate": 0.00017048964222948217, "loss": 0.9364, "step": 8475 }, { "epoch": 0.9783110290724504, "grad_norm": 0.3694005310535431, "learning_rate": 0.00017044201784608762, "loss": 0.9481, "step": 8480 }, { "epoch": 0.9788878634056299, "grad_norm": 0.46674659848213196, "learning_rate": 0.00017039436172842684, "loss": 0.9839, "step": 8485 }, { "epoch": 0.9794646977388094, "grad_norm": 0.35668930411338806, "learning_rate": 0.00017034667389796904, "loss": 0.9526, "step": 8490 }, { "epoch": 0.980041532071989, "grad_norm": 0.3454805314540863, "learning_rate": 0.0001702989543761977, "loss": 0.9748, "step": 8495 }, { "epoch": 0.9806183664051684, "grad_norm": 0.3518441617488861, "learning_rate": 0.00017025120318461047, "loss": 0.9725, "step": 8500 }, { "epoch": 0.981195200738348, "grad_norm": 0.3471713066101074, "learning_rate": 0.00017020342034471944, "loss": 0.965, "step": 8505 }, { "epoch": 0.9817720350715274, "grad_norm": 0.3555985391139984, "learning_rate": 0.00017015560587805081, "loss": 0.9563, "step": 8510 }, { "epoch": 0.982348869404707, "grad_norm": 0.3868695795536041, "learning_rate": 0.00017010775980614518, "loss": 0.9863, "step": 8515 }, { "epoch": 0.9829257037378865, "grad_norm": 0.3373103737831116, "learning_rate": 0.00017005988215055718, "loss": 0.9657, "step": 8520 }, { "epoch": 0.983502538071066, "grad_norm": 0.34423285722732544, "learning_rate": 0.00017001197293285589, "loss": 0.9394, "step": 8525 }, { "epoch": 0.9840793724042455, "grad_norm": 0.3576836585998535, "learning_rate": 0.00016996403217462442, "loss": 0.9968, "step": 8530 }, { "epoch": 0.984656206737425, "grad_norm": 0.34142541885375977, "learning_rate": 0.00016991605989746025, "loss": 0.9703, "step": 8535 }, { "epoch": 0.9852330410706045, "grad_norm": 0.32961562275886536, "learning_rate": 0.00016986805612297494, "loss": 0.9336, "step": 8540 }, { "epoch": 0.9858098754037841, "grad_norm": 0.36415210366249084, "learning_rate": 0.00016982002087279432, "loss": 0.9715, "step": 8545 }, { "epoch": 0.9863867097369635, "grad_norm": 0.3524576425552368, "learning_rate": 0.00016977195416855828, "loss": 0.9801, "step": 8550 }, { "epoch": 0.9869635440701431, "grad_norm": 0.33668380975723267, "learning_rate": 0.00016972385603192106, "loss": 0.9813, "step": 8555 }, { "epoch": 0.9875403784033225, "grad_norm": 0.32700344920158386, "learning_rate": 0.00016967572648455097, "loss": 0.9726, "step": 8560 }, { "epoch": 0.9881172127365021, "grad_norm": 0.3518313765525818, "learning_rate": 0.00016962756554813037, "loss": 1.0142, "step": 8565 }, { "epoch": 0.9886940470696816, "grad_norm": 0.35196545720100403, "learning_rate": 0.00016957937324435594, "loss": 1.0086, "step": 8570 }, { "epoch": 0.9892708814028611, "grad_norm": 0.38598722219467163, "learning_rate": 0.00016953114959493835, "loss": 0.9976, "step": 8575 }, { "epoch": 0.9898477157360406, "grad_norm": 0.3629986345767975, "learning_rate": 0.0001694828946216025, "loss": 0.9404, "step": 8580 }, { "epoch": 0.9904245500692201, "grad_norm": 0.36070430278778076, "learning_rate": 0.00016943460834608728, "loss": 1.0135, "step": 8585 }, { "epoch": 0.9910013844023996, "grad_norm": 0.3393903374671936, "learning_rate": 0.0001693862907901458, "loss": 0.9574, "step": 8590 }, { "epoch": 0.9915782187355792, "grad_norm": 0.35163483023643494, "learning_rate": 0.00016933794197554524, "loss": 0.9809, "step": 8595 }, { "epoch": 0.9921550530687586, "grad_norm": 0.36458709836006165, "learning_rate": 0.00016928956192406678, "loss": 0.9738, "step": 8600 }, { "epoch": 0.9927318874019382, "grad_norm": 0.3441014587879181, "learning_rate": 0.00016924115065750575, "loss": 0.9831, "step": 8605 }, { "epoch": 0.9933087217351176, "grad_norm": 0.3553078770637512, "learning_rate": 0.00016919270819767152, "loss": 0.953, "step": 8610 }, { "epoch": 0.9938855560682972, "grad_norm": 0.36507415771484375, "learning_rate": 0.00016914423456638753, "loss": 0.9955, "step": 8615 }, { "epoch": 0.9944623904014767, "grad_norm": 0.3221859335899353, "learning_rate": 0.0001690957297854912, "loss": 0.975, "step": 8620 }, { "epoch": 0.9950392247346562, "grad_norm": 0.39070311188697815, "learning_rate": 0.00016904719387683407, "loss": 0.9846, "step": 8625 }, { "epoch": 0.9956160590678357, "grad_norm": 0.3547630310058594, "learning_rate": 0.00016899862686228163, "loss": 0.9936, "step": 8630 }, { "epoch": 0.9961928934010152, "grad_norm": 0.36742347478866577, "learning_rate": 0.00016895002876371343, "loss": 0.9796, "step": 8635 }, { "epoch": 0.9967697277341947, "grad_norm": 0.3844885230064392, "learning_rate": 0.00016890139960302304, "loss": 0.9951, "step": 8640 }, { "epoch": 0.9973465620673743, "grad_norm": 0.3780565559864044, "learning_rate": 0.00016885273940211795, "loss": 0.9467, "step": 8645 }, { "epoch": 0.9979233964005537, "grad_norm": 0.35448718070983887, "learning_rate": 0.0001688040481829197, "loss": 0.9981, "step": 8650 }, { "epoch": 0.9985002307337333, "grad_norm": 0.3584054708480835, "learning_rate": 0.00016875532596736373, "loss": 0.963, "step": 8655 }, { "epoch": 0.9990770650669127, "grad_norm": 0.343289315700531, "learning_rate": 0.00016870657277739953, "loss": 0.9238, "step": 8660 }, { "epoch": 0.9996538994000923, "grad_norm": 0.3527401089668274, "learning_rate": 0.00016865778863499054, "loss": 0.9112, "step": 8665 }, { "epoch": 1.0, "eval_loss": 0.9769963622093201, "eval_runtime": 961.8297, "eval_samples_per_second": 15.959, "eval_steps_per_second": 0.998, "step": 8668 }, { "epoch": 1.0002307337332719, "grad_norm": 0.3392125070095062, "learning_rate": 0.00016860897356211403, "loss": 0.9856, "step": 8670 }, { "epoch": 1.0008075680664512, "grad_norm": 0.36300501227378845, "learning_rate": 0.00016856012758076133, "loss": 0.9442, "step": 8675 }, { "epoch": 1.0013844023996308, "grad_norm": 0.3717747926712036, "learning_rate": 0.0001685112507129377, "loss": 0.9853, "step": 8680 }, { "epoch": 1.0019612367328103, "grad_norm": 0.39347657561302185, "learning_rate": 0.00016846234298066218, "loss": 0.9624, "step": 8685 }, { "epoch": 1.00253807106599, "grad_norm": 0.3865641951560974, "learning_rate": 0.00016841340440596785, "loss": 0.941, "step": 8690 }, { "epoch": 1.0031149053991693, "grad_norm": 0.3533646762371063, "learning_rate": 0.00016836443501090163, "loss": 0.89, "step": 8695 }, { "epoch": 1.0036917397323488, "grad_norm": 0.3816198408603668, "learning_rate": 0.0001683154348175243, "loss": 0.9631, "step": 8700 }, { "epoch": 1.0042685740655284, "grad_norm": 0.3821849822998047, "learning_rate": 0.00016826640384791052, "loss": 0.9817, "step": 8705 }, { "epoch": 1.004845408398708, "grad_norm": 0.36628416180610657, "learning_rate": 0.00016821734212414894, "loss": 0.8998, "step": 8710 }, { "epoch": 1.0054222427318873, "grad_norm": 0.38809841871261597, "learning_rate": 0.00016816824966834183, "loss": 0.9391, "step": 8715 }, { "epoch": 1.0059990770650669, "grad_norm": 0.4090449810028076, "learning_rate": 0.00016811912650260556, "loss": 0.9141, "step": 8720 }, { "epoch": 1.0065759113982464, "grad_norm": 0.35133248567581177, "learning_rate": 0.0001680699726490701, "loss": 0.9621, "step": 8725 }, { "epoch": 1.007152745731426, "grad_norm": 0.3541509807109833, "learning_rate": 0.00016802078812987948, "loss": 0.9135, "step": 8730 }, { "epoch": 1.0077295800646053, "grad_norm": 0.34470343589782715, "learning_rate": 0.0001679715729671913, "loss": 0.9444, "step": 8735 }, { "epoch": 1.008306414397785, "grad_norm": 0.3753330409526825, "learning_rate": 0.00016792232718317718, "loss": 0.9491, "step": 8740 }, { "epoch": 1.0088832487309645, "grad_norm": 0.3640606701374054, "learning_rate": 0.0001678730508000224, "loss": 0.9451, "step": 8745 }, { "epoch": 1.009460083064144, "grad_norm": 0.35499805212020874, "learning_rate": 0.00016782374383992604, "loss": 0.9287, "step": 8750 }, { "epoch": 1.0100369173973236, "grad_norm": 0.3767320513725281, "learning_rate": 0.000167774406325101, "loss": 0.9379, "step": 8755 }, { "epoch": 1.010613751730503, "grad_norm": 0.39077675342559814, "learning_rate": 0.00016772503827777396, "loss": 0.9044, "step": 8760 }, { "epoch": 1.0111905860636825, "grad_norm": 0.3857879936695099, "learning_rate": 0.0001676756397201853, "loss": 0.9548, "step": 8765 }, { "epoch": 1.011767420396862, "grad_norm": 0.37087294459342957, "learning_rate": 0.00016762621067458917, "loss": 0.9255, "step": 8770 }, { "epoch": 1.0123442547300416, "grad_norm": 0.3710249662399292, "learning_rate": 0.00016757675116325343, "loss": 0.9263, "step": 8775 }, { "epoch": 1.012921089063221, "grad_norm": 0.37042540311813354, "learning_rate": 0.00016752726120845973, "loss": 0.9117, "step": 8780 }, { "epoch": 1.0134979233964005, "grad_norm": 0.37328779697418213, "learning_rate": 0.00016747774083250333, "loss": 0.9152, "step": 8785 }, { "epoch": 1.0140747577295801, "grad_norm": 0.35958462953567505, "learning_rate": 0.0001674281900576933, "loss": 0.9049, "step": 8790 }, { "epoch": 1.0146515920627597, "grad_norm": 0.38205039501190186, "learning_rate": 0.00016737860890635235, "loss": 0.928, "step": 8795 }, { "epoch": 1.015228426395939, "grad_norm": 0.3680003881454468, "learning_rate": 0.0001673289974008169, "loss": 0.9349, "step": 8800 }, { "epoch": 1.0158052607291186, "grad_norm": 0.3842025697231293, "learning_rate": 0.00016727935556343698, "loss": 0.9005, "step": 8805 }, { "epoch": 1.0163820950622982, "grad_norm": 0.37604400515556335, "learning_rate": 0.00016722968341657642, "loss": 0.9511, "step": 8810 }, { "epoch": 1.0169589293954777, "grad_norm": 0.36115026473999023, "learning_rate": 0.00016717998098261254, "loss": 0.9738, "step": 8815 }, { "epoch": 1.017535763728657, "grad_norm": 0.3732301890850067, "learning_rate": 0.0001671302482839364, "loss": 0.9471, "step": 8820 }, { "epoch": 1.0181125980618366, "grad_norm": 0.362886518239975, "learning_rate": 0.0001670804853429527, "loss": 0.9463, "step": 8825 }, { "epoch": 1.0186894323950162, "grad_norm": 0.38515588641166687, "learning_rate": 0.00016703069218207972, "loss": 0.9291, "step": 8830 }, { "epoch": 1.0192662667281958, "grad_norm": 0.3255932331085205, "learning_rate": 0.00016698086882374939, "loss": 0.957, "step": 8835 }, { "epoch": 1.019843101061375, "grad_norm": 0.37959185242652893, "learning_rate": 0.00016693101529040725, "loss": 0.9248, "step": 8840 }, { "epoch": 1.0204199353945547, "grad_norm": 0.3593595623970032, "learning_rate": 0.00016688113160451238, "loss": 0.9432, "step": 8845 }, { "epoch": 1.0209967697277342, "grad_norm": 0.3396543264389038, "learning_rate": 0.00016683121778853746, "loss": 0.8758, "step": 8850 }, { "epoch": 1.0215736040609138, "grad_norm": 0.355819433927536, "learning_rate": 0.00016678127386496883, "loss": 0.9419, "step": 8855 }, { "epoch": 1.0221504383940931, "grad_norm": 0.38481202721595764, "learning_rate": 0.00016673129985630625, "loss": 0.9459, "step": 8860 }, { "epoch": 1.0227272727272727, "grad_norm": 0.3595040440559387, "learning_rate": 0.00016668129578506315, "loss": 0.9144, "step": 8865 }, { "epoch": 1.0233041070604523, "grad_norm": 0.401304692029953, "learning_rate": 0.00016663126167376646, "loss": 0.9194, "step": 8870 }, { "epoch": 1.0238809413936318, "grad_norm": 0.32624685764312744, "learning_rate": 0.0001665811975449566, "loss": 0.931, "step": 8875 }, { "epoch": 1.0244577757268112, "grad_norm": 0.39976969361305237, "learning_rate": 0.00016653110342118764, "loss": 0.9067, "step": 8880 }, { "epoch": 1.0250346100599907, "grad_norm": 0.34697604179382324, "learning_rate": 0.00016648097932502704, "loss": 0.9473, "step": 8885 }, { "epoch": 1.0256114443931703, "grad_norm": 0.32559990882873535, "learning_rate": 0.0001664308252790558, "loss": 0.8861, "step": 8890 }, { "epoch": 1.0261882787263499, "grad_norm": 0.3570786416530609, "learning_rate": 0.0001663806413058684, "loss": 0.9822, "step": 8895 }, { "epoch": 1.0267651130595292, "grad_norm": 0.3664936125278473, "learning_rate": 0.00016633042742807285, "loss": 0.8971, "step": 8900 }, { "epoch": 1.0273419473927088, "grad_norm": 0.34375905990600586, "learning_rate": 0.00016628018366829055, "loss": 0.9683, "step": 8905 }, { "epoch": 1.0279187817258884, "grad_norm": 0.3472381830215454, "learning_rate": 0.00016622991004915645, "loss": 0.9341, "step": 8910 }, { "epoch": 1.028495616059068, "grad_norm": 0.4032275676727295, "learning_rate": 0.00016617960659331892, "loss": 0.9457, "step": 8915 }, { "epoch": 1.0290724503922473, "grad_norm": 0.3763919174671173, "learning_rate": 0.00016612927332343975, "loss": 0.9638, "step": 8920 }, { "epoch": 1.0296492847254268, "grad_norm": 0.3896368145942688, "learning_rate": 0.00016607891026219418, "loss": 0.9588, "step": 8925 }, { "epoch": 1.0302261190586064, "grad_norm": 0.3642440736293793, "learning_rate": 0.00016602851743227083, "loss": 0.9543, "step": 8930 }, { "epoch": 1.030802953391786, "grad_norm": 0.3432563245296478, "learning_rate": 0.0001659780948563719, "loss": 0.9543, "step": 8935 }, { "epoch": 1.0313797877249653, "grad_norm": 0.3308102786540985, "learning_rate": 0.00016592764255721264, "loss": 0.9284, "step": 8940 }, { "epoch": 1.0319566220581449, "grad_norm": 0.39647376537323, "learning_rate": 0.0001658771605575221, "loss": 0.8985, "step": 8945 }, { "epoch": 1.0325334563913244, "grad_norm": 0.35071223974227905, "learning_rate": 0.00016582664888004244, "loss": 0.9836, "step": 8950 }, { "epoch": 1.033110290724504, "grad_norm": 0.4009931981563568, "learning_rate": 0.00016577610754752925, "loss": 0.9932, "step": 8955 }, { "epoch": 1.0336871250576833, "grad_norm": 0.37220340967178345, "learning_rate": 0.00016572553658275157, "loss": 0.9339, "step": 8960 }, { "epoch": 1.034263959390863, "grad_norm": 0.5049521923065186, "learning_rate": 0.00016567493600849165, "loss": 0.9203, "step": 8965 }, { "epoch": 1.0348407937240425, "grad_norm": 0.3833288550376892, "learning_rate": 0.00016562430584754516, "loss": 0.9392, "step": 8970 }, { "epoch": 1.035417628057222, "grad_norm": 0.3854983448982239, "learning_rate": 0.00016557364612272113, "loss": 0.967, "step": 8975 }, { "epoch": 1.0359944623904014, "grad_norm": 0.38779276609420776, "learning_rate": 0.0001655229568568418, "loss": 0.9716, "step": 8980 }, { "epoch": 1.036571296723581, "grad_norm": 0.38969096541404724, "learning_rate": 0.00016547223807274287, "loss": 0.9235, "step": 8985 }, { "epoch": 1.0371481310567605, "grad_norm": 0.3601762354373932, "learning_rate": 0.00016542148979327315, "loss": 0.9388, "step": 8990 }, { "epoch": 1.03772496538994, "grad_norm": 0.37787869572639465, "learning_rate": 0.00016537071204129487, "loss": 0.9323, "step": 8995 }, { "epoch": 1.0383017997231194, "grad_norm": 0.3725028336048126, "learning_rate": 0.00016531990483968357, "loss": 0.911, "step": 9000 }, { "epoch": 1.038878634056299, "grad_norm": 0.3681508004665375, "learning_rate": 0.00016526906821132792, "loss": 0.9863, "step": 9005 }, { "epoch": 1.0394554683894786, "grad_norm": 0.3951893150806427, "learning_rate": 0.00016521820217912998, "loss": 0.9698, "step": 9010 }, { "epoch": 1.0400323027226581, "grad_norm": 0.40850746631622314, "learning_rate": 0.00016516730676600493, "loss": 0.9386, "step": 9015 }, { "epoch": 1.0406091370558375, "grad_norm": 0.3597157895565033, "learning_rate": 0.0001651163819948813, "loss": 0.895, "step": 9020 }, { "epoch": 1.041185971389017, "grad_norm": 0.35712316632270813, "learning_rate": 0.00016506542788870076, "loss": 0.9504, "step": 9025 }, { "epoch": 1.0417628057221966, "grad_norm": 0.3754498064517975, "learning_rate": 0.00016501444447041824, "loss": 0.8897, "step": 9030 }, { "epoch": 1.0423396400553762, "grad_norm": 0.3659985065460205, "learning_rate": 0.00016496343176300196, "loss": 0.8833, "step": 9035 }, { "epoch": 1.0429164743885555, "grad_norm": 0.4013921022415161, "learning_rate": 0.00016491238978943312, "loss": 0.9698, "step": 9040 }, { "epoch": 1.043493308721735, "grad_norm": 0.3634096682071686, "learning_rate": 0.00016486131857270628, "loss": 0.9354, "step": 9045 }, { "epoch": 1.0440701430549146, "grad_norm": 0.4177205562591553, "learning_rate": 0.00016481021813582913, "loss": 1.0056, "step": 9050 }, { "epoch": 1.0446469773880942, "grad_norm": 0.3462320864200592, "learning_rate": 0.00016475908850182251, "loss": 0.9092, "step": 9055 }, { "epoch": 1.0452238117212735, "grad_norm": 0.3686048984527588, "learning_rate": 0.00016470792969372039, "loss": 0.9341, "step": 9060 }, { "epoch": 1.045800646054453, "grad_norm": 0.368020623922348, "learning_rate": 0.00016465674173456998, "loss": 0.9805, "step": 9065 }, { "epoch": 1.0463774803876327, "grad_norm": 0.36095044016838074, "learning_rate": 0.0001646055246474315, "loss": 0.9008, "step": 9070 }, { "epoch": 1.0469543147208122, "grad_norm": 0.4059806168079376, "learning_rate": 0.00016455427845537835, "loss": 0.9404, "step": 9075 }, { "epoch": 1.0475311490539916, "grad_norm": 0.38529303669929504, "learning_rate": 0.00016450300318149707, "loss": 0.925, "step": 9080 }, { "epoch": 1.0481079833871711, "grad_norm": 0.34358593821525574, "learning_rate": 0.00016445169884888726, "loss": 0.9386, "step": 9085 }, { "epoch": 1.0486848177203507, "grad_norm": 0.3995891213417053, "learning_rate": 0.0001644003654806616, "loss": 0.9915, "step": 9090 }, { "epoch": 1.0492616520535303, "grad_norm": 0.37649253010749817, "learning_rate": 0.00016434900309994589, "loss": 0.9543, "step": 9095 }, { "epoch": 1.0498384863867098, "grad_norm": 0.3805069029331207, "learning_rate": 0.00016429761172987898, "loss": 0.9396, "step": 9100 }, { "epoch": 1.0504153207198892, "grad_norm": 0.35094547271728516, "learning_rate": 0.00016424619139361282, "loss": 0.9467, "step": 9105 }, { "epoch": 1.0509921550530688, "grad_norm": 0.375531941652298, "learning_rate": 0.00016419474211431227, "loss": 0.9519, "step": 9110 }, { "epoch": 1.0515689893862483, "grad_norm": 0.38208749890327454, "learning_rate": 0.0001641432639151554, "loss": 0.9717, "step": 9115 }, { "epoch": 1.0521458237194279, "grad_norm": 0.40280410647392273, "learning_rate": 0.00016409175681933328, "loss": 0.9059, "step": 9120 }, { "epoch": 1.0527226580526072, "grad_norm": 0.3839852511882782, "learning_rate": 0.0001640402208500499, "loss": 0.9399, "step": 9125 }, { "epoch": 1.0532994923857868, "grad_norm": 0.3738575875759125, "learning_rate": 0.00016398865603052228, "loss": 0.9179, "step": 9130 }, { "epoch": 1.0538763267189664, "grad_norm": 0.36099398136138916, "learning_rate": 0.00016393706238398056, "loss": 0.9475, "step": 9135 }, { "epoch": 1.054453161052146, "grad_norm": 0.37569257616996765, "learning_rate": 0.00016388543993366774, "loss": 0.9202, "step": 9140 }, { "epoch": 1.0550299953853253, "grad_norm": 0.3682805001735687, "learning_rate": 0.0001638337887028398, "loss": 0.8981, "step": 9145 }, { "epoch": 1.0556068297185048, "grad_norm": 0.3938318192958832, "learning_rate": 0.00016378210871476577, "loss": 0.9511, "step": 9150 }, { "epoch": 1.0561836640516844, "grad_norm": 0.4259416460990906, "learning_rate": 0.00016373039999272756, "loss": 0.8954, "step": 9155 }, { "epoch": 1.056760498384864, "grad_norm": 0.4090641140937805, "learning_rate": 0.00016367866256002003, "loss": 0.955, "step": 9160 }, { "epoch": 1.0573373327180433, "grad_norm": 0.3798772394657135, "learning_rate": 0.00016362689643995105, "loss": 0.9026, "step": 9165 }, { "epoch": 1.0579141670512229, "grad_norm": 0.42819643020629883, "learning_rate": 0.0001635751016558413, "loss": 0.9789, "step": 9170 }, { "epoch": 1.0584910013844024, "grad_norm": 0.4076824486255646, "learning_rate": 0.00016352327823102448, "loss": 0.9453, "step": 9175 }, { "epoch": 1.059067835717582, "grad_norm": 0.42359670996665955, "learning_rate": 0.00016347142618884712, "loss": 0.8879, "step": 9180 }, { "epoch": 1.0596446700507614, "grad_norm": 0.347460001707077, "learning_rate": 0.00016341954555266865, "loss": 0.9195, "step": 9185 }, { "epoch": 1.060221504383941, "grad_norm": 0.3660471737384796, "learning_rate": 0.00016336763634586143, "loss": 0.9269, "step": 9190 }, { "epoch": 1.0607983387171205, "grad_norm": 0.40367934107780457, "learning_rate": 0.00016331569859181062, "loss": 0.937, "step": 9195 }, { "epoch": 1.0613751730503, "grad_norm": 0.3569383919239044, "learning_rate": 0.00016326373231391434, "loss": 0.9234, "step": 9200 }, { "epoch": 1.0619520073834794, "grad_norm": 0.3617344796657562, "learning_rate": 0.00016321173753558343, "loss": 0.9292, "step": 9205 }, { "epoch": 1.062528841716659, "grad_norm": 0.34451478719711304, "learning_rate": 0.00016315971428024168, "loss": 0.8996, "step": 9210 }, { "epoch": 1.0631056760498385, "grad_norm": 0.3708736300468445, "learning_rate": 0.00016310766257132567, "loss": 0.9464, "step": 9215 }, { "epoch": 1.063682510383018, "grad_norm": 0.3594589829444885, "learning_rate": 0.00016305558243228475, "loss": 0.9344, "step": 9220 }, { "epoch": 1.0642593447161974, "grad_norm": 0.4037278890609741, "learning_rate": 0.0001630034738865812, "loss": 0.9109, "step": 9225 }, { "epoch": 1.064836179049377, "grad_norm": 0.3730737864971161, "learning_rate": 0.00016295133695768996, "loss": 0.9734, "step": 9230 }, { "epoch": 1.0654130133825566, "grad_norm": 0.4240753650665283, "learning_rate": 0.00016289917166909884, "loss": 0.9375, "step": 9235 }, { "epoch": 1.0659898477157361, "grad_norm": 0.3585973381996155, "learning_rate": 0.00016284697804430843, "loss": 0.948, "step": 9240 }, { "epoch": 1.0665666820489155, "grad_norm": 0.38361573219299316, "learning_rate": 0.00016279475610683203, "loss": 0.9547, "step": 9245 }, { "epoch": 1.067143516382095, "grad_norm": 0.4068223834037781, "learning_rate": 0.00016274250588019568, "loss": 0.9829, "step": 9250 }, { "epoch": 1.0677203507152746, "grad_norm": 0.37644630670547485, "learning_rate": 0.00016269022738793832, "loss": 0.9747, "step": 9255 }, { "epoch": 1.0682971850484542, "grad_norm": 0.35857805609703064, "learning_rate": 0.00016263792065361135, "loss": 0.9538, "step": 9260 }, { "epoch": 1.0688740193816335, "grad_norm": 0.37298527359962463, "learning_rate": 0.00016258558570077925, "loss": 0.9559, "step": 9265 }, { "epoch": 1.069450853714813, "grad_norm": 0.35640233755111694, "learning_rate": 0.00016253322255301887, "loss": 0.9729, "step": 9270 }, { "epoch": 1.0700276880479926, "grad_norm": 0.3905540108680725, "learning_rate": 0.00016248083123392, "loss": 0.9356, "step": 9275 }, { "epoch": 1.0706045223811722, "grad_norm": 0.3624536395072937, "learning_rate": 0.00016242841176708497, "loss": 0.9457, "step": 9280 }, { "epoch": 1.0711813567143516, "grad_norm": 0.3997156620025635, "learning_rate": 0.0001623759641761289, "loss": 0.919, "step": 9285 }, { "epoch": 1.0717581910475311, "grad_norm": 0.34290432929992676, "learning_rate": 0.00016232348848467946, "loss": 0.9155, "step": 9290 }, { "epoch": 1.0723350253807107, "grad_norm": 0.3913106322288513, "learning_rate": 0.00016227098471637713, "loss": 0.9814, "step": 9295 }, { "epoch": 1.0729118597138902, "grad_norm": 0.3945711553096771, "learning_rate": 0.00016221845289487492, "loss": 0.9145, "step": 9300 }, { "epoch": 1.0734886940470696, "grad_norm": 0.3874426484107971, "learning_rate": 0.0001621658930438385, "loss": 0.9411, "step": 9305 }, { "epoch": 1.0740655283802492, "grad_norm": 0.37332093715667725, "learning_rate": 0.00016211330518694624, "loss": 0.9382, "step": 9310 }, { "epoch": 1.0746423627134287, "grad_norm": 0.3567892014980316, "learning_rate": 0.00016206068934788905, "loss": 0.9323, "step": 9315 }, { "epoch": 1.0752191970466083, "grad_norm": 0.3612172305583954, "learning_rate": 0.00016200804555037047, "loss": 0.9779, "step": 9320 }, { "epoch": 1.0757960313797876, "grad_norm": 0.4569231867790222, "learning_rate": 0.0001619553738181066, "loss": 0.957, "step": 9325 }, { "epoch": 1.0763728657129672, "grad_norm": 0.3518175184726715, "learning_rate": 0.0001619026741748262, "loss": 0.8803, "step": 9330 }, { "epoch": 1.0769497000461468, "grad_norm": 0.37040144205093384, "learning_rate": 0.00016184994664427053, "loss": 0.9358, "step": 9335 }, { "epoch": 1.0775265343793263, "grad_norm": 0.3865705132484436, "learning_rate": 0.00016179719125019345, "loss": 0.9072, "step": 9340 }, { "epoch": 1.0781033687125057, "grad_norm": 0.37067902088165283, "learning_rate": 0.00016174440801636138, "loss": 0.9472, "step": 9345 }, { "epoch": 1.0786802030456852, "grad_norm": 0.3600660264492035, "learning_rate": 0.0001616915969665533, "loss": 0.9108, "step": 9350 }, { "epoch": 1.0792570373788648, "grad_norm": 0.3454911410808563, "learning_rate": 0.00016163875812456063, "loss": 0.9037, "step": 9355 }, { "epoch": 1.0798338717120444, "grad_norm": 0.3558720648288727, "learning_rate": 0.0001615858915141874, "loss": 0.8982, "step": 9360 }, { "epoch": 1.0804107060452237, "grad_norm": 0.35229113698005676, "learning_rate": 0.00016153299715925012, "loss": 0.9239, "step": 9365 }, { "epoch": 1.0809875403784033, "grad_norm": 0.3825600743293762, "learning_rate": 0.00016148007508357784, "loss": 0.9469, "step": 9370 }, { "epoch": 1.0815643747115828, "grad_norm": 0.44566377997398376, "learning_rate": 0.00016142712531101196, "loss": 0.9333, "step": 9375 }, { "epoch": 1.0821412090447624, "grad_norm": 0.38880455493927, "learning_rate": 0.00016137414786540654, "loss": 0.9118, "step": 9380 }, { "epoch": 1.0827180433779418, "grad_norm": 0.39327144622802734, "learning_rate": 0.00016132114277062797, "loss": 0.9402, "step": 9385 }, { "epoch": 1.0832948777111213, "grad_norm": 0.36946901679039, "learning_rate": 0.0001612681100505552, "loss": 0.8793, "step": 9390 }, { "epoch": 1.0838717120443009, "grad_norm": 0.3460526764392853, "learning_rate": 0.00016121504972907956, "loss": 0.8921, "step": 9395 }, { "epoch": 1.0844485463774804, "grad_norm": 0.4143986999988556, "learning_rate": 0.0001611619618301048, "loss": 0.9805, "step": 9400 }, { "epoch": 1.0850253807106598, "grad_norm": 0.38757944107055664, "learning_rate": 0.00016110884637754713, "loss": 0.9422, "step": 9405 }, { "epoch": 1.0856022150438394, "grad_norm": 0.37840455770492554, "learning_rate": 0.00016105570339533518, "loss": 0.9387, "step": 9410 }, { "epoch": 1.086179049377019, "grad_norm": 0.38999855518341064, "learning_rate": 0.00016100253290740995, "loss": 0.948, "step": 9415 }, { "epoch": 1.0867558837101985, "grad_norm": 0.38250136375427246, "learning_rate": 0.00016094933493772487, "loss": 0.934, "step": 9420 }, { "epoch": 1.087332718043378, "grad_norm": 0.36484387516975403, "learning_rate": 0.0001608961095102457, "loss": 0.8776, "step": 9425 }, { "epoch": 1.0879095523765574, "grad_norm": 0.4226197600364685, "learning_rate": 0.00016084285664895066, "loss": 0.8962, "step": 9430 }, { "epoch": 1.088486386709737, "grad_norm": 0.41604354977607727, "learning_rate": 0.00016078957637783017, "loss": 0.9563, "step": 9435 }, { "epoch": 1.0890632210429165, "grad_norm": 0.37396174669265747, "learning_rate": 0.00016073626872088718, "loss": 0.9355, "step": 9440 }, { "epoch": 1.0896400553760959, "grad_norm": 0.37299230694770813, "learning_rate": 0.00016068293370213684, "loss": 0.9513, "step": 9445 }, { "epoch": 1.0902168897092754, "grad_norm": 0.38365793228149414, "learning_rate": 0.00016062957134560675, "loss": 0.94, "step": 9450 }, { "epoch": 1.090793724042455, "grad_norm": 0.3528333306312561, "learning_rate": 0.00016057618167533667, "loss": 0.9808, "step": 9455 }, { "epoch": 1.0913705583756346, "grad_norm": 0.38088691234588623, "learning_rate": 0.00016052276471537877, "loss": 0.9308, "step": 9460 }, { "epoch": 1.0919473927088141, "grad_norm": 0.377972275018692, "learning_rate": 0.0001604693204897975, "loss": 0.8924, "step": 9465 }, { "epoch": 1.0925242270419935, "grad_norm": 0.323946475982666, "learning_rate": 0.00016041584902266968, "loss": 0.9062, "step": 9470 }, { "epoch": 1.093101061375173, "grad_norm": 0.4198917746543884, "learning_rate": 0.00016036235033808417, "loss": 0.9348, "step": 9475 }, { "epoch": 1.0936778957083526, "grad_norm": 0.36399000883102417, "learning_rate": 0.00016030882446014234, "loss": 0.9395, "step": 9480 }, { "epoch": 1.0942547300415322, "grad_norm": 0.37311363220214844, "learning_rate": 0.0001602552714129576, "loss": 0.9531, "step": 9485 }, { "epoch": 1.0948315643747115, "grad_norm": 0.3694175183773041, "learning_rate": 0.00016020169122065578, "loss": 0.9254, "step": 9490 }, { "epoch": 1.095408398707891, "grad_norm": 0.4136304557323456, "learning_rate": 0.00016014808390737485, "loss": 0.9588, "step": 9495 }, { "epoch": 1.0959852330410707, "grad_norm": 0.3715604841709137, "learning_rate": 0.000160094449497265, "loss": 0.9446, "step": 9500 }, { "epoch": 1.0965620673742502, "grad_norm": 0.3993631601333618, "learning_rate": 0.0001600407880144886, "loss": 0.9291, "step": 9505 }, { "epoch": 1.0971389017074296, "grad_norm": 0.37997785210609436, "learning_rate": 0.00015998709948322027, "loss": 0.9908, "step": 9510 }, { "epoch": 1.0977157360406091, "grad_norm": 0.3883385956287384, "learning_rate": 0.00015993338392764685, "loss": 0.901, "step": 9515 }, { "epoch": 1.0982925703737887, "grad_norm": 0.3730206787586212, "learning_rate": 0.00015987964137196726, "loss": 0.8803, "step": 9520 }, { "epoch": 1.0988694047069683, "grad_norm": 0.38496091961860657, "learning_rate": 0.00015982587184039263, "loss": 0.8802, "step": 9525 }, { "epoch": 1.0994462390401476, "grad_norm": 0.3969172239303589, "learning_rate": 0.00015977207535714625, "loss": 0.9055, "step": 9530 }, { "epoch": 1.1000230733733272, "grad_norm": 0.44119396805763245, "learning_rate": 0.0001597182519464635, "loss": 1.0069, "step": 9535 }, { "epoch": 1.1005999077065067, "grad_norm": 0.3768390119075775, "learning_rate": 0.00015966440163259202, "loss": 0.9637, "step": 9540 }, { "epoch": 1.1011767420396863, "grad_norm": 0.39733919501304626, "learning_rate": 0.00015961052443979137, "loss": 0.9896, "step": 9545 }, { "epoch": 1.1017535763728656, "grad_norm": 0.3685975968837738, "learning_rate": 0.0001595566203923334, "loss": 0.9021, "step": 9550 }, { "epoch": 1.1023304107060452, "grad_norm": 0.4275849759578705, "learning_rate": 0.00015950268951450198, "loss": 0.897, "step": 9555 }, { "epoch": 1.1029072450392248, "grad_norm": 0.401770681142807, "learning_rate": 0.00015944873183059303, "loss": 0.9091, "step": 9560 }, { "epoch": 1.1034840793724043, "grad_norm": 0.38329482078552246, "learning_rate": 0.00015939474736491468, "loss": 0.9396, "step": 9565 }, { "epoch": 1.1040609137055837, "grad_norm": 0.34465184807777405, "learning_rate": 0.00015934073614178696, "loss": 0.9683, "step": 9570 }, { "epoch": 1.1046377480387632, "grad_norm": 0.40164217352867126, "learning_rate": 0.00015928669818554206, "loss": 0.9354, "step": 9575 }, { "epoch": 1.1052145823719428, "grad_norm": 0.39106857776641846, "learning_rate": 0.0001592326335205242, "loss": 0.909, "step": 9580 }, { "epoch": 1.1057914167051224, "grad_norm": 0.36791619658470154, "learning_rate": 0.00015917854217108954, "loss": 0.8927, "step": 9585 }, { "epoch": 1.1063682510383017, "grad_norm": 0.3477643132209778, "learning_rate": 0.00015912442416160644, "loss": 0.8746, "step": 9590 }, { "epoch": 1.1069450853714813, "grad_norm": 0.36076053977012634, "learning_rate": 0.0001590702795164551, "loss": 0.9222, "step": 9595 }, { "epoch": 1.1075219197046609, "grad_norm": 0.4136887490749359, "learning_rate": 0.00015901610826002787, "loss": 0.9537, "step": 9600 }, { "epoch": 1.1080987540378404, "grad_norm": 0.3848995566368103, "learning_rate": 0.0001589619104167289, "loss": 0.9322, "step": 9605 }, { "epoch": 1.1086755883710198, "grad_norm": 0.3714704215526581, "learning_rate": 0.00015890768601097447, "loss": 0.9499, "step": 9610 }, { "epoch": 1.1092524227041993, "grad_norm": 0.3850856125354767, "learning_rate": 0.0001588534350671928, "loss": 0.9638, "step": 9615 }, { "epoch": 1.109829257037379, "grad_norm": 0.3717416226863861, "learning_rate": 0.00015879915760982406, "loss": 0.943, "step": 9620 }, { "epoch": 1.1104060913705585, "grad_norm": 0.36091673374176025, "learning_rate": 0.0001587448536633203, "loss": 0.9536, "step": 9625 }, { "epoch": 1.1109829257037378, "grad_norm": 0.343685507774353, "learning_rate": 0.00015869052325214554, "loss": 0.9391, "step": 9630 }, { "epoch": 1.1115597600369174, "grad_norm": 0.40542444586753845, "learning_rate": 0.00015863616640077578, "loss": 0.8862, "step": 9635 }, { "epoch": 1.112136594370097, "grad_norm": 0.3820521831512451, "learning_rate": 0.00015858178313369893, "loss": 0.9461, "step": 9640 }, { "epoch": 1.1127134287032765, "grad_norm": 0.3812884986400604, "learning_rate": 0.00015852737347541465, "loss": 0.9391, "step": 9645 }, { "epoch": 1.1132902630364558, "grad_norm": 0.3547990918159485, "learning_rate": 0.00015847293745043466, "loss": 0.9091, "step": 9650 }, { "epoch": 1.1138670973696354, "grad_norm": 0.3752082884311676, "learning_rate": 0.0001584184750832825, "loss": 0.9802, "step": 9655 }, { "epoch": 1.114443931702815, "grad_norm": 0.4098625183105469, "learning_rate": 0.00015836398639849355, "loss": 0.8935, "step": 9660 }, { "epoch": 1.1150207660359945, "grad_norm": 0.40601786971092224, "learning_rate": 0.0001583094714206151, "loss": 0.9065, "step": 9665 }, { "epoch": 1.1155976003691739, "grad_norm": 0.3601096272468567, "learning_rate": 0.0001582549301742062, "loss": 0.942, "step": 9670 }, { "epoch": 1.1161744347023534, "grad_norm": 0.37831351161003113, "learning_rate": 0.00015820036268383785, "loss": 0.949, "step": 9675 }, { "epoch": 1.116751269035533, "grad_norm": 0.3862978219985962, "learning_rate": 0.00015814576897409273, "loss": 0.9181, "step": 9680 }, { "epoch": 1.1173281033687126, "grad_norm": 0.3954308331012726, "learning_rate": 0.00015809114906956552, "loss": 0.9699, "step": 9685 }, { "epoch": 1.117904937701892, "grad_norm": 0.39789891242980957, "learning_rate": 0.00015803650299486252, "loss": 0.9297, "step": 9690 }, { "epoch": 1.1184817720350715, "grad_norm": 0.3621467053890228, "learning_rate": 0.00015798183077460188, "loss": 0.9135, "step": 9695 }, { "epoch": 1.119058606368251, "grad_norm": 0.3689694404602051, "learning_rate": 0.0001579271324334136, "loss": 0.9608, "step": 9700 }, { "epoch": 1.1196354407014306, "grad_norm": 0.3975661098957062, "learning_rate": 0.00015787240799593937, "loss": 0.9225, "step": 9705 }, { "epoch": 1.12021227503461, "grad_norm": 0.3825543224811554, "learning_rate": 0.00015781765748683262, "loss": 0.9709, "step": 9710 }, { "epoch": 1.1207891093677895, "grad_norm": 0.434542715549469, "learning_rate": 0.0001577628809307586, "loss": 0.9354, "step": 9715 }, { "epoch": 1.121365943700969, "grad_norm": 0.36350882053375244, "learning_rate": 0.00015770807835239424, "loss": 0.9486, "step": 9720 }, { "epoch": 1.1219427780341487, "grad_norm": 0.3788936138153076, "learning_rate": 0.00015765324977642822, "loss": 1.0066, "step": 9725 }, { "epoch": 1.122519612367328, "grad_norm": 0.3581905663013458, "learning_rate": 0.00015759839522756092, "loss": 0.9538, "step": 9730 }, { "epoch": 1.1230964467005076, "grad_norm": 0.41105157136917114, "learning_rate": 0.00015754351473050435, "loss": 0.9166, "step": 9735 }, { "epoch": 1.1236732810336871, "grad_norm": 0.3867192566394806, "learning_rate": 0.0001574886083099824, "loss": 0.931, "step": 9740 }, { "epoch": 1.1242501153668667, "grad_norm": 0.3927655518054962, "learning_rate": 0.00015743367599073044, "loss": 0.9501, "step": 9745 }, { "epoch": 1.1248269497000463, "grad_norm": 0.3890318274497986, "learning_rate": 0.0001573787177974956, "loss": 0.9712, "step": 9750 }, { "epoch": 1.1254037840332256, "grad_norm": 0.3845767080783844, "learning_rate": 0.0001573237337550367, "loss": 0.9395, "step": 9755 }, { "epoch": 1.1259806183664052, "grad_norm": 0.38565099239349365, "learning_rate": 0.00015726872388812407, "loss": 0.9331, "step": 9760 }, { "epoch": 1.1265574526995847, "grad_norm": 0.3627215027809143, "learning_rate": 0.00015721368822153986, "loss": 0.9511, "step": 9765 }, { "epoch": 1.127134287032764, "grad_norm": 0.35866937041282654, "learning_rate": 0.00015715862678007767, "loss": 0.9542, "step": 9770 }, { "epoch": 1.1277111213659436, "grad_norm": 0.36874574422836304, "learning_rate": 0.0001571035395885428, "loss": 0.8729, "step": 9775 }, { "epoch": 1.1282879556991232, "grad_norm": 0.4252176880836487, "learning_rate": 0.0001570484266717522, "loss": 0.9313, "step": 9780 }, { "epoch": 1.1288647900323028, "grad_norm": 0.36453625559806824, "learning_rate": 0.00015699328805453424, "loss": 0.8858, "step": 9785 }, { "epoch": 1.1294416243654823, "grad_norm": 0.4107474982738495, "learning_rate": 0.00015693812376172902, "loss": 0.9887, "step": 9790 }, { "epoch": 1.1300184586986617, "grad_norm": 0.4069174528121948, "learning_rate": 0.00015688293381818823, "loss": 0.9478, "step": 9795 }, { "epoch": 1.1305952930318413, "grad_norm": 0.37103304266929626, "learning_rate": 0.00015682771824877494, "loss": 0.9144, "step": 9800 }, { "epoch": 1.1311721273650208, "grad_norm": 0.40417689085006714, "learning_rate": 0.00015677247707836397, "loss": 0.9464, "step": 9805 }, { "epoch": 1.1317489616982002, "grad_norm": 0.3834182918071747, "learning_rate": 0.0001567172103318415, "loss": 0.9902, "step": 9810 }, { "epoch": 1.1323257960313797, "grad_norm": 0.37694182991981506, "learning_rate": 0.00015666191803410536, "loss": 0.9339, "step": 9815 }, { "epoch": 1.1329026303645593, "grad_norm": 0.3973928987979889, "learning_rate": 0.00015660660021006478, "loss": 0.9301, "step": 9820 }, { "epoch": 1.1334794646977389, "grad_norm": 0.41115155816078186, "learning_rate": 0.00015655125688464062, "loss": 0.9196, "step": 9825 }, { "epoch": 1.1340562990309184, "grad_norm": 0.3878481686115265, "learning_rate": 0.00015649588808276505, "loss": 0.9395, "step": 9830 }, { "epoch": 1.1346331333640978, "grad_norm": 0.362389475107193, "learning_rate": 0.00015644049382938191, "loss": 0.9005, "step": 9835 }, { "epoch": 1.1352099676972773, "grad_norm": 0.40141263604164124, "learning_rate": 0.00015638507414944642, "loss": 0.9412, "step": 9840 }, { "epoch": 1.135786802030457, "grad_norm": 0.3529787063598633, "learning_rate": 0.00015632962906792522, "loss": 0.9619, "step": 9845 }, { "epoch": 1.1363636363636362, "grad_norm": 0.37504446506500244, "learning_rate": 0.00015627415860979641, "loss": 0.96, "step": 9850 }, { "epoch": 1.1369404706968158, "grad_norm": 0.39885926246643066, "learning_rate": 0.0001562186628000496, "loss": 0.9156, "step": 9855 }, { "epoch": 1.1375173050299954, "grad_norm": 0.37924274802207947, "learning_rate": 0.0001561631416636857, "loss": 0.958, "step": 9860 }, { "epoch": 1.138094139363175, "grad_norm": 0.37374287843704224, "learning_rate": 0.00015610759522571713, "loss": 0.8828, "step": 9865 }, { "epoch": 1.1386709736963545, "grad_norm": 0.37404969334602356, "learning_rate": 0.00015605202351116765, "loss": 0.9512, "step": 9870 }, { "epoch": 1.1392478080295338, "grad_norm": 0.3823772966861725, "learning_rate": 0.00015599642654507244, "loss": 0.9525, "step": 9875 }, { "epoch": 1.1398246423627134, "grad_norm": 0.45090019702911377, "learning_rate": 0.00015594080435247802, "loss": 0.9666, "step": 9880 }, { "epoch": 1.140401476695893, "grad_norm": 0.37332072854042053, "learning_rate": 0.00015588515695844234, "loss": 0.9481, "step": 9885 }, { "epoch": 1.1409783110290725, "grad_norm": 0.36044663190841675, "learning_rate": 0.0001558294843880346, "loss": 0.9188, "step": 9890 }, { "epoch": 1.1415551453622519, "grad_norm": 0.3901468515396118, "learning_rate": 0.00015577378666633545, "loss": 0.996, "step": 9895 }, { "epoch": 1.1421319796954315, "grad_norm": 0.3967418074607849, "learning_rate": 0.00015571806381843676, "loss": 0.9345, "step": 9900 }, { "epoch": 1.142708814028611, "grad_norm": 0.3700670301914215, "learning_rate": 0.00015566231586944186, "loss": 0.9379, "step": 9905 }, { "epoch": 1.1432856483617906, "grad_norm": 0.37614816427230835, "learning_rate": 0.00015560654284446526, "loss": 0.9585, "step": 9910 }, { "epoch": 1.14386248269497, "grad_norm": 0.38715970516204834, "learning_rate": 0.00015555074476863282, "loss": 0.9538, "step": 9915 }, { "epoch": 1.1444393170281495, "grad_norm": 0.38108375668525696, "learning_rate": 0.0001554949216670817, "loss": 0.9557, "step": 9920 }, { "epoch": 1.145016151361329, "grad_norm": 0.39917248487472534, "learning_rate": 0.00015543907356496033, "loss": 0.9416, "step": 9925 }, { "epoch": 1.1455929856945086, "grad_norm": 0.41090819239616394, "learning_rate": 0.00015538320048742835, "loss": 0.9457, "step": 9930 }, { "epoch": 1.146169820027688, "grad_norm": 0.38558125495910645, "learning_rate": 0.00015532730245965668, "loss": 0.9564, "step": 9935 }, { "epoch": 1.1467466543608675, "grad_norm": 0.4513038694858551, "learning_rate": 0.00015527137950682756, "loss": 0.9503, "step": 9940 }, { "epoch": 1.147323488694047, "grad_norm": 0.3840969502925873, "learning_rate": 0.00015521543165413428, "loss": 0.9037, "step": 9945 }, { "epoch": 1.1479003230272267, "grad_norm": 0.3525768220424652, "learning_rate": 0.00015515945892678157, "loss": 0.9935, "step": 9950 }, { "epoch": 1.148477157360406, "grad_norm": 0.37335145473480225, "learning_rate": 0.0001551034613499852, "loss": 0.9736, "step": 9955 }, { "epoch": 1.1490539916935856, "grad_norm": 0.4112907350063324, "learning_rate": 0.00015504743894897218, "loss": 0.9434, "step": 9960 }, { "epoch": 1.1496308260267651, "grad_norm": 0.3955402374267578, "learning_rate": 0.00015499139174898071, "loss": 0.9893, "step": 9965 }, { "epoch": 1.1502076603599447, "grad_norm": 0.39400553703308105, "learning_rate": 0.0001549353197752602, "loss": 0.9892, "step": 9970 }, { "epoch": 1.150784494693124, "grad_norm": 0.38637709617614746, "learning_rate": 0.00015487922305307118, "loss": 0.9282, "step": 9975 }, { "epoch": 1.1513613290263036, "grad_norm": 0.44860902428627014, "learning_rate": 0.00015482310160768527, "loss": 1.0229, "step": 9980 }, { "epoch": 1.1519381633594832, "grad_norm": 0.378139466047287, "learning_rate": 0.00015476695546438535, "loss": 0.9421, "step": 9985 }, { "epoch": 1.1525149976926627, "grad_norm": 0.4301804304122925, "learning_rate": 0.0001547107846484653, "loss": 0.9599, "step": 9990 }, { "epoch": 1.153091832025842, "grad_norm": 0.3928966522216797, "learning_rate": 0.0001546545891852303, "loss": 0.9652, "step": 9995 }, { "epoch": 1.1536686663590217, "grad_norm": 0.35950881242752075, "learning_rate": 0.0001545983690999964, "loss": 0.8964, "step": 10000 }, { "epoch": 1.1542455006922012, "grad_norm": 0.3532848656177521, "learning_rate": 0.00015454212441809095, "loss": 0.9069, "step": 10005 }, { "epoch": 1.1548223350253808, "grad_norm": 0.4111880958080292, "learning_rate": 0.0001544858551648522, "loss": 0.94, "step": 10010 }, { "epoch": 1.1553991693585601, "grad_norm": 0.4143196642398834, "learning_rate": 0.0001544295613656296, "loss": 0.9598, "step": 10015 }, { "epoch": 1.1559760036917397, "grad_norm": 0.4328601062297821, "learning_rate": 0.00015437324304578363, "loss": 0.9171, "step": 10020 }, { "epoch": 1.1565528380249193, "grad_norm": 0.4185430407524109, "learning_rate": 0.0001543169002306858, "loss": 0.8857, "step": 10025 }, { "epoch": 1.1571296723580988, "grad_norm": 0.34174492955207825, "learning_rate": 0.00015426053294571865, "loss": 0.9565, "step": 10030 }, { "epoch": 1.1577065066912782, "grad_norm": 0.4023081660270691, "learning_rate": 0.00015420414121627575, "loss": 0.9048, "step": 10035 }, { "epoch": 1.1582833410244577, "grad_norm": 0.3603830635547638, "learning_rate": 0.00015414772506776165, "loss": 0.9337, "step": 10040 }, { "epoch": 1.1588601753576373, "grad_norm": 0.35782790184020996, "learning_rate": 0.000154091284525592, "loss": 0.906, "step": 10045 }, { "epoch": 1.1594370096908169, "grad_norm": 0.4056355953216553, "learning_rate": 0.00015403481961519334, "loss": 0.8862, "step": 10050 }, { "epoch": 1.1600138440239962, "grad_norm": 0.3880009949207306, "learning_rate": 0.00015397833036200322, "loss": 0.9643, "step": 10055 }, { "epoch": 1.1605906783571758, "grad_norm": 0.37115952372550964, "learning_rate": 0.00015392181679147013, "loss": 0.9906, "step": 10060 }, { "epoch": 1.1611675126903553, "grad_norm": 0.40016767382621765, "learning_rate": 0.00015386527892905365, "loss": 0.928, "step": 10065 }, { "epoch": 1.161744347023535, "grad_norm": 0.38123106956481934, "learning_rate": 0.00015380871680022406, "loss": 0.9618, "step": 10070 }, { "epoch": 1.1623211813567145, "grad_norm": 0.36609750986099243, "learning_rate": 0.00015375213043046276, "loss": 0.9228, "step": 10075 }, { "epoch": 1.1628980156898938, "grad_norm": 0.39698731899261475, "learning_rate": 0.0001536955198452621, "loss": 0.9658, "step": 10080 }, { "epoch": 1.1634748500230734, "grad_norm": 0.37321779131889343, "learning_rate": 0.00015363888507012515, "loss": 0.9488, "step": 10085 }, { "epoch": 1.164051684356253, "grad_norm": 0.3812940716743469, "learning_rate": 0.00015358222613056602, "loss": 0.9386, "step": 10090 }, { "epoch": 1.1646285186894323, "grad_norm": 0.42194968461990356, "learning_rate": 0.0001535255430521097, "loss": 0.9154, "step": 10095 }, { "epoch": 1.1652053530226119, "grad_norm": 0.419939786195755, "learning_rate": 0.00015346883586029198, "loss": 0.9235, "step": 10100 }, { "epoch": 1.1657821873557914, "grad_norm": 0.35055387020111084, "learning_rate": 0.00015341210458065963, "loss": 0.9365, "step": 10105 }, { "epoch": 1.166359021688971, "grad_norm": 0.3702836334705353, "learning_rate": 0.00015335534923877013, "loss": 0.9896, "step": 10110 }, { "epoch": 1.1669358560221506, "grad_norm": 0.3982597291469574, "learning_rate": 0.00015329856986019192, "loss": 0.9258, "step": 10115 }, { "epoch": 1.16751269035533, "grad_norm": 0.41476061940193176, "learning_rate": 0.00015324176647050415, "loss": 0.9549, "step": 10120 }, { "epoch": 1.1680895246885095, "grad_norm": 0.37612512707710266, "learning_rate": 0.000153184939095297, "loss": 0.9374, "step": 10125 }, { "epoch": 1.168666359021689, "grad_norm": 0.3923216462135315, "learning_rate": 0.00015312808776017113, "loss": 0.9184, "step": 10130 }, { "epoch": 1.1692431933548684, "grad_norm": 0.35655319690704346, "learning_rate": 0.00015307121249073831, "loss": 0.9052, "step": 10135 }, { "epoch": 1.169820027688048, "grad_norm": 0.3752961754798889, "learning_rate": 0.00015301431331262095, "loss": 0.9356, "step": 10140 }, { "epoch": 1.1703968620212275, "grad_norm": 0.3986845314502716, "learning_rate": 0.0001529573902514522, "loss": 0.9992, "step": 10145 }, { "epoch": 1.170973696354407, "grad_norm": 0.39377009868621826, "learning_rate": 0.00015290044333287597, "loss": 0.9546, "step": 10150 }, { "epoch": 1.1715505306875866, "grad_norm": 0.3905738294124603, "learning_rate": 0.00015284347258254704, "loss": 0.9224, "step": 10155 }, { "epoch": 1.172127365020766, "grad_norm": 0.38576817512512207, "learning_rate": 0.00015278647802613083, "loss": 0.8831, "step": 10160 }, { "epoch": 1.1727041993539455, "grad_norm": 0.36881786584854126, "learning_rate": 0.00015272945968930346, "loss": 0.9233, "step": 10165 }, { "epoch": 1.173281033687125, "grad_norm": 0.3943594694137573, "learning_rate": 0.0001526724175977518, "loss": 0.8819, "step": 10170 }, { "epoch": 1.1738578680203045, "grad_norm": 0.3838745355606079, "learning_rate": 0.0001526153517771735, "loss": 0.9679, "step": 10175 }, { "epoch": 1.174434702353484, "grad_norm": 0.38031336665153503, "learning_rate": 0.00015255826225327675, "loss": 0.9789, "step": 10180 }, { "epoch": 1.1750115366866636, "grad_norm": 0.4234495759010315, "learning_rate": 0.0001525011490517805, "loss": 0.9495, "step": 10185 }, { "epoch": 1.1755883710198431, "grad_norm": 0.3952755033969879, "learning_rate": 0.00015244401219841438, "loss": 0.9164, "step": 10190 }, { "epoch": 1.1761652053530227, "grad_norm": 0.38901087641716003, "learning_rate": 0.00015238685171891863, "loss": 0.9748, "step": 10195 }, { "epoch": 1.176742039686202, "grad_norm": 0.39485955238342285, "learning_rate": 0.00015232966763904416, "loss": 0.9365, "step": 10200 }, { "epoch": 1.1773188740193816, "grad_norm": 0.36091843247413635, "learning_rate": 0.00015227245998455254, "loss": 0.9711, "step": 10205 }, { "epoch": 1.1778957083525612, "grad_norm": 0.3655792772769928, "learning_rate": 0.00015221522878121593, "loss": 0.9597, "step": 10210 }, { "epoch": 1.1784725426857405, "grad_norm": 0.37455499172210693, "learning_rate": 0.00015215797405481704, "loss": 0.9553, "step": 10215 }, { "epoch": 1.17904937701892, "grad_norm": 0.41447341442108154, "learning_rate": 0.00015210069583114928, "loss": 0.9201, "step": 10220 }, { "epoch": 1.1796262113520997, "grad_norm": 0.40085557103157043, "learning_rate": 0.0001520433941360166, "loss": 0.9807, "step": 10225 }, { "epoch": 1.1802030456852792, "grad_norm": 0.405367374420166, "learning_rate": 0.00015198606899523352, "loss": 0.9543, "step": 10230 }, { "epoch": 1.1807798800184588, "grad_norm": 0.3848768472671509, "learning_rate": 0.00015192872043462514, "loss": 0.917, "step": 10235 }, { "epoch": 1.1813567143516381, "grad_norm": 0.3763655126094818, "learning_rate": 0.0001518713484800271, "loss": 0.944, "step": 10240 }, { "epoch": 1.1819335486848177, "grad_norm": 0.3986711800098419, "learning_rate": 0.00015181395315728554, "loss": 0.8734, "step": 10245 }, { "epoch": 1.1825103830179973, "grad_norm": 0.3781437873840332, "learning_rate": 0.00015175653449225716, "loss": 0.9581, "step": 10250 }, { "epoch": 1.1830872173511768, "grad_norm": 0.40199145674705505, "learning_rate": 0.00015169909251080922, "loss": 0.9467, "step": 10255 }, { "epoch": 1.1836640516843562, "grad_norm": 0.3898562788963318, "learning_rate": 0.00015164162723881947, "loss": 0.9474, "step": 10260 }, { "epoch": 1.1842408860175357, "grad_norm": 0.3571324348449707, "learning_rate": 0.00015158413870217606, "loss": 0.9509, "step": 10265 }, { "epoch": 1.1848177203507153, "grad_norm": 0.4020764231681824, "learning_rate": 0.00015152662692677774, "loss": 0.9347, "step": 10270 }, { "epoch": 1.1853945546838949, "grad_norm": 0.3949398398399353, "learning_rate": 0.00015146909193853363, "loss": 0.9403, "step": 10275 }, { "epoch": 1.1859713890170742, "grad_norm": 0.37582725286483765, "learning_rate": 0.0001514115337633634, "loss": 0.9477, "step": 10280 }, { "epoch": 1.1865482233502538, "grad_norm": 0.3682326674461365, "learning_rate": 0.0001513539524271971, "loss": 0.9516, "step": 10285 }, { "epoch": 1.1871250576834333, "grad_norm": 0.3528582453727722, "learning_rate": 0.0001512963479559752, "loss": 0.9185, "step": 10290 }, { "epoch": 1.187701892016613, "grad_norm": 0.43728992342948914, "learning_rate": 0.0001512387203756487, "loss": 0.9085, "step": 10295 }, { "epoch": 1.1882787263497923, "grad_norm": 0.3973561227321625, "learning_rate": 0.00015118106971217883, "loss": 0.9589, "step": 10300 }, { "epoch": 1.1888555606829718, "grad_norm": 0.3903520405292511, "learning_rate": 0.00015112339599153746, "loss": 0.9408, "step": 10305 }, { "epoch": 1.1894323950161514, "grad_norm": 0.47012007236480713, "learning_rate": 0.00015106569923970664, "loss": 1.0075, "step": 10310 }, { "epoch": 1.190009229349331, "grad_norm": 0.41596439480781555, "learning_rate": 0.00015100797948267882, "loss": 0.9521, "step": 10315 }, { "epoch": 1.1905860636825103, "grad_norm": 0.3753730058670044, "learning_rate": 0.00015095023674645698, "loss": 0.9424, "step": 10320 }, { "epoch": 1.1911628980156899, "grad_norm": 0.37456628680229187, "learning_rate": 0.00015089247105705425, "loss": 0.9179, "step": 10325 }, { "epoch": 1.1917397323488694, "grad_norm": 0.422911137342453, "learning_rate": 0.0001508346824404942, "loss": 0.9415, "step": 10330 }, { "epoch": 1.192316566682049, "grad_norm": 0.3563242256641388, "learning_rate": 0.00015077687092281074, "loss": 0.9618, "step": 10335 }, { "epoch": 1.1928934010152283, "grad_norm": 0.38106414675712585, "learning_rate": 0.000150719036530048, "loss": 0.9488, "step": 10340 }, { "epoch": 1.193470235348408, "grad_norm": 0.39531293511390686, "learning_rate": 0.00015066117928826063, "loss": 0.9512, "step": 10345 }, { "epoch": 1.1940470696815875, "grad_norm": 1.4611742496490479, "learning_rate": 0.00015060329922351326, "loss": 0.9554, "step": 10350 }, { "epoch": 1.194623904014767, "grad_norm": 0.39308950304985046, "learning_rate": 0.0001505453963618811, "loss": 0.9498, "step": 10355 }, { "epoch": 1.1952007383479464, "grad_norm": 0.43254947662353516, "learning_rate": 0.00015048747072944944, "loss": 0.9345, "step": 10360 }, { "epoch": 1.195777572681126, "grad_norm": 0.38925978541374207, "learning_rate": 0.0001504295223523139, "loss": 0.9409, "step": 10365 }, { "epoch": 1.1963544070143055, "grad_norm": 0.3758449852466583, "learning_rate": 0.00015037155125658037, "loss": 0.925, "step": 10370 }, { "epoch": 1.196931241347485, "grad_norm": 0.4111412465572357, "learning_rate": 0.00015031355746836485, "loss": 0.9333, "step": 10375 }, { "epoch": 1.1975080756806644, "grad_norm": 0.4146568179130554, "learning_rate": 0.00015025554101379379, "loss": 0.9382, "step": 10380 }, { "epoch": 1.198084910013844, "grad_norm": 0.38229039311408997, "learning_rate": 0.00015019750191900362, "loss": 0.8596, "step": 10385 }, { "epoch": 1.1986617443470236, "grad_norm": 0.3730371296405792, "learning_rate": 0.00015013944021014105, "loss": 0.9667, "step": 10390 }, { "epoch": 1.1992385786802031, "grad_norm": 0.4008365273475647, "learning_rate": 0.0001500813559133631, "loss": 0.9299, "step": 10395 }, { "epoch": 1.1998154130133827, "grad_norm": 0.4008704721927643, "learning_rate": 0.00015002324905483673, "loss": 0.9336, "step": 10400 }, { "epoch": 1.200392247346562, "grad_norm": 0.46172234416007996, "learning_rate": 0.00014996511966073925, "loss": 0.925, "step": 10405 }, { "epoch": 1.2009690816797416, "grad_norm": 0.3420531451702118, "learning_rate": 0.00014990696775725812, "loss": 0.9524, "step": 10410 }, { "epoch": 1.2015459160129212, "grad_norm": 0.4005972445011139, "learning_rate": 0.0001498487933705908, "loss": 0.9926, "step": 10415 }, { "epoch": 1.2021227503461005, "grad_norm": 0.38163304328918457, "learning_rate": 0.00014979059652694501, "loss": 0.9485, "step": 10420 }, { "epoch": 1.20269958467928, "grad_norm": 0.37857162952423096, "learning_rate": 0.0001497323772525385, "loss": 0.9081, "step": 10425 }, { "epoch": 1.2032764190124596, "grad_norm": 0.41365325450897217, "learning_rate": 0.00014967413557359923, "loss": 0.9242, "step": 10430 }, { "epoch": 1.2038532533456392, "grad_norm": 0.4273558557033539, "learning_rate": 0.00014961587151636515, "loss": 0.9069, "step": 10435 }, { "epoch": 1.2044300876788188, "grad_norm": 0.402115136384964, "learning_rate": 0.00014955758510708434, "loss": 0.9709, "step": 10440 }, { "epoch": 1.205006922011998, "grad_norm": 0.4027498960494995, "learning_rate": 0.00014949927637201494, "loss": 0.9186, "step": 10445 }, { "epoch": 1.2055837563451777, "grad_norm": 0.39657580852508545, "learning_rate": 0.00014944094533742513, "loss": 0.9311, "step": 10450 }, { "epoch": 1.2061605906783572, "grad_norm": 0.3923443555831909, "learning_rate": 0.00014938259202959317, "loss": 0.901, "step": 10455 }, { "epoch": 1.2067374250115366, "grad_norm": 0.3821752965450287, "learning_rate": 0.00014932421647480737, "loss": 0.9169, "step": 10460 }, { "epoch": 1.2073142593447161, "grad_norm": 0.3927006721496582, "learning_rate": 0.00014926581869936597, "loss": 0.9571, "step": 10465 }, { "epoch": 1.2078910936778957, "grad_norm": 0.4481658935546875, "learning_rate": 0.00014920739872957732, "loss": 0.9038, "step": 10470 }, { "epoch": 1.2084679280110753, "grad_norm": 0.4267770051956177, "learning_rate": 0.00014914895659175973, "loss": 0.9711, "step": 10475 }, { "epoch": 1.2090447623442548, "grad_norm": 0.36944663524627686, "learning_rate": 0.0001490904923122415, "loss": 0.9154, "step": 10480 }, { "epoch": 1.2096215966774342, "grad_norm": 0.36720767617225647, "learning_rate": 0.00014903200591736087, "loss": 0.9095, "step": 10485 }, { "epoch": 1.2101984310106138, "grad_norm": 0.3741086721420288, "learning_rate": 0.00014897349743346613, "loss": 0.9757, "step": 10490 }, { "epoch": 1.2107752653437933, "grad_norm": 0.35875627398490906, "learning_rate": 0.00014891496688691539, "loss": 0.9721, "step": 10495 }, { "epoch": 1.2113520996769727, "grad_norm": 0.4409966468811035, "learning_rate": 0.00014885641430407686, "loss": 0.953, "step": 10500 }, { "epoch": 1.2119289340101522, "grad_norm": 0.40664994716644287, "learning_rate": 0.0001487978397113285, "loss": 0.9727, "step": 10505 }, { "epoch": 1.2125057683433318, "grad_norm": 0.4651268422603607, "learning_rate": 0.0001487392431350584, "loss": 0.9417, "step": 10510 }, { "epoch": 1.2130826026765114, "grad_norm": 0.38706329464912415, "learning_rate": 0.0001486806246016643, "loss": 0.9368, "step": 10515 }, { "epoch": 1.213659437009691, "grad_norm": 0.40690895915031433, "learning_rate": 0.00014862198413755401, "loss": 0.9325, "step": 10520 }, { "epoch": 1.2142362713428703, "grad_norm": 0.3677927851676941, "learning_rate": 0.00014856332176914526, "loss": 0.9543, "step": 10525 }, { "epoch": 1.2148131056760498, "grad_norm": 0.3821386396884918, "learning_rate": 0.00014850463752286543, "loss": 0.9702, "step": 10530 }, { "epoch": 1.2153899400092294, "grad_norm": 0.4116564691066742, "learning_rate": 0.00014844593142515196, "loss": 0.9497, "step": 10535 }, { "epoch": 1.2159667743424087, "grad_norm": 0.4448542892932892, "learning_rate": 0.00014838720350245205, "loss": 0.9941, "step": 10540 }, { "epoch": 1.2165436086755883, "grad_norm": 0.37002575397491455, "learning_rate": 0.00014832845378122276, "loss": 0.9204, "step": 10545 }, { "epoch": 1.2171204430087679, "grad_norm": 0.394660085439682, "learning_rate": 0.0001482696822879309, "loss": 0.9252, "step": 10550 }, { "epoch": 1.2176972773419474, "grad_norm": 0.38183853030204773, "learning_rate": 0.00014821088904905315, "loss": 0.9943, "step": 10555 }, { "epoch": 1.218274111675127, "grad_norm": 0.36868584156036377, "learning_rate": 0.00014815207409107608, "loss": 0.9442, "step": 10560 }, { "epoch": 1.2188509460083063, "grad_norm": 0.39112672209739685, "learning_rate": 0.0001480932374404958, "loss": 0.9472, "step": 10565 }, { "epoch": 1.219427780341486, "grad_norm": 0.3754420578479767, "learning_rate": 0.00014803437912381845, "loss": 0.9284, "step": 10570 }, { "epoch": 1.2200046146746655, "grad_norm": 0.43286576867103577, "learning_rate": 0.00014797549916755975, "loss": 0.893, "step": 10575 }, { "epoch": 1.220581449007845, "grad_norm": 0.40418797731399536, "learning_rate": 0.00014791659759824527, "loss": 0.9163, "step": 10580 }, { "epoch": 1.2211582833410244, "grad_norm": 0.35531437397003174, "learning_rate": 0.00014785767444241025, "loss": 0.895, "step": 10585 }, { "epoch": 1.221735117674204, "grad_norm": 0.46868211030960083, "learning_rate": 0.0001477987297265997, "loss": 0.9587, "step": 10590 }, { "epoch": 1.2223119520073835, "grad_norm": 0.4548766314983368, "learning_rate": 0.00014773976347736835, "loss": 0.9508, "step": 10595 }, { "epoch": 1.222888786340563, "grad_norm": 0.3882719576358795, "learning_rate": 0.00014768077572128058, "loss": 0.9317, "step": 10600 }, { "epoch": 1.2234656206737424, "grad_norm": 0.3923512101173401, "learning_rate": 0.0001476217664849105, "loss": 0.9394, "step": 10605 }, { "epoch": 1.224042455006922, "grad_norm": 0.37607160210609436, "learning_rate": 0.00014756273579484187, "loss": 0.9321, "step": 10610 }, { "epoch": 1.2246192893401016, "grad_norm": 0.3971920311450958, "learning_rate": 0.0001475036836776682, "loss": 0.9607, "step": 10615 }, { "epoch": 1.2251961236732811, "grad_norm": 0.3635753095149994, "learning_rate": 0.00014744461015999248, "loss": 0.9038, "step": 10620 }, { "epoch": 1.2257729580064605, "grad_norm": 0.37379416823387146, "learning_rate": 0.00014738551526842755, "loss": 0.9371, "step": 10625 }, { "epoch": 1.22634979233964, "grad_norm": 0.4032329022884369, "learning_rate": 0.00014732639902959567, "loss": 0.9379, "step": 10630 }, { "epoch": 1.2269266266728196, "grad_norm": 0.37104472517967224, "learning_rate": 0.00014726726147012889, "loss": 0.9528, "step": 10635 }, { "epoch": 1.2275034610059992, "grad_norm": 0.3992524743080139, "learning_rate": 0.0001472081026166688, "loss": 0.9536, "step": 10640 }, { "epoch": 1.2280802953391785, "grad_norm": 0.3816789388656616, "learning_rate": 0.0001471489224958665, "loss": 0.9038, "step": 10645 }, { "epoch": 1.228657129672358, "grad_norm": 0.4063278138637543, "learning_rate": 0.00014708972113438285, "loss": 0.9336, "step": 10650 }, { "epoch": 1.2292339640055376, "grad_norm": 0.3935660421848297, "learning_rate": 0.00014703049855888808, "loss": 0.92, "step": 10655 }, { "epoch": 1.2298107983387172, "grad_norm": 0.3983980119228363, "learning_rate": 0.0001469712547960622, "loss": 0.9693, "step": 10660 }, { "epoch": 1.2303876326718965, "grad_norm": 0.433312326669693, "learning_rate": 0.00014691198987259454, "loss": 0.9097, "step": 10665 }, { "epoch": 1.2309644670050761, "grad_norm": 0.4001083970069885, "learning_rate": 0.00014685270381518408, "loss": 0.8826, "step": 10670 }, { "epoch": 1.2315413013382557, "grad_norm": 0.41054749488830566, "learning_rate": 0.00014679339665053933, "loss": 0.8979, "step": 10675 }, { "epoch": 1.2321181356714352, "grad_norm": 0.3685263991355896, "learning_rate": 0.00014673406840537824, "loss": 0.9549, "step": 10680 }, { "epoch": 1.2326949700046146, "grad_norm": 0.44796431064605713, "learning_rate": 0.0001466747191064284, "loss": 0.9824, "step": 10685 }, { "epoch": 1.2332718043377942, "grad_norm": 0.3935278356075287, "learning_rate": 0.00014661534878042664, "loss": 0.9385, "step": 10690 }, { "epoch": 1.2338486386709737, "grad_norm": 0.4125763773918152, "learning_rate": 0.00014655595745411955, "loss": 0.9568, "step": 10695 }, { "epoch": 1.2344254730041533, "grad_norm": 0.39233893156051636, "learning_rate": 0.0001464965451542629, "loss": 0.9506, "step": 10700 }, { "epoch": 1.2350023073373326, "grad_norm": 0.40938007831573486, "learning_rate": 0.00014643711190762216, "loss": 0.9222, "step": 10705 }, { "epoch": 1.2355791416705122, "grad_norm": 0.3984127342700958, "learning_rate": 0.00014637765774097206, "loss": 0.907, "step": 10710 }, { "epoch": 1.2361559760036918, "grad_norm": 0.4171662926673889, "learning_rate": 0.00014631818268109688, "loss": 0.9628, "step": 10715 }, { "epoch": 1.2367328103368713, "grad_norm": 0.4212305545806885, "learning_rate": 0.00014625868675479018, "loss": 0.9216, "step": 10720 }, { "epoch": 1.2373096446700507, "grad_norm": 0.39830756187438965, "learning_rate": 0.000146199169988855, "loss": 0.9357, "step": 10725 }, { "epoch": 1.2378864790032302, "grad_norm": 0.4301896393299103, "learning_rate": 0.00014613963241010382, "loss": 0.8656, "step": 10730 }, { "epoch": 1.2384633133364098, "grad_norm": 0.3532908856868744, "learning_rate": 0.00014608007404535837, "loss": 0.9407, "step": 10735 }, { "epoch": 1.2390401476695894, "grad_norm": 0.4185565412044525, "learning_rate": 0.00014602049492144984, "loss": 0.9255, "step": 10740 }, { "epoch": 1.2396169820027687, "grad_norm": 0.36691245436668396, "learning_rate": 0.00014596089506521874, "loss": 0.931, "step": 10745 }, { "epoch": 1.2401938163359483, "grad_norm": 0.40366676449775696, "learning_rate": 0.00014590127450351493, "loss": 0.9576, "step": 10750 }, { "epoch": 1.2407706506691278, "grad_norm": 0.4176740050315857, "learning_rate": 0.00014584163326319754, "loss": 0.9434, "step": 10755 }, { "epoch": 1.2413474850023074, "grad_norm": 0.37950599193573, "learning_rate": 0.0001457819713711351, "loss": 0.8734, "step": 10760 }, { "epoch": 1.241924319335487, "grad_norm": 0.3910213112831116, "learning_rate": 0.00014572228885420543, "loss": 0.9363, "step": 10765 }, { "epoch": 1.2425011536686663, "grad_norm": 0.42837202548980713, "learning_rate": 0.00014566258573929557, "loss": 0.9179, "step": 10770 }, { "epoch": 1.2430779880018459, "grad_norm": 0.37683621048927307, "learning_rate": 0.00014560286205330197, "loss": 0.9571, "step": 10775 }, { "epoch": 1.2436548223350254, "grad_norm": 0.35878995060920715, "learning_rate": 0.00014554311782313014, "loss": 0.9394, "step": 10780 }, { "epoch": 1.2442316566682048, "grad_norm": 0.3822707533836365, "learning_rate": 0.0001454833530756951, "loss": 0.9223, "step": 10785 }, { "epoch": 1.2448084910013844, "grad_norm": 0.4296896457672119, "learning_rate": 0.00014542356783792094, "loss": 0.9011, "step": 10790 }, { "epoch": 1.245385325334564, "grad_norm": 0.3938808739185333, "learning_rate": 0.00014536376213674098, "loss": 0.9506, "step": 10795 }, { "epoch": 1.2459621596677435, "grad_norm": 0.36194875836372375, "learning_rate": 0.0001453039359990979, "loss": 0.946, "step": 10800 }, { "epoch": 1.246538994000923, "grad_norm": 0.3902686536312103, "learning_rate": 0.00014524408945194338, "loss": 0.9213, "step": 10805 }, { "epoch": 1.2471158283341024, "grad_norm": 0.4296351671218872, "learning_rate": 0.00014518422252223845, "loss": 0.9559, "step": 10810 }, { "epoch": 1.247692662667282, "grad_norm": 0.3877114951610565, "learning_rate": 0.00014512433523695332, "loss": 0.9368, "step": 10815 }, { "epoch": 1.2482694970004615, "grad_norm": 0.3983345329761505, "learning_rate": 0.00014506442762306728, "loss": 0.9569, "step": 10820 }, { "epoch": 1.2488463313336409, "grad_norm": 0.37539294362068176, "learning_rate": 0.0001450044997075689, "loss": 0.9275, "step": 10825 }, { "epoch": 1.2494231656668204, "grad_norm": 0.40253835916519165, "learning_rate": 0.0001449445515174557, "loss": 0.9245, "step": 10830 }, { "epoch": 1.25, "grad_norm": 0.4149536192417145, "learning_rate": 0.00014488458307973455, "loss": 0.9281, "step": 10835 }, { "epoch": 1.2505768343331796, "grad_norm": 0.36260801553726196, "learning_rate": 0.0001448245944214213, "loss": 0.9594, "step": 10840 }, { "epoch": 1.2511536686663591, "grad_norm": 0.3983232378959656, "learning_rate": 0.000144764585569541, "loss": 0.9249, "step": 10845 }, { "epoch": 1.2517305029995385, "grad_norm": 0.3661304712295532, "learning_rate": 0.00014470455655112772, "loss": 0.9267, "step": 10850 }, { "epoch": 1.252307337332718, "grad_norm": 0.4051198959350586, "learning_rate": 0.0001446445073932247, "loss": 0.9467, "step": 10855 }, { "epoch": 1.2528841716658976, "grad_norm": 0.38413557410240173, "learning_rate": 0.00014458443812288415, "loss": 0.9106, "step": 10860 }, { "epoch": 1.253461005999077, "grad_norm": 0.3864401876926422, "learning_rate": 0.00014452434876716737, "loss": 0.9145, "step": 10865 }, { "epoch": 1.2540378403322565, "grad_norm": 0.3908703625202179, "learning_rate": 0.0001444642393531448, "loss": 0.9148, "step": 10870 }, { "epoch": 1.254614674665436, "grad_norm": 0.3725275695323944, "learning_rate": 0.00014440410990789582, "loss": 0.9505, "step": 10875 }, { "epoch": 1.2551915089986156, "grad_norm": 0.4420537054538727, "learning_rate": 0.00014434396045850885, "loss": 0.9221, "step": 10880 }, { "epoch": 1.2557683433317952, "grad_norm": 0.392156183719635, "learning_rate": 0.00014428379103208135, "loss": 0.8979, "step": 10885 }, { "epoch": 1.2563451776649746, "grad_norm": 0.40596142411231995, "learning_rate": 0.00014422360165571976, "loss": 0.9549, "step": 10890 }, { "epoch": 1.2569220119981541, "grad_norm": 0.38918864727020264, "learning_rate": 0.00014416339235653948, "loss": 0.9563, "step": 10895 }, { "epoch": 1.2574988463313337, "grad_norm": 0.37681928277015686, "learning_rate": 0.00014410316316166498, "loss": 0.925, "step": 10900 }, { "epoch": 1.258075680664513, "grad_norm": 0.3767823278903961, "learning_rate": 0.0001440429140982296, "loss": 1.014, "step": 10905 }, { "epoch": 1.2586525149976926, "grad_norm": 0.4258878231048584, "learning_rate": 0.00014398264519337566, "loss": 0.9529, "step": 10910 }, { "epoch": 1.2592293493308722, "grad_norm": 0.4223175346851349, "learning_rate": 0.00014392235647425438, "loss": 0.9176, "step": 10915 }, { "epoch": 1.2598061836640517, "grad_norm": 0.4392840564250946, "learning_rate": 0.000143862047968026, "loss": 0.9255, "step": 10920 }, { "epoch": 1.2603830179972313, "grad_norm": 0.4153304994106293, "learning_rate": 0.0001438017197018596, "loss": 0.8901, "step": 10925 }, { "epoch": 1.2609598523304106, "grad_norm": 0.4319252073764801, "learning_rate": 0.00014374137170293318, "loss": 0.9407, "step": 10930 }, { "epoch": 1.2615366866635902, "grad_norm": 0.3682355582714081, "learning_rate": 0.00014368100399843366, "loss": 0.9164, "step": 10935 }, { "epoch": 1.2621135209967698, "grad_norm": 0.3747365474700928, "learning_rate": 0.00014362061661555675, "loss": 0.9524, "step": 10940 }, { "epoch": 1.262690355329949, "grad_norm": 0.35737344622612, "learning_rate": 0.00014356020958150714, "loss": 0.9653, "step": 10945 }, { "epoch": 1.2632671896631287, "grad_norm": 0.3945746421813965, "learning_rate": 0.00014349978292349825, "loss": 0.9456, "step": 10950 }, { "epoch": 1.2638440239963082, "grad_norm": 0.40850600600242615, "learning_rate": 0.00014343933666875245, "loss": 0.9359, "step": 10955 }, { "epoch": 1.2644208583294878, "grad_norm": 0.42189982533454895, "learning_rate": 0.00014337887084450094, "loss": 0.9438, "step": 10960 }, { "epoch": 1.2649976926626674, "grad_norm": 0.3748544454574585, "learning_rate": 0.0001433183854779836, "loss": 0.9477, "step": 10965 }, { "epoch": 1.2655745269958467, "grad_norm": 0.4351139962673187, "learning_rate": 0.0001432578805964493, "loss": 0.9571, "step": 10970 }, { "epoch": 1.2661513613290263, "grad_norm": 0.36444708704948425, "learning_rate": 0.0001431973562271555, "loss": 0.9544, "step": 10975 }, { "epoch": 1.2667281956622058, "grad_norm": 0.34223493933677673, "learning_rate": 0.00014313681239736865, "loss": 0.9266, "step": 10980 }, { "epoch": 1.2673050299953852, "grad_norm": 0.36005809903144836, "learning_rate": 0.00014307624913436378, "loss": 0.9378, "step": 10985 }, { "epoch": 1.2678818643285648, "grad_norm": 0.4061218500137329, "learning_rate": 0.00014301566646542484, "loss": 0.9565, "step": 10990 }, { "epoch": 1.2684586986617443, "grad_norm": 0.40021246671676636, "learning_rate": 0.00014295506441784435, "loss": 0.9401, "step": 10995 }, { "epoch": 1.2690355329949239, "grad_norm": 0.36139312386512756, "learning_rate": 0.0001428944430189237, "loss": 0.9554, "step": 11000 }, { "epoch": 1.2696123673281035, "grad_norm": 0.3915020525455475, "learning_rate": 0.00014283380229597296, "loss": 0.931, "step": 11005 }, { "epoch": 1.270189201661283, "grad_norm": 0.3968125581741333, "learning_rate": 0.00014277314227631086, "loss": 0.8924, "step": 11010 }, { "epoch": 1.2707660359944624, "grad_norm": 0.3936527669429779, "learning_rate": 0.00014271246298726493, "loss": 0.9559, "step": 11015 }, { "epoch": 1.271342870327642, "grad_norm": 0.37673893570899963, "learning_rate": 0.00014265176445617118, "loss": 0.9228, "step": 11020 }, { "epoch": 1.2719197046608215, "grad_norm": 0.4325665533542633, "learning_rate": 0.00014259104671037452, "loss": 0.9025, "step": 11025 }, { "epoch": 1.2724965389940008, "grad_norm": 0.3954697847366333, "learning_rate": 0.0001425303097772284, "loss": 0.9346, "step": 11030 }, { "epoch": 1.2730733733271804, "grad_norm": 0.3768031597137451, "learning_rate": 0.00014246955368409488, "loss": 0.8911, "step": 11035 }, { "epoch": 1.27365020766036, "grad_norm": 0.3749455511569977, "learning_rate": 0.00014240877845834472, "loss": 0.9224, "step": 11040 }, { "epoch": 1.2742270419935395, "grad_norm": 0.41520991921424866, "learning_rate": 0.0001423479841273573, "loss": 0.964, "step": 11045 }, { "epoch": 1.274803876326719, "grad_norm": 0.3900809586048126, "learning_rate": 0.00014228717071852057, "loss": 0.9803, "step": 11050 }, { "epoch": 1.2753807106598984, "grad_norm": 0.370792955160141, "learning_rate": 0.00014222633825923108, "loss": 0.9121, "step": 11055 }, { "epoch": 1.275957544993078, "grad_norm": 0.3843114674091339, "learning_rate": 0.000142165486776894, "loss": 0.9405, "step": 11060 }, { "epoch": 1.2765343793262576, "grad_norm": 0.4016331732273102, "learning_rate": 0.00014210461629892302, "loss": 0.9385, "step": 11065 }, { "epoch": 1.277111213659437, "grad_norm": 0.3488540053367615, "learning_rate": 0.00014204372685274039, "loss": 0.9676, "step": 11070 }, { "epoch": 1.2776880479926165, "grad_norm": 0.3962653875350952, "learning_rate": 0.00014198281846577695, "loss": 0.93, "step": 11075 }, { "epoch": 1.278264882325796, "grad_norm": 0.36145344376564026, "learning_rate": 0.00014192189116547202, "loss": 0.9463, "step": 11080 }, { "epoch": 1.2788417166589756, "grad_norm": 0.40102216601371765, "learning_rate": 0.00014186094497927352, "loss": 0.8905, "step": 11085 }, { "epoch": 1.2794185509921552, "grad_norm": 0.40777820348739624, "learning_rate": 0.00014179997993463776, "loss": 0.9455, "step": 11090 }, { "epoch": 1.2799953853253345, "grad_norm": 0.36451178789138794, "learning_rate": 0.00014173899605902967, "loss": 0.8928, "step": 11095 }, { "epoch": 1.280572219658514, "grad_norm": 0.3938768804073334, "learning_rate": 0.00014167799337992258, "loss": 0.9016, "step": 11100 }, { "epoch": 1.2811490539916937, "grad_norm": 0.3848060071468353, "learning_rate": 0.0001416169719247983, "loss": 0.9278, "step": 11105 }, { "epoch": 1.281725888324873, "grad_norm": 0.38790416717529297, "learning_rate": 0.00014155593172114714, "loss": 0.918, "step": 11110 }, { "epoch": 1.2823027226580526, "grad_norm": 0.4134730100631714, "learning_rate": 0.00014149487279646781, "loss": 0.9552, "step": 11115 }, { "epoch": 1.2828795569912321, "grad_norm": 0.397656112909317, "learning_rate": 0.0001414337951782675, "loss": 0.9243, "step": 11120 }, { "epoch": 1.2834563913244117, "grad_norm": 0.39988553524017334, "learning_rate": 0.00014137269889406175, "loss": 0.951, "step": 11125 }, { "epoch": 1.2840332256575913, "grad_norm": 0.4649205505847931, "learning_rate": 0.00014131158397137462, "loss": 0.9284, "step": 11130 }, { "epoch": 1.2846100599907706, "grad_norm": 0.39595118165016174, "learning_rate": 0.00014125045043773845, "loss": 0.9351, "step": 11135 }, { "epoch": 1.2851868943239502, "grad_norm": 0.3864995837211609, "learning_rate": 0.00014118929832069405, "loss": 0.8899, "step": 11140 }, { "epoch": 1.2857637286571297, "grad_norm": 0.44582268595695496, "learning_rate": 0.00014112812764779053, "loss": 0.9924, "step": 11145 }, { "epoch": 1.286340562990309, "grad_norm": 0.38030120730400085, "learning_rate": 0.00014106693844658544, "loss": 0.9403, "step": 11150 }, { "epoch": 1.2869173973234886, "grad_norm": 0.35102468729019165, "learning_rate": 0.00014100573074464457, "loss": 0.9364, "step": 11155 }, { "epoch": 1.2874942316566682, "grad_norm": 0.43488121032714844, "learning_rate": 0.00014094450456954218, "loss": 0.9572, "step": 11160 }, { "epoch": 1.2880710659898478, "grad_norm": 0.3914535939693451, "learning_rate": 0.00014088325994886076, "loss": 0.8916, "step": 11165 }, { "epoch": 1.2886479003230273, "grad_norm": 0.39461666345596313, "learning_rate": 0.0001408219969101911, "loss": 0.9366, "step": 11170 }, { "epoch": 1.2892247346562067, "grad_norm": 0.37475186586380005, "learning_rate": 0.00014076071548113238, "loss": 0.9289, "step": 11175 }, { "epoch": 1.2898015689893862, "grad_norm": 0.3968532383441925, "learning_rate": 0.00014069941568929192, "loss": 0.9489, "step": 11180 }, { "epoch": 1.2903784033225658, "grad_norm": 0.378654807806015, "learning_rate": 0.00014063809756228546, "loss": 0.9033, "step": 11185 }, { "epoch": 1.2909552376557452, "grad_norm": 0.37253937125205994, "learning_rate": 0.0001405767611277369, "loss": 0.8878, "step": 11190 }, { "epoch": 1.2915320719889247, "grad_norm": 0.37073519825935364, "learning_rate": 0.00014051540641327846, "loss": 0.9219, "step": 11195 }, { "epoch": 1.2921089063221043, "grad_norm": 0.47666656970977783, "learning_rate": 0.00014045403344655052, "loss": 0.9549, "step": 11200 }, { "epoch": 1.2926857406552839, "grad_norm": 0.4085308909416199, "learning_rate": 0.00014039264225520175, "loss": 0.9306, "step": 11205 }, { "epoch": 1.2932625749884634, "grad_norm": 0.3675244450569153, "learning_rate": 0.00014033123286688902, "loss": 0.8491, "step": 11210 }, { "epoch": 1.2938394093216428, "grad_norm": 0.38852930068969727, "learning_rate": 0.0001402698053092773, "loss": 0.9174, "step": 11215 }, { "epoch": 1.2944162436548223, "grad_norm": 0.3479853570461273, "learning_rate": 0.0001402083596100399, "loss": 0.8958, "step": 11220 }, { "epoch": 1.294993077988002, "grad_norm": 0.3830512762069702, "learning_rate": 0.00014014689579685817, "loss": 0.9252, "step": 11225 }, { "epoch": 1.2955699123211812, "grad_norm": 0.3972710967063904, "learning_rate": 0.00014008541389742173, "loss": 0.9099, "step": 11230 }, { "epoch": 1.2961467466543608, "grad_norm": 0.41176527738571167, "learning_rate": 0.00014002391393942826, "loss": 0.9279, "step": 11235 }, { "epoch": 1.2967235809875404, "grad_norm": 0.3941210210323334, "learning_rate": 0.0001399623959505836, "loss": 0.9368, "step": 11240 }, { "epoch": 1.29730041532072, "grad_norm": 0.39330047369003296, "learning_rate": 0.00013990085995860182, "loss": 0.9322, "step": 11245 }, { "epoch": 1.2978772496538995, "grad_norm": 0.3845579922199249, "learning_rate": 0.00013983930599120487, "loss": 0.94, "step": 11250 }, { "epoch": 1.2984540839870788, "grad_norm": 0.4267570972442627, "learning_rate": 0.00013977773407612305, "loss": 0.917, "step": 11255 }, { "epoch": 1.2990309183202584, "grad_norm": 0.43357259035110474, "learning_rate": 0.0001397161442410945, "loss": 0.9466, "step": 11260 }, { "epoch": 1.299607752653438, "grad_norm": 0.35696274042129517, "learning_rate": 0.0001396545365138657, "loss": 0.8862, "step": 11265 }, { "epoch": 1.3001845869866173, "grad_norm": 0.4099060297012329, "learning_rate": 0.00013959291092219096, "loss": 0.9313, "step": 11270 }, { "epoch": 1.3007614213197969, "grad_norm": 0.40767526626586914, "learning_rate": 0.00013953126749383272, "loss": 0.9765, "step": 11275 }, { "epoch": 1.3013382556529764, "grad_norm": 0.4065137803554535, "learning_rate": 0.00013946960625656153, "loss": 0.9726, "step": 11280 }, { "epoch": 1.301915089986156, "grad_norm": 0.3992060720920563, "learning_rate": 0.00013940792723815586, "loss": 0.8959, "step": 11285 }, { "epoch": 1.3024919243193356, "grad_norm": 0.3845786452293396, "learning_rate": 0.00013934623046640222, "loss": 0.9131, "step": 11290 }, { "epoch": 1.303068758652515, "grad_norm": 0.4595773220062256, "learning_rate": 0.00013928451596909516, "loss": 0.9231, "step": 11295 }, { "epoch": 1.3036455929856945, "grad_norm": 0.4528485834598541, "learning_rate": 0.00013922278377403714, "loss": 0.922, "step": 11300 }, { "epoch": 1.304222427318874, "grad_norm": 0.4147089421749115, "learning_rate": 0.00013916103390903864, "loss": 1.0079, "step": 11305 }, { "epoch": 1.3047992616520534, "grad_norm": 0.39060330390930176, "learning_rate": 0.00013909926640191813, "loss": 0.9211, "step": 11310 }, { "epoch": 1.305376095985233, "grad_norm": 0.428657203912735, "learning_rate": 0.00013903748128050197, "loss": 0.9599, "step": 11315 }, { "epoch": 1.3059529303184125, "grad_norm": 0.4070096015930176, "learning_rate": 0.00013897567857262447, "loss": 0.9656, "step": 11320 }, { "epoch": 1.306529764651592, "grad_norm": 0.4431822896003723, "learning_rate": 0.0001389138583061279, "loss": 0.9206, "step": 11325 }, { "epoch": 1.3071065989847717, "grad_norm": 0.3884516656398773, "learning_rate": 0.00013885202050886237, "loss": 0.948, "step": 11330 }, { "epoch": 1.307683433317951, "grad_norm": 0.3520592749118805, "learning_rate": 0.00013879016520868594, "loss": 0.9439, "step": 11335 }, { "epoch": 1.3082602676511306, "grad_norm": 0.38204270601272583, "learning_rate": 0.00013872829243346453, "loss": 0.9851, "step": 11340 }, { "epoch": 1.3088371019843101, "grad_norm": 0.3736591339111328, "learning_rate": 0.000138666402211072, "loss": 0.9526, "step": 11345 }, { "epoch": 1.3094139363174895, "grad_norm": 0.3967388868331909, "learning_rate": 0.00013860449456939, "loss": 0.934, "step": 11350 }, { "epoch": 1.309990770650669, "grad_norm": 0.38638293743133545, "learning_rate": 0.00013854256953630797, "loss": 0.9615, "step": 11355 }, { "epoch": 1.3105676049838486, "grad_norm": 0.37511980533599854, "learning_rate": 0.0001384806271397233, "loss": 0.9241, "step": 11360 }, { "epoch": 1.3111444393170282, "grad_norm": 0.4248884320259094, "learning_rate": 0.00013841866740754125, "loss": 0.9263, "step": 11365 }, { "epoch": 1.3117212736502077, "grad_norm": 0.3675033450126648, "learning_rate": 0.00013835669036767466, "loss": 0.9261, "step": 11370 }, { "epoch": 1.3122981079833873, "grad_norm": 0.4006101191043854, "learning_rate": 0.00013829469604804438, "loss": 0.9089, "step": 11375 }, { "epoch": 1.3128749423165667, "grad_norm": 0.40753471851348877, "learning_rate": 0.00013823268447657897, "loss": 0.9243, "step": 11380 }, { "epoch": 1.3134517766497462, "grad_norm": 0.38653838634490967, "learning_rate": 0.00013817065568121477, "loss": 0.9265, "step": 11385 }, { "epoch": 1.3140286109829258, "grad_norm": 0.41960474848747253, "learning_rate": 0.00013810860968989586, "loss": 0.9732, "step": 11390 }, { "epoch": 1.3146054453161051, "grad_norm": 0.3811507821083069, "learning_rate": 0.00013804654653057404, "loss": 0.9718, "step": 11395 }, { "epoch": 1.3151822796492847, "grad_norm": 0.3971553444862366, "learning_rate": 0.00013798446623120893, "loss": 0.9336, "step": 11400 }, { "epoch": 1.3157591139824643, "grad_norm": 0.4036201536655426, "learning_rate": 0.00013792236881976784, "loss": 0.9603, "step": 11405 }, { "epoch": 1.3163359483156438, "grad_norm": 0.382589727640152, "learning_rate": 0.00013786025432422573, "loss": 0.9482, "step": 11410 }, { "epoch": 1.3169127826488234, "grad_norm": 0.4491170644760132, "learning_rate": 0.00013779812277256537, "loss": 0.9335, "step": 11415 }, { "epoch": 1.3174896169820027, "grad_norm": 0.3773971498012543, "learning_rate": 0.00013773597419277703, "loss": 0.9342, "step": 11420 }, { "epoch": 1.3180664513151823, "grad_norm": 0.37888625264167786, "learning_rate": 0.0001376738086128589, "loss": 0.8975, "step": 11425 }, { "epoch": 1.3186432856483619, "grad_norm": 0.38423123955726624, "learning_rate": 0.0001376116260608166, "loss": 0.905, "step": 11430 }, { "epoch": 1.3192201199815412, "grad_norm": 0.3804115355014801, "learning_rate": 0.0001375494265646635, "loss": 0.934, "step": 11435 }, { "epoch": 1.3197969543147208, "grad_norm": 0.41067662835121155, "learning_rate": 0.00013748721015242066, "loss": 0.9475, "step": 11440 }, { "epoch": 1.3203737886479003, "grad_norm": 0.4139721095561981, "learning_rate": 0.0001374249768521166, "loss": 0.928, "step": 11445 }, { "epoch": 1.32095062298108, "grad_norm": 0.38291290402412415, "learning_rate": 0.0001373627266917876, "loss": 0.9601, "step": 11450 }, { "epoch": 1.3215274573142595, "grad_norm": 0.37222400307655334, "learning_rate": 0.00013730045969947752, "loss": 0.943, "step": 11455 }, { "epoch": 1.3221042916474388, "grad_norm": 0.38442909717559814, "learning_rate": 0.0001372381759032377, "loss": 0.9419, "step": 11460 }, { "epoch": 1.3226811259806184, "grad_norm": 0.3971159756183624, "learning_rate": 0.00013717587533112707, "loss": 0.9123, "step": 11465 }, { "epoch": 1.323257960313798, "grad_norm": 0.4297926127910614, "learning_rate": 0.00013711355801121226, "loss": 0.9524, "step": 11470 }, { "epoch": 1.3238347946469773, "grad_norm": 0.34397372603416443, "learning_rate": 0.00013705122397156727, "loss": 0.8974, "step": 11475 }, { "epoch": 1.3244116289801569, "grad_norm": 0.4033229649066925, "learning_rate": 0.00013698887324027373, "loss": 0.9497, "step": 11480 }, { "epoch": 1.3249884633133364, "grad_norm": 0.4018646776676178, "learning_rate": 0.0001369265058454208, "loss": 0.968, "step": 11485 }, { "epoch": 1.325565297646516, "grad_norm": 0.40979576110839844, "learning_rate": 0.00013686412181510504, "loss": 0.8928, "step": 11490 }, { "epoch": 1.3261421319796955, "grad_norm": 0.4052782952785492, "learning_rate": 0.00013680172117743066, "loss": 0.945, "step": 11495 }, { "epoch": 1.326718966312875, "grad_norm": 0.42259207367897034, "learning_rate": 0.0001367393039605092, "loss": 0.9269, "step": 11500 }, { "epoch": 1.3272958006460545, "grad_norm": 0.37201040983200073, "learning_rate": 0.0001366768701924598, "loss": 0.9516, "step": 11505 }, { "epoch": 1.327872634979234, "grad_norm": 0.38049280643463135, "learning_rate": 0.00013661441990140894, "loss": 0.9897, "step": 11510 }, { "epoch": 1.3284494693124134, "grad_norm": 0.3636796176433563, "learning_rate": 0.00013655195311549059, "loss": 1.0134, "step": 11515 }, { "epoch": 1.329026303645593, "grad_norm": 0.40373724699020386, "learning_rate": 0.0001364894698628462, "loss": 0.9467, "step": 11520 }, { "epoch": 1.3296031379787725, "grad_norm": 0.43449637293815613, "learning_rate": 0.0001364269701716246, "loss": 0.9938, "step": 11525 }, { "epoch": 1.330179972311952, "grad_norm": 0.47336867451667786, "learning_rate": 0.00013636445406998198, "loss": 0.9202, "step": 11530 }, { "epoch": 1.3307568066451316, "grad_norm": 0.3779871165752411, "learning_rate": 0.00013630192158608202, "loss": 0.9789, "step": 11535 }, { "epoch": 1.331333640978311, "grad_norm": 0.3905300498008728, "learning_rate": 0.00013623937274809568, "loss": 0.8662, "step": 11540 }, { "epoch": 1.3319104753114905, "grad_norm": 0.4032379388809204, "learning_rate": 0.00013617680758420134, "loss": 0.9047, "step": 11545 }, { "epoch": 1.33248730964467, "grad_norm": 0.3876950740814209, "learning_rate": 0.00013611422612258477, "loss": 0.9225, "step": 11550 }, { "epoch": 1.3330641439778494, "grad_norm": 0.3889716565608978, "learning_rate": 0.000136051628391439, "loss": 0.9556, "step": 11555 }, { "epoch": 1.333640978311029, "grad_norm": 0.38099825382232666, "learning_rate": 0.0001359890144189644, "loss": 0.9339, "step": 11560 }, { "epoch": 1.3342178126442086, "grad_norm": 0.44365194439888, "learning_rate": 0.00013592638423336875, "loss": 0.8879, "step": 11565 }, { "epoch": 1.3347946469773881, "grad_norm": 0.39158475399017334, "learning_rate": 0.00013586373786286706, "loss": 1.0019, "step": 11570 }, { "epoch": 1.3353714813105677, "grad_norm": 0.3753363788127899, "learning_rate": 0.00013580107533568163, "loss": 0.9233, "step": 11575 }, { "epoch": 1.335948315643747, "grad_norm": 0.4002339541912079, "learning_rate": 0.00013573839668004202, "loss": 0.9503, "step": 11580 }, { "epoch": 1.3365251499769266, "grad_norm": 0.38539615273475647, "learning_rate": 0.00013567570192418512, "loss": 0.9241, "step": 11585 }, { "epoch": 1.3371019843101062, "grad_norm": 0.40463823080062866, "learning_rate": 0.00013561299109635507, "loss": 0.9342, "step": 11590 }, { "epoch": 1.3376788186432855, "grad_norm": 0.39062389731407166, "learning_rate": 0.00013555026422480313, "loss": 0.922, "step": 11595 }, { "epoch": 1.338255652976465, "grad_norm": 0.3820863366127014, "learning_rate": 0.00013548752133778796, "loss": 0.9366, "step": 11600 }, { "epoch": 1.3388324873096447, "grad_norm": 0.3850560784339905, "learning_rate": 0.0001354247624635753, "loss": 0.9345, "step": 11605 }, { "epoch": 1.3394093216428242, "grad_norm": 0.40325823426246643, "learning_rate": 0.00013536198763043823, "loss": 0.8968, "step": 11610 }, { "epoch": 1.3399861559760038, "grad_norm": 0.4057595431804657, "learning_rate": 0.00013529919686665679, "loss": 0.9466, "step": 11615 }, { "epoch": 1.3405629903091831, "grad_norm": 0.3949381411075592, "learning_rate": 0.0001352363902005185, "loss": 0.9278, "step": 11620 }, { "epoch": 1.3411398246423627, "grad_norm": 0.4039568603038788, "learning_rate": 0.00013517356766031777, "loss": 0.9706, "step": 11625 }, { "epoch": 1.3417166589755423, "grad_norm": 0.3839099407196045, "learning_rate": 0.00013511072927435632, "loss": 0.9001, "step": 11630 }, { "epoch": 1.3422934933087216, "grad_norm": 0.41112446784973145, "learning_rate": 0.00013504787507094296, "loss": 0.9224, "step": 11635 }, { "epoch": 1.3428703276419012, "grad_norm": 0.40775299072265625, "learning_rate": 0.00013498500507839363, "loss": 0.894, "step": 11640 }, { "epoch": 1.3434471619750807, "grad_norm": 0.39060235023498535, "learning_rate": 0.0001349221193250314, "loss": 0.9165, "step": 11645 }, { "epoch": 1.3440239963082603, "grad_norm": 0.370009183883667, "learning_rate": 0.0001348592178391864, "loss": 0.9315, "step": 11650 }, { "epoch": 1.3446008306414399, "grad_norm": 0.5149419903755188, "learning_rate": 0.00013479630064919593, "loss": 0.9746, "step": 11655 }, { "epoch": 1.3451776649746192, "grad_norm": 0.3872247338294983, "learning_rate": 0.0001347333677834042, "loss": 0.9233, "step": 11660 }, { "epoch": 1.3457544993077988, "grad_norm": 0.4263676404953003, "learning_rate": 0.0001346704192701627, "loss": 0.9739, "step": 11665 }, { "epoch": 1.3463313336409783, "grad_norm": 0.4168122112751007, "learning_rate": 0.00013460745513782976, "loss": 0.946, "step": 11670 }, { "epoch": 1.3469081679741577, "grad_norm": 0.48353296518325806, "learning_rate": 0.0001345444754147709, "loss": 0.9424, "step": 11675 }, { "epoch": 1.3474850023073373, "grad_norm": 0.38045409321784973, "learning_rate": 0.00013448148012935865, "loss": 0.9149, "step": 11680 }, { "epoch": 1.3480618366405168, "grad_norm": 0.4222165644168854, "learning_rate": 0.0001344184693099724, "loss": 0.9751, "step": 11685 }, { "epoch": 1.3486386709736964, "grad_norm": 0.374525785446167, "learning_rate": 0.00013435544298499874, "loss": 0.9155, "step": 11690 }, { "epoch": 1.349215505306876, "grad_norm": 0.41011539101600647, "learning_rate": 0.0001342924011828311, "loss": 0.9135, "step": 11695 }, { "epoch": 1.3497923396400553, "grad_norm": 0.40688735246658325, "learning_rate": 0.00013422934393186994, "loss": 0.9194, "step": 11700 }, { "epoch": 1.3503691739732349, "grad_norm": 0.38346344232559204, "learning_rate": 0.0001341662712605227, "loss": 0.9104, "step": 11705 }, { "epoch": 1.3509460083064144, "grad_norm": 0.39127588272094727, "learning_rate": 0.00013410318319720372, "loss": 0.8961, "step": 11710 }, { "epoch": 1.351522842639594, "grad_norm": 0.36714041233062744, "learning_rate": 0.0001340400797703343, "loss": 0.8935, "step": 11715 }, { "epoch": 1.3520996769727733, "grad_norm": 0.4213907718658447, "learning_rate": 0.00013397696100834265, "loss": 0.9102, "step": 11720 }, { "epoch": 1.352676511305953, "grad_norm": 0.3632507622241974, "learning_rate": 0.00013391382693966395, "loss": 0.896, "step": 11725 }, { "epoch": 1.3532533456391325, "grad_norm": 0.39109885692596436, "learning_rate": 0.00013385067759274014, "loss": 0.9276, "step": 11730 }, { "epoch": 1.353830179972312, "grad_norm": 0.3651511073112488, "learning_rate": 0.00013378751299602016, "loss": 0.9459, "step": 11735 }, { "epoch": 1.3544070143054916, "grad_norm": 0.43356800079345703, "learning_rate": 0.00013372433317795977, "loss": 0.9392, "step": 11740 }, { "epoch": 1.354983848638671, "grad_norm": 0.39868101477622986, "learning_rate": 0.00013366113816702164, "loss": 0.9124, "step": 11745 }, { "epoch": 1.3555606829718505, "grad_norm": 0.4322625398635864, "learning_rate": 0.0001335979279916752, "loss": 0.9534, "step": 11750 }, { "epoch": 1.35613751730503, "grad_norm": 0.4016193449497223, "learning_rate": 0.0001335347026803968, "loss": 0.9087, "step": 11755 }, { "epoch": 1.3567143516382094, "grad_norm": 0.399722158908844, "learning_rate": 0.0001334714622616695, "loss": 0.9338, "step": 11760 }, { "epoch": 1.357291185971389, "grad_norm": 0.3979092538356781, "learning_rate": 0.0001334082067639833, "loss": 0.905, "step": 11765 }, { "epoch": 1.3578680203045685, "grad_norm": 0.3598368465900421, "learning_rate": 0.0001333449362158349, "loss": 0.8926, "step": 11770 }, { "epoch": 1.3584448546377481, "grad_norm": 0.39615514874458313, "learning_rate": 0.0001332816506457278, "loss": 0.9034, "step": 11775 }, { "epoch": 1.3590216889709277, "grad_norm": 0.41682448983192444, "learning_rate": 0.0001332183500821723, "loss": 0.9388, "step": 11780 }, { "epoch": 1.359598523304107, "grad_norm": 0.3829791843891144, "learning_rate": 0.00013315503455368536, "loss": 0.9496, "step": 11785 }, { "epoch": 1.3601753576372866, "grad_norm": 0.3748948872089386, "learning_rate": 0.0001330917040887908, "loss": 0.9622, "step": 11790 }, { "epoch": 1.3607521919704662, "grad_norm": 0.3581671416759491, "learning_rate": 0.00013302835871601914, "loss": 0.9801, "step": 11795 }, { "epoch": 1.3613290263036455, "grad_norm": 0.37090566754341125, "learning_rate": 0.00013296499846390756, "loss": 0.9408, "step": 11800 }, { "epoch": 1.361905860636825, "grad_norm": 0.4028850197792053, "learning_rate": 0.00013290162336099996, "loss": 0.9306, "step": 11805 }, { "epoch": 1.3624826949700046, "grad_norm": 0.38192489743232727, "learning_rate": 0.000132838233435847, "loss": 0.9256, "step": 11810 }, { "epoch": 1.3630595293031842, "grad_norm": 0.45534375309944153, "learning_rate": 0.00013277482871700588, "loss": 0.936, "step": 11815 }, { "epoch": 1.3636363636363638, "grad_norm": 0.44784387946128845, "learning_rate": 0.00013271140923304064, "loss": 0.9184, "step": 11820 }, { "epoch": 1.364213197969543, "grad_norm": 0.4402177035808563, "learning_rate": 0.00013264797501252184, "loss": 0.9465, "step": 11825 }, { "epoch": 1.3647900323027227, "grad_norm": 0.4333953261375427, "learning_rate": 0.00013258452608402673, "loss": 0.9135, "step": 11830 }, { "epoch": 1.3653668666359022, "grad_norm": 0.4207151234149933, "learning_rate": 0.00013252106247613914, "loss": 0.9334, "step": 11835 }, { "epoch": 1.3659437009690816, "grad_norm": 0.36296144127845764, "learning_rate": 0.0001324575842174496, "loss": 0.9816, "step": 11840 }, { "epoch": 1.3665205353022611, "grad_norm": 0.3740653395652771, "learning_rate": 0.00013239409133655516, "loss": 0.9217, "step": 11845 }, { "epoch": 1.3670973696354407, "grad_norm": 0.4140547513961792, "learning_rate": 0.00013233058386205948, "loss": 0.975, "step": 11850 }, { "epoch": 1.3676742039686203, "grad_norm": 0.3837771713733673, "learning_rate": 0.00013226706182257284, "loss": 0.9681, "step": 11855 }, { "epoch": 1.3682510383017998, "grad_norm": 0.38130462169647217, "learning_rate": 0.000132203525246712, "loss": 0.9067, "step": 11860 }, { "epoch": 1.3688278726349792, "grad_norm": 0.3723084628582001, "learning_rate": 0.00013213997416310034, "loss": 0.8401, "step": 11865 }, { "epoch": 1.3694047069681587, "grad_norm": 0.366595059633255, "learning_rate": 0.00013207640860036775, "loss": 0.9539, "step": 11870 }, { "epoch": 1.3699815413013383, "grad_norm": 0.39958158135414124, "learning_rate": 0.0001320128285871506, "loss": 0.9879, "step": 11875 }, { "epoch": 1.3705583756345177, "grad_norm": 0.3819301426410675, "learning_rate": 0.00013194923415209183, "loss": 0.9204, "step": 11880 }, { "epoch": 1.3711352099676972, "grad_norm": 0.34480926394462585, "learning_rate": 0.00013188562532384087, "loss": 0.8921, "step": 11885 }, { "epoch": 1.3717120443008768, "grad_norm": 0.3757553696632385, "learning_rate": 0.0001318220021310536, "loss": 0.974, "step": 11890 }, { "epoch": 1.3722888786340564, "grad_norm": 0.36825573444366455, "learning_rate": 0.00013175836460239243, "loss": 0.8998, "step": 11895 }, { "epoch": 1.372865712967236, "grad_norm": 0.38179588317871094, "learning_rate": 0.00013169471276652613, "loss": 0.9382, "step": 11900 }, { "epoch": 1.3734425473004153, "grad_norm": 0.40566423535346985, "learning_rate": 0.00013163104665213008, "loss": 0.9505, "step": 11905 }, { "epoch": 1.3740193816335948, "grad_norm": 0.4374394714832306, "learning_rate": 0.00013156736628788584, "loss": 0.9523, "step": 11910 }, { "epoch": 1.3745962159667744, "grad_norm": 0.39277151226997375, "learning_rate": 0.00013150367170248169, "loss": 0.9238, "step": 11915 }, { "epoch": 1.3751730502999537, "grad_norm": 0.39160189032554626, "learning_rate": 0.00013143996292461202, "loss": 0.8842, "step": 11920 }, { "epoch": 1.3757498846331333, "grad_norm": 0.36961859464645386, "learning_rate": 0.00013137623998297785, "loss": 0.9399, "step": 11925 }, { "epoch": 1.3763267189663129, "grad_norm": 0.3880990147590637, "learning_rate": 0.0001313125029062865, "loss": 0.8803, "step": 11930 }, { "epoch": 1.3769035532994924, "grad_norm": 0.41162627935409546, "learning_rate": 0.00013124875172325159, "loss": 0.968, "step": 11935 }, { "epoch": 1.377480387632672, "grad_norm": 0.376601904630661, "learning_rate": 0.00013118498646259323, "loss": 0.9859, "step": 11940 }, { "epoch": 1.3780572219658513, "grad_norm": 0.4360121488571167, "learning_rate": 0.0001311212071530377, "loss": 0.9316, "step": 11945 }, { "epoch": 1.378634056299031, "grad_norm": 0.4116380214691162, "learning_rate": 0.00013105741382331775, "loss": 0.9582, "step": 11950 }, { "epoch": 1.3792108906322105, "grad_norm": 0.39656034111976624, "learning_rate": 0.0001309936065021724, "loss": 0.9395, "step": 11955 }, { "epoch": 1.3797877249653898, "grad_norm": 0.3742821514606476, "learning_rate": 0.00013092978521834695, "loss": 0.9787, "step": 11960 }, { "epoch": 1.3803645592985694, "grad_norm": 0.3952317535877228, "learning_rate": 0.00013086595000059306, "loss": 0.937, "step": 11965 }, { "epoch": 1.380941393631749, "grad_norm": 0.43107977509498596, "learning_rate": 0.0001308021008776686, "loss": 0.9629, "step": 11970 }, { "epoch": 1.3815182279649285, "grad_norm": 0.3768942952156067, "learning_rate": 0.00013073823787833767, "loss": 0.9404, "step": 11975 }, { "epoch": 1.382095062298108, "grad_norm": 0.44299280643463135, "learning_rate": 0.00013067436103137074, "loss": 0.9767, "step": 11980 }, { "epoch": 1.3826718966312874, "grad_norm": 0.4110683798789978, "learning_rate": 0.00013061047036554444, "loss": 0.9309, "step": 11985 }, { "epoch": 1.383248730964467, "grad_norm": 0.3969920873641968, "learning_rate": 0.00013054656590964165, "loss": 0.9472, "step": 11990 }, { "epoch": 1.3838255652976466, "grad_norm": 0.37778428196907043, "learning_rate": 0.00013048264769245142, "loss": 0.8792, "step": 11995 }, { "epoch": 1.384402399630826, "grad_norm": 0.39364323019981384, "learning_rate": 0.00013041871574276905, "loss": 0.9477, "step": 12000 }, { "epoch": 1.3849792339640055, "grad_norm": 0.4027245044708252, "learning_rate": 0.00013035477008939598, "loss": 0.9367, "step": 12005 }, { "epoch": 1.385556068297185, "grad_norm": 0.38217535614967346, "learning_rate": 0.00013029081076113992, "loss": 0.8929, "step": 12010 }, { "epoch": 1.3861329026303646, "grad_norm": 0.47565191984176636, "learning_rate": 0.00013022683778681458, "loss": 0.9033, "step": 12015 }, { "epoch": 1.3867097369635442, "grad_norm": 0.610550582408905, "learning_rate": 0.00013016285119524002, "loss": 0.9522, "step": 12020 }, { "epoch": 1.3872865712967235, "grad_norm": 0.3981565237045288, "learning_rate": 0.00013009885101524223, "loss": 0.9176, "step": 12025 }, { "epoch": 1.387863405629903, "grad_norm": 0.39330512285232544, "learning_rate": 0.00013003483727565344, "loss": 0.9062, "step": 12030 }, { "epoch": 1.3884402399630826, "grad_norm": 0.3685097098350525, "learning_rate": 0.00012997081000531196, "loss": 0.9752, "step": 12035 }, { "epoch": 1.389017074296262, "grad_norm": 0.4128875732421875, "learning_rate": 0.00012990676923306223, "loss": 0.9488, "step": 12040 }, { "epoch": 1.3895939086294415, "grad_norm": 0.3621658980846405, "learning_rate": 0.00012984271498775473, "loss": 0.9536, "step": 12045 }, { "epoch": 1.390170742962621, "grad_norm": 0.3921271860599518, "learning_rate": 0.000129778647298246, "loss": 0.9134, "step": 12050 }, { "epoch": 1.3907475772958007, "grad_norm": 0.3891122341156006, "learning_rate": 0.0001297145661933987, "loss": 0.9275, "step": 12055 }, { "epoch": 1.3913244116289802, "grad_norm": 0.3682466149330139, "learning_rate": 0.00012965047170208145, "loss": 0.9552, "step": 12060 }, { "epoch": 1.3919012459621598, "grad_norm": 0.49361538887023926, "learning_rate": 0.00012958636385316895, "loss": 0.9195, "step": 12065 }, { "epoch": 1.3924780802953391, "grad_norm": 0.3890072703361511, "learning_rate": 0.00012952224267554193, "loss": 0.9439, "step": 12070 }, { "epoch": 1.3930549146285187, "grad_norm": 0.4101308286190033, "learning_rate": 0.00012945810819808715, "loss": 0.8946, "step": 12075 }, { "epoch": 1.3936317489616983, "grad_norm": 0.41521626710891724, "learning_rate": 0.0001293939604496972, "loss": 0.9408, "step": 12080 }, { "epoch": 1.3942085832948776, "grad_norm": 0.4195525050163269, "learning_rate": 0.00012932979945927083, "loss": 0.9213, "step": 12085 }, { "epoch": 1.3947854176280572, "grad_norm": 0.41010963916778564, "learning_rate": 0.00012926562525571273, "loss": 0.989, "step": 12090 }, { "epoch": 1.3953622519612368, "grad_norm": 0.37773555517196655, "learning_rate": 0.00012920143786793344, "loss": 0.9573, "step": 12095 }, { "epoch": 1.3959390862944163, "grad_norm": 0.3921565115451813, "learning_rate": 0.00012913723732484953, "loss": 0.919, "step": 12100 }, { "epoch": 1.3965159206275959, "grad_norm": 0.39858192205429077, "learning_rate": 0.00012907302365538348, "loss": 0.9566, "step": 12105 }, { "epoch": 1.3970927549607752, "grad_norm": 0.4024829566478729, "learning_rate": 0.00012900879688846365, "loss": 0.9249, "step": 12110 }, { "epoch": 1.3976695892939548, "grad_norm": 0.41164299845695496, "learning_rate": 0.00012894455705302432, "loss": 0.8791, "step": 12115 }, { "epoch": 1.3982464236271344, "grad_norm": 0.3956606388092041, "learning_rate": 0.0001288803041780057, "loss": 0.9589, "step": 12120 }, { "epoch": 1.3988232579603137, "grad_norm": 0.3662050664424896, "learning_rate": 0.0001288160382923538, "loss": 0.956, "step": 12125 }, { "epoch": 1.3994000922934933, "grad_norm": 0.3646704852581024, "learning_rate": 0.00012875175942502054, "loss": 0.9914, "step": 12130 }, { "epoch": 1.3999769266266728, "grad_norm": 0.3993260860443115, "learning_rate": 0.0001286874676049637, "loss": 0.9225, "step": 12135 }, { "epoch": 1.4005537609598524, "grad_norm": 0.38320133090019226, "learning_rate": 0.00012862316286114676, "loss": 0.9126, "step": 12140 }, { "epoch": 1.401130595293032, "grad_norm": 0.4064159095287323, "learning_rate": 0.00012855884522253928, "loss": 0.9135, "step": 12145 }, { "epoch": 1.4017074296262113, "grad_norm": 0.42876338958740234, "learning_rate": 0.00012849451471811643, "loss": 0.9421, "step": 12150 }, { "epoch": 1.4022842639593909, "grad_norm": 0.3715895414352417, "learning_rate": 0.0001284301713768592, "loss": 0.9415, "step": 12155 }, { "epoch": 1.4028610982925704, "grad_norm": 0.37385094165802, "learning_rate": 0.00012836581522775438, "loss": 0.9066, "step": 12160 }, { "epoch": 1.4034379326257498, "grad_norm": 0.3633922338485718, "learning_rate": 0.00012830144629979456, "loss": 0.9539, "step": 12165 }, { "epoch": 1.4040147669589293, "grad_norm": 0.38846006989479065, "learning_rate": 0.0001282370646219781, "loss": 0.9294, "step": 12170 }, { "epoch": 1.404591601292109, "grad_norm": 0.37591782212257385, "learning_rate": 0.00012817267022330903, "loss": 0.8834, "step": 12175 }, { "epoch": 1.4051684356252885, "grad_norm": 0.417021244764328, "learning_rate": 0.00012810826313279717, "loss": 0.9032, "step": 12180 }, { "epoch": 1.405745269958468, "grad_norm": 0.42905882000923157, "learning_rate": 0.00012804384337945803, "loss": 0.9289, "step": 12185 }, { "epoch": 1.4063221042916474, "grad_norm": 0.4415873885154724, "learning_rate": 0.00012797941099231284, "loss": 0.9051, "step": 12190 }, { "epoch": 1.406898938624827, "grad_norm": 0.3640197217464447, "learning_rate": 0.00012791496600038854, "loss": 0.9079, "step": 12195 }, { "epoch": 1.4074757729580065, "grad_norm": 0.3957792818546295, "learning_rate": 0.00012785050843271763, "loss": 0.9206, "step": 12200 }, { "epoch": 1.4080526072911859, "grad_norm": 0.3771734833717346, "learning_rate": 0.0001277860383183385, "loss": 0.9383, "step": 12205 }, { "epoch": 1.4086294416243654, "grad_norm": 0.3889096975326538, "learning_rate": 0.00012772155568629499, "loss": 0.9629, "step": 12210 }, { "epoch": 1.409206275957545, "grad_norm": 0.4572511911392212, "learning_rate": 0.00012765706056563667, "loss": 0.9915, "step": 12215 }, { "epoch": 1.4097831102907246, "grad_norm": 0.38007521629333496, "learning_rate": 0.00012759255298541868, "loss": 0.8918, "step": 12220 }, { "epoch": 1.4103599446239041, "grad_norm": 0.3732183873653412, "learning_rate": 0.00012752803297470187, "loss": 0.9185, "step": 12225 }, { "epoch": 1.4109367789570835, "grad_norm": 0.3802737593650818, "learning_rate": 0.00012746350056255259, "loss": 0.9226, "step": 12230 }, { "epoch": 1.411513613290263, "grad_norm": 0.3844064176082611, "learning_rate": 0.00012739895577804284, "loss": 0.8875, "step": 12235 }, { "epoch": 1.4120904476234426, "grad_norm": 0.39902496337890625, "learning_rate": 0.00012733439865025012, "loss": 0.9545, "step": 12240 }, { "epoch": 1.412667281956622, "grad_norm": 0.4251570999622345, "learning_rate": 0.00012726982920825762, "loss": 0.9587, "step": 12245 }, { "epoch": 1.4132441162898015, "grad_norm": 0.45643094182014465, "learning_rate": 0.00012720524748115395, "loss": 0.9062, "step": 12250 }, { "epoch": 1.413820950622981, "grad_norm": 0.4236997663974762, "learning_rate": 0.0001271406534980333, "loss": 0.9377, "step": 12255 }, { "epoch": 1.4143977849561606, "grad_norm": 0.3734123408794403, "learning_rate": 0.00012707604728799543, "loss": 0.9715, "step": 12260 }, { "epoch": 1.4149746192893402, "grad_norm": 0.381076455116272, "learning_rate": 0.0001270114288801455, "loss": 0.9119, "step": 12265 }, { "epoch": 1.4155514536225196, "grad_norm": 0.3949384391307831, "learning_rate": 0.0001269467983035943, "loss": 0.9251, "step": 12270 }, { "epoch": 1.4161282879556991, "grad_norm": 0.43294233083724976, "learning_rate": 0.00012688215558745794, "loss": 0.9293, "step": 12275 }, { "epoch": 1.4167051222888787, "grad_norm": 0.3953537940979004, "learning_rate": 0.00012681750076085817, "loss": 0.9745, "step": 12280 }, { "epoch": 1.417281956622058, "grad_norm": 0.4244721829891205, "learning_rate": 0.00012675283385292212, "loss": 0.9527, "step": 12285 }, { "epoch": 1.4178587909552376, "grad_norm": 0.4531431198120117, "learning_rate": 0.00012668815489278227, "loss": 0.9598, "step": 12290 }, { "epoch": 1.4184356252884172, "grad_norm": 0.5036474466323853, "learning_rate": 0.0001266234639095767, "loss": 0.9285, "step": 12295 }, { "epoch": 1.4190124596215967, "grad_norm": 0.371463418006897, "learning_rate": 0.00012655876093244878, "loss": 0.9574, "step": 12300 }, { "epoch": 1.4195892939547763, "grad_norm": 0.3933194577693939, "learning_rate": 0.00012649404599054736, "loss": 0.972, "step": 12305 }, { "epoch": 1.4201661282879556, "grad_norm": 0.3995529115200043, "learning_rate": 0.00012642931911302662, "loss": 0.9281, "step": 12310 }, { "epoch": 1.4207429626211352, "grad_norm": 0.37661126255989075, "learning_rate": 0.00012636458032904617, "loss": 0.9105, "step": 12315 }, { "epoch": 1.4213197969543148, "grad_norm": 0.4267100393772125, "learning_rate": 0.00012629982966777095, "loss": 0.9245, "step": 12320 }, { "epoch": 1.421896631287494, "grad_norm": 0.4244813621044159, "learning_rate": 0.00012623506715837122, "loss": 0.929, "step": 12325 }, { "epoch": 1.4224734656206737, "grad_norm": 0.38589537143707275, "learning_rate": 0.00012617029283002265, "loss": 0.8674, "step": 12330 }, { "epoch": 1.4230502999538532, "grad_norm": 0.3990231156349182, "learning_rate": 0.0001261055067119062, "loss": 0.9488, "step": 12335 }, { "epoch": 1.4236271342870328, "grad_norm": 0.39353740215301514, "learning_rate": 0.00012604070883320817, "loss": 0.9148, "step": 12340 }, { "epoch": 1.4242039686202124, "grad_norm": 0.4017605781555176, "learning_rate": 0.00012597589922312008, "loss": 0.9591, "step": 12345 }, { "epoch": 1.4247808029533917, "grad_norm": 0.412386417388916, "learning_rate": 0.0001259110779108388, "loss": 0.9367, "step": 12350 }, { "epoch": 1.4253576372865713, "grad_norm": 0.3855336308479309, "learning_rate": 0.0001258462449255665, "loss": 0.9365, "step": 12355 }, { "epoch": 1.4259344716197508, "grad_norm": 0.4509463310241699, "learning_rate": 0.00012578140029651053, "loss": 0.8986, "step": 12360 }, { "epoch": 1.4265113059529302, "grad_norm": 0.39673352241516113, "learning_rate": 0.0001257165440528835, "loss": 0.8817, "step": 12365 }, { "epoch": 1.4270881402861098, "grad_norm": 0.4036000072956085, "learning_rate": 0.0001256516762239033, "loss": 0.9114, "step": 12370 }, { "epoch": 1.4276649746192893, "grad_norm": 0.383042573928833, "learning_rate": 0.00012558679683879301, "loss": 0.9879, "step": 12375 }, { "epoch": 1.4282418089524689, "grad_norm": 0.36675453186035156, "learning_rate": 0.00012552190592678096, "loss": 0.9855, "step": 12380 }, { "epoch": 1.4288186432856484, "grad_norm": 0.40489956736564636, "learning_rate": 0.00012545700351710055, "loss": 0.8809, "step": 12385 }, { "epoch": 1.4293954776188278, "grad_norm": 0.4169880449771881, "learning_rate": 0.0001253920896389905, "loss": 0.9405, "step": 12390 }, { "epoch": 1.4299723119520074, "grad_norm": 0.4987186789512634, "learning_rate": 0.00012532716432169463, "loss": 0.8999, "step": 12395 }, { "epoch": 1.430549146285187, "grad_norm": 0.37798649072647095, "learning_rate": 0.0001252622275944619, "loss": 0.9265, "step": 12400 }, { "epoch": 1.4311259806183663, "grad_norm": 0.3762564957141876, "learning_rate": 0.00012519727948654642, "loss": 0.9024, "step": 12405 }, { "epoch": 1.4317028149515458, "grad_norm": 0.39836904406547546, "learning_rate": 0.00012513232002720753, "loss": 0.9163, "step": 12410 }, { "epoch": 1.4322796492847254, "grad_norm": 0.3596324920654297, "learning_rate": 0.00012506734924570947, "loss": 0.9298, "step": 12415 }, { "epoch": 1.432856483617905, "grad_norm": 0.452575147151947, "learning_rate": 0.00012500236717132178, "loss": 0.9485, "step": 12420 }, { "epoch": 1.4334333179510845, "grad_norm": 0.41744694113731384, "learning_rate": 0.000124937373833319, "loss": 0.9301, "step": 12425 }, { "epoch": 1.434010152284264, "grad_norm": 0.4188467264175415, "learning_rate": 0.00012487236926098075, "loss": 0.9236, "step": 12430 }, { "epoch": 1.4345869866174434, "grad_norm": 0.4042269289493561, "learning_rate": 0.0001248073534835917, "loss": 0.9311, "step": 12435 }, { "epoch": 1.435163820950623, "grad_norm": 0.47039151191711426, "learning_rate": 0.00012474232653044163, "loss": 0.8941, "step": 12440 }, { "epoch": 1.4357406552838026, "grad_norm": 0.37996938824653625, "learning_rate": 0.00012467728843082527, "loss": 0.9221, "step": 12445 }, { "epoch": 1.436317489616982, "grad_norm": 0.4154973328113556, "learning_rate": 0.0001246122392140424, "loss": 0.9594, "step": 12450 }, { "epoch": 1.4368943239501615, "grad_norm": 0.4697798192501068, "learning_rate": 0.00012454717890939787, "loss": 0.9577, "step": 12455 }, { "epoch": 1.437471158283341, "grad_norm": 0.3927803039550781, "learning_rate": 0.0001244821075462014, "loss": 0.9436, "step": 12460 }, { "epoch": 1.4380479926165206, "grad_norm": 0.4236595034599304, "learning_rate": 0.00012441702515376786, "loss": 0.9482, "step": 12465 }, { "epoch": 1.4386248269497002, "grad_norm": 0.4353571832180023, "learning_rate": 0.00012435193176141689, "loss": 0.9142, "step": 12470 }, { "epoch": 1.4392016612828795, "grad_norm": 0.4359499216079712, "learning_rate": 0.00012428682739847329, "loss": 0.8943, "step": 12475 }, { "epoch": 1.439778495616059, "grad_norm": 0.424065500497818, "learning_rate": 0.0001242217120942666, "loss": 0.9089, "step": 12480 }, { "epoch": 1.4403553299492386, "grad_norm": 0.4124845564365387, "learning_rate": 0.00012415658587813146, "loss": 0.9146, "step": 12485 }, { "epoch": 1.440932164282418, "grad_norm": 0.34853968024253845, "learning_rate": 0.00012409144877940738, "loss": 0.8773, "step": 12490 }, { "epoch": 1.4415089986155976, "grad_norm": 0.38272228837013245, "learning_rate": 0.00012402630082743868, "loss": 1.0033, "step": 12495 }, { "epoch": 1.4420858329487771, "grad_norm": 0.4090026319026947, "learning_rate": 0.0001239611420515747, "loss": 0.96, "step": 12500 }, { "epoch": 1.4426626672819567, "grad_norm": 0.4200041890144348, "learning_rate": 0.00012389597248116952, "loss": 0.9338, "step": 12505 }, { "epoch": 1.4432395016151363, "grad_norm": 0.4104383587837219, "learning_rate": 0.00012383079214558227, "loss": 0.931, "step": 12510 }, { "epoch": 1.4438163359483156, "grad_norm": 0.45613524317741394, "learning_rate": 0.0001237656010741767, "loss": 0.9265, "step": 12515 }, { "epoch": 1.4443931702814952, "grad_norm": 0.41018715500831604, "learning_rate": 0.0001237003992963216, "loss": 0.9479, "step": 12520 }, { "epoch": 1.4449700046146747, "grad_norm": 0.41325533390045166, "learning_rate": 0.00012363518684139043, "loss": 0.8916, "step": 12525 }, { "epoch": 1.445546838947854, "grad_norm": 0.37731215357780457, "learning_rate": 0.0001235699637387616, "loss": 0.9599, "step": 12530 }, { "epoch": 1.4461236732810336, "grad_norm": 0.38812848925590515, "learning_rate": 0.0001235047300178182, "loss": 0.9288, "step": 12535 }, { "epoch": 1.4467005076142132, "grad_norm": 0.46964332461357117, "learning_rate": 0.00012343948570794815, "loss": 0.9503, "step": 12540 }, { "epoch": 1.4472773419473928, "grad_norm": 0.39741063117980957, "learning_rate": 0.00012337423083854415, "loss": 0.9676, "step": 12545 }, { "epoch": 1.4478541762805723, "grad_norm": 0.40735316276550293, "learning_rate": 0.00012330896543900362, "loss": 0.9923, "step": 12550 }, { "epoch": 1.4484310106137517, "grad_norm": 0.3687584698200226, "learning_rate": 0.00012324368953872883, "loss": 0.9076, "step": 12555 }, { "epoch": 1.4490078449469312, "grad_norm": 0.39105790853500366, "learning_rate": 0.0001231784031671266, "loss": 0.9131, "step": 12560 }, { "epoch": 1.4495846792801108, "grad_norm": 0.4149412214756012, "learning_rate": 0.00012311310635360856, "loss": 0.9179, "step": 12565 }, { "epoch": 1.4501615136132902, "grad_norm": 0.374827116727829, "learning_rate": 0.00012304779912759118, "loss": 0.9475, "step": 12570 }, { "epoch": 1.4507383479464697, "grad_norm": 0.44704556465148926, "learning_rate": 0.00012298248151849537, "loss": 0.9788, "step": 12575 }, { "epoch": 1.4513151822796493, "grad_norm": 0.39649584889411926, "learning_rate": 0.00012291715355574692, "loss": 0.9261, "step": 12580 }, { "epoch": 1.4518920166128289, "grad_norm": 0.3798285722732544, "learning_rate": 0.00012285181526877615, "loss": 0.9477, "step": 12585 }, { "epoch": 1.4524688509460084, "grad_norm": 0.39250555634498596, "learning_rate": 0.00012278646668701813, "loss": 0.9529, "step": 12590 }, { "epoch": 1.4530456852791878, "grad_norm": 0.3975488543510437, "learning_rate": 0.00012272110783991243, "loss": 0.9347, "step": 12595 }, { "epoch": 1.4536225196123673, "grad_norm": 0.3645585775375366, "learning_rate": 0.00012265573875690344, "loss": 0.9507, "step": 12600 }, { "epoch": 1.454199353945547, "grad_norm": 0.37658560276031494, "learning_rate": 0.00012259035946744003, "loss": 0.9392, "step": 12605 }, { "epoch": 1.4547761882787262, "grad_norm": 0.44661521911621094, "learning_rate": 0.0001225249700009757, "loss": 0.9495, "step": 12610 }, { "epoch": 1.4553530226119058, "grad_norm": 0.4087098240852356, "learning_rate": 0.0001224595703869685, "loss": 0.9337, "step": 12615 }, { "epoch": 1.4559298569450854, "grad_norm": 0.4374881088733673, "learning_rate": 0.00012239416065488112, "loss": 0.9702, "step": 12620 }, { "epoch": 1.456506691278265, "grad_norm": 0.42702898383140564, "learning_rate": 0.00012232874083418078, "loss": 0.9305, "step": 12625 }, { "epoch": 1.4570835256114445, "grad_norm": 0.399262011051178, "learning_rate": 0.0001222633109543392, "loss": 0.9267, "step": 12630 }, { "epoch": 1.4576603599446238, "grad_norm": 0.38637447357177734, "learning_rate": 0.00012219787104483264, "loss": 0.9453, "step": 12635 }, { "epoch": 1.4582371942778034, "grad_norm": 0.40154480934143066, "learning_rate": 0.00012213242113514198, "loss": 0.9293, "step": 12640 }, { "epoch": 1.458814028610983, "grad_norm": 0.4145256280899048, "learning_rate": 0.00012206696125475249, "loss": 0.9248, "step": 12645 }, { "epoch": 1.4593908629441623, "grad_norm": 0.39756283164024353, "learning_rate": 0.00012200149143315403, "loss": 0.951, "step": 12650 }, { "epoch": 1.4599676972773419, "grad_norm": 0.42253583669662476, "learning_rate": 0.0001219360116998408, "loss": 1.0005, "step": 12655 }, { "epoch": 1.4605445316105214, "grad_norm": 0.4099411964416504, "learning_rate": 0.00012187052208431158, "loss": 0.9329, "step": 12660 }, { "epoch": 1.461121365943701, "grad_norm": 0.4196203351020813, "learning_rate": 0.00012180502261606958, "loss": 0.9457, "step": 12665 }, { "epoch": 1.4616982002768806, "grad_norm": 0.372347891330719, "learning_rate": 0.00012173951332462245, "loss": 0.9179, "step": 12670 }, { "epoch": 1.46227503461006, "grad_norm": 0.4399539530277252, "learning_rate": 0.0001216739942394822, "loss": 0.9634, "step": 12675 }, { "epoch": 1.4628518689432395, "grad_norm": 0.37646159529685974, "learning_rate": 0.00012160846539016535, "loss": 0.9668, "step": 12680 }, { "epoch": 1.463428703276419, "grad_norm": 0.4296335279941559, "learning_rate": 0.0001215429268061928, "loss": 0.9561, "step": 12685 }, { "epoch": 1.4640055376095984, "grad_norm": 0.40583279728889465, "learning_rate": 0.00012147737851708973, "loss": 0.9384, "step": 12690 }, { "epoch": 1.464582371942778, "grad_norm": 0.399314284324646, "learning_rate": 0.00012141182055238585, "loss": 0.9043, "step": 12695 }, { "epoch": 1.4651592062759575, "grad_norm": 0.44123348593711853, "learning_rate": 0.00012134625294161508, "loss": 0.9361, "step": 12700 }, { "epoch": 1.465736040609137, "grad_norm": 0.411359578371048, "learning_rate": 0.00012128067571431583, "loss": 0.9424, "step": 12705 }, { "epoch": 1.4663128749423167, "grad_norm": 0.4213956296443939, "learning_rate": 0.0001212150889000307, "loss": 0.9272, "step": 12710 }, { "epoch": 1.466889709275496, "grad_norm": 0.40593644976615906, "learning_rate": 0.00012114949252830674, "loss": 0.9378, "step": 12715 }, { "epoch": 1.4674665436086756, "grad_norm": 0.37980008125305176, "learning_rate": 0.00012108388662869519, "loss": 0.9346, "step": 12720 }, { "epoch": 1.4680433779418551, "grad_norm": 0.42362499237060547, "learning_rate": 0.00012101827123075167, "loss": 0.968, "step": 12725 }, { "epoch": 1.4686202122750345, "grad_norm": 0.4329698085784912, "learning_rate": 0.00012095264636403603, "loss": 0.954, "step": 12730 }, { "epoch": 1.469197046608214, "grad_norm": 0.4111020267009735, "learning_rate": 0.0001208870120581124, "loss": 0.9294, "step": 12735 }, { "epoch": 1.4697738809413936, "grad_norm": 0.4156142473220825, "learning_rate": 0.00012082136834254918, "loss": 0.909, "step": 12740 }, { "epoch": 1.4703507152745732, "grad_norm": 0.4147590696811676, "learning_rate": 0.00012075571524691895, "loss": 0.9334, "step": 12745 }, { "epoch": 1.4709275496077527, "grad_norm": 0.3994022607803345, "learning_rate": 0.00012069005280079862, "loss": 0.9461, "step": 12750 }, { "epoch": 1.471504383940932, "grad_norm": 0.36893755197525024, "learning_rate": 0.00012062438103376918, "loss": 0.9126, "step": 12755 }, { "epoch": 1.4720812182741116, "grad_norm": 0.38947948813438416, "learning_rate": 0.00012055869997541593, "loss": 0.8755, "step": 12760 }, { "epoch": 1.4726580526072912, "grad_norm": 0.36021241545677185, "learning_rate": 0.00012049300965532832, "loss": 0.9173, "step": 12765 }, { "epoch": 1.4732348869404708, "grad_norm": 0.39924636483192444, "learning_rate": 0.00012042731010309995, "loss": 0.9659, "step": 12770 }, { "epoch": 1.4738117212736501, "grad_norm": 0.3978961110115051, "learning_rate": 0.00012036160134832862, "loss": 0.9637, "step": 12775 }, { "epoch": 1.4743885556068297, "grad_norm": 0.4412531852722168, "learning_rate": 0.00012029588342061621, "loss": 0.9188, "step": 12780 }, { "epoch": 1.4749653899400093, "grad_norm": 0.39058932662010193, "learning_rate": 0.00012023015634956882, "loss": 0.9557, "step": 12785 }, { "epoch": 1.4755422242731888, "grad_norm": 0.3821706473827362, "learning_rate": 0.00012016442016479656, "loss": 0.936, "step": 12790 }, { "epoch": 1.4761190586063684, "grad_norm": 0.3862445652484894, "learning_rate": 0.00012009867489591377, "loss": 0.8954, "step": 12795 }, { "epoch": 1.4766958929395477, "grad_norm": 0.3745687007904053, "learning_rate": 0.00012003292057253883, "loss": 0.9194, "step": 12800 }, { "epoch": 1.4772727272727273, "grad_norm": 0.42594972252845764, "learning_rate": 0.00011996715722429413, "loss": 0.972, "step": 12805 }, { "epoch": 1.4778495616059069, "grad_norm": 0.40421348810195923, "learning_rate": 0.00011990138488080622, "loss": 0.9433, "step": 12810 }, { "epoch": 1.4784263959390862, "grad_norm": 0.3907431662082672, "learning_rate": 0.00011983560357170568, "loss": 0.9475, "step": 12815 }, { "epoch": 1.4790032302722658, "grad_norm": 0.42429277300834656, "learning_rate": 0.00011976981332662711, "loss": 0.9317, "step": 12820 }, { "epoch": 1.4795800646054453, "grad_norm": 0.5049063563346863, "learning_rate": 0.00011970401417520913, "loss": 0.9408, "step": 12825 }, { "epoch": 1.480156898938625, "grad_norm": 0.43257758021354675, "learning_rate": 0.0001196382061470944, "loss": 0.961, "step": 12830 }, { "epoch": 1.4807337332718045, "grad_norm": 0.3847612738609314, "learning_rate": 0.00011957238927192955, "loss": 0.895, "step": 12835 }, { "epoch": 1.4813105676049838, "grad_norm": 0.39073553681373596, "learning_rate": 0.00011950656357936525, "loss": 0.9157, "step": 12840 }, { "epoch": 1.4818874019381634, "grad_norm": 0.4299897849559784, "learning_rate": 0.00011944072909905604, "loss": 0.9503, "step": 12845 }, { "epoch": 1.482464236271343, "grad_norm": 0.41576239466667175, "learning_rate": 0.00011937488586066054, "loss": 0.9498, "step": 12850 }, { "epoch": 1.4830410706045223, "grad_norm": 0.4170142412185669, "learning_rate": 0.00011930903389384123, "loss": 0.9183, "step": 12855 }, { "epoch": 1.4836179049377018, "grad_norm": 0.39141911268234253, "learning_rate": 0.00011924317322826452, "loss": 0.898, "step": 12860 }, { "epoch": 1.4841947392708814, "grad_norm": 0.38259202241897583, "learning_rate": 0.00011917730389360085, "loss": 0.8972, "step": 12865 }, { "epoch": 1.484771573604061, "grad_norm": 0.3913986086845398, "learning_rate": 0.00011911142591952437, "loss": 0.9063, "step": 12870 }, { "epoch": 1.4853484079372405, "grad_norm": 0.3920874297618866, "learning_rate": 0.00011904553933571336, "loss": 0.907, "step": 12875 }, { "epoch": 1.4859252422704199, "grad_norm": 0.40604132413864136, "learning_rate": 0.00011897964417184975, "loss": 0.8798, "step": 12880 }, { "epoch": 1.4865020766035995, "grad_norm": 0.40422797203063965, "learning_rate": 0.0001189137404576195, "loss": 0.8955, "step": 12885 }, { "epoch": 1.487078910936779, "grad_norm": 0.41283461451530457, "learning_rate": 0.00011884782822271235, "loss": 0.9368, "step": 12890 }, { "epoch": 1.4876557452699584, "grad_norm": 0.4172614812850952, "learning_rate": 0.00011878190749682187, "loss": 0.9014, "step": 12895 }, { "epoch": 1.488232579603138, "grad_norm": 0.38606515526771545, "learning_rate": 0.00011871597830964551, "loss": 0.9087, "step": 12900 }, { "epoch": 1.4888094139363175, "grad_norm": 0.43429484963417053, "learning_rate": 0.00011865004069088446, "loss": 0.9084, "step": 12905 }, { "epoch": 1.489386248269497, "grad_norm": 0.41635558009147644, "learning_rate": 0.00011858409467024376, "loss": 0.9449, "step": 12910 }, { "epoch": 1.4899630826026766, "grad_norm": 0.40536168217658997, "learning_rate": 0.00011851814027743223, "loss": 0.9013, "step": 12915 }, { "epoch": 1.490539916935856, "grad_norm": 0.3647763729095459, "learning_rate": 0.00011845217754216245, "loss": 0.9269, "step": 12920 }, { "epoch": 1.4911167512690355, "grad_norm": 0.3644658923149109, "learning_rate": 0.00011838620649415076, "loss": 0.9173, "step": 12925 }, { "epoch": 1.491693585602215, "grad_norm": 0.39510855078697205, "learning_rate": 0.00011832022716311722, "loss": 0.9111, "step": 12930 }, { "epoch": 1.4922704199353944, "grad_norm": 0.40662682056427, "learning_rate": 0.0001182542395787857, "loss": 0.9532, "step": 12935 }, { "epoch": 1.492847254268574, "grad_norm": 0.38650259375572205, "learning_rate": 0.00011818824377088366, "loss": 0.9268, "step": 12940 }, { "epoch": 1.4934240886017536, "grad_norm": 0.3982434868812561, "learning_rate": 0.00011812223976914243, "loss": 0.9307, "step": 12945 }, { "epoch": 1.4940009229349331, "grad_norm": 0.4159766733646393, "learning_rate": 0.00011805622760329687, "loss": 0.9542, "step": 12950 }, { "epoch": 1.4945777572681127, "grad_norm": 0.3663424551486969, "learning_rate": 0.00011799020730308563, "loss": 0.8649, "step": 12955 }, { "epoch": 1.495154591601292, "grad_norm": 0.38212850689888, "learning_rate": 0.00011792417889825094, "loss": 0.8616, "step": 12960 }, { "epoch": 1.4957314259344716, "grad_norm": 0.42534342408180237, "learning_rate": 0.00011785814241853876, "loss": 0.9143, "step": 12965 }, { "epoch": 1.4963082602676512, "grad_norm": 0.35233616828918457, "learning_rate": 0.00011779209789369867, "loss": 0.9374, "step": 12970 }, { "epoch": 1.4968850946008305, "grad_norm": 0.3654002249240875, "learning_rate": 0.00011772604535348382, "loss": 0.89, "step": 12975 }, { "epoch": 1.49746192893401, "grad_norm": 0.3802456259727478, "learning_rate": 0.00011765998482765104, "loss": 0.9309, "step": 12980 }, { "epoch": 1.4980387632671897, "grad_norm": 0.42161834239959717, "learning_rate": 0.00011759391634596067, "loss": 0.9935, "step": 12985 }, { "epoch": 1.4986155976003692, "grad_norm": 0.3783545196056366, "learning_rate": 0.00011752783993817675, "loss": 0.9277, "step": 12990 }, { "epoch": 1.4991924319335488, "grad_norm": 0.38438281416893005, "learning_rate": 0.00011746175563406681, "loss": 0.9102, "step": 12995 }, { "epoch": 1.4997692662667281, "grad_norm": 0.39648398756980896, "learning_rate": 0.00011739566346340194, "loss": 0.9303, "step": 13000 }, { "epoch": 1.5003461005999077, "grad_norm": 0.36507081985473633, "learning_rate": 0.00011732956345595682, "loss": 0.9049, "step": 13005 }, { "epoch": 1.5009229349330873, "grad_norm": 0.3909667730331421, "learning_rate": 0.0001172634556415096, "loss": 0.9377, "step": 13010 }, { "epoch": 1.5014997692662666, "grad_norm": 0.40388140082359314, "learning_rate": 0.00011719734004984201, "loss": 0.9625, "step": 13015 }, { "epoch": 1.5020766035994462, "grad_norm": 0.41368624567985535, "learning_rate": 0.00011713121671073924, "loss": 0.903, "step": 13020 }, { "epoch": 1.5026534379326257, "grad_norm": 0.3890875577926636, "learning_rate": 0.00011706508565399, "loss": 0.9366, "step": 13025 }, { "epoch": 1.5032302722658053, "grad_norm": 0.3867320120334625, "learning_rate": 0.0001169989469093864, "loss": 0.9226, "step": 13030 }, { "epoch": 1.5038071065989849, "grad_norm": 0.4172716438770294, "learning_rate": 0.00011693280050672417, "loss": 0.9627, "step": 13035 }, { "epoch": 1.5043839409321644, "grad_norm": 0.38736414909362793, "learning_rate": 0.0001168666464758023, "loss": 0.9435, "step": 13040 }, { "epoch": 1.5049607752653438, "grad_norm": 0.37807396054267883, "learning_rate": 0.00011680048484642334, "loss": 0.9486, "step": 13045 }, { "epoch": 1.5055376095985233, "grad_norm": 0.3694530427455902, "learning_rate": 0.00011673431564839327, "loss": 0.9804, "step": 13050 }, { "epoch": 1.5061144439317027, "grad_norm": 0.41525396704673767, "learning_rate": 0.0001166681389115214, "loss": 0.9222, "step": 13055 }, { "epoch": 1.5066912782648822, "grad_norm": 0.4183506369590759, "learning_rate": 0.00011660195466562051, "loss": 0.9199, "step": 13060 }, { "epoch": 1.5072681125980618, "grad_norm": 0.3661426901817322, "learning_rate": 0.0001165357629405067, "loss": 0.9377, "step": 13065 }, { "epoch": 1.5078449469312414, "grad_norm": 0.3974156379699707, "learning_rate": 0.00011646956376599951, "loss": 0.8701, "step": 13070 }, { "epoch": 1.508421781264421, "grad_norm": 0.3940170109272003, "learning_rate": 0.00011640335717192172, "loss": 0.9155, "step": 13075 }, { "epoch": 1.5089986155976005, "grad_norm": 0.40755462646484375, "learning_rate": 0.00011633714318809962, "loss": 0.9542, "step": 13080 }, { "epoch": 1.5095754499307799, "grad_norm": 0.4307992160320282, "learning_rate": 0.0001162709218443627, "loss": 0.9364, "step": 13085 }, { "epoch": 1.5101522842639594, "grad_norm": 0.4064287841320038, "learning_rate": 0.0001162046931705438, "loss": 0.9736, "step": 13090 }, { "epoch": 1.5107291185971388, "grad_norm": 0.41723504662513733, "learning_rate": 0.00011613845719647909, "loss": 0.9962, "step": 13095 }, { "epoch": 1.5113059529303183, "grad_norm": 0.37900570034980774, "learning_rate": 0.00011607221395200796, "loss": 0.907, "step": 13100 }, { "epoch": 1.511882787263498, "grad_norm": 0.43227189779281616, "learning_rate": 0.00011600596346697317, "loss": 0.9348, "step": 13105 }, { "epoch": 1.5124596215966775, "grad_norm": 0.40124958753585815, "learning_rate": 0.00011593970577122067, "loss": 0.9054, "step": 13110 }, { "epoch": 1.513036455929857, "grad_norm": 0.38149794936180115, "learning_rate": 0.00011587344089459966, "loss": 0.9172, "step": 13115 }, { "epoch": 1.5136132902630366, "grad_norm": 0.49807044863700867, "learning_rate": 0.00011580716886696263, "loss": 0.9209, "step": 13120 }, { "epoch": 1.514190124596216, "grad_norm": 0.4255905747413635, "learning_rate": 0.00011574088971816523, "loss": 0.9201, "step": 13125 }, { "epoch": 1.5147669589293955, "grad_norm": 0.34574073553085327, "learning_rate": 0.00011567460347806638, "loss": 0.9229, "step": 13130 }, { "epoch": 1.5153437932625748, "grad_norm": 0.43729299306869507, "learning_rate": 0.00011560831017652813, "loss": 0.9621, "step": 13135 }, { "epoch": 1.5159206275957544, "grad_norm": 0.40615105628967285, "learning_rate": 0.00011554200984341577, "loss": 0.9251, "step": 13140 }, { "epoch": 1.516497461928934, "grad_norm": 0.37189602851867676, "learning_rate": 0.0001154757025085977, "loss": 0.9241, "step": 13145 }, { "epoch": 1.5170742962621135, "grad_norm": 0.37347811460494995, "learning_rate": 0.00011540938820194553, "loss": 0.9336, "step": 13150 }, { "epoch": 1.517651130595293, "grad_norm": 0.4073767066001892, "learning_rate": 0.00011534306695333395, "loss": 0.9168, "step": 13155 }, { "epoch": 1.5182279649284727, "grad_norm": 0.42767712473869324, "learning_rate": 0.0001152767387926408, "loss": 0.8952, "step": 13160 }, { "epoch": 1.518804799261652, "grad_norm": 0.4040094316005707, "learning_rate": 0.00011521040374974714, "loss": 0.9037, "step": 13165 }, { "epoch": 1.5193816335948316, "grad_norm": 0.36918869614601135, "learning_rate": 0.00011514406185453692, "loss": 0.9313, "step": 13170 }, { "epoch": 1.519958467928011, "grad_norm": 0.36346372961997986, "learning_rate": 0.00011507771313689739, "loss": 0.8932, "step": 13175 }, { "epoch": 1.5205353022611905, "grad_norm": 0.36180615425109863, "learning_rate": 0.00011501135762671869, "loss": 0.9271, "step": 13180 }, { "epoch": 1.52111213659437, "grad_norm": 0.4362223446369171, "learning_rate": 0.00011494499535389418, "loss": 0.9458, "step": 13185 }, { "epoch": 1.5216889709275496, "grad_norm": 0.39161691069602966, "learning_rate": 0.00011487862634832014, "loss": 0.971, "step": 13190 }, { "epoch": 1.5222658052607292, "grad_norm": 0.40720003843307495, "learning_rate": 0.00011481225063989597, "loss": 0.9547, "step": 13195 }, { "epoch": 1.5228426395939088, "grad_norm": 0.3815619945526123, "learning_rate": 0.00011474586825852405, "loss": 0.9162, "step": 13200 }, { "epoch": 1.523419473927088, "grad_norm": 0.38358306884765625, "learning_rate": 0.00011467947923410973, "loss": 0.8822, "step": 13205 }, { "epoch": 1.5239963082602677, "grad_norm": 0.37997207045555115, "learning_rate": 0.00011461308359656149, "loss": 0.9794, "step": 13210 }, { "epoch": 1.524573142593447, "grad_norm": 0.3825436532497406, "learning_rate": 0.00011454668137579059, "loss": 0.9013, "step": 13215 }, { "epoch": 1.5251499769266266, "grad_norm": 0.38891690969467163, "learning_rate": 0.00011448027260171142, "loss": 0.9753, "step": 13220 }, { "epoch": 1.5257268112598061, "grad_norm": 0.3794013559818268, "learning_rate": 0.00011441385730424123, "loss": 0.8855, "step": 13225 }, { "epoch": 1.5263036455929857, "grad_norm": 0.42887309193611145, "learning_rate": 0.00011434743551330028, "loss": 0.896, "step": 13230 }, { "epoch": 1.5268804799261653, "grad_norm": 0.391722172498703, "learning_rate": 0.00011428100725881167, "loss": 0.9659, "step": 13235 }, { "epoch": 1.5274573142593448, "grad_norm": 0.4026354253292084, "learning_rate": 0.00011421457257070148, "loss": 0.9392, "step": 13240 }, { "epoch": 1.5280341485925242, "grad_norm": 0.4350236654281616, "learning_rate": 0.00011414813147889868, "loss": 0.9289, "step": 13245 }, { "epoch": 1.5286109829257037, "grad_norm": 0.4506089687347412, "learning_rate": 0.0001140816840133351, "loss": 0.9446, "step": 13250 }, { "epoch": 1.529187817258883, "grad_norm": 0.4440595805644989, "learning_rate": 0.00011401523020394546, "loss": 0.9314, "step": 13255 }, { "epoch": 1.5297646515920627, "grad_norm": 0.3944181501865387, "learning_rate": 0.00011394877008066731, "loss": 0.9291, "step": 13260 }, { "epoch": 1.5303414859252422, "grad_norm": 0.4218878746032715, "learning_rate": 0.00011388230367344111, "loss": 0.8998, "step": 13265 }, { "epoch": 1.5309183202584218, "grad_norm": 0.4063720405101776, "learning_rate": 0.00011381583101221003, "loss": 0.961, "step": 13270 }, { "epoch": 1.5314951545916013, "grad_norm": 0.38921263813972473, "learning_rate": 0.00011374935212692018, "loss": 0.9483, "step": 13275 }, { "epoch": 1.532071988924781, "grad_norm": 0.41635677218437195, "learning_rate": 0.00011368286704752042, "loss": 0.9531, "step": 13280 }, { "epoch": 1.5326488232579605, "grad_norm": 0.4070545732975006, "learning_rate": 0.00011361637580396243, "loss": 0.9497, "step": 13285 }, { "epoch": 1.5332256575911398, "grad_norm": 0.39755600690841675, "learning_rate": 0.00011354987842620061, "loss": 0.9894, "step": 13290 }, { "epoch": 1.5338024919243194, "grad_norm": 0.41410693526268005, "learning_rate": 0.00011348337494419219, "loss": 0.888, "step": 13295 }, { "epoch": 1.5343793262574987, "grad_norm": 0.4335348904132843, "learning_rate": 0.00011341686538789708, "loss": 0.922, "step": 13300 }, { "epoch": 1.5349561605906783, "grad_norm": 0.44754913449287415, "learning_rate": 0.000113350349787278, "loss": 0.9532, "step": 13305 }, { "epoch": 1.5355329949238579, "grad_norm": 0.4027005136013031, "learning_rate": 0.00011328382817230034, "loss": 0.9525, "step": 13310 }, { "epoch": 1.5361098292570374, "grad_norm": 0.364678293466568, "learning_rate": 0.00011321730057293225, "loss": 0.9321, "step": 13315 }, { "epoch": 1.536686663590217, "grad_norm": 0.41008302569389343, "learning_rate": 0.00011315076701914449, "loss": 0.927, "step": 13320 }, { "epoch": 1.5372634979233966, "grad_norm": 0.3624386489391327, "learning_rate": 0.00011308422754091057, "loss": 0.8554, "step": 13325 }, { "epoch": 1.537840332256576, "grad_norm": 0.3806852400302887, "learning_rate": 0.0001130176821682067, "loss": 0.8788, "step": 13330 }, { "epoch": 1.5384171665897555, "grad_norm": 0.37646257877349854, "learning_rate": 0.00011295113093101162, "loss": 0.9034, "step": 13335 }, { "epoch": 1.5389940009229348, "grad_norm": 0.41849663853645325, "learning_rate": 0.00011288457385930686, "loss": 0.9242, "step": 13340 }, { "epoch": 1.5395708352561144, "grad_norm": 0.36853688955307007, "learning_rate": 0.00011281801098307647, "loss": 0.8691, "step": 13345 }, { "epoch": 1.540147669589294, "grad_norm": 0.3795246481895447, "learning_rate": 0.0001127514423323072, "loss": 0.9911, "step": 13350 }, { "epoch": 1.5407245039224735, "grad_norm": 0.4382937550544739, "learning_rate": 0.00011268486793698832, "loss": 0.9828, "step": 13355 }, { "epoch": 1.541301338255653, "grad_norm": 0.3911653161048889, "learning_rate": 0.00011261828782711173, "loss": 0.9234, "step": 13360 }, { "epoch": 1.5418781725888326, "grad_norm": 0.37677720189094543, "learning_rate": 0.00011255170203267186, "loss": 0.9197, "step": 13365 }, { "epoch": 1.542455006922012, "grad_norm": 0.38520315289497375, "learning_rate": 0.00011248511058366586, "loss": 0.8861, "step": 13370 }, { "epoch": 1.5430318412551915, "grad_norm": 0.4070914387702942, "learning_rate": 0.00011241851351009318, "loss": 0.9964, "step": 13375 }, { "epoch": 1.543608675588371, "grad_norm": 0.411170095205307, "learning_rate": 0.000112351910841956, "loss": 0.9454, "step": 13380 }, { "epoch": 1.5441855099215505, "grad_norm": 0.3714311718940735, "learning_rate": 0.00011228530260925894, "loss": 0.8964, "step": 13385 }, { "epoch": 1.54476234425473, "grad_norm": 0.3878600597381592, "learning_rate": 0.00011221868884200912, "loss": 0.9266, "step": 13390 }, { "epoch": 1.5453391785879096, "grad_norm": 0.43673795461654663, "learning_rate": 0.00011215206957021618, "loss": 0.9352, "step": 13395 }, { "epoch": 1.5459160129210892, "grad_norm": 0.40795305371284485, "learning_rate": 0.00011208544482389223, "loss": 0.9074, "step": 13400 }, { "epoch": 1.5464928472542687, "grad_norm": 0.40639349818229675, "learning_rate": 0.00011201881463305188, "loss": 0.9287, "step": 13405 }, { "epoch": 1.547069681587448, "grad_norm": 0.36830776929855347, "learning_rate": 0.00011195217902771212, "loss": 0.9482, "step": 13410 }, { "epoch": 1.5476465159206276, "grad_norm": 0.38134291768074036, "learning_rate": 0.00011188553803789244, "loss": 0.9183, "step": 13415 }, { "epoch": 1.548223350253807, "grad_norm": 0.37543559074401855, "learning_rate": 0.00011181889169361473, "loss": 0.9702, "step": 13420 }, { "epoch": 1.5488001845869865, "grad_norm": 0.3869517743587494, "learning_rate": 0.0001117522400249033, "loss": 0.8916, "step": 13425 }, { "epoch": 1.549377018920166, "grad_norm": 0.3994382619857788, "learning_rate": 0.00011168558306178483, "loss": 0.9326, "step": 13430 }, { "epoch": 1.5499538532533457, "grad_norm": 0.4107452929019928, "learning_rate": 0.00011161892083428846, "loss": 0.8895, "step": 13435 }, { "epoch": 1.5505306875865252, "grad_norm": 0.4027215838432312, "learning_rate": 0.00011155225337244562, "loss": 0.9403, "step": 13440 }, { "epoch": 1.5511075219197048, "grad_norm": 0.42710939049720764, "learning_rate": 0.00011148558070629011, "loss": 0.9366, "step": 13445 }, { "epoch": 1.5516843562528841, "grad_norm": 0.3737245500087738, "learning_rate": 0.00011141890286585819, "loss": 0.9509, "step": 13450 }, { "epoch": 1.5522611905860637, "grad_norm": 0.38691097497940063, "learning_rate": 0.00011135221988118825, "loss": 0.9097, "step": 13455 }, { "epoch": 1.552838024919243, "grad_norm": 0.42986229062080383, "learning_rate": 0.00011128553178232117, "loss": 0.9406, "step": 13460 }, { "epoch": 1.5534148592524226, "grad_norm": 0.37808936834335327, "learning_rate": 0.00011121883859930002, "loss": 0.8939, "step": 13465 }, { "epoch": 1.5539916935856022, "grad_norm": 0.41650715470314026, "learning_rate": 0.00011115214036217026, "loss": 0.9611, "step": 13470 }, { "epoch": 1.5545685279187818, "grad_norm": 0.40282508730888367, "learning_rate": 0.00011108543710097954, "loss": 0.9266, "step": 13475 }, { "epoch": 1.5551453622519613, "grad_norm": 0.39564791321754456, "learning_rate": 0.00011101872884577784, "loss": 0.9209, "step": 13480 }, { "epoch": 1.5557221965851409, "grad_norm": 0.48436239361763, "learning_rate": 0.0001109520156266173, "loss": 0.9363, "step": 13485 }, { "epoch": 1.5562990309183202, "grad_norm": 0.4382035434246063, "learning_rate": 0.0001108852974735524, "loss": 0.9322, "step": 13490 }, { "epoch": 1.5568758652514998, "grad_norm": 0.34939664602279663, "learning_rate": 0.00011081857441663983, "loss": 0.9466, "step": 13495 }, { "epoch": 1.5574526995846791, "grad_norm": 0.37167319655418396, "learning_rate": 0.00011075184648593838, "loss": 0.9381, "step": 13500 }, { "epoch": 1.5580295339178587, "grad_norm": 0.39715757966041565, "learning_rate": 0.00011068511371150918, "loss": 0.9075, "step": 13505 }, { "epoch": 1.5586063682510383, "grad_norm": 0.38853919506073, "learning_rate": 0.00011061837612341542, "loss": 0.9422, "step": 13510 }, { "epoch": 1.5591832025842178, "grad_norm": 0.37417513132095337, "learning_rate": 0.00011055163375172257, "loss": 0.9533, "step": 13515 }, { "epoch": 1.5597600369173974, "grad_norm": 0.40994784235954285, "learning_rate": 0.00011048488662649814, "loss": 0.8954, "step": 13520 }, { "epoch": 1.560336871250577, "grad_norm": 0.38082724809646606, "learning_rate": 0.00011041813477781186, "loss": 0.9343, "step": 13525 }, { "epoch": 1.5609137055837563, "grad_norm": 0.41218113899230957, "learning_rate": 0.00011035137823573561, "loss": 0.9772, "step": 13530 }, { "epoch": 1.5614905399169359, "grad_norm": 0.43310537934303284, "learning_rate": 0.0001102846170303433, "loss": 0.958, "step": 13535 }, { "epoch": 1.5620673742501152, "grad_norm": 0.4469921588897705, "learning_rate": 0.00011021785119171098, "loss": 0.9022, "step": 13540 }, { "epoch": 1.5626442085832948, "grad_norm": 0.3993799388408661, "learning_rate": 0.0001101510807499168, "loss": 0.8787, "step": 13545 }, { "epoch": 1.5632210429164743, "grad_norm": 0.3866843283176422, "learning_rate": 0.00011008430573504099, "loss": 0.9063, "step": 13550 }, { "epoch": 1.563797877249654, "grad_norm": 0.37747833132743835, "learning_rate": 0.00011001752617716579, "loss": 0.9077, "step": 13555 }, { "epoch": 1.5643747115828335, "grad_norm": 0.3823941648006439, "learning_rate": 0.00010995074210637557, "loss": 0.963, "step": 13560 }, { "epoch": 1.564951545916013, "grad_norm": 0.3883691132068634, "learning_rate": 0.00010988395355275663, "loss": 0.9022, "step": 13565 }, { "epoch": 1.5655283802491924, "grad_norm": 0.4316290020942688, "learning_rate": 0.00010981716054639735, "loss": 0.9569, "step": 13570 }, { "epoch": 1.566105214582372, "grad_norm": 0.38802143931388855, "learning_rate": 0.00010975036311738818, "loss": 0.9098, "step": 13575 }, { "epoch": 1.5666820489155513, "grad_norm": 0.36971673369407654, "learning_rate": 0.00010968356129582139, "loss": 0.9798, "step": 13580 }, { "epoch": 1.5672588832487309, "grad_norm": 0.38266512751579285, "learning_rate": 0.00010961675511179142, "loss": 0.937, "step": 13585 }, { "epoch": 1.5678357175819104, "grad_norm": 0.3889155685901642, "learning_rate": 0.00010954994459539452, "loss": 0.9518, "step": 13590 }, { "epoch": 1.56841255191509, "grad_norm": 0.3645268380641937, "learning_rate": 0.00010948312977672899, "loss": 0.9222, "step": 13595 }, { "epoch": 1.5689893862482696, "grad_norm": 0.41634300351142883, "learning_rate": 0.00010941631068589502, "loss": 0.8753, "step": 13600 }, { "epoch": 1.5695662205814491, "grad_norm": 0.3983922600746155, "learning_rate": 0.00010934948735299475, "loss": 0.9589, "step": 13605 }, { "epoch": 1.5701430549146285, "grad_norm": 0.3916638493537903, "learning_rate": 0.00010928265980813223, "loss": 0.9154, "step": 13610 }, { "epoch": 1.570719889247808, "grad_norm": 0.41058340668678284, "learning_rate": 0.0001092158280814134, "loss": 0.9635, "step": 13615 }, { "epoch": 1.5712967235809874, "grad_norm": 0.4408254027366638, "learning_rate": 0.00010914899220294607, "loss": 0.8695, "step": 13620 }, { "epoch": 1.571873557914167, "grad_norm": 0.39679133892059326, "learning_rate": 0.00010908215220283993, "loss": 0.9355, "step": 13625 }, { "epoch": 1.5724503922473465, "grad_norm": 0.38094478845596313, "learning_rate": 0.00010901530811120655, "loss": 0.9367, "step": 13630 }, { "epoch": 1.573027226580526, "grad_norm": 0.4355633556842804, "learning_rate": 0.00010894845995815928, "loss": 0.9188, "step": 13635 }, { "epoch": 1.5736040609137056, "grad_norm": 0.4333462715148926, "learning_rate": 0.00010888160777381342, "loss": 0.9727, "step": 13640 }, { "epoch": 1.5741808952468852, "grad_norm": 0.4014212489128113, "learning_rate": 0.00010881475158828592, "loss": 0.9598, "step": 13645 }, { "epoch": 1.5747577295800648, "grad_norm": 0.3798421025276184, "learning_rate": 0.00010874789143169568, "loss": 0.9769, "step": 13650 }, { "epoch": 1.5753345639132441, "grad_norm": 0.42204970121383667, "learning_rate": 0.00010868102733416332, "loss": 0.9029, "step": 13655 }, { "epoch": 1.5759113982464237, "grad_norm": 0.42524123191833496, "learning_rate": 0.00010861415932581123, "loss": 0.9278, "step": 13660 }, { "epoch": 1.576488232579603, "grad_norm": 0.43853500485420227, "learning_rate": 0.00010854728743676362, "loss": 0.944, "step": 13665 }, { "epoch": 1.5770650669127826, "grad_norm": 0.41227036714553833, "learning_rate": 0.00010848041169714635, "loss": 0.8973, "step": 13670 }, { "epoch": 1.5776419012459622, "grad_norm": 0.37476420402526855, "learning_rate": 0.00010841353213708711, "loss": 0.9499, "step": 13675 }, { "epoch": 1.5782187355791417, "grad_norm": 0.39963239431381226, "learning_rate": 0.00010834664878671525, "loss": 0.9036, "step": 13680 }, { "epoch": 1.5787955699123213, "grad_norm": 0.37388959527015686, "learning_rate": 0.00010827976167616185, "loss": 0.9154, "step": 13685 }, { "epoch": 1.5793724042455008, "grad_norm": 0.39449089765548706, "learning_rate": 0.00010821287083555971, "loss": 0.9183, "step": 13690 }, { "epoch": 1.5799492385786802, "grad_norm": 0.42294618487358093, "learning_rate": 0.00010814597629504324, "loss": 0.9375, "step": 13695 }, { "epoch": 1.5805260729118598, "grad_norm": 0.40645015239715576, "learning_rate": 0.00010807907808474862, "loss": 0.9377, "step": 13700 }, { "epoch": 1.581102907245039, "grad_norm": 0.3905043601989746, "learning_rate": 0.00010801217623481356, "loss": 0.9371, "step": 13705 }, { "epoch": 1.5816797415782187, "grad_norm": 0.46238189935684204, "learning_rate": 0.00010794527077537755, "loss": 0.9441, "step": 13710 }, { "epoch": 1.5822565759113982, "grad_norm": 0.44243723154067993, "learning_rate": 0.00010787836173658155, "loss": 0.9134, "step": 13715 }, { "epoch": 1.5828334102445778, "grad_norm": 0.4234520196914673, "learning_rate": 0.00010781144914856826, "loss": 0.968, "step": 13720 }, { "epoch": 1.5834102445777574, "grad_norm": 0.42427393794059753, "learning_rate": 0.00010774453304148192, "loss": 0.8884, "step": 13725 }, { "epoch": 1.583987078910937, "grad_norm": 0.40130579471588135, "learning_rate": 0.00010767761344546831, "loss": 0.9292, "step": 13730 }, { "epoch": 1.5845639132441163, "grad_norm": 0.3994501233100891, "learning_rate": 0.00010761069039067498, "loss": 1.0001, "step": 13735 }, { "epoch": 1.5851407475772958, "grad_norm": 0.40696844458580017, "learning_rate": 0.00010754376390725074, "loss": 0.9145, "step": 13740 }, { "epoch": 1.5857175819104752, "grad_norm": 0.42791715264320374, "learning_rate": 0.00010747683402534621, "loss": 0.9214, "step": 13745 }, { "epoch": 1.5862944162436547, "grad_norm": 0.36512282490730286, "learning_rate": 0.00010740990077511337, "loss": 0.9127, "step": 13750 }, { "epoch": 1.5868712505768343, "grad_norm": 0.4037910997867584, "learning_rate": 0.00010734296418670582, "loss": 0.9214, "step": 13755 }, { "epoch": 1.5874480849100139, "grad_norm": 0.41456806659698486, "learning_rate": 0.00010727602429027859, "loss": 0.9446, "step": 13760 }, { "epoch": 1.5880249192431934, "grad_norm": 0.3830060660839081, "learning_rate": 0.00010720908111598824, "loss": 0.9261, "step": 13765 }, { "epoch": 1.588601753576373, "grad_norm": 0.4202171862125397, "learning_rate": 0.00010714213469399283, "loss": 0.954, "step": 13770 }, { "epoch": 1.5891785879095524, "grad_norm": 0.4009062647819519, "learning_rate": 0.00010707518505445182, "loss": 0.9539, "step": 13775 }, { "epoch": 1.589755422242732, "grad_norm": 0.4225723147392273, "learning_rate": 0.00010700823222752618, "loss": 0.9219, "step": 13780 }, { "epoch": 1.5903322565759113, "grad_norm": 0.392956405878067, "learning_rate": 0.00010694127624337826, "loss": 0.9375, "step": 13785 }, { "epoch": 1.5909090909090908, "grad_norm": 0.3861681818962097, "learning_rate": 0.00010687431713217186, "loss": 0.9202, "step": 13790 }, { "epoch": 1.5914859252422704, "grad_norm": 0.39695194363594055, "learning_rate": 0.00010680735492407225, "loss": 0.8907, "step": 13795 }, { "epoch": 1.59206275957545, "grad_norm": 0.4258616864681244, "learning_rate": 0.00010674038964924597, "loss": 0.8806, "step": 13800 }, { "epoch": 1.5926395939086295, "grad_norm": 0.4228580594062805, "learning_rate": 0.00010667342133786102, "loss": 0.9581, "step": 13805 }, { "epoch": 1.593216428241809, "grad_norm": 0.37822139263153076, "learning_rate": 0.00010660645002008678, "loss": 0.8637, "step": 13810 }, { "epoch": 1.5937932625749884, "grad_norm": 0.3744790852069855, "learning_rate": 0.00010653947572609393, "loss": 0.8927, "step": 13815 }, { "epoch": 1.594370096908168, "grad_norm": 0.4195287525653839, "learning_rate": 0.00010647249848605454, "loss": 0.9231, "step": 13820 }, { "epoch": 1.5949469312413473, "grad_norm": 0.41662541031837463, "learning_rate": 0.00010640551833014196, "loss": 0.9706, "step": 13825 }, { "epoch": 1.595523765574527, "grad_norm": 0.4328131377696991, "learning_rate": 0.0001063385352885309, "loss": 1.004, "step": 13830 }, { "epoch": 1.5961005999077065, "grad_norm": 0.3658946752548218, "learning_rate": 0.00010627154939139737, "loss": 0.9176, "step": 13835 }, { "epoch": 1.596677434240886, "grad_norm": 0.4352788031101227, "learning_rate": 0.00010620456066891862, "loss": 0.9649, "step": 13840 }, { "epoch": 1.5972542685740656, "grad_norm": 0.39662760496139526, "learning_rate": 0.00010613756915127319, "loss": 0.968, "step": 13845 }, { "epoch": 1.5978311029072452, "grad_norm": 0.3719061613082886, "learning_rate": 0.00010607057486864091, "loss": 0.935, "step": 13850 }, { "epoch": 1.5984079372404245, "grad_norm": 0.4000123143196106, "learning_rate": 0.00010600357785120285, "loss": 0.925, "step": 13855 }, { "epoch": 1.598984771573604, "grad_norm": 0.3936002254486084, "learning_rate": 0.00010593657812914129, "loss": 0.9394, "step": 13860 }, { "epoch": 1.5995616059067834, "grad_norm": 0.42291298508644104, "learning_rate": 0.00010586957573263968, "loss": 0.9529, "step": 13865 }, { "epoch": 1.600138440239963, "grad_norm": 0.39400723576545715, "learning_rate": 0.00010580257069188279, "loss": 0.9482, "step": 13870 }, { "epoch": 1.6007152745731426, "grad_norm": 0.4413566291332245, "learning_rate": 0.00010573556303705652, "loss": 0.9673, "step": 13875 }, { "epoch": 1.6012921089063221, "grad_norm": 0.39717692136764526, "learning_rate": 0.00010566855279834793, "loss": 0.9588, "step": 13880 }, { "epoch": 1.6018689432395017, "grad_norm": 0.38071373105049133, "learning_rate": 0.00010560154000594524, "loss": 0.9404, "step": 13885 }, { "epoch": 1.6024457775726813, "grad_norm": 0.38415026664733887, "learning_rate": 0.00010553452469003789, "loss": 0.9299, "step": 13890 }, { "epoch": 1.6030226119058606, "grad_norm": 0.3782908022403717, "learning_rate": 0.00010546750688081638, "loss": 0.9524, "step": 13895 }, { "epoch": 1.6035994462390402, "grad_norm": 0.4405461549758911, "learning_rate": 0.00010540048660847239, "loss": 0.9121, "step": 13900 }, { "epoch": 1.6041762805722195, "grad_norm": 0.37163928151130676, "learning_rate": 0.00010533346390319867, "loss": 0.9111, "step": 13905 }, { "epoch": 1.604753114905399, "grad_norm": 0.4153117537498474, "learning_rate": 0.00010526643879518905, "loss": 0.9026, "step": 13910 }, { "epoch": 1.6053299492385786, "grad_norm": 0.39725857973098755, "learning_rate": 0.00010519941131463852, "loss": 0.9864, "step": 13915 }, { "epoch": 1.6059067835717582, "grad_norm": 0.37876853346824646, "learning_rate": 0.00010513238149174304, "loss": 0.9289, "step": 13920 }, { "epoch": 1.6064836179049378, "grad_norm": 0.4003385901451111, "learning_rate": 0.00010506534935669974, "loss": 0.9353, "step": 13925 }, { "epoch": 1.6070604522381173, "grad_norm": 0.4417904317378998, "learning_rate": 0.00010499831493970669, "loss": 0.9107, "step": 13930 }, { "epoch": 1.6076372865712967, "grad_norm": 0.3675759434700012, "learning_rate": 0.00010493127827096298, "loss": 0.9558, "step": 13935 }, { "epoch": 1.6082141209044762, "grad_norm": 0.44555363059043884, "learning_rate": 0.00010486423938066887, "loss": 0.9315, "step": 13940 }, { "epoch": 1.6087909552376556, "grad_norm": 0.36408329010009766, "learning_rate": 0.00010479719829902539, "loss": 0.9076, "step": 13945 }, { "epoch": 1.6093677895708351, "grad_norm": 0.39997726678848267, "learning_rate": 0.00010473015505623477, "loss": 0.9268, "step": 13950 }, { "epoch": 1.6099446239040147, "grad_norm": 0.3789185583591461, "learning_rate": 0.00010466310968250009, "loss": 0.9367, "step": 13955 }, { "epoch": 1.6105214582371943, "grad_norm": 0.41184380650520325, "learning_rate": 0.0001045960622080254, "loss": 0.9795, "step": 13960 }, { "epoch": 1.6110982925703738, "grad_norm": 0.4009269177913666, "learning_rate": 0.00010452901266301574, "loss": 0.9496, "step": 13965 }, { "epoch": 1.6116751269035534, "grad_norm": 0.3926098942756653, "learning_rate": 0.00010446196107767705, "loss": 0.9602, "step": 13970 }, { "epoch": 1.6122519612367328, "grad_norm": 0.3812084197998047, "learning_rate": 0.00010439490748221621, "loss": 0.9553, "step": 13975 }, { "epoch": 1.6128287955699123, "grad_norm": 0.3911786675453186, "learning_rate": 0.000104327851906841, "loss": 0.885, "step": 13980 }, { "epoch": 1.6134056299030919, "grad_norm": 0.38076895475387573, "learning_rate": 0.00010426079438176009, "loss": 0.9734, "step": 13985 }, { "epoch": 1.6139824642362712, "grad_norm": 0.4474939703941345, "learning_rate": 0.00010419373493718298, "loss": 0.9702, "step": 13990 }, { "epoch": 1.6145592985694508, "grad_norm": 0.3928569257259369, "learning_rate": 0.00010412667360332013, "loss": 0.897, "step": 13995 }, { "epoch": 1.6151361329026304, "grad_norm": 0.392778605222702, "learning_rate": 0.00010405961041038279, "loss": 0.9189, "step": 14000 }, { "epoch": 1.61571296723581, "grad_norm": 0.39536142349243164, "learning_rate": 0.00010399254538858303, "loss": 0.9164, "step": 14005 }, { "epoch": 1.6162898015689895, "grad_norm": 0.42655783891677856, "learning_rate": 0.00010392547856813384, "loss": 0.9268, "step": 14010 }, { "epoch": 1.616866635902169, "grad_norm": 0.4114214777946472, "learning_rate": 0.00010385840997924887, "loss": 0.946, "step": 14015 }, { "epoch": 1.6174434702353484, "grad_norm": 0.40836742520332336, "learning_rate": 0.00010379133965214274, "loss": 0.907, "step": 14020 }, { "epoch": 1.618020304568528, "grad_norm": 0.4120984971523285, "learning_rate": 0.00010372426761703067, "loss": 0.9156, "step": 14025 }, { "epoch": 1.6185971389017073, "grad_norm": 0.3959936201572418, "learning_rate": 0.00010365719390412882, "loss": 0.862, "step": 14030 }, { "epoch": 1.6191739732348869, "grad_norm": 0.4160866439342499, "learning_rate": 0.00010359011854365397, "loss": 0.9078, "step": 14035 }, { "epoch": 1.6197508075680664, "grad_norm": 0.39500558376312256, "learning_rate": 0.00010352304156582376, "loss": 0.9374, "step": 14040 }, { "epoch": 1.620327641901246, "grad_norm": 0.39849910140037537, "learning_rate": 0.0001034559630008564, "loss": 0.885, "step": 14045 }, { "epoch": 1.6209044762344256, "grad_norm": 0.4136286973953247, "learning_rate": 0.00010338888287897102, "loss": 0.9146, "step": 14050 }, { "epoch": 1.6214813105676051, "grad_norm": 0.4256516396999359, "learning_rate": 0.0001033218012303873, "loss": 0.9246, "step": 14055 }, { "epoch": 1.6220581449007845, "grad_norm": 0.4077073633670807, "learning_rate": 0.00010325471808532566, "loss": 0.9234, "step": 14060 }, { "epoch": 1.622634979233964, "grad_norm": 0.42370501160621643, "learning_rate": 0.00010318763347400719, "loss": 0.9254, "step": 14065 }, { "epoch": 1.6232118135671434, "grad_norm": 0.3607090711593628, "learning_rate": 0.00010312054742665362, "loss": 0.9417, "step": 14070 }, { "epoch": 1.623788647900323, "grad_norm": 0.4143436551094055, "learning_rate": 0.00010305345997348736, "loss": 0.9438, "step": 14075 }, { "epoch": 1.6243654822335025, "grad_norm": 0.35811230540275574, "learning_rate": 0.00010298637114473144, "loss": 0.9488, "step": 14080 }, { "epoch": 1.624942316566682, "grad_norm": 0.4050202965736389, "learning_rate": 0.0001029192809706095, "loss": 0.9373, "step": 14085 }, { "epoch": 1.6255191508998617, "grad_norm": 0.4086194336414337, "learning_rate": 0.00010285218948134581, "loss": 0.983, "step": 14090 }, { "epoch": 1.6260959852330412, "grad_norm": 0.37604227662086487, "learning_rate": 0.00010278509670716518, "loss": 0.8987, "step": 14095 }, { "epoch": 1.6266728195662206, "grad_norm": 0.4030003845691681, "learning_rate": 0.00010271800267829308, "loss": 0.9356, "step": 14100 }, { "epoch": 1.6272496538994001, "grad_norm": 0.4271502196788788, "learning_rate": 0.00010265090742495546, "loss": 0.953, "step": 14105 }, { "epoch": 1.6278264882325795, "grad_norm": 0.41166070103645325, "learning_rate": 0.00010258381097737892, "loss": 0.9577, "step": 14110 }, { "epoch": 1.628403322565759, "grad_norm": 0.4195760488510132, "learning_rate": 0.00010251671336579048, "loss": 0.9357, "step": 14115 }, { "epoch": 1.6289801568989386, "grad_norm": 0.40065228939056396, "learning_rate": 0.00010244961462041777, "loss": 0.9749, "step": 14120 }, { "epoch": 1.6295569912321182, "grad_norm": 0.40311920642852783, "learning_rate": 0.0001023825147714889, "loss": 0.9107, "step": 14125 }, { "epoch": 1.6301338255652977, "grad_norm": 0.420380562543869, "learning_rate": 0.00010231541384923248, "loss": 0.9697, "step": 14130 }, { "epoch": 1.6307106598984773, "grad_norm": 0.3689268231391907, "learning_rate": 0.00010224831188387765, "loss": 0.9005, "step": 14135 }, { "epoch": 1.6312874942316566, "grad_norm": 0.4116344153881073, "learning_rate": 0.0001021812089056539, "loss": 0.8997, "step": 14140 }, { "epoch": 1.6318643285648362, "grad_norm": 0.37291398644447327, "learning_rate": 0.0001021141049447913, "loss": 0.9649, "step": 14145 }, { "epoch": 1.6324411628980156, "grad_norm": 0.3837934136390686, "learning_rate": 0.0001020470000315203, "loss": 0.9648, "step": 14150 }, { "epoch": 1.6330179972311951, "grad_norm": 0.3765500485897064, "learning_rate": 0.00010197989419607184, "loss": 0.8798, "step": 14155 }, { "epoch": 1.6335948315643747, "grad_norm": 0.36903536319732666, "learning_rate": 0.00010191278746867714, "loss": 0.9312, "step": 14160 }, { "epoch": 1.6341716658975542, "grad_norm": 0.41757214069366455, "learning_rate": 0.00010184567987956797, "loss": 0.9221, "step": 14165 }, { "epoch": 1.6347485002307338, "grad_norm": 0.38226452469825745, "learning_rate": 0.00010177857145897643, "loss": 0.9661, "step": 14170 }, { "epoch": 1.6353253345639134, "grad_norm": 0.3918800354003906, "learning_rate": 0.00010171146223713496, "loss": 0.9104, "step": 14175 }, { "epoch": 1.6359021688970927, "grad_norm": 0.3708522915840149, "learning_rate": 0.00010164435224427646, "loss": 0.9644, "step": 14180 }, { "epoch": 1.6364790032302723, "grad_norm": 0.3736863434314728, "learning_rate": 0.00010157724151063406, "loss": 0.9076, "step": 14185 }, { "epoch": 1.6370558375634516, "grad_norm": 0.38738158345222473, "learning_rate": 0.00010151013006644128, "loss": 0.9173, "step": 14190 }, { "epoch": 1.6376326718966312, "grad_norm": 0.41743552684783936, "learning_rate": 0.00010144301794193197, "loss": 0.9011, "step": 14195 }, { "epoch": 1.6382095062298108, "grad_norm": 0.3966219425201416, "learning_rate": 0.00010137590516734026, "loss": 0.9458, "step": 14200 }, { "epoch": 1.6387863405629903, "grad_norm": 0.3808024525642395, "learning_rate": 0.00010130879177290061, "loss": 0.9484, "step": 14205 }, { "epoch": 1.63936317489617, "grad_norm": 0.3857729434967041, "learning_rate": 0.00010124167778884767, "loss": 0.8844, "step": 14210 }, { "epoch": 1.6399400092293495, "grad_norm": 0.3597935438156128, "learning_rate": 0.00010117456324541652, "loss": 0.9238, "step": 14215 }, { "epoch": 1.6405168435625288, "grad_norm": 0.40039053559303284, "learning_rate": 0.00010110744817284232, "loss": 0.9245, "step": 14220 }, { "epoch": 1.6410936778957084, "grad_norm": 0.4076060354709625, "learning_rate": 0.00010104033260136056, "loss": 0.9351, "step": 14225 }, { "epoch": 1.6416705122288877, "grad_norm": 0.41253358125686646, "learning_rate": 0.00010097321656120695, "loss": 0.8715, "step": 14230 }, { "epoch": 1.6422473465620673, "grad_norm": 0.45987001061439514, "learning_rate": 0.00010090610008261738, "loss": 0.9585, "step": 14235 }, { "epoch": 1.6428241808952468, "grad_norm": 0.39234888553619385, "learning_rate": 0.00010083898319582795, "loss": 0.8901, "step": 14240 }, { "epoch": 1.6434010152284264, "grad_norm": 0.41099515557289124, "learning_rate": 0.00010077186593107495, "loss": 0.9179, "step": 14245 }, { "epoch": 1.643977849561606, "grad_norm": 0.43246176838874817, "learning_rate": 0.00010070474831859486, "loss": 0.9627, "step": 14250 }, { "epoch": 1.6445546838947855, "grad_norm": 0.37442895770072937, "learning_rate": 0.00010063763038862428, "loss": 0.9208, "step": 14255 }, { "epoch": 1.6451315182279649, "grad_norm": 0.38451138138771057, "learning_rate": 0.00010057051217139997, "loss": 0.9064, "step": 14260 }, { "epoch": 1.6457083525611444, "grad_norm": 0.3907986879348755, "learning_rate": 0.0001005033936971588, "loss": 0.9274, "step": 14265 }, { "epoch": 1.6462851868943238, "grad_norm": 0.41767701506614685, "learning_rate": 0.00010043627499613778, "loss": 0.8953, "step": 14270 }, { "epoch": 1.6468620212275034, "grad_norm": 0.42702117562294006, "learning_rate": 0.00010036915609857406, "loss": 0.9665, "step": 14275 }, { "epoch": 1.647438855560683, "grad_norm": 0.5105391144752502, "learning_rate": 0.00010030203703470477, "loss": 0.9829, "step": 14280 }, { "epoch": 1.6480156898938625, "grad_norm": 0.40759360790252686, "learning_rate": 0.00010023491783476724, "loss": 0.9246, "step": 14285 }, { "epoch": 1.648592524227042, "grad_norm": 0.445537269115448, "learning_rate": 0.00010016779852899873, "loss": 0.9676, "step": 14290 }, { "epoch": 1.6491693585602216, "grad_norm": 0.3821800947189331, "learning_rate": 0.00010010067914763668, "loss": 0.9158, "step": 14295 }, { "epoch": 1.649746192893401, "grad_norm": 0.39797648787498474, "learning_rate": 0.00010003355972091848, "loss": 0.9002, "step": 14300 }, { "epoch": 1.6503230272265805, "grad_norm": 0.39020875096321106, "learning_rate": 9.996644027908154e-05, "loss": 0.9021, "step": 14305 }, { "epoch": 1.6508998615597599, "grad_norm": 0.4043421745300293, "learning_rate": 9.989932085236334e-05, "loss": 0.9118, "step": 14310 }, { "epoch": 1.6514766958929394, "grad_norm": 0.39409375190734863, "learning_rate": 9.983220147100129e-05, "loss": 0.9214, "step": 14315 }, { "epoch": 1.652053530226119, "grad_norm": 0.4032192826271057, "learning_rate": 9.976508216523278e-05, "loss": 0.9188, "step": 14320 }, { "epoch": 1.6526303645592986, "grad_norm": 0.3894086182117462, "learning_rate": 9.969796296529525e-05, "loss": 1.036, "step": 14325 }, { "epoch": 1.6532071988924781, "grad_norm": 0.382533460855484, "learning_rate": 9.963084390142595e-05, "loss": 0.9062, "step": 14330 }, { "epoch": 1.6537840332256577, "grad_norm": 0.3999064564704895, "learning_rate": 9.956372500386222e-05, "loss": 0.9372, "step": 14335 }, { "epoch": 1.6543608675588373, "grad_norm": 0.4040856957435608, "learning_rate": 9.949660630284122e-05, "loss": 0.8744, "step": 14340 }, { "epoch": 1.6549377018920166, "grad_norm": 0.4170020818710327, "learning_rate": 9.942948782860008e-05, "loss": 0.9355, "step": 14345 }, { "epoch": 1.6555145362251962, "grad_norm": 0.39724001288414, "learning_rate": 9.936236961137575e-05, "loss": 0.9466, "step": 14350 }, { "epoch": 1.6560913705583755, "grad_norm": 0.4035636782646179, "learning_rate": 9.929525168140516e-05, "loss": 0.9407, "step": 14355 }, { "epoch": 1.656668204891555, "grad_norm": 0.40529632568359375, "learning_rate": 9.922813406892508e-05, "loss": 0.9191, "step": 14360 }, { "epoch": 1.6572450392247347, "grad_norm": 0.4170621633529663, "learning_rate": 9.916101680417208e-05, "loss": 0.9501, "step": 14365 }, { "epoch": 1.6578218735579142, "grad_norm": 0.367270290851593, "learning_rate": 9.909389991738263e-05, "loss": 0.9359, "step": 14370 }, { "epoch": 1.6583987078910938, "grad_norm": 0.36950138211250305, "learning_rate": 9.902678343879308e-05, "loss": 0.9433, "step": 14375 }, { "epoch": 1.6589755422242733, "grad_norm": 0.36553606390953064, "learning_rate": 9.895966739863947e-05, "loss": 0.8903, "step": 14380 }, { "epoch": 1.6595523765574527, "grad_norm": 0.379629522562027, "learning_rate": 9.889255182715769e-05, "loss": 0.878, "step": 14385 }, { "epoch": 1.6601292108906323, "grad_norm": 0.4937252104282379, "learning_rate": 9.88254367545835e-05, "loss": 0.9175, "step": 14390 }, { "epoch": 1.6607060452238116, "grad_norm": 0.4445362985134125, "learning_rate": 9.875832221115234e-05, "loss": 1.0388, "step": 14395 }, { "epoch": 1.6612828795569912, "grad_norm": 0.3727785050868988, "learning_rate": 9.869120822709946e-05, "loss": 0.9752, "step": 14400 }, { "epoch": 1.6618597138901707, "grad_norm": 0.4096865653991699, "learning_rate": 9.862409483265975e-05, "loss": 0.9334, "step": 14405 }, { "epoch": 1.6624365482233503, "grad_norm": 0.4165703058242798, "learning_rate": 9.855698205806805e-05, "loss": 0.9634, "step": 14410 }, { "epoch": 1.6630133825565299, "grad_norm": 0.4018830955028534, "learning_rate": 9.848986993355877e-05, "loss": 0.9348, "step": 14415 }, { "epoch": 1.6635902168897094, "grad_norm": 0.3881585896015167, "learning_rate": 9.842275848936595e-05, "loss": 0.9336, "step": 14420 }, { "epoch": 1.6641670512228888, "grad_norm": 0.39819440245628357, "learning_rate": 9.835564775572356e-05, "loss": 0.9359, "step": 14425 }, { "epoch": 1.6647438855560683, "grad_norm": 0.45221155881881714, "learning_rate": 9.828853776286505e-05, "loss": 0.9702, "step": 14430 }, { "epoch": 1.6653207198892477, "grad_norm": 0.3747832179069519, "learning_rate": 9.822142854102362e-05, "loss": 0.91, "step": 14435 }, { "epoch": 1.6658975542224272, "grad_norm": 0.4133490324020386, "learning_rate": 9.815432012043204e-05, "loss": 0.9482, "step": 14440 }, { "epoch": 1.6664743885556068, "grad_norm": 0.37815743684768677, "learning_rate": 9.808721253132289e-05, "loss": 0.9418, "step": 14445 }, { "epoch": 1.6670512228887864, "grad_norm": 0.3955732583999634, "learning_rate": 9.802010580392821e-05, "loss": 0.9085, "step": 14450 }, { "epoch": 1.667628057221966, "grad_norm": 0.3749368488788605, "learning_rate": 9.79529999684797e-05, "loss": 0.9217, "step": 14455 }, { "epoch": 1.6682048915551455, "grad_norm": 0.3877532184123993, "learning_rate": 9.78858950552087e-05, "loss": 0.9568, "step": 14460 }, { "epoch": 1.6687817258883249, "grad_norm": 0.3820559084415436, "learning_rate": 9.781879109434614e-05, "loss": 0.8994, "step": 14465 }, { "epoch": 1.6693585602215044, "grad_norm": 0.38123568892478943, "learning_rate": 9.77516881161224e-05, "loss": 0.9013, "step": 14470 }, { "epoch": 1.6699353945546838, "grad_norm": 0.41897571086883545, "learning_rate": 9.768458615076751e-05, "loss": 0.9194, "step": 14475 }, { "epoch": 1.6705122288878633, "grad_norm": 0.39537757635116577, "learning_rate": 9.761748522851112e-05, "loss": 0.9243, "step": 14480 }, { "epoch": 1.671089063221043, "grad_norm": 0.41747602820396423, "learning_rate": 9.755038537958226e-05, "loss": 0.9695, "step": 14485 }, { "epoch": 1.6716658975542225, "grad_norm": 0.41853514313697815, "learning_rate": 9.748328663420952e-05, "loss": 0.8954, "step": 14490 }, { "epoch": 1.672242731887402, "grad_norm": 0.4038637578487396, "learning_rate": 9.74161890226211e-05, "loss": 0.907, "step": 14495 }, { "epoch": 1.6728195662205816, "grad_norm": 0.3900943994522095, "learning_rate": 9.734909257504455e-05, "loss": 0.952, "step": 14500 }, { "epoch": 1.673396400553761, "grad_norm": 0.3971070349216461, "learning_rate": 9.728199732170696e-05, "loss": 0.9212, "step": 14505 }, { "epoch": 1.6739732348869405, "grad_norm": 0.4064873456954956, "learning_rate": 9.721490329283483e-05, "loss": 0.9615, "step": 14510 }, { "epoch": 1.6745500692201198, "grad_norm": 0.39063361287117004, "learning_rate": 9.714781051865421e-05, "loss": 0.9701, "step": 14515 }, { "epoch": 1.6751269035532994, "grad_norm": 0.38286256790161133, "learning_rate": 9.708071902939054e-05, "loss": 0.9138, "step": 14520 }, { "epoch": 1.675703737886479, "grad_norm": 0.3789064586162567, "learning_rate": 9.701362885526856e-05, "loss": 0.9297, "step": 14525 }, { "epoch": 1.6762805722196585, "grad_norm": 0.4091351628303528, "learning_rate": 9.694654002651266e-05, "loss": 0.971, "step": 14530 }, { "epoch": 1.676857406552838, "grad_norm": 0.38930168747901917, "learning_rate": 9.687945257334641e-05, "loss": 0.9607, "step": 14535 }, { "epoch": 1.6774342408860177, "grad_norm": 0.39753562211990356, "learning_rate": 9.681236652599286e-05, "loss": 0.9611, "step": 14540 }, { "epoch": 1.678011075219197, "grad_norm": 0.39889010787010193, "learning_rate": 9.674528191467434e-05, "loss": 0.9181, "step": 14545 }, { "epoch": 1.6785879095523766, "grad_norm": 0.40444427728652954, "learning_rate": 9.667819876961272e-05, "loss": 0.9109, "step": 14550 }, { "epoch": 1.679164743885556, "grad_norm": 0.41998520493507385, "learning_rate": 9.661111712102901e-05, "loss": 0.9045, "step": 14555 }, { "epoch": 1.6797415782187355, "grad_norm": 0.43666961789131165, "learning_rate": 9.654403699914363e-05, "loss": 0.8748, "step": 14560 }, { "epoch": 1.680318412551915, "grad_norm": 0.38630232214927673, "learning_rate": 9.647695843417628e-05, "loss": 0.9359, "step": 14565 }, { "epoch": 1.6808952468850946, "grad_norm": 0.4208744466304779, "learning_rate": 9.640988145634606e-05, "loss": 0.9247, "step": 14570 }, { "epoch": 1.6814720812182742, "grad_norm": 0.40704822540283203, "learning_rate": 9.634280609587123e-05, "loss": 0.955, "step": 14575 }, { "epoch": 1.6820489155514537, "grad_norm": 0.43808555603027344, "learning_rate": 9.627573238296933e-05, "loss": 0.9144, "step": 14580 }, { "epoch": 1.682625749884633, "grad_norm": 0.3939140737056732, "learning_rate": 9.620866034785728e-05, "loss": 0.9081, "step": 14585 }, { "epoch": 1.6832025842178127, "grad_norm": 0.38371533155441284, "learning_rate": 9.614159002075114e-05, "loss": 0.9486, "step": 14590 }, { "epoch": 1.683779418550992, "grad_norm": 0.3783860504627228, "learning_rate": 9.60745214318662e-05, "loss": 0.9226, "step": 14595 }, { "epoch": 1.6843562528841716, "grad_norm": 0.40141963958740234, "learning_rate": 9.600745461141696e-05, "loss": 0.9796, "step": 14600 }, { "epoch": 1.6849330872173511, "grad_norm": 0.4136003851890564, "learning_rate": 9.594038958961725e-05, "loss": 0.9096, "step": 14605 }, { "epoch": 1.6855099215505307, "grad_norm": 0.36876803636550903, "learning_rate": 9.58733263966799e-05, "loss": 0.8963, "step": 14610 }, { "epoch": 1.6860867558837103, "grad_norm": 0.3809376657009125, "learning_rate": 9.580626506281703e-05, "loss": 0.9696, "step": 14615 }, { "epoch": 1.6866635902168898, "grad_norm": 0.40277227759361267, "learning_rate": 9.573920561823995e-05, "loss": 0.9342, "step": 14620 }, { "epoch": 1.6872404245500692, "grad_norm": 0.43503326177597046, "learning_rate": 9.567214809315903e-05, "loss": 0.9483, "step": 14625 }, { "epoch": 1.6878172588832487, "grad_norm": 0.39147305488586426, "learning_rate": 9.560509251778383e-05, "loss": 0.9396, "step": 14630 }, { "epoch": 1.688394093216428, "grad_norm": 0.4333549439907074, "learning_rate": 9.553803892232296e-05, "loss": 0.9104, "step": 14635 }, { "epoch": 1.6889709275496076, "grad_norm": 0.39406928420066833, "learning_rate": 9.547098733698428e-05, "loss": 0.9478, "step": 14640 }, { "epoch": 1.6895477618827872, "grad_norm": 0.3725603222846985, "learning_rate": 9.540393779197464e-05, "loss": 0.8539, "step": 14645 }, { "epoch": 1.6901245962159668, "grad_norm": 0.37901946902275085, "learning_rate": 9.533689031749991e-05, "loss": 0.9301, "step": 14650 }, { "epoch": 1.6907014305491463, "grad_norm": 0.3747023642063141, "learning_rate": 9.526984494376524e-05, "loss": 0.9269, "step": 14655 }, { "epoch": 1.691278264882326, "grad_norm": 0.37854355573654175, "learning_rate": 9.520280170097462e-05, "loss": 0.867, "step": 14660 }, { "epoch": 1.6918550992155053, "grad_norm": 0.38338419795036316, "learning_rate": 9.513576061933118e-05, "loss": 0.9194, "step": 14665 }, { "epoch": 1.6924319335486848, "grad_norm": 0.3833276033401489, "learning_rate": 9.5068721729037e-05, "loss": 0.9376, "step": 14670 }, { "epoch": 1.6930087678818642, "grad_norm": 0.37685397267341614, "learning_rate": 9.500168506029334e-05, "loss": 0.9075, "step": 14675 }, { "epoch": 1.6935856022150437, "grad_norm": 0.4300825595855713, "learning_rate": 9.493465064330029e-05, "loss": 0.9386, "step": 14680 }, { "epoch": 1.6941624365482233, "grad_norm": 0.3908791244029999, "learning_rate": 9.486761850825694e-05, "loss": 0.9543, "step": 14685 }, { "epoch": 1.6947392708814029, "grad_norm": 0.3841436803340912, "learning_rate": 9.480058868536149e-05, "loss": 0.9034, "step": 14690 }, { "epoch": 1.6953161052145824, "grad_norm": 0.4477061927318573, "learning_rate": 9.473356120481098e-05, "loss": 0.9531, "step": 14695 }, { "epoch": 1.695892939547762, "grad_norm": 0.38848182559013367, "learning_rate": 9.466653609680137e-05, "loss": 0.9192, "step": 14700 }, { "epoch": 1.6964697738809416, "grad_norm": 0.42426785826683044, "learning_rate": 9.459951339152762e-05, "loss": 0.8993, "step": 14705 }, { "epoch": 1.697046608214121, "grad_norm": 0.4407934844493866, "learning_rate": 9.453249311918362e-05, "loss": 0.9737, "step": 14710 }, { "epoch": 1.6976234425473005, "grad_norm": 0.4233091175556183, "learning_rate": 9.446547530996214e-05, "loss": 0.9115, "step": 14715 }, { "epoch": 1.6982002768804798, "grad_norm": 0.38162070512771606, "learning_rate": 9.439845999405478e-05, "loss": 0.9048, "step": 14720 }, { "epoch": 1.6987771112136594, "grad_norm": 0.44918137788772583, "learning_rate": 9.43314472016521e-05, "loss": 0.8878, "step": 14725 }, { "epoch": 1.699353945546839, "grad_norm": 0.40736523270606995, "learning_rate": 9.426443696294351e-05, "loss": 0.9339, "step": 14730 }, { "epoch": 1.6999307798800185, "grad_norm": 0.3635164201259613, "learning_rate": 9.419742930811722e-05, "loss": 0.9033, "step": 14735 }, { "epoch": 1.700507614213198, "grad_norm": 0.3836980164051056, "learning_rate": 9.413042426736032e-05, "loss": 0.9218, "step": 14740 }, { "epoch": 1.7010844485463776, "grad_norm": 0.3805466294288635, "learning_rate": 9.406342187085875e-05, "loss": 0.8939, "step": 14745 }, { "epoch": 1.701661282879557, "grad_norm": 0.3768046200275421, "learning_rate": 9.399642214879717e-05, "loss": 0.9541, "step": 14750 }, { "epoch": 1.7022381172127365, "grad_norm": 0.42572706937789917, "learning_rate": 9.39294251313591e-05, "loss": 0.9927, "step": 14755 }, { "epoch": 1.702814951545916, "grad_norm": 0.4072059392929077, "learning_rate": 9.386243084872682e-05, "loss": 0.9676, "step": 14760 }, { "epoch": 1.7033917858790955, "grad_norm": 0.39494749903678894, "learning_rate": 9.379543933108141e-05, "loss": 0.9201, "step": 14765 }, { "epoch": 1.703968620212275, "grad_norm": 0.42822760343551636, "learning_rate": 9.372845060860264e-05, "loss": 0.9036, "step": 14770 }, { "epoch": 1.7045454545454546, "grad_norm": 0.3813650906085968, "learning_rate": 9.36614647114691e-05, "loss": 0.8976, "step": 14775 }, { "epoch": 1.7051222888786342, "grad_norm": 0.4004404544830322, "learning_rate": 9.359448166985806e-05, "loss": 0.9066, "step": 14780 }, { "epoch": 1.7056991232118137, "grad_norm": 0.39617690443992615, "learning_rate": 9.35275015139455e-05, "loss": 0.9306, "step": 14785 }, { "epoch": 1.706275957544993, "grad_norm": 0.3876945674419403, "learning_rate": 9.34605242739061e-05, "loss": 0.9129, "step": 14790 }, { "epoch": 1.7068527918781726, "grad_norm": 0.3834667205810547, "learning_rate": 9.339354997991325e-05, "loss": 0.9551, "step": 14795 }, { "epoch": 1.707429626211352, "grad_norm": 0.3988478481769562, "learning_rate": 9.3326578662139e-05, "loss": 0.9172, "step": 14800 }, { "epoch": 1.7080064605445315, "grad_norm": 0.40340641140937805, "learning_rate": 9.325961035075405e-05, "loss": 0.9626, "step": 14805 }, { "epoch": 1.708583294877711, "grad_norm": 0.44429507851600647, "learning_rate": 9.319264507592776e-05, "loss": 0.9331, "step": 14810 }, { "epoch": 1.7091601292108907, "grad_norm": 0.41271674633026123, "learning_rate": 9.312568286782815e-05, "loss": 0.9173, "step": 14815 }, { "epoch": 1.7097369635440702, "grad_norm": 0.3779640793800354, "learning_rate": 9.305872375662176e-05, "loss": 0.9004, "step": 14820 }, { "epoch": 1.7103137978772498, "grad_norm": 0.40028929710388184, "learning_rate": 9.299176777247386e-05, "loss": 0.8877, "step": 14825 }, { "epoch": 1.7108906322104291, "grad_norm": 0.3729310631752014, "learning_rate": 9.29248149455482e-05, "loss": 0.9557, "step": 14830 }, { "epoch": 1.7114674665436087, "grad_norm": 0.37121349573135376, "learning_rate": 9.285786530600718e-05, "loss": 0.9302, "step": 14835 }, { "epoch": 1.712044300876788, "grad_norm": 0.36666813492774963, "learning_rate": 9.279091888401179e-05, "loss": 0.9732, "step": 14840 }, { "epoch": 1.7126211352099676, "grad_norm": 0.4723244309425354, "learning_rate": 9.272397570972145e-05, "loss": 1.0227, "step": 14845 }, { "epoch": 1.7131979695431472, "grad_norm": 0.44618186354637146, "learning_rate": 9.26570358132942e-05, "loss": 0.946, "step": 14850 }, { "epoch": 1.7137748038763267, "grad_norm": 0.3813045024871826, "learning_rate": 9.259009922488665e-05, "loss": 0.9072, "step": 14855 }, { "epoch": 1.7143516382095063, "grad_norm": 0.40963122248649597, "learning_rate": 9.252316597465384e-05, "loss": 0.9181, "step": 14860 }, { "epoch": 1.7149284725426859, "grad_norm": 0.43289676308631897, "learning_rate": 9.245623609274928e-05, "loss": 0.9365, "step": 14865 }, { "epoch": 1.7155053068758652, "grad_norm": 0.4129135310649872, "learning_rate": 9.238930960932506e-05, "loss": 0.9466, "step": 14870 }, { "epoch": 1.7160821412090448, "grad_norm": 0.3834366202354431, "learning_rate": 9.23223865545317e-05, "loss": 0.9291, "step": 14875 }, { "epoch": 1.7166589755422241, "grad_norm": 0.380035400390625, "learning_rate": 9.225546695851815e-05, "loss": 0.9214, "step": 14880 }, { "epoch": 1.7172358098754037, "grad_norm": 0.406563937664032, "learning_rate": 9.218855085143176e-05, "loss": 0.9089, "step": 14885 }, { "epoch": 1.7178126442085833, "grad_norm": 0.40587136149406433, "learning_rate": 9.212163826341847e-05, "loss": 0.9524, "step": 14890 }, { "epoch": 1.7183894785417628, "grad_norm": 0.39258936047554016, "learning_rate": 9.20547292246225e-05, "loss": 0.9747, "step": 14895 }, { "epoch": 1.7189663128749424, "grad_norm": 0.3985291123390198, "learning_rate": 9.198782376518642e-05, "loss": 0.9987, "step": 14900 }, { "epoch": 1.719543147208122, "grad_norm": 0.3905879557132721, "learning_rate": 9.192092191525139e-05, "loss": 0.9427, "step": 14905 }, { "epoch": 1.7201199815413013, "grad_norm": 0.42461061477661133, "learning_rate": 9.185402370495677e-05, "loss": 0.9667, "step": 14910 }, { "epoch": 1.7206968158744809, "grad_norm": 0.3873096704483032, "learning_rate": 9.178712916444033e-05, "loss": 0.9011, "step": 14915 }, { "epoch": 1.7212736502076602, "grad_norm": 0.4047510027885437, "learning_rate": 9.172023832383816e-05, "loss": 0.9815, "step": 14920 }, { "epoch": 1.7218504845408398, "grad_norm": 0.4329635500907898, "learning_rate": 9.165335121328477e-05, "loss": 0.9505, "step": 14925 }, { "epoch": 1.7224273188740193, "grad_norm": 0.37576204538345337, "learning_rate": 9.158646786291292e-05, "loss": 0.9236, "step": 14930 }, { "epoch": 1.723004153207199, "grad_norm": 0.3973287343978882, "learning_rate": 9.151958830285366e-05, "loss": 0.8911, "step": 14935 }, { "epoch": 1.7235809875403785, "grad_norm": 0.4271901249885559, "learning_rate": 9.14527125632364e-05, "loss": 0.982, "step": 14940 }, { "epoch": 1.724157821873558, "grad_norm": 0.43565037846565247, "learning_rate": 9.138584067418878e-05, "loss": 0.9617, "step": 14945 }, { "epoch": 1.7247346562067374, "grad_norm": 0.4059095084667206, "learning_rate": 9.131897266583672e-05, "loss": 0.8999, "step": 14950 }, { "epoch": 1.725311490539917, "grad_norm": 0.437881201505661, "learning_rate": 9.125210856830433e-05, "loss": 0.9047, "step": 14955 }, { "epoch": 1.7258883248730963, "grad_norm": 0.3824065029621124, "learning_rate": 9.118524841171409e-05, "loss": 0.9533, "step": 14960 }, { "epoch": 1.7264651592062759, "grad_norm": 0.39069101214408875, "learning_rate": 9.111839222618663e-05, "loss": 0.9776, "step": 14965 }, { "epoch": 1.7270419935394554, "grad_norm": 0.43811115622520447, "learning_rate": 9.105154004184071e-05, "loss": 0.8873, "step": 14970 }, { "epoch": 1.727618827872635, "grad_norm": 0.40116867423057556, "learning_rate": 9.098469188879349e-05, "loss": 0.9287, "step": 14975 }, { "epoch": 1.7281956622058146, "grad_norm": 0.3768905699253082, "learning_rate": 9.091784779716011e-05, "loss": 0.8807, "step": 14980 }, { "epoch": 1.7287724965389941, "grad_norm": 0.38657596707344055, "learning_rate": 9.085100779705398e-05, "loss": 0.9756, "step": 14985 }, { "epoch": 1.7293493308721735, "grad_norm": 0.40648481249809265, "learning_rate": 9.078417191858662e-05, "loss": 0.8996, "step": 14990 }, { "epoch": 1.729926165205353, "grad_norm": 0.37395209074020386, "learning_rate": 9.071734019186778e-05, "loss": 0.8956, "step": 14995 }, { "epoch": 1.7305029995385324, "grad_norm": 0.4580058455467224, "learning_rate": 9.065051264700527e-05, "loss": 0.9537, "step": 15000 }, { "epoch": 1.731079833871712, "grad_norm": 0.3521862030029297, "learning_rate": 9.058368931410498e-05, "loss": 0.9096, "step": 15005 }, { "epoch": 1.7316566682048915, "grad_norm": 0.4156653881072998, "learning_rate": 9.051687022327103e-05, "loss": 0.8714, "step": 15010 }, { "epoch": 1.732233502538071, "grad_norm": 0.37951210141181946, "learning_rate": 9.045005540460552e-05, "loss": 0.9564, "step": 15015 }, { "epoch": 1.7328103368712506, "grad_norm": 0.41788360476493835, "learning_rate": 9.038324488820863e-05, "loss": 0.9462, "step": 15020 }, { "epoch": 1.7333871712044302, "grad_norm": 0.45095932483673096, "learning_rate": 9.031643870417861e-05, "loss": 0.9131, "step": 15025 }, { "epoch": 1.7339640055376098, "grad_norm": 0.3977847099304199, "learning_rate": 9.024963688261186e-05, "loss": 0.9116, "step": 15030 }, { "epoch": 1.734540839870789, "grad_norm": 0.4043447971343994, "learning_rate": 9.018283945360266e-05, "loss": 0.9271, "step": 15035 }, { "epoch": 1.7351176742039687, "grad_norm": 0.44181013107299805, "learning_rate": 9.011604644724342e-05, "loss": 0.9155, "step": 15040 }, { "epoch": 1.735694508537148, "grad_norm": 0.4287402927875519, "learning_rate": 9.004925789362446e-05, "loss": 0.9138, "step": 15045 }, { "epoch": 1.7362713428703276, "grad_norm": 0.3614751994609833, "learning_rate": 8.998247382283423e-05, "loss": 0.9452, "step": 15050 }, { "epoch": 1.7368481772035071, "grad_norm": 0.41066932678222656, "learning_rate": 8.991569426495905e-05, "loss": 0.898, "step": 15055 }, { "epoch": 1.7374250115366867, "grad_norm": 0.457390159368515, "learning_rate": 8.984891925008321e-05, "loss": 0.8802, "step": 15060 }, { "epoch": 1.7380018458698663, "grad_norm": 0.35870426893234253, "learning_rate": 8.978214880828903e-05, "loss": 0.8995, "step": 15065 }, { "epoch": 1.7385786802030458, "grad_norm": 0.39848119020462036, "learning_rate": 8.971538296965674e-05, "loss": 0.9064, "step": 15070 }, { "epoch": 1.7391555145362252, "grad_norm": 0.38636332750320435, "learning_rate": 8.964862176426443e-05, "loss": 0.953, "step": 15075 }, { "epoch": 1.7397323488694048, "grad_norm": 0.41869068145751953, "learning_rate": 8.958186522218813e-05, "loss": 0.9417, "step": 15080 }, { "epoch": 1.740309183202584, "grad_norm": 0.3965960144996643, "learning_rate": 8.951511337350188e-05, "loss": 0.9319, "step": 15085 }, { "epoch": 1.7408860175357637, "grad_norm": 0.38021016120910645, "learning_rate": 8.944836624827748e-05, "loss": 0.9162, "step": 15090 }, { "epoch": 1.7414628518689432, "grad_norm": 0.41196557879447937, "learning_rate": 8.938162387658458e-05, "loss": 0.9287, "step": 15095 }, { "epoch": 1.7420396862021228, "grad_norm": 0.3578120172023773, "learning_rate": 8.931488628849085e-05, "loss": 0.96, "step": 15100 }, { "epoch": 1.7426165205353024, "grad_norm": 0.4296330213546753, "learning_rate": 8.924815351406163e-05, "loss": 0.9434, "step": 15105 }, { "epoch": 1.743193354868482, "grad_norm": 0.3744300305843353, "learning_rate": 8.918142558336022e-05, "loss": 0.9601, "step": 15110 }, { "epoch": 1.7437701892016613, "grad_norm": 0.41023698449134827, "learning_rate": 8.91147025264476e-05, "loss": 0.9312, "step": 15115 }, { "epoch": 1.7443470235348408, "grad_norm": 0.40638571977615356, "learning_rate": 8.904798437338272e-05, "loss": 0.9658, "step": 15120 }, { "epoch": 1.7449238578680202, "grad_norm": 0.45808759331703186, "learning_rate": 8.898127115422221e-05, "loss": 0.9522, "step": 15125 }, { "epoch": 1.7455006922011997, "grad_norm": 0.4006982147693634, "learning_rate": 8.891456289902046e-05, "loss": 0.8894, "step": 15130 }, { "epoch": 1.7460775265343793, "grad_norm": 0.3888961970806122, "learning_rate": 8.884785963782975e-05, "loss": 0.9107, "step": 15135 }, { "epoch": 1.7466543608675589, "grad_norm": 0.4383211135864258, "learning_rate": 8.878116140069999e-05, "loss": 0.9225, "step": 15140 }, { "epoch": 1.7472311952007384, "grad_norm": 0.38487568497657776, "learning_rate": 8.871446821767888e-05, "loss": 0.9299, "step": 15145 }, { "epoch": 1.747808029533918, "grad_norm": 0.3626287877559662, "learning_rate": 8.864778011881175e-05, "loss": 0.9082, "step": 15150 }, { "epoch": 1.7483848638670973, "grad_norm": 0.4096532166004181, "learning_rate": 8.858109713414185e-05, "loss": 0.9614, "step": 15155 }, { "epoch": 1.748961698200277, "grad_norm": 0.3614518344402313, "learning_rate": 8.85144192937099e-05, "loss": 0.9248, "step": 15160 }, { "epoch": 1.7495385325334563, "grad_norm": 0.3788556754589081, "learning_rate": 8.84477466275544e-05, "loss": 0.8654, "step": 15165 }, { "epoch": 1.7501153668666358, "grad_norm": 0.3660648465156555, "learning_rate": 8.838107916571156e-05, "loss": 0.9343, "step": 15170 }, { "epoch": 1.7506922011998154, "grad_norm": 0.4170313775539398, "learning_rate": 8.831441693821519e-05, "loss": 0.948, "step": 15175 }, { "epoch": 1.751269035532995, "grad_norm": 0.3601408004760742, "learning_rate": 8.824775997509675e-05, "loss": 0.9301, "step": 15180 }, { "epoch": 1.7518458698661745, "grad_norm": 0.38346150517463684, "learning_rate": 8.818110830638528e-05, "loss": 0.9182, "step": 15185 }, { "epoch": 1.752422704199354, "grad_norm": 0.3932247757911682, "learning_rate": 8.811446196210757e-05, "loss": 0.9271, "step": 15190 }, { "epoch": 1.7529995385325334, "grad_norm": 0.40483179688453674, "learning_rate": 8.80478209722879e-05, "loss": 0.975, "step": 15195 }, { "epoch": 1.753576372865713, "grad_norm": 0.40138062834739685, "learning_rate": 8.798118536694813e-05, "loss": 0.9006, "step": 15200 }, { "epoch": 1.7541532071988923, "grad_norm": 0.4747352600097656, "learning_rate": 8.791455517610776e-05, "loss": 0.9065, "step": 15205 }, { "epoch": 1.754730041532072, "grad_norm": 0.4437057375907898, "learning_rate": 8.784793042978384e-05, "loss": 0.9266, "step": 15210 }, { "epoch": 1.7553068758652515, "grad_norm": 0.4352372884750366, "learning_rate": 8.778131115799093e-05, "loss": 1.0089, "step": 15215 }, { "epoch": 1.755883710198431, "grad_norm": 0.4017319083213806, "learning_rate": 8.771469739074107e-05, "loss": 0.9516, "step": 15220 }, { "epoch": 1.7564605445316106, "grad_norm": 0.4051602780818939, "learning_rate": 8.764808915804401e-05, "loss": 0.9162, "step": 15225 }, { "epoch": 1.7570373788647902, "grad_norm": 0.41565215587615967, "learning_rate": 8.758148648990684e-05, "loss": 0.9296, "step": 15230 }, { "epoch": 1.7576142131979695, "grad_norm": 0.4095606505870819, "learning_rate": 8.751488941633416e-05, "loss": 0.9492, "step": 15235 }, { "epoch": 1.758191047531149, "grad_norm": 0.36888161301612854, "learning_rate": 8.744829796732812e-05, "loss": 0.9687, "step": 15240 }, { "epoch": 1.7587678818643284, "grad_norm": 0.3901865482330322, "learning_rate": 8.738171217288831e-05, "loss": 0.8896, "step": 15245 }, { "epoch": 1.759344716197508, "grad_norm": 0.39652901887893677, "learning_rate": 8.73151320630117e-05, "loss": 0.9804, "step": 15250 }, { "epoch": 1.7599215505306876, "grad_norm": 0.4261581301689148, "learning_rate": 8.724855766769282e-05, "loss": 0.9291, "step": 15255 }, { "epoch": 1.7604983848638671, "grad_norm": 0.4231615960597992, "learning_rate": 8.718198901692354e-05, "loss": 0.8976, "step": 15260 }, { "epoch": 1.7610752191970467, "grad_norm": 0.38978713750839233, "learning_rate": 8.711542614069316e-05, "loss": 0.9492, "step": 15265 }, { "epoch": 1.7616520535302262, "grad_norm": 0.4083541929721832, "learning_rate": 8.70488690689884e-05, "loss": 0.9148, "step": 15270 }, { "epoch": 1.7622288878634056, "grad_norm": 0.3902876377105713, "learning_rate": 8.698231783179334e-05, "loss": 0.9179, "step": 15275 }, { "epoch": 1.7628057221965852, "grad_norm": 0.46301478147506714, "learning_rate": 8.691577245908947e-05, "loss": 0.9431, "step": 15280 }, { "epoch": 1.7633825565297645, "grad_norm": 0.39645519852638245, "learning_rate": 8.684923298085555e-05, "loss": 0.968, "step": 15285 }, { "epoch": 1.763959390862944, "grad_norm": 0.41923776268959045, "learning_rate": 8.678269942706777e-05, "loss": 0.9646, "step": 15290 }, { "epoch": 1.7645362251961236, "grad_norm": 0.36724886298179626, "learning_rate": 8.671617182769967e-05, "loss": 0.8777, "step": 15295 }, { "epoch": 1.7651130595293032, "grad_norm": 0.39986652135849, "learning_rate": 8.6649650212722e-05, "loss": 0.8661, "step": 15300 }, { "epoch": 1.7656898938624828, "grad_norm": 0.3949691653251648, "learning_rate": 8.658313461210294e-05, "loss": 0.8907, "step": 15305 }, { "epoch": 1.7662667281956623, "grad_norm": 0.380906879901886, "learning_rate": 8.651662505580785e-05, "loss": 0.8585, "step": 15310 }, { "epoch": 1.7668435625288417, "grad_norm": 0.4579210877418518, "learning_rate": 8.645012157379941e-05, "loss": 0.9024, "step": 15315 }, { "epoch": 1.7674203968620212, "grad_norm": 0.4028342068195343, "learning_rate": 8.63836241960376e-05, "loss": 0.8992, "step": 15320 }, { "epoch": 1.7679972311952006, "grad_norm": 0.39463692903518677, "learning_rate": 8.631713295247961e-05, "loss": 0.8753, "step": 15325 }, { "epoch": 1.7685740655283801, "grad_norm": 0.397307813167572, "learning_rate": 8.625064787307986e-05, "loss": 0.9066, "step": 15330 }, { "epoch": 1.7691508998615597, "grad_norm": 0.35301893949508667, "learning_rate": 8.618416898779e-05, "loss": 0.9314, "step": 15335 }, { "epoch": 1.7697277341947393, "grad_norm": 0.36975350975990295, "learning_rate": 8.611769632655896e-05, "loss": 0.92, "step": 15340 }, { "epoch": 1.7703045685279188, "grad_norm": 0.3862159550189972, "learning_rate": 8.605122991933271e-05, "loss": 0.9481, "step": 15345 }, { "epoch": 1.7708814028610984, "grad_norm": 0.3985309600830078, "learning_rate": 8.598476979605456e-05, "loss": 0.9441, "step": 15350 }, { "epoch": 1.7714582371942778, "grad_norm": 0.3654569089412689, "learning_rate": 8.591831598666491e-05, "loss": 0.9268, "step": 15355 }, { "epoch": 1.7720350715274573, "grad_norm": 0.4199677109718323, "learning_rate": 8.585186852110134e-05, "loss": 0.909, "step": 15360 }, { "epoch": 1.7726119058606367, "grad_norm": 0.47328895330429077, "learning_rate": 8.578542742929852e-05, "loss": 0.9117, "step": 15365 }, { "epoch": 1.7731887401938162, "grad_norm": 0.4488062560558319, "learning_rate": 8.571899274118835e-05, "loss": 0.8804, "step": 15370 }, { "epoch": 1.7737655745269958, "grad_norm": 0.4089053273200989, "learning_rate": 8.565256448669976e-05, "loss": 0.9349, "step": 15375 }, { "epoch": 1.7743424088601754, "grad_norm": 0.42348453402519226, "learning_rate": 8.558614269575878e-05, "loss": 0.9348, "step": 15380 }, { "epoch": 1.774919243193355, "grad_norm": 0.36207103729248047, "learning_rate": 8.551972739828861e-05, "loss": 0.9457, "step": 15385 }, { "epoch": 1.7754960775265345, "grad_norm": 0.40925994515419006, "learning_rate": 8.545331862420944e-05, "loss": 0.9087, "step": 15390 }, { "epoch": 1.776072911859714, "grad_norm": 0.3908781111240387, "learning_rate": 8.538691640343856e-05, "loss": 0.8865, "step": 15395 }, { "epoch": 1.7766497461928934, "grad_norm": 0.41950979828834534, "learning_rate": 8.532052076589025e-05, "loss": 0.9217, "step": 15400 }, { "epoch": 1.777226580526073, "grad_norm": 0.4013879597187042, "learning_rate": 8.525413174147598e-05, "loss": 0.9121, "step": 15405 }, { "epoch": 1.7778034148592523, "grad_norm": 0.40807363390922546, "learning_rate": 8.518774936010406e-05, "loss": 0.9025, "step": 15410 }, { "epoch": 1.7783802491924319, "grad_norm": 0.41417497396469116, "learning_rate": 8.512137365167986e-05, "loss": 0.9211, "step": 15415 }, { "epoch": 1.7789570835256114, "grad_norm": 0.38941362500190735, "learning_rate": 8.505500464610584e-05, "loss": 0.9524, "step": 15420 }, { "epoch": 1.779533917858791, "grad_norm": 0.3910941183567047, "learning_rate": 8.498864237328132e-05, "loss": 0.9518, "step": 15425 }, { "epoch": 1.7801107521919706, "grad_norm": 0.4338248372077942, "learning_rate": 8.492228686310266e-05, "loss": 0.9739, "step": 15430 }, { "epoch": 1.7806875865251501, "grad_norm": 0.37239012122154236, "learning_rate": 8.485593814546307e-05, "loss": 0.9104, "step": 15435 }, { "epoch": 1.7812644208583295, "grad_norm": 0.41907015442848206, "learning_rate": 8.478959625025288e-05, "loss": 0.9151, "step": 15440 }, { "epoch": 1.781841255191509, "grad_norm": 0.3942820429801941, "learning_rate": 8.47232612073592e-05, "loss": 0.9246, "step": 15445 }, { "epoch": 1.7824180895246884, "grad_norm": 0.3966974914073944, "learning_rate": 8.465693304666606e-05, "loss": 0.8847, "step": 15450 }, { "epoch": 1.782994923857868, "grad_norm": 0.4247607886791229, "learning_rate": 8.459061179805449e-05, "loss": 0.9161, "step": 15455 }, { "epoch": 1.7835717581910475, "grad_norm": 0.3746182918548584, "learning_rate": 8.452429749140233e-05, "loss": 0.9298, "step": 15460 }, { "epoch": 1.784148592524227, "grad_norm": 0.4121777415275574, "learning_rate": 8.445799015658427e-05, "loss": 0.8803, "step": 15465 }, { "epoch": 1.7847254268574066, "grad_norm": 0.46958792209625244, "learning_rate": 8.439168982347186e-05, "loss": 0.9259, "step": 15470 }, { "epoch": 1.7853022611905862, "grad_norm": 0.3878363072872162, "learning_rate": 8.432539652193363e-05, "loss": 0.9235, "step": 15475 }, { "epoch": 1.7858790955237656, "grad_norm": 0.3856402039527893, "learning_rate": 8.425911028183479e-05, "loss": 0.8902, "step": 15480 }, { "epoch": 1.7864559298569451, "grad_norm": 0.48076337575912476, "learning_rate": 8.419283113303742e-05, "loss": 0.9705, "step": 15485 }, { "epoch": 1.7870327641901245, "grad_norm": 0.3873134255409241, "learning_rate": 8.412655910540035e-05, "loss": 0.9599, "step": 15490 }, { "epoch": 1.787609598523304, "grad_norm": 0.3456037640571594, "learning_rate": 8.406029422877937e-05, "loss": 0.9236, "step": 15495 }, { "epoch": 1.7881864328564836, "grad_norm": 0.3917859196662903, "learning_rate": 8.399403653302687e-05, "loss": 0.8963, "step": 15500 }, { "epoch": 1.7887632671896632, "grad_norm": 0.43257713317871094, "learning_rate": 8.392778604799203e-05, "loss": 0.8965, "step": 15505 }, { "epoch": 1.7893401015228427, "grad_norm": 0.4102369546890259, "learning_rate": 8.386154280352094e-05, "loss": 0.9122, "step": 15510 }, { "epoch": 1.7899169358560223, "grad_norm": 0.40290239453315735, "learning_rate": 8.379530682945622e-05, "loss": 0.9344, "step": 15515 }, { "epoch": 1.7904937701892016, "grad_norm": 0.41029873490333557, "learning_rate": 8.372907815563733e-05, "loss": 0.9048, "step": 15520 }, { "epoch": 1.7910706045223812, "grad_norm": 0.4195650815963745, "learning_rate": 8.366285681190039e-05, "loss": 0.9081, "step": 15525 }, { "epoch": 1.7916474388555605, "grad_norm": 0.40104198455810547, "learning_rate": 8.359664282807829e-05, "loss": 0.9728, "step": 15530 }, { "epoch": 1.7922242731887401, "grad_norm": 0.40213140845298767, "learning_rate": 8.353043623400054e-05, "loss": 0.9563, "step": 15535 }, { "epoch": 1.7928011075219197, "grad_norm": 0.4082624912261963, "learning_rate": 8.34642370594933e-05, "loss": 0.8982, "step": 15540 }, { "epoch": 1.7933779418550992, "grad_norm": 0.3675086498260498, "learning_rate": 8.339804533437951e-05, "loss": 0.9356, "step": 15545 }, { "epoch": 1.7939547761882788, "grad_norm": 0.42921894788742065, "learning_rate": 8.333186108847862e-05, "loss": 0.8969, "step": 15550 }, { "epoch": 1.7945316105214584, "grad_norm": 0.3704732656478882, "learning_rate": 8.326568435160677e-05, "loss": 0.9421, "step": 15555 }, { "epoch": 1.7951084448546377, "grad_norm": 0.4511631429195404, "learning_rate": 8.319951515357666e-05, "loss": 0.9853, "step": 15560 }, { "epoch": 1.7956852791878173, "grad_norm": 0.42818620800971985, "learning_rate": 8.313335352419773e-05, "loss": 0.9735, "step": 15565 }, { "epoch": 1.7962621135209966, "grad_norm": 0.3656601011753082, "learning_rate": 8.306719949327588e-05, "loss": 0.9318, "step": 15570 }, { "epoch": 1.7968389478541762, "grad_norm": 0.40848276019096375, "learning_rate": 8.300105309061358e-05, "loss": 0.9316, "step": 15575 }, { "epoch": 1.7974157821873558, "grad_norm": 0.37792646884918213, "learning_rate": 8.293491434601003e-05, "loss": 0.9263, "step": 15580 }, { "epoch": 1.7979926165205353, "grad_norm": 0.3963736891746521, "learning_rate": 8.286878328926077e-05, "loss": 0.9002, "step": 15585 }, { "epoch": 1.798569450853715, "grad_norm": 0.4018113911151886, "learning_rate": 8.280265995015802e-05, "loss": 0.9209, "step": 15590 }, { "epoch": 1.7991462851868945, "grad_norm": 0.3702740967273712, "learning_rate": 8.27365443584904e-05, "loss": 0.8576, "step": 15595 }, { "epoch": 1.7997231195200738, "grad_norm": 0.40294185280799866, "learning_rate": 8.26704365440432e-05, "loss": 0.9255, "step": 15600 }, { "epoch": 1.8002999538532534, "grad_norm": 0.4672613739967346, "learning_rate": 8.260433653659809e-05, "loss": 0.9112, "step": 15605 }, { "epoch": 1.8008767881864327, "grad_norm": 0.3775807321071625, "learning_rate": 8.25382443659332e-05, "loss": 0.9228, "step": 15610 }, { "epoch": 1.8014536225196123, "grad_norm": 0.39842814207077026, "learning_rate": 8.247216006182326e-05, "loss": 0.9351, "step": 15615 }, { "epoch": 1.8020304568527918, "grad_norm": 0.37785279750823975, "learning_rate": 8.240608365403934e-05, "loss": 0.8772, "step": 15620 }, { "epoch": 1.8026072911859714, "grad_norm": 0.4134449362754822, "learning_rate": 8.234001517234901e-05, "loss": 0.8922, "step": 15625 }, { "epoch": 1.803184125519151, "grad_norm": 0.38116446137428284, "learning_rate": 8.227395464651618e-05, "loss": 0.901, "step": 15630 }, { "epoch": 1.8037609598523305, "grad_norm": 0.4059298038482666, "learning_rate": 8.220790210630134e-05, "loss": 0.9601, "step": 15635 }, { "epoch": 1.8043377941855099, "grad_norm": 0.3819584548473358, "learning_rate": 8.214185758146126e-05, "loss": 0.9207, "step": 15640 }, { "epoch": 1.8049146285186894, "grad_norm": 0.4297388195991516, "learning_rate": 8.20758211017491e-05, "loss": 0.9235, "step": 15645 }, { "epoch": 1.8054914628518688, "grad_norm": 0.41157621145248413, "learning_rate": 8.20097926969144e-05, "loss": 0.9497, "step": 15650 }, { "epoch": 1.8060682971850484, "grad_norm": 0.43935003876686096, "learning_rate": 8.194377239670317e-05, "loss": 0.9521, "step": 15655 }, { "epoch": 1.806645131518228, "grad_norm": 0.41185909509658813, "learning_rate": 8.187776023085762e-05, "loss": 0.9502, "step": 15660 }, { "epoch": 1.8072219658514075, "grad_norm": 0.4108864367008209, "learning_rate": 8.181175622911635e-05, "loss": 0.9256, "step": 15665 }, { "epoch": 1.807798800184587, "grad_norm": 0.38821250200271606, "learning_rate": 8.174576042121433e-05, "loss": 0.9331, "step": 15670 }, { "epoch": 1.8083756345177666, "grad_norm": 0.43762829899787903, "learning_rate": 8.167977283688282e-05, "loss": 0.9446, "step": 15675 }, { "epoch": 1.808952468850946, "grad_norm": 0.3885977268218994, "learning_rate": 8.16137935058493e-05, "loss": 0.9332, "step": 15680 }, { "epoch": 1.8095293031841255, "grad_norm": 0.3965728282928467, "learning_rate": 8.154782245783756e-05, "loss": 0.8907, "step": 15685 }, { "epoch": 1.8101061375173049, "grad_norm": 0.4613015651702881, "learning_rate": 8.148185972256778e-05, "loss": 0.9267, "step": 15690 }, { "epoch": 1.8106829718504844, "grad_norm": 0.38467276096343994, "learning_rate": 8.141590532975626e-05, "loss": 0.9241, "step": 15695 }, { "epoch": 1.811259806183664, "grad_norm": 0.3891170620918274, "learning_rate": 8.134995930911555e-05, "loss": 0.9751, "step": 15700 }, { "epoch": 1.8118366405168436, "grad_norm": 0.4085995852947235, "learning_rate": 8.128402169035451e-05, "loss": 0.9202, "step": 15705 }, { "epoch": 1.8124134748500231, "grad_norm": 0.4093764126300812, "learning_rate": 8.121809250317815e-05, "loss": 0.9358, "step": 15710 }, { "epoch": 1.8129903091832027, "grad_norm": 0.434633731842041, "learning_rate": 8.115217177728766e-05, "loss": 0.9862, "step": 15715 }, { "epoch": 1.813567143516382, "grad_norm": 0.3456355333328247, "learning_rate": 8.108625954238051e-05, "loss": 0.9446, "step": 15720 }, { "epoch": 1.8141439778495616, "grad_norm": 0.3649025857448578, "learning_rate": 8.102035582815026e-05, "loss": 0.9174, "step": 15725 }, { "epoch": 1.8147208121827412, "grad_norm": 0.37535566091537476, "learning_rate": 8.095446066428666e-05, "loss": 0.9541, "step": 15730 }, { "epoch": 1.8152976465159205, "grad_norm": 0.42814260721206665, "learning_rate": 8.088857408047562e-05, "loss": 0.9571, "step": 15735 }, { "epoch": 1.8158744808491, "grad_norm": 0.41946104168891907, "learning_rate": 8.082269610639919e-05, "loss": 0.9451, "step": 15740 }, { "epoch": 1.8164513151822796, "grad_norm": 0.4270831346511841, "learning_rate": 8.07568267717355e-05, "loss": 0.8814, "step": 15745 }, { "epoch": 1.8170281495154592, "grad_norm": 0.36242547631263733, "learning_rate": 8.06909661061588e-05, "loss": 0.9423, "step": 15750 }, { "epoch": 1.8176049838486388, "grad_norm": 0.40985921025276184, "learning_rate": 8.062511413933948e-05, "loss": 0.8786, "step": 15755 }, { "epoch": 1.8181818181818183, "grad_norm": 0.36799412965774536, "learning_rate": 8.055927090094397e-05, "loss": 0.9068, "step": 15760 }, { "epoch": 1.8187586525149977, "grad_norm": 0.41538429260253906, "learning_rate": 8.049343642063477e-05, "loss": 0.9299, "step": 15765 }, { "epoch": 1.8193354868481773, "grad_norm": 0.3922020494937897, "learning_rate": 8.042761072807045e-05, "loss": 0.9473, "step": 15770 }, { "epoch": 1.8199123211813566, "grad_norm": 0.40639728307724, "learning_rate": 8.036179385290561e-05, "loss": 0.8988, "step": 15775 }, { "epoch": 1.8204891555145362, "grad_norm": 0.4399595856666565, "learning_rate": 8.029598582479088e-05, "loss": 0.9281, "step": 15780 }, { "epoch": 1.8210659898477157, "grad_norm": 0.37775760889053345, "learning_rate": 8.023018667337291e-05, "loss": 0.8989, "step": 15785 }, { "epoch": 1.8216428241808953, "grad_norm": 0.4193717837333679, "learning_rate": 8.016439642829433e-05, "loss": 0.9235, "step": 15790 }, { "epoch": 1.8222196585140749, "grad_norm": 0.38701748847961426, "learning_rate": 8.00986151191938e-05, "loss": 0.9148, "step": 15795 }, { "epoch": 1.8227964928472544, "grad_norm": 0.40565016865730286, "learning_rate": 8.003284277570588e-05, "loss": 0.9378, "step": 15800 }, { "epoch": 1.8233733271804338, "grad_norm": 0.4248996078968048, "learning_rate": 7.99670794274612e-05, "loss": 0.9135, "step": 15805 }, { "epoch": 1.8239501615136133, "grad_norm": 0.3437690734863281, "learning_rate": 7.990132510408625e-05, "loss": 0.8809, "step": 15810 }, { "epoch": 1.8245269958467927, "grad_norm": 0.4016622304916382, "learning_rate": 7.983557983520345e-05, "loss": 0.9645, "step": 15815 }, { "epoch": 1.8251038301799722, "grad_norm": 0.3806969225406647, "learning_rate": 7.976984365043123e-05, "loss": 0.8848, "step": 15820 }, { "epoch": 1.8256806645131518, "grad_norm": 0.3845742642879486, "learning_rate": 7.970411657938381e-05, "loss": 0.9128, "step": 15825 }, { "epoch": 1.8262574988463314, "grad_norm": 0.40634384751319885, "learning_rate": 7.963839865167139e-05, "loss": 0.9613, "step": 15830 }, { "epoch": 1.826834333179511, "grad_norm": 0.4079045057296753, "learning_rate": 7.957268989690007e-05, "loss": 0.9268, "step": 15835 }, { "epoch": 1.8274111675126905, "grad_norm": 0.3828427493572235, "learning_rate": 7.95069903446717e-05, "loss": 0.8936, "step": 15840 }, { "epoch": 1.8279880018458698, "grad_norm": 0.40603169798851013, "learning_rate": 7.944130002458406e-05, "loss": 0.926, "step": 15845 }, { "epoch": 1.8285648361790494, "grad_norm": 0.41030603647232056, "learning_rate": 7.937561896623084e-05, "loss": 0.9358, "step": 15850 }, { "epoch": 1.8291416705122288, "grad_norm": 0.4036436975002289, "learning_rate": 7.930994719920142e-05, "loss": 0.9531, "step": 15855 }, { "epoch": 1.8297185048454083, "grad_norm": 0.40609830617904663, "learning_rate": 7.924428475308106e-05, "loss": 0.8767, "step": 15860 }, { "epoch": 1.8302953391785879, "grad_norm": 0.371971070766449, "learning_rate": 7.917863165745084e-05, "loss": 0.8763, "step": 15865 }, { "epoch": 1.8308721735117675, "grad_norm": 0.4002048969268799, "learning_rate": 7.911298794188761e-05, "loss": 0.9454, "step": 15870 }, { "epoch": 1.831449007844947, "grad_norm": 0.3957858085632324, "learning_rate": 7.904735363596401e-05, "loss": 0.9064, "step": 15875 }, { "epoch": 1.8320258421781266, "grad_norm": 0.3692780137062073, "learning_rate": 7.898172876924833e-05, "loss": 0.9281, "step": 15880 }, { "epoch": 1.832602676511306, "grad_norm": 0.42157161235809326, "learning_rate": 7.891611337130482e-05, "loss": 0.919, "step": 15885 }, { "epoch": 1.8331795108444855, "grad_norm": 0.40753528475761414, "learning_rate": 7.88505074716933e-05, "loss": 1.0049, "step": 15890 }, { "epoch": 1.8337563451776648, "grad_norm": 0.3804478645324707, "learning_rate": 7.878491109996928e-05, "loss": 0.9284, "step": 15895 }, { "epoch": 1.8343331795108444, "grad_norm": 0.40410223603248596, "learning_rate": 7.871932428568418e-05, "loss": 0.946, "step": 15900 }, { "epoch": 1.834910013844024, "grad_norm": 0.42021000385284424, "learning_rate": 7.865374705838493e-05, "loss": 0.9146, "step": 15905 }, { "epoch": 1.8354868481772035, "grad_norm": 0.4008379876613617, "learning_rate": 7.85881794476142e-05, "loss": 0.9104, "step": 15910 }, { "epoch": 1.836063682510383, "grad_norm": 0.41672542691230774, "learning_rate": 7.852262148291028e-05, "loss": 0.9564, "step": 15915 }, { "epoch": 1.8366405168435627, "grad_norm": 0.39630943536758423, "learning_rate": 7.845707319380723e-05, "loss": 0.9127, "step": 15920 }, { "epoch": 1.837217351176742, "grad_norm": 0.4240933358669281, "learning_rate": 7.839153460983468e-05, "loss": 0.9728, "step": 15925 }, { "epoch": 1.8377941855099216, "grad_norm": 0.4285568594932556, "learning_rate": 7.832600576051779e-05, "loss": 0.8806, "step": 15930 }, { "epoch": 1.838371019843101, "grad_norm": 0.4240418076515198, "learning_rate": 7.826048667537757e-05, "loss": 0.938, "step": 15935 }, { "epoch": 1.8389478541762805, "grad_norm": 0.3835122585296631, "learning_rate": 7.819497738393044e-05, "loss": 0.9463, "step": 15940 }, { "epoch": 1.83952468850946, "grad_norm": 0.4309677183628082, "learning_rate": 7.812947791568845e-05, "loss": 0.9371, "step": 15945 }, { "epoch": 1.8401015228426396, "grad_norm": 0.3694637417793274, "learning_rate": 7.806398830015921e-05, "loss": 0.9423, "step": 15950 }, { "epoch": 1.8406783571758192, "grad_norm": 0.3995886743068695, "learning_rate": 7.7998508566846e-05, "loss": 0.9003, "step": 15955 }, { "epoch": 1.8412551915089987, "grad_norm": 0.43725675344467163, "learning_rate": 7.793303874524752e-05, "loss": 0.8792, "step": 15960 }, { "epoch": 1.841832025842178, "grad_norm": 0.4083716571331024, "learning_rate": 7.786757886485806e-05, "loss": 0.9519, "step": 15965 }, { "epoch": 1.8424088601753577, "grad_norm": 0.3987749218940735, "learning_rate": 7.780212895516737e-05, "loss": 0.901, "step": 15970 }, { "epoch": 1.842985694508537, "grad_norm": 0.41949161887168884, "learning_rate": 7.773668904566085e-05, "loss": 0.8809, "step": 15975 }, { "epoch": 1.8435625288417166, "grad_norm": 0.4237136244773865, "learning_rate": 7.767125916581928e-05, "loss": 0.9537, "step": 15980 }, { "epoch": 1.8441393631748961, "grad_norm": 0.3790094256401062, "learning_rate": 7.760583934511887e-05, "loss": 0.9282, "step": 15985 }, { "epoch": 1.8447161975080757, "grad_norm": 0.36996859312057495, "learning_rate": 7.75404296130315e-05, "loss": 0.9472, "step": 15990 }, { "epoch": 1.8452930318412553, "grad_norm": 0.40153929591178894, "learning_rate": 7.747502999902433e-05, "loss": 0.9103, "step": 15995 }, { "epoch": 1.8458698661744348, "grad_norm": 0.3847443461418152, "learning_rate": 7.740964053255999e-05, "loss": 0.9051, "step": 16000 }, { "epoch": 1.8464467005076142, "grad_norm": 0.3627074956893921, "learning_rate": 7.734426124309656e-05, "loss": 0.9618, "step": 16005 }, { "epoch": 1.8470235348407937, "grad_norm": 0.42722293734550476, "learning_rate": 7.727889216008757e-05, "loss": 0.9784, "step": 16010 }, { "epoch": 1.847600369173973, "grad_norm": 0.4196968376636505, "learning_rate": 7.721353331298192e-05, "loss": 0.9048, "step": 16015 }, { "epoch": 1.8481772035071526, "grad_norm": 0.38322052359580994, "learning_rate": 7.714818473122385e-05, "loss": 0.9251, "step": 16020 }, { "epoch": 1.8487540378403322, "grad_norm": 0.4124515652656555, "learning_rate": 7.708284644425309e-05, "loss": 0.9276, "step": 16025 }, { "epoch": 1.8493308721735118, "grad_norm": 0.43157243728637695, "learning_rate": 7.701751848150462e-05, "loss": 0.9677, "step": 16030 }, { "epoch": 1.8499077065066913, "grad_norm": 0.3826289474964142, "learning_rate": 7.695220087240885e-05, "loss": 0.9283, "step": 16035 }, { "epoch": 1.850484540839871, "grad_norm": 0.39209648966789246, "learning_rate": 7.68868936463914e-05, "loss": 0.9227, "step": 16040 }, { "epoch": 1.8510613751730502, "grad_norm": 0.42012834548950195, "learning_rate": 7.682159683287345e-05, "loss": 0.9217, "step": 16045 }, { "epoch": 1.8516382095062298, "grad_norm": 0.38320961594581604, "learning_rate": 7.675631046127123e-05, "loss": 0.9327, "step": 16050 }, { "epoch": 1.8522150438394092, "grad_norm": 0.3654974400997162, "learning_rate": 7.669103456099637e-05, "loss": 0.9499, "step": 16055 }, { "epoch": 1.8527918781725887, "grad_norm": 0.3563767671585083, "learning_rate": 7.662576916145587e-05, "loss": 0.9016, "step": 16060 }, { "epoch": 1.8533687125057683, "grad_norm": 0.38693365454673767, "learning_rate": 7.656051429205188e-05, "loss": 0.9531, "step": 16065 }, { "epoch": 1.8539455468389479, "grad_norm": 0.4085317552089691, "learning_rate": 7.649526998218185e-05, "loss": 0.9219, "step": 16070 }, { "epoch": 1.8545223811721274, "grad_norm": 0.3921666741371155, "learning_rate": 7.643003626123841e-05, "loss": 0.913, "step": 16075 }, { "epoch": 1.855099215505307, "grad_norm": 0.382941335439682, "learning_rate": 7.636481315860958e-05, "loss": 0.9308, "step": 16080 }, { "epoch": 1.8556760498384866, "grad_norm": 0.3899093270301819, "learning_rate": 7.629960070367846e-05, "loss": 0.9103, "step": 16085 }, { "epoch": 1.856252884171666, "grad_norm": 0.5074906349182129, "learning_rate": 7.623439892582331e-05, "loss": 0.9146, "step": 16090 }, { "epoch": 1.8568297185048455, "grad_norm": 0.3871719241142273, "learning_rate": 7.616920785441777e-05, "loss": 0.9272, "step": 16095 }, { "epoch": 1.8574065528380248, "grad_norm": 0.4301646947860718, "learning_rate": 7.61040275188305e-05, "loss": 0.9237, "step": 16100 }, { "epoch": 1.8579833871712044, "grad_norm": 0.386033296585083, "learning_rate": 7.603885794842536e-05, "loss": 0.869, "step": 16105 }, { "epoch": 1.858560221504384, "grad_norm": 0.42225587368011475, "learning_rate": 7.597369917256132e-05, "loss": 0.8915, "step": 16110 }, { "epoch": 1.8591370558375635, "grad_norm": 0.40398165583610535, "learning_rate": 7.590855122059265e-05, "loss": 0.9254, "step": 16115 }, { "epoch": 1.859713890170743, "grad_norm": 0.4442918002605438, "learning_rate": 7.584341412186855e-05, "loss": 0.8927, "step": 16120 }, { "epoch": 1.8602907245039226, "grad_norm": 0.4120365083217621, "learning_rate": 7.577828790573345e-05, "loss": 0.9186, "step": 16125 }, { "epoch": 1.860867558837102, "grad_norm": 0.39227908849716187, "learning_rate": 7.571317260152675e-05, "loss": 0.9066, "step": 16130 }, { "epoch": 1.8614443931702815, "grad_norm": 0.44152939319610596, "learning_rate": 7.564806823858314e-05, "loss": 0.8705, "step": 16135 }, { "epoch": 1.8620212275034609, "grad_norm": 0.4071158468723297, "learning_rate": 7.55829748462322e-05, "loss": 0.9611, "step": 16140 }, { "epoch": 1.8625980618366405, "grad_norm": 0.4083629548549652, "learning_rate": 7.55178924537986e-05, "loss": 0.9647, "step": 16145 }, { "epoch": 1.86317489616982, "grad_norm": 0.4229707717895508, "learning_rate": 7.545282109060215e-05, "loss": 0.9201, "step": 16150 }, { "epoch": 1.8637517305029996, "grad_norm": 0.38979142904281616, "learning_rate": 7.538776078595762e-05, "loss": 0.9889, "step": 16155 }, { "epoch": 1.8643285648361791, "grad_norm": 0.4165305495262146, "learning_rate": 7.532271156917478e-05, "loss": 0.8899, "step": 16160 }, { "epoch": 1.8649053991693587, "grad_norm": 0.39025261998176575, "learning_rate": 7.525767346955837e-05, "loss": 0.9509, "step": 16165 }, { "epoch": 1.865482233502538, "grad_norm": 0.3888443112373352, "learning_rate": 7.519264651640829e-05, "loss": 0.9175, "step": 16170 }, { "epoch": 1.8660590678357176, "grad_norm": 0.4504176676273346, "learning_rate": 7.512763073901927e-05, "loss": 0.9471, "step": 16175 }, { "epoch": 1.866635902168897, "grad_norm": 0.41925010085105896, "learning_rate": 7.5062626166681e-05, "loss": 0.9074, "step": 16180 }, { "epoch": 1.8672127365020765, "grad_norm": 0.40411990880966187, "learning_rate": 7.499763282867823e-05, "loss": 0.9075, "step": 16185 }, { "epoch": 1.867789570835256, "grad_norm": 0.3905486464500427, "learning_rate": 7.493265075429056e-05, "loss": 0.9207, "step": 16190 }, { "epoch": 1.8683664051684357, "grad_norm": 0.35199257731437683, "learning_rate": 7.486767997279251e-05, "loss": 0.8817, "step": 16195 }, { "epoch": 1.8689432395016152, "grad_norm": 0.42351698875427246, "learning_rate": 7.480272051345358e-05, "loss": 0.9764, "step": 16200 }, { "epoch": 1.8695200738347948, "grad_norm": 0.413447767496109, "learning_rate": 7.473777240553814e-05, "loss": 0.9348, "step": 16205 }, { "epoch": 1.8700969081679741, "grad_norm": 0.42107057571411133, "learning_rate": 7.467283567830542e-05, "loss": 0.9551, "step": 16210 }, { "epoch": 1.8706737425011537, "grad_norm": 0.41672512888908386, "learning_rate": 7.460791036100952e-05, "loss": 0.9127, "step": 16215 }, { "epoch": 1.871250576834333, "grad_norm": 0.43511131405830383, "learning_rate": 7.454299648289946e-05, "loss": 0.9656, "step": 16220 }, { "epoch": 1.8718274111675126, "grad_norm": 0.37112078070640564, "learning_rate": 7.447809407321909e-05, "loss": 0.9245, "step": 16225 }, { "epoch": 1.8724042455006922, "grad_norm": 0.4278329908847809, "learning_rate": 7.4413203161207e-05, "loss": 0.9372, "step": 16230 }, { "epoch": 1.8729810798338717, "grad_norm": 0.4026067852973938, "learning_rate": 7.434832377609671e-05, "loss": 0.9019, "step": 16235 }, { "epoch": 1.8735579141670513, "grad_norm": 0.43685853481292725, "learning_rate": 7.428345594711652e-05, "loss": 0.9051, "step": 16240 }, { "epoch": 1.8741347485002309, "grad_norm": 0.4294734001159668, "learning_rate": 7.421859970348949e-05, "loss": 0.9445, "step": 16245 }, { "epoch": 1.8747115828334102, "grad_norm": 0.42335641384124756, "learning_rate": 7.41537550744335e-05, "loss": 0.9338, "step": 16250 }, { "epoch": 1.8752884171665898, "grad_norm": 0.3663158714771271, "learning_rate": 7.408892208916118e-05, "loss": 0.8959, "step": 16255 }, { "epoch": 1.8758652514997691, "grad_norm": 0.39420464634895325, "learning_rate": 7.402410077687993e-05, "loss": 0.9231, "step": 16260 }, { "epoch": 1.8764420858329487, "grad_norm": 0.3816054165363312, "learning_rate": 7.395929116679185e-05, "loss": 0.9217, "step": 16265 }, { "epoch": 1.8770189201661283, "grad_norm": 0.4024397134780884, "learning_rate": 7.38944932880938e-05, "loss": 0.9171, "step": 16270 }, { "epoch": 1.8775957544993078, "grad_norm": 0.37566685676574707, "learning_rate": 7.382970716997736e-05, "loss": 0.9604, "step": 16275 }, { "epoch": 1.8781725888324874, "grad_norm": 0.3700317144393921, "learning_rate": 7.37649328416288e-05, "loss": 0.9155, "step": 16280 }, { "epoch": 1.878749423165667, "grad_norm": 0.40654414892196655, "learning_rate": 7.370017033222912e-05, "loss": 0.9055, "step": 16285 }, { "epoch": 1.8793262574988463, "grad_norm": 0.3641127943992615, "learning_rate": 7.363541967095387e-05, "loss": 0.912, "step": 16290 }, { "epoch": 1.8799030918320259, "grad_norm": 0.4290393888950348, "learning_rate": 7.357068088697339e-05, "loss": 0.9601, "step": 16295 }, { "epoch": 1.8804799261652052, "grad_norm": 0.4048447012901306, "learning_rate": 7.350595400945268e-05, "loss": 0.9298, "step": 16300 }, { "epoch": 1.8810567604983848, "grad_norm": 0.37648749351501465, "learning_rate": 7.344123906755124e-05, "loss": 0.9371, "step": 16305 }, { "epoch": 1.8816335948315643, "grad_norm": 0.3663206994533539, "learning_rate": 7.337653609042332e-05, "loss": 0.8938, "step": 16310 }, { "epoch": 1.882210429164744, "grad_norm": 0.4044005274772644, "learning_rate": 7.331184510721776e-05, "loss": 0.9356, "step": 16315 }, { "epoch": 1.8827872634979235, "grad_norm": 0.37954995036125183, "learning_rate": 7.324716614707793e-05, "loss": 0.9178, "step": 16320 }, { "epoch": 1.883364097831103, "grad_norm": 0.3999575972557068, "learning_rate": 7.318249923914184e-05, "loss": 0.9632, "step": 16325 }, { "epoch": 1.8839409321642824, "grad_norm": 0.3797055184841156, "learning_rate": 7.311784441254207e-05, "loss": 0.9277, "step": 16330 }, { "epoch": 1.884517766497462, "grad_norm": 0.4123266935348511, "learning_rate": 7.305320169640575e-05, "loss": 0.9324, "step": 16335 }, { "epoch": 1.8850946008306413, "grad_norm": 0.38933947682380676, "learning_rate": 7.29885711198545e-05, "loss": 0.9254, "step": 16340 }, { "epoch": 1.8856714351638209, "grad_norm": 0.3938358724117279, "learning_rate": 7.292395271200459e-05, "loss": 0.9264, "step": 16345 }, { "epoch": 1.8862482694970004, "grad_norm": 0.49041980504989624, "learning_rate": 7.285934650196672e-05, "loss": 0.947, "step": 16350 }, { "epoch": 1.88682510383018, "grad_norm": 0.37116193771362305, "learning_rate": 7.279475251884609e-05, "loss": 0.8855, "step": 16355 }, { "epoch": 1.8874019381633595, "grad_norm": 0.3848974406719208, "learning_rate": 7.273017079174239e-05, "loss": 0.9428, "step": 16360 }, { "epoch": 1.8879787724965391, "grad_norm": 0.4379478394985199, "learning_rate": 7.266560134974989e-05, "loss": 0.9608, "step": 16365 }, { "epoch": 1.8885556068297185, "grad_norm": 0.404349148273468, "learning_rate": 7.260104422195721e-05, "loss": 0.9392, "step": 16370 }, { "epoch": 1.889132441162898, "grad_norm": 0.4757440984249115, "learning_rate": 7.253649943744742e-05, "loss": 0.9663, "step": 16375 }, { "epoch": 1.8897092754960774, "grad_norm": 0.4406890571117401, "learning_rate": 7.247196702529815e-05, "loss": 0.9497, "step": 16380 }, { "epoch": 1.890286109829257, "grad_norm": 0.3876641094684601, "learning_rate": 7.240744701458134e-05, "loss": 0.9348, "step": 16385 }, { "epoch": 1.8908629441624365, "grad_norm": 0.4099477231502533, "learning_rate": 7.234293943436338e-05, "loss": 0.9521, "step": 16390 }, { "epoch": 1.891439778495616, "grad_norm": 0.37708956003189087, "learning_rate": 7.227844431370502e-05, "loss": 0.9045, "step": 16395 }, { "epoch": 1.8920166128287956, "grad_norm": 0.37302008271217346, "learning_rate": 7.221396168166152e-05, "loss": 0.9244, "step": 16400 }, { "epoch": 1.8925934471619752, "grad_norm": 0.37601548433303833, "learning_rate": 7.214949156728239e-05, "loss": 0.9299, "step": 16405 }, { "epoch": 1.8931702814951545, "grad_norm": 0.4314630329608917, "learning_rate": 7.208503399961149e-05, "loss": 0.9369, "step": 16410 }, { "epoch": 1.893747115828334, "grad_norm": 0.3835648000240326, "learning_rate": 7.202058900768718e-05, "loss": 0.9245, "step": 16415 }, { "epoch": 1.8943239501615134, "grad_norm": 0.39860522747039795, "learning_rate": 7.1956156620542e-05, "loss": 0.898, "step": 16420 }, { "epoch": 1.894900784494693, "grad_norm": 0.3779727518558502, "learning_rate": 7.189173686720287e-05, "loss": 0.9415, "step": 16425 }, { "epoch": 1.8954776188278726, "grad_norm": 0.38131847977638245, "learning_rate": 7.182732977669098e-05, "loss": 0.9132, "step": 16430 }, { "epoch": 1.8960544531610521, "grad_norm": 0.45847800374031067, "learning_rate": 7.176293537802193e-05, "loss": 0.9214, "step": 16435 }, { "epoch": 1.8966312874942317, "grad_norm": 0.40811875462532043, "learning_rate": 7.169855370020547e-05, "loss": 0.925, "step": 16440 }, { "epoch": 1.8972081218274113, "grad_norm": 0.4316577613353729, "learning_rate": 7.163418477224567e-05, "loss": 0.9443, "step": 16445 }, { "epoch": 1.8977849561605908, "grad_norm": 0.42547932267189026, "learning_rate": 7.156982862314084e-05, "loss": 0.8999, "step": 16450 }, { "epoch": 1.8983617904937702, "grad_norm": 0.39770105481147766, "learning_rate": 7.15054852818836e-05, "loss": 0.8858, "step": 16455 }, { "epoch": 1.8989386248269498, "grad_norm": 0.3987480401992798, "learning_rate": 7.144115477746074e-05, "loss": 0.9079, "step": 16460 }, { "epoch": 1.899515459160129, "grad_norm": 0.45633745193481445, "learning_rate": 7.137683713885321e-05, "loss": 0.9337, "step": 16465 }, { "epoch": 1.9000922934933087, "grad_norm": 0.4088650941848755, "learning_rate": 7.131253239503635e-05, "loss": 0.9509, "step": 16470 }, { "epoch": 1.9006691278264882, "grad_norm": 0.4616527855396271, "learning_rate": 7.124824057497949e-05, "loss": 0.9329, "step": 16475 }, { "epoch": 1.9012459621596678, "grad_norm": 0.36398735642433167, "learning_rate": 7.118396170764623e-05, "loss": 0.9264, "step": 16480 }, { "epoch": 1.9018227964928474, "grad_norm": 0.4025936424732208, "learning_rate": 7.111969582199431e-05, "loss": 0.9527, "step": 16485 }, { "epoch": 1.902399630826027, "grad_norm": 0.3752637803554535, "learning_rate": 7.105544294697569e-05, "loss": 0.986, "step": 16490 }, { "epoch": 1.9029764651592063, "grad_norm": 0.4268634617328644, "learning_rate": 7.099120311153639e-05, "loss": 0.9546, "step": 16495 }, { "epoch": 1.9035532994923858, "grad_norm": 0.4121106266975403, "learning_rate": 7.092697634461654e-05, "loss": 0.9851, "step": 16500 }, { "epoch": 1.9041301338255652, "grad_norm": 0.4355171024799347, "learning_rate": 7.086276267515048e-05, "loss": 0.9322, "step": 16505 }, { "epoch": 1.9047069681587447, "grad_norm": 0.38573938608169556, "learning_rate": 7.07985621320666e-05, "loss": 0.923, "step": 16510 }, { "epoch": 1.9052838024919243, "grad_norm": 0.4271920919418335, "learning_rate": 7.073437474428732e-05, "loss": 0.9642, "step": 16515 }, { "epoch": 1.9058606368251039, "grad_norm": 0.40393730998039246, "learning_rate": 7.067020054072916e-05, "loss": 0.9754, "step": 16520 }, { "epoch": 1.9064374711582834, "grad_norm": 0.3705683648586273, "learning_rate": 7.060603955030283e-05, "loss": 0.8778, "step": 16525 }, { "epoch": 1.907014305491463, "grad_norm": 0.401284396648407, "learning_rate": 7.05418918019129e-05, "loss": 0.8879, "step": 16530 }, { "epoch": 1.9075911398246423, "grad_norm": 0.3965059518814087, "learning_rate": 7.047775732445805e-05, "loss": 0.9206, "step": 16535 }, { "epoch": 1.908167974157822, "grad_norm": 0.406692236661911, "learning_rate": 7.041363614683106e-05, "loss": 0.9312, "step": 16540 }, { "epoch": 1.9087448084910013, "grad_norm": 0.4251668453216553, "learning_rate": 7.034952829791858e-05, "loss": 0.9195, "step": 16545 }, { "epoch": 1.9093216428241808, "grad_norm": 0.4059813916683197, "learning_rate": 7.028543380660135e-05, "loss": 1.0009, "step": 16550 }, { "epoch": 1.9098984771573604, "grad_norm": 0.37833818793296814, "learning_rate": 7.022135270175401e-05, "loss": 0.9313, "step": 16555 }, { "epoch": 1.91047531149054, "grad_norm": 0.3625733554363251, "learning_rate": 7.01572850122453e-05, "loss": 0.9346, "step": 16560 }, { "epoch": 1.9110521458237195, "grad_norm": 0.38310348987579346, "learning_rate": 7.00932307669378e-05, "loss": 0.9007, "step": 16565 }, { "epoch": 1.911628980156899, "grad_norm": 0.39671164751052856, "learning_rate": 7.002918999468804e-05, "loss": 0.9106, "step": 16570 }, { "epoch": 1.9122058144900784, "grad_norm": 0.3802867531776428, "learning_rate": 6.996516272434658e-05, "loss": 0.8741, "step": 16575 }, { "epoch": 1.912782648823258, "grad_norm": 0.44518622756004333, "learning_rate": 6.990114898475782e-05, "loss": 0.9304, "step": 16580 }, { "epoch": 1.9133594831564373, "grad_norm": 0.3733896017074585, "learning_rate": 6.983714880476002e-05, "loss": 0.9097, "step": 16585 }, { "epoch": 1.913936317489617, "grad_norm": 0.3972858786582947, "learning_rate": 6.97731622131854e-05, "loss": 0.9595, "step": 16590 }, { "epoch": 1.9145131518227965, "grad_norm": 0.38236817717552185, "learning_rate": 6.97091892388601e-05, "loss": 0.9005, "step": 16595 }, { "epoch": 1.915089986155976, "grad_norm": 0.36411792039871216, "learning_rate": 6.964522991060404e-05, "loss": 0.9103, "step": 16600 }, { "epoch": 1.9156668204891556, "grad_norm": 0.43032193183898926, "learning_rate": 6.9581284257231e-05, "loss": 0.9137, "step": 16605 }, { "epoch": 1.9162436548223352, "grad_norm": 0.38933807611465454, "learning_rate": 6.951735230754859e-05, "loss": 0.8861, "step": 16610 }, { "epoch": 1.9168204891555145, "grad_norm": 0.40578311681747437, "learning_rate": 6.945343409035839e-05, "loss": 0.9433, "step": 16615 }, { "epoch": 1.917397323488694, "grad_norm": 0.4237358868122101, "learning_rate": 6.938952963445559e-05, "loss": 0.8664, "step": 16620 }, { "epoch": 1.9179741578218734, "grad_norm": 0.3827938437461853, "learning_rate": 6.932563896862926e-05, "loss": 0.8932, "step": 16625 }, { "epoch": 1.918550992155053, "grad_norm": 0.4397215247154236, "learning_rate": 6.926176212166234e-05, "loss": 0.962, "step": 16630 }, { "epoch": 1.9191278264882325, "grad_norm": 0.4293370246887207, "learning_rate": 6.919789912233146e-05, "loss": 0.935, "step": 16635 }, { "epoch": 1.9197046608214121, "grad_norm": 0.4171515703201294, "learning_rate": 6.913404999940698e-05, "loss": 0.9105, "step": 16640 }, { "epoch": 1.9202814951545917, "grad_norm": 0.4079015552997589, "learning_rate": 6.907021478165305e-05, "loss": 0.9212, "step": 16645 }, { "epoch": 1.9208583294877712, "grad_norm": 0.46125656366348267, "learning_rate": 6.900639349782762e-05, "loss": 0.892, "step": 16650 }, { "epoch": 1.9214351638209506, "grad_norm": 0.3586021959781647, "learning_rate": 6.894258617668229e-05, "loss": 0.955, "step": 16655 }, { "epoch": 1.9220119981541302, "grad_norm": 0.3966992497444153, "learning_rate": 6.887879284696232e-05, "loss": 0.9363, "step": 16660 }, { "epoch": 1.9225888324873095, "grad_norm": 0.4333534836769104, "learning_rate": 6.88150135374068e-05, "loss": 0.928, "step": 16665 }, { "epoch": 1.923165666820489, "grad_norm": 0.41055798530578613, "learning_rate": 6.875124827674841e-05, "loss": 0.9279, "step": 16670 }, { "epoch": 1.9237425011536686, "grad_norm": 0.4172268211841583, "learning_rate": 6.868749709371354e-05, "loss": 0.941, "step": 16675 }, { "epoch": 1.9243193354868482, "grad_norm": 0.42212820053100586, "learning_rate": 6.862376001702213e-05, "loss": 0.9339, "step": 16680 }, { "epoch": 1.9248961698200278, "grad_norm": 0.41261783242225647, "learning_rate": 6.8560037075388e-05, "loss": 0.9533, "step": 16685 }, { "epoch": 1.9254730041532073, "grad_norm": 0.4174567759037018, "learning_rate": 6.849632829751838e-05, "loss": 0.93, "step": 16690 }, { "epoch": 1.9260498384863867, "grad_norm": 0.38516369462013245, "learning_rate": 6.843263371211414e-05, "loss": 0.937, "step": 16695 }, { "epoch": 1.9266266728195662, "grad_norm": 0.420776903629303, "learning_rate": 6.836895334786996e-05, "loss": 0.9415, "step": 16700 }, { "epoch": 1.9272035071527456, "grad_norm": 0.38259801268577576, "learning_rate": 6.830528723347387e-05, "loss": 0.953, "step": 16705 }, { "epoch": 1.9277803414859251, "grad_norm": 0.38586026430130005, "learning_rate": 6.824163539760759e-05, "loss": 0.9387, "step": 16710 }, { "epoch": 1.9283571758191047, "grad_norm": 0.39584457874298096, "learning_rate": 6.81779978689464e-05, "loss": 0.9517, "step": 16715 }, { "epoch": 1.9289340101522843, "grad_norm": 0.39963656663894653, "learning_rate": 6.811437467615915e-05, "loss": 0.9289, "step": 16720 }, { "epoch": 1.9295108444854638, "grad_norm": 0.4218963384628296, "learning_rate": 6.805076584790818e-05, "loss": 0.8897, "step": 16725 }, { "epoch": 1.9300876788186434, "grad_norm": 0.3993114233016968, "learning_rate": 6.798717141284942e-05, "loss": 0.9396, "step": 16730 }, { "epoch": 1.9306645131518227, "grad_norm": 0.39297980070114136, "learning_rate": 6.792359139963228e-05, "loss": 0.9502, "step": 16735 }, { "epoch": 1.9312413474850023, "grad_norm": 0.353900671005249, "learning_rate": 6.786002583689968e-05, "loss": 0.9808, "step": 16740 }, { "epoch": 1.9318181818181817, "grad_norm": 0.37047556042671204, "learning_rate": 6.7796474753288e-05, "loss": 0.8961, "step": 16745 }, { "epoch": 1.9323950161513612, "grad_norm": 0.39696457982063293, "learning_rate": 6.773293817742717e-05, "loss": 0.8609, "step": 16750 }, { "epoch": 1.9329718504845408, "grad_norm": 0.3835957646369934, "learning_rate": 6.766941613794053e-05, "loss": 0.948, "step": 16755 }, { "epoch": 1.9335486848177204, "grad_norm": 0.4246508479118347, "learning_rate": 6.760590866344486e-05, "loss": 0.9271, "step": 16760 }, { "epoch": 1.9341255191509, "grad_norm": 0.35733363032341003, "learning_rate": 6.754241578255042e-05, "loss": 0.8943, "step": 16765 }, { "epoch": 1.9347023534840795, "grad_norm": 0.38427141308784485, "learning_rate": 6.747893752386088e-05, "loss": 0.9205, "step": 16770 }, { "epoch": 1.9352791878172588, "grad_norm": 0.4096218943595886, "learning_rate": 6.74154739159733e-05, "loss": 0.9333, "step": 16775 }, { "epoch": 1.9358560221504384, "grad_norm": 0.3812349736690521, "learning_rate": 6.73520249874782e-05, "loss": 0.9194, "step": 16780 }, { "epoch": 1.936432856483618, "grad_norm": 0.42356806993484497, "learning_rate": 6.728859076695938e-05, "loss": 0.9593, "step": 16785 }, { "epoch": 1.9370096908167973, "grad_norm": 0.386717826128006, "learning_rate": 6.722517128299414e-05, "loss": 0.9022, "step": 16790 }, { "epoch": 1.9375865251499769, "grad_norm": 0.4004405438899994, "learning_rate": 6.716176656415305e-05, "loss": 0.9745, "step": 16795 }, { "epoch": 1.9381633594831564, "grad_norm": 0.398300439119339, "learning_rate": 6.709837663900007e-05, "loss": 0.9484, "step": 16800 }, { "epoch": 1.938740193816336, "grad_norm": 0.4078962504863739, "learning_rate": 6.703500153609247e-05, "loss": 0.9304, "step": 16805 }, { "epoch": 1.9393170281495156, "grad_norm": 0.40395575761795044, "learning_rate": 6.697164128398088e-05, "loss": 0.9248, "step": 16810 }, { "epoch": 1.9398938624826951, "grad_norm": 0.40404340624809265, "learning_rate": 6.690829591120922e-05, "loss": 0.9578, "step": 16815 }, { "epoch": 1.9404706968158745, "grad_norm": 0.4161837100982666, "learning_rate": 6.684496544631466e-05, "loss": 0.8986, "step": 16820 }, { "epoch": 1.941047531149054, "grad_norm": 0.4277629852294922, "learning_rate": 6.678164991782772e-05, "loss": 0.9928, "step": 16825 }, { "epoch": 1.9416243654822334, "grad_norm": 0.38067278265953064, "learning_rate": 6.671834935427222e-05, "loss": 0.9516, "step": 16830 }, { "epoch": 1.942201199815413, "grad_norm": 0.37735915184020996, "learning_rate": 6.665506378416513e-05, "loss": 0.9199, "step": 16835 }, { "epoch": 1.9427780341485925, "grad_norm": 0.39840787649154663, "learning_rate": 6.65917932360167e-05, "loss": 0.9641, "step": 16840 }, { "epoch": 1.943354868481772, "grad_norm": 0.41158953309059143, "learning_rate": 6.652853773833052e-05, "loss": 0.9537, "step": 16845 }, { "epoch": 1.9439317028149516, "grad_norm": 0.367220014333725, "learning_rate": 6.646529731960324e-05, "loss": 0.8913, "step": 16850 }, { "epoch": 1.9445085371481312, "grad_norm": 0.3907991349697113, "learning_rate": 6.640207200832482e-05, "loss": 0.9216, "step": 16855 }, { "epoch": 1.9450853714813106, "grad_norm": 0.40876129269599915, "learning_rate": 6.633886183297838e-05, "loss": 0.9338, "step": 16860 }, { "epoch": 1.9456622058144901, "grad_norm": 0.41524142026901245, "learning_rate": 6.627566682204025e-05, "loss": 0.9301, "step": 16865 }, { "epoch": 1.9462390401476695, "grad_norm": 0.40617069602012634, "learning_rate": 6.621248700397989e-05, "loss": 0.9035, "step": 16870 }, { "epoch": 1.946815874480849, "grad_norm": 0.3806745707988739, "learning_rate": 6.614932240725989e-05, "loss": 0.9121, "step": 16875 }, { "epoch": 1.9473927088140286, "grad_norm": 0.3907873332500458, "learning_rate": 6.608617306033609e-05, "loss": 0.9058, "step": 16880 }, { "epoch": 1.9479695431472082, "grad_norm": 0.39370810985565186, "learning_rate": 6.602303899165737e-05, "loss": 0.9464, "step": 16885 }, { "epoch": 1.9485463774803877, "grad_norm": 0.36397796869277954, "learning_rate": 6.595992022966571e-05, "loss": 0.8671, "step": 16890 }, { "epoch": 1.9491232118135673, "grad_norm": 0.39594727754592896, "learning_rate": 6.58968168027963e-05, "loss": 0.9762, "step": 16895 }, { "epoch": 1.9497000461467466, "grad_norm": 0.3924747109413147, "learning_rate": 6.583372873947732e-05, "loss": 0.939, "step": 16900 }, { "epoch": 1.9502768804799262, "grad_norm": 0.3910292983055115, "learning_rate": 6.577065606813011e-05, "loss": 0.9245, "step": 16905 }, { "epoch": 1.9508537148131055, "grad_norm": 0.3986966907978058, "learning_rate": 6.570759881716892e-05, "loss": 0.922, "step": 16910 }, { "epoch": 1.951430549146285, "grad_norm": 0.37212473154067993, "learning_rate": 6.56445570150013e-05, "loss": 0.8996, "step": 16915 }, { "epoch": 1.9520073834794647, "grad_norm": 0.38835760951042175, "learning_rate": 6.558153069002764e-05, "loss": 0.9552, "step": 16920 }, { "epoch": 1.9525842178126442, "grad_norm": 0.40095987915992737, "learning_rate": 6.551851987064141e-05, "loss": 0.9248, "step": 16925 }, { "epoch": 1.9531610521458238, "grad_norm": 0.42613843083381653, "learning_rate": 6.54555245852291e-05, "loss": 0.9297, "step": 16930 }, { "epoch": 1.9537378864790034, "grad_norm": 0.39653289318084717, "learning_rate": 6.539254486217026e-05, "loss": 0.9619, "step": 16935 }, { "epoch": 1.9543147208121827, "grad_norm": 0.4119310677051544, "learning_rate": 6.532958072983734e-05, "loss": 0.9605, "step": 16940 }, { "epoch": 1.9548915551453623, "grad_norm": 0.3854370415210724, "learning_rate": 6.526663221659579e-05, "loss": 0.9268, "step": 16945 }, { "epoch": 1.9554683894785416, "grad_norm": 0.4038507640361786, "learning_rate": 6.520369935080411e-05, "loss": 0.8646, "step": 16950 }, { "epoch": 1.9560452238117212, "grad_norm": 0.3981791138648987, "learning_rate": 6.51407821608136e-05, "loss": 0.9104, "step": 16955 }, { "epoch": 1.9566220581449008, "grad_norm": 0.41779494285583496, "learning_rate": 6.507788067496863e-05, "loss": 0.9362, "step": 16960 }, { "epoch": 1.9571988924780803, "grad_norm": 0.39504823088645935, "learning_rate": 6.501499492160636e-05, "loss": 0.9438, "step": 16965 }, { "epoch": 1.9577757268112599, "grad_norm": 0.421944260597229, "learning_rate": 6.495212492905707e-05, "loss": 0.918, "step": 16970 }, { "epoch": 1.9583525611444395, "grad_norm": 0.40287306904792786, "learning_rate": 6.488927072564372e-05, "loss": 0.8682, "step": 16975 }, { "epoch": 1.9589293954776188, "grad_norm": 0.43788981437683105, "learning_rate": 6.482643233968224e-05, "loss": 0.908, "step": 16980 }, { "epoch": 1.9595062298107984, "grad_norm": 0.3697527348995209, "learning_rate": 6.476360979948153e-05, "loss": 0.9286, "step": 16985 }, { "epoch": 1.9600830641439777, "grad_norm": 0.38073477149009705, "learning_rate": 6.470080313334322e-05, "loss": 0.8957, "step": 16990 }, { "epoch": 1.9606598984771573, "grad_norm": 0.44243210554122925, "learning_rate": 6.463801236956184e-05, "loss": 0.9488, "step": 16995 }, { "epoch": 1.9612367328103368, "grad_norm": 0.39931437373161316, "learning_rate": 6.457523753642469e-05, "loss": 0.9382, "step": 17000 }, { "epoch": 1.9618135671435164, "grad_norm": 0.428107351064682, "learning_rate": 6.451247866221206e-05, "loss": 0.9399, "step": 17005 }, { "epoch": 1.962390401476696, "grad_norm": 0.4132627546787262, "learning_rate": 6.44497357751969e-05, "loss": 0.9203, "step": 17010 }, { "epoch": 1.9629672358098755, "grad_norm": 0.3902876079082489, "learning_rate": 6.438700890364496e-05, "loss": 0.9191, "step": 17015 }, { "epoch": 1.9635440701430549, "grad_norm": 0.41587990522384644, "learning_rate": 6.432429807581489e-05, "loss": 0.9854, "step": 17020 }, { "epoch": 1.9641209044762344, "grad_norm": 0.5028902888298035, "learning_rate": 6.426160331995801e-05, "loss": 0.8926, "step": 17025 }, { "epoch": 1.9646977388094138, "grad_norm": 0.4033501148223877, "learning_rate": 6.419892466431842e-05, "loss": 0.9497, "step": 17030 }, { "epoch": 1.9652745731425934, "grad_norm": 0.3764554262161255, "learning_rate": 6.413626213713295e-05, "loss": 0.9535, "step": 17035 }, { "epoch": 1.965851407475773, "grad_norm": 0.4067056179046631, "learning_rate": 6.407361576663124e-05, "loss": 0.928, "step": 17040 }, { "epoch": 1.9664282418089525, "grad_norm": 0.38903966546058655, "learning_rate": 6.401098558103563e-05, "loss": 0.8632, "step": 17045 }, { "epoch": 1.967005076142132, "grad_norm": 0.48693788051605225, "learning_rate": 6.394837160856105e-05, "loss": 0.8727, "step": 17050 }, { "epoch": 1.9675819104753116, "grad_norm": 0.39712145924568176, "learning_rate": 6.388577387741524e-05, "loss": 0.9145, "step": 17055 }, { "epoch": 1.968158744808491, "grad_norm": 0.43445274233818054, "learning_rate": 6.382319241579866e-05, "loss": 0.9578, "step": 17060 }, { "epoch": 1.9687355791416705, "grad_norm": 0.4607578217983246, "learning_rate": 6.376062725190435e-05, "loss": 0.9467, "step": 17065 }, { "epoch": 1.9693124134748499, "grad_norm": 0.4164617657661438, "learning_rate": 6.369807841391798e-05, "loss": 0.9318, "step": 17070 }, { "epoch": 1.9698892478080294, "grad_norm": 0.38452988862991333, "learning_rate": 6.363554593001801e-05, "loss": 0.9329, "step": 17075 }, { "epoch": 1.970466082141209, "grad_norm": 0.41434916853904724, "learning_rate": 6.357302982837543e-05, "loss": 0.9214, "step": 17080 }, { "epoch": 1.9710429164743886, "grad_norm": 0.3847775161266327, "learning_rate": 6.351053013715383e-05, "loss": 0.9322, "step": 17085 }, { "epoch": 1.9716197508075681, "grad_norm": 0.4423973262310028, "learning_rate": 6.344804688450941e-05, "loss": 0.9484, "step": 17090 }, { "epoch": 1.9721965851407477, "grad_norm": 0.4209598898887634, "learning_rate": 6.33855800985911e-05, "loss": 0.9023, "step": 17095 }, { "epoch": 1.972773419473927, "grad_norm": 0.4027029871940613, "learning_rate": 6.332312980754025e-05, "loss": 0.9565, "step": 17100 }, { "epoch": 1.9733502538071066, "grad_norm": 0.4411013722419739, "learning_rate": 6.326069603949079e-05, "loss": 0.9358, "step": 17105 }, { "epoch": 1.973927088140286, "grad_norm": 0.3684937655925751, "learning_rate": 6.319827882256935e-05, "loss": 0.9061, "step": 17110 }, { "epoch": 1.9745039224734655, "grad_norm": 0.4084828197956085, "learning_rate": 6.313587818489497e-05, "loss": 0.9066, "step": 17115 }, { "epoch": 1.975080756806645, "grad_norm": 0.4619320333003998, "learning_rate": 6.307349415457923e-05, "loss": 0.922, "step": 17120 }, { "epoch": 1.9756575911398246, "grad_norm": 0.37400147318840027, "learning_rate": 6.301112675972627e-05, "loss": 0.9151, "step": 17125 }, { "epoch": 1.9762344254730042, "grad_norm": 0.39188510179519653, "learning_rate": 6.294877602843275e-05, "loss": 0.9913, "step": 17130 }, { "epoch": 1.9768112598061838, "grad_norm": 0.4424871802330017, "learning_rate": 6.28864419887878e-05, "loss": 0.8995, "step": 17135 }, { "epoch": 1.9773880941393633, "grad_norm": 0.39406830072402954, "learning_rate": 6.282412466887293e-05, "loss": 0.8848, "step": 17140 }, { "epoch": 1.9779649284725427, "grad_norm": 0.41170790791511536, "learning_rate": 6.276182409676234e-05, "loss": 0.9644, "step": 17145 }, { "epoch": 1.9785417628057222, "grad_norm": 0.42084214091300964, "learning_rate": 6.269954030052252e-05, "loss": 0.9029, "step": 17150 }, { "epoch": 1.9791185971389016, "grad_norm": 0.4019472599029541, "learning_rate": 6.263727330821241e-05, "loss": 0.9181, "step": 17155 }, { "epoch": 1.9796954314720812, "grad_norm": 0.36666131019592285, "learning_rate": 6.25750231478834e-05, "loss": 0.9103, "step": 17160 }, { "epoch": 1.9802722658052607, "grad_norm": 0.4180550277233124, "learning_rate": 6.251278984757938e-05, "loss": 0.9464, "step": 17165 }, { "epoch": 1.9808491001384403, "grad_norm": 0.4247073233127594, "learning_rate": 6.245057343533653e-05, "loss": 0.9364, "step": 17170 }, { "epoch": 1.9814259344716199, "grad_norm": 0.4047488868236542, "learning_rate": 6.238837393918341e-05, "loss": 0.9444, "step": 17175 }, { "epoch": 1.9820027688047994, "grad_norm": 0.37309056520462036, "learning_rate": 6.232619138714112e-05, "loss": 0.9291, "step": 17180 }, { "epoch": 1.9825796031379788, "grad_norm": 0.4210014343261719, "learning_rate": 6.226402580722298e-05, "loss": 0.8786, "step": 17185 }, { "epoch": 1.9831564374711583, "grad_norm": 0.39095330238342285, "learning_rate": 6.220187722743466e-05, "loss": 0.9254, "step": 17190 }, { "epoch": 1.9837332718043377, "grad_norm": 0.4275166988372803, "learning_rate": 6.213974567577426e-05, "loss": 0.9888, "step": 17195 }, { "epoch": 1.9843101061375172, "grad_norm": 0.39710062742233276, "learning_rate": 6.207763118023218e-05, "loss": 0.9084, "step": 17200 }, { "epoch": 1.9848869404706968, "grad_norm": 0.5015316605567932, "learning_rate": 6.201553376879108e-05, "loss": 0.9358, "step": 17205 }, { "epoch": 1.9854637748038764, "grad_norm": 0.3706842064857483, "learning_rate": 6.195345346942599e-05, "loss": 0.9353, "step": 17210 }, { "epoch": 1.986040609137056, "grad_norm": 0.40164434909820557, "learning_rate": 6.189139031010416e-05, "loss": 0.9092, "step": 17215 }, { "epoch": 1.9866174434702355, "grad_norm": 0.4197016656398773, "learning_rate": 6.182934431878526e-05, "loss": 0.8762, "step": 17220 }, { "epoch": 1.9871942778034148, "grad_norm": 0.42161640524864197, "learning_rate": 6.176731552342104e-05, "loss": 0.9427, "step": 17225 }, { "epoch": 1.9877711121365944, "grad_norm": 0.42144665122032166, "learning_rate": 6.170530395195561e-05, "loss": 0.9246, "step": 17230 }, { "epoch": 1.9883479464697738, "grad_norm": 0.40069445967674255, "learning_rate": 6.164330963232535e-05, "loss": 0.9494, "step": 17235 }, { "epoch": 1.9889247808029533, "grad_norm": 0.3807680010795593, "learning_rate": 6.158133259245877e-05, "loss": 0.9212, "step": 17240 }, { "epoch": 1.9895016151361329, "grad_norm": 0.3835570812225342, "learning_rate": 6.151937286027669e-05, "loss": 0.9311, "step": 17245 }, { "epoch": 1.9900784494693124, "grad_norm": 0.40442800521850586, "learning_rate": 6.145743046369205e-05, "loss": 0.9444, "step": 17250 }, { "epoch": 1.990655283802492, "grad_norm": 0.4020007848739624, "learning_rate": 6.139550543061006e-05, "loss": 0.9084, "step": 17255 }, { "epoch": 1.9912321181356716, "grad_norm": 0.3811594545841217, "learning_rate": 6.133359778892802e-05, "loss": 0.9033, "step": 17260 }, { "epoch": 1.991808952468851, "grad_norm": 0.4268489480018616, "learning_rate": 6.127170756653546e-05, "loss": 0.9646, "step": 17265 }, { "epoch": 1.9923857868020305, "grad_norm": 0.43692687153816223, "learning_rate": 6.120983479131411e-05, "loss": 0.9556, "step": 17270 }, { "epoch": 1.9929626211352098, "grad_norm": 0.3834821581840515, "learning_rate": 6.114797949113767e-05, "loss": 0.901, "step": 17275 }, { "epoch": 1.9935394554683894, "grad_norm": 0.4126569628715515, "learning_rate": 6.108614169387215e-05, "loss": 0.9706, "step": 17280 }, { "epoch": 1.994116289801569, "grad_norm": 0.41819560527801514, "learning_rate": 6.102432142737555e-05, "loss": 0.8937, "step": 17285 }, { "epoch": 1.9946931241347485, "grad_norm": 0.40084108710289, "learning_rate": 6.096251871949804e-05, "loss": 0.9308, "step": 17290 }, { "epoch": 1.995269958467928, "grad_norm": 0.35687753558158875, "learning_rate": 6.090073359808188e-05, "loss": 0.9226, "step": 17295 }, { "epoch": 1.9958467928011077, "grad_norm": 0.393536776304245, "learning_rate": 6.0838966090961355e-05, "loss": 0.913, "step": 17300 }, { "epoch": 1.996423627134287, "grad_norm": 0.36556947231292725, "learning_rate": 6.077721622596287e-05, "loss": 0.9291, "step": 17305 }, { "epoch": 1.9970004614674666, "grad_norm": 0.3737342357635498, "learning_rate": 6.071548403090488e-05, "loss": 0.8971, "step": 17310 }, { "epoch": 1.997577295800646, "grad_norm": 0.41196849942207336, "learning_rate": 6.0653769533597804e-05, "loss": 0.9329, "step": 17315 }, { "epoch": 1.9981541301338255, "grad_norm": 0.40694162249565125, "learning_rate": 6.059207276184416e-05, "loss": 0.9129, "step": 17320 }, { "epoch": 1.998730964467005, "grad_norm": 0.4291395843029022, "learning_rate": 6.053039374343849e-05, "loss": 0.9066, "step": 17325 }, { "epoch": 1.9993077988001846, "grad_norm": 0.45420485734939575, "learning_rate": 6.046873250616729e-05, "loss": 0.9447, "step": 17330 }, { "epoch": 1.9998846331333642, "grad_norm": 0.4170719087123871, "learning_rate": 6.040708907780907e-05, "loss": 0.9487, "step": 17335 }, { "epoch": 2.0, "eval_loss": 0.9607023000717163, "eval_runtime": 959.7881, "eval_samples_per_second": 15.993, "eval_steps_per_second": 1.0, "step": 17336 }, { "epoch": 2.0004614674665437, "grad_norm": 0.3912767767906189, "learning_rate": 6.0345463486134325e-05, "loss": 0.8925, "step": 17340 }, { "epoch": 2.0010383017997233, "grad_norm": 0.48886004090309143, "learning_rate": 6.0283855758905496e-05, "loss": 0.8874, "step": 17345 }, { "epoch": 2.0016151361329024, "grad_norm": 0.38123923540115356, "learning_rate": 6.0222265923876995e-05, "loss": 0.8842, "step": 17350 }, { "epoch": 2.002191970466082, "grad_norm": 0.3847237825393677, "learning_rate": 6.0160694008795114e-05, "loss": 0.8911, "step": 17355 }, { "epoch": 2.0027688047992616, "grad_norm": 0.3945702314376831, "learning_rate": 6.0099140041398205e-05, "loss": 0.8977, "step": 17360 }, { "epoch": 2.003345639132441, "grad_norm": 0.3811289370059967, "learning_rate": 6.0037604049416383e-05, "loss": 0.8462, "step": 17365 }, { "epoch": 2.0039224734656207, "grad_norm": 0.3992691934108734, "learning_rate": 5.9976086060571765e-05, "loss": 0.903, "step": 17370 }, { "epoch": 2.0044993077988003, "grad_norm": 0.4333883225917816, "learning_rate": 5.9914586102578284e-05, "loss": 0.9278, "step": 17375 }, { "epoch": 2.00507614213198, "grad_norm": 0.4005573093891144, "learning_rate": 5.9853104203141854e-05, "loss": 0.874, "step": 17380 }, { "epoch": 2.0056529764651594, "grad_norm": 0.4214078187942505, "learning_rate": 5.979164038996015e-05, "loss": 0.8595, "step": 17385 }, { "epoch": 2.0062298107983385, "grad_norm": 0.43147456645965576, "learning_rate": 5.973019469072272e-05, "loss": 0.8401, "step": 17390 }, { "epoch": 2.006806645131518, "grad_norm": 0.44270798563957214, "learning_rate": 5.966876713311103e-05, "loss": 0.8779, "step": 17395 }, { "epoch": 2.0073834794646976, "grad_norm": 0.424435555934906, "learning_rate": 5.960735774479826e-05, "loss": 0.8772, "step": 17400 }, { "epoch": 2.007960313797877, "grad_norm": 0.38282132148742676, "learning_rate": 5.954596655344951e-05, "loss": 0.8401, "step": 17405 }, { "epoch": 2.0085371481310568, "grad_norm": 0.40239688754081726, "learning_rate": 5.9484593586721546e-05, "loss": 0.8821, "step": 17410 }, { "epoch": 2.0091139824642363, "grad_norm": 0.4169721007347107, "learning_rate": 5.942323887226311e-05, "loss": 0.8924, "step": 17415 }, { "epoch": 2.009690816797416, "grad_norm": 0.4102388024330139, "learning_rate": 5.936190243771458e-05, "loss": 0.8869, "step": 17420 }, { "epoch": 2.0102676511305955, "grad_norm": 0.4065175950527191, "learning_rate": 5.9300584310708086e-05, "loss": 0.8357, "step": 17425 }, { "epoch": 2.0108444854637746, "grad_norm": 0.42205628752708435, "learning_rate": 5.923928451886767e-05, "loss": 0.8602, "step": 17430 }, { "epoch": 2.011421319796954, "grad_norm": 0.4142395555973053, "learning_rate": 5.917800308980892e-05, "loss": 0.8751, "step": 17435 }, { "epoch": 2.0119981541301337, "grad_norm": 0.3703550398349762, "learning_rate": 5.911674005113929e-05, "loss": 0.9026, "step": 17440 }, { "epoch": 2.0125749884633133, "grad_norm": 0.43400120735168457, "learning_rate": 5.905549543045783e-05, "loss": 0.8188, "step": 17445 }, { "epoch": 2.013151822796493, "grad_norm": 0.3862331509590149, "learning_rate": 5.899426925535545e-05, "loss": 0.8668, "step": 17450 }, { "epoch": 2.0137286571296724, "grad_norm": 0.36499011516571045, "learning_rate": 5.8933061553414614e-05, "loss": 0.8867, "step": 17455 }, { "epoch": 2.014305491462852, "grad_norm": 0.3892669975757599, "learning_rate": 5.887187235220948e-05, "loss": 0.8732, "step": 17460 }, { "epoch": 2.0148823257960315, "grad_norm": 0.4285455346107483, "learning_rate": 5.881070167930598e-05, "loss": 0.87, "step": 17465 }, { "epoch": 2.0154591601292107, "grad_norm": 0.42607060074806213, "learning_rate": 5.874954956226157e-05, "loss": 0.8865, "step": 17470 }, { "epoch": 2.0160359944623902, "grad_norm": 0.4194190502166748, "learning_rate": 5.868841602862541e-05, "loss": 0.8928, "step": 17475 }, { "epoch": 2.01661282879557, "grad_norm": 0.4271929860115051, "learning_rate": 5.862730110593824e-05, "loss": 0.8396, "step": 17480 }, { "epoch": 2.0171896631287494, "grad_norm": 0.3861948251724243, "learning_rate": 5.856620482173252e-05, "loss": 0.8878, "step": 17485 }, { "epoch": 2.017766497461929, "grad_norm": 0.39574941992759705, "learning_rate": 5.8505127203532216e-05, "loss": 0.8399, "step": 17490 }, { "epoch": 2.0183433317951085, "grad_norm": 0.40148764848709106, "learning_rate": 5.844406827885287e-05, "loss": 0.8678, "step": 17495 }, { "epoch": 2.018920166128288, "grad_norm": 0.396918922662735, "learning_rate": 5.838302807520171e-05, "loss": 0.8774, "step": 17500 }, { "epoch": 2.0194970004614676, "grad_norm": 0.45297619700431824, "learning_rate": 5.8322006620077426e-05, "loss": 0.848, "step": 17505 }, { "epoch": 2.020073834794647, "grad_norm": 0.4377535283565521, "learning_rate": 5.826100394097036e-05, "loss": 0.876, "step": 17510 }, { "epoch": 2.0206506691278263, "grad_norm": 0.423527330160141, "learning_rate": 5.8200020065362246e-05, "loss": 0.8914, "step": 17515 }, { "epoch": 2.021227503461006, "grad_norm": 0.4209001362323761, "learning_rate": 5.8139055020726494e-05, "loss": 0.8783, "step": 17520 }, { "epoch": 2.0218043377941854, "grad_norm": 0.4174181818962097, "learning_rate": 5.807810883452798e-05, "loss": 0.8553, "step": 17525 }, { "epoch": 2.022381172127365, "grad_norm": 0.4181443452835083, "learning_rate": 5.8017181534223096e-05, "loss": 0.9307, "step": 17530 }, { "epoch": 2.0229580064605446, "grad_norm": 0.4317053556442261, "learning_rate": 5.7956273147259645e-05, "loss": 0.8831, "step": 17535 }, { "epoch": 2.023534840793724, "grad_norm": 0.4345930516719818, "learning_rate": 5.789538370107701e-05, "loss": 0.8768, "step": 17540 }, { "epoch": 2.0241116751269037, "grad_norm": 0.39889758825302124, "learning_rate": 5.7834513223106004e-05, "loss": 0.8695, "step": 17545 }, { "epoch": 2.0246885094600833, "grad_norm": 0.45139816403388977, "learning_rate": 5.77736617407689e-05, "loss": 0.8829, "step": 17550 }, { "epoch": 2.0252653437932624, "grad_norm": 0.43973278999328613, "learning_rate": 5.771282928147941e-05, "loss": 0.8439, "step": 17555 }, { "epoch": 2.025842178126442, "grad_norm": 0.4227539896965027, "learning_rate": 5.765201587264271e-05, "loss": 0.8518, "step": 17560 }, { "epoch": 2.0264190124596215, "grad_norm": 0.4028456211090088, "learning_rate": 5.7591221541655285e-05, "loss": 0.8845, "step": 17565 }, { "epoch": 2.026995846792801, "grad_norm": 0.4572751820087433, "learning_rate": 5.753044631590513e-05, "loss": 0.8713, "step": 17570 }, { "epoch": 2.0275726811259807, "grad_norm": 0.42924416065216064, "learning_rate": 5.746969022277161e-05, "loss": 0.8404, "step": 17575 }, { "epoch": 2.0281495154591602, "grad_norm": 0.38023436069488525, "learning_rate": 5.74089532896255e-05, "loss": 0.8709, "step": 17580 }, { "epoch": 2.02872634979234, "grad_norm": 0.41763070225715637, "learning_rate": 5.7348235543828834e-05, "loss": 0.8754, "step": 17585 }, { "epoch": 2.0293031841255194, "grad_norm": 0.4525087773799896, "learning_rate": 5.7287537012735104e-05, "loss": 0.9014, "step": 17590 }, { "epoch": 2.0298800184586985, "grad_norm": 0.4048612117767334, "learning_rate": 5.722685772368912e-05, "loss": 0.8648, "step": 17595 }, { "epoch": 2.030456852791878, "grad_norm": 0.4235800802707672, "learning_rate": 5.716619770402707e-05, "loss": 0.8565, "step": 17600 }, { "epoch": 2.0310336871250576, "grad_norm": 0.38395988941192627, "learning_rate": 5.710555698107627e-05, "loss": 0.882, "step": 17605 }, { "epoch": 2.031610521458237, "grad_norm": 0.40724024176597595, "learning_rate": 5.704493558215567e-05, "loss": 0.8982, "step": 17610 }, { "epoch": 2.0321873557914167, "grad_norm": 0.41519051790237427, "learning_rate": 5.69843335345752e-05, "loss": 0.8696, "step": 17615 }, { "epoch": 2.0327641901245963, "grad_norm": 0.37952789664268494, "learning_rate": 5.692375086563622e-05, "loss": 0.8895, "step": 17620 }, { "epoch": 2.033341024457776, "grad_norm": 0.37834814190864563, "learning_rate": 5.6863187602631354e-05, "loss": 0.8362, "step": 17625 }, { "epoch": 2.0339178587909554, "grad_norm": 0.40140074491500854, "learning_rate": 5.680264377284451e-05, "loss": 0.8111, "step": 17630 }, { "epoch": 2.0344946931241346, "grad_norm": 0.41932836174964905, "learning_rate": 5.6742119403550733e-05, "loss": 0.906, "step": 17635 }, { "epoch": 2.035071527457314, "grad_norm": 0.3922612965106964, "learning_rate": 5.668161452201639e-05, "loss": 0.8498, "step": 17640 }, { "epoch": 2.0356483617904937, "grad_norm": 0.3891882300376892, "learning_rate": 5.6621129155499066e-05, "loss": 0.8675, "step": 17645 }, { "epoch": 2.0362251961236733, "grad_norm": 0.42834147810935974, "learning_rate": 5.6560663331247556e-05, "loss": 0.8348, "step": 17650 }, { "epoch": 2.036802030456853, "grad_norm": 0.43242147564888, "learning_rate": 5.650021707650173e-05, "loss": 0.8868, "step": 17655 }, { "epoch": 2.0373788647900324, "grad_norm": 0.4187890887260437, "learning_rate": 5.64397904184929e-05, "loss": 0.887, "step": 17660 }, { "epoch": 2.037955699123212, "grad_norm": 0.41034138202667236, "learning_rate": 5.6379383384443255e-05, "loss": 0.9019, "step": 17665 }, { "epoch": 2.0385325334563915, "grad_norm": 0.5285660624504089, "learning_rate": 5.6318996001566384e-05, "loss": 0.8967, "step": 17670 }, { "epoch": 2.0391093677895706, "grad_norm": 0.4344254434108734, "learning_rate": 5.625862829706679e-05, "loss": 0.9037, "step": 17675 }, { "epoch": 2.03968620212275, "grad_norm": 0.40379151701927185, "learning_rate": 5.6198280298140404e-05, "loss": 0.8866, "step": 17680 }, { "epoch": 2.0402630364559298, "grad_norm": 0.46100154519081116, "learning_rate": 5.613795203197401e-05, "loss": 0.8283, "step": 17685 }, { "epoch": 2.0408398707891093, "grad_norm": 0.45787981152534485, "learning_rate": 5.607764352574565e-05, "loss": 0.8646, "step": 17690 }, { "epoch": 2.041416705122289, "grad_norm": 0.4042724072933197, "learning_rate": 5.6017354806624344e-05, "loss": 0.8741, "step": 17695 }, { "epoch": 2.0419935394554685, "grad_norm": 0.3924694061279297, "learning_rate": 5.5957085901770424e-05, "loss": 0.8481, "step": 17700 }, { "epoch": 2.042570373788648, "grad_norm": 0.4196171164512634, "learning_rate": 5.589683683833502e-05, "loss": 0.9099, "step": 17705 }, { "epoch": 2.0431472081218276, "grad_norm": 0.4184753894805908, "learning_rate": 5.5836607643460504e-05, "loss": 0.8511, "step": 17710 }, { "epoch": 2.0437240424550067, "grad_norm": 0.3830004632472992, "learning_rate": 5.577639834428026e-05, "loss": 0.8416, "step": 17715 }, { "epoch": 2.0443008767881863, "grad_norm": 0.4424822926521301, "learning_rate": 5.571620896791869e-05, "loss": 0.8951, "step": 17720 }, { "epoch": 2.044877711121366, "grad_norm": 0.44922518730163574, "learning_rate": 5.565603954149118e-05, "loss": 0.8697, "step": 17725 }, { "epoch": 2.0454545454545454, "grad_norm": 0.4096304774284363, "learning_rate": 5.559589009210421e-05, "loss": 0.8638, "step": 17730 }, { "epoch": 2.046031379787725, "grad_norm": 0.41669782996177673, "learning_rate": 5.553576064685522e-05, "loss": 0.8459, "step": 17735 }, { "epoch": 2.0466082141209045, "grad_norm": 0.4168075621128082, "learning_rate": 5.547565123283267e-05, "loss": 0.8374, "step": 17740 }, { "epoch": 2.047185048454084, "grad_norm": 0.40191158652305603, "learning_rate": 5.5415561877115876e-05, "loss": 0.915, "step": 17745 }, { "epoch": 2.0477618827872637, "grad_norm": 0.40572503209114075, "learning_rate": 5.535549260677534e-05, "loss": 0.8361, "step": 17750 }, { "epoch": 2.048338717120443, "grad_norm": 0.36613893508911133, "learning_rate": 5.529544344887227e-05, "loss": 0.8305, "step": 17755 }, { "epoch": 2.0489155514536224, "grad_norm": 0.4345441460609436, "learning_rate": 5.523541443045904e-05, "loss": 0.8612, "step": 17760 }, { "epoch": 2.049492385786802, "grad_norm": 0.44153690338134766, "learning_rate": 5.517540557857869e-05, "loss": 0.8863, "step": 17765 }, { "epoch": 2.0500692201199815, "grad_norm": 0.3826152980327606, "learning_rate": 5.511541692026549e-05, "loss": 0.8733, "step": 17770 }, { "epoch": 2.050646054453161, "grad_norm": 0.44603461027145386, "learning_rate": 5.505544848254432e-05, "loss": 0.8998, "step": 17775 }, { "epoch": 2.0512228887863406, "grad_norm": 0.43969833850860596, "learning_rate": 5.4995500292431144e-05, "loss": 0.8992, "step": 17780 }, { "epoch": 2.05179972311952, "grad_norm": 0.4337633550167084, "learning_rate": 5.493557237693271e-05, "loss": 0.8735, "step": 17785 }, { "epoch": 2.0523765574526998, "grad_norm": 0.3976989984512329, "learning_rate": 5.4875664763046705e-05, "loss": 0.8607, "step": 17790 }, { "epoch": 2.052953391785879, "grad_norm": 0.44140127301216125, "learning_rate": 5.481577747776156e-05, "loss": 0.9202, "step": 17795 }, { "epoch": 2.0535302261190584, "grad_norm": 0.439375638961792, "learning_rate": 5.4755910548056666e-05, "loss": 0.8707, "step": 17800 }, { "epoch": 2.054107060452238, "grad_norm": 0.42562079429626465, "learning_rate": 5.4696064000902146e-05, "loss": 0.8643, "step": 17805 }, { "epoch": 2.0546838947854176, "grad_norm": 0.4260598421096802, "learning_rate": 5.463623786325907e-05, "loss": 0.8906, "step": 17810 }, { "epoch": 2.055260729118597, "grad_norm": 0.40893182158470154, "learning_rate": 5.457643216207907e-05, "loss": 0.8497, "step": 17815 }, { "epoch": 2.0558375634517767, "grad_norm": 0.3989853262901306, "learning_rate": 5.451664692430493e-05, "loss": 0.889, "step": 17820 }, { "epoch": 2.0564143977849563, "grad_norm": 0.38165777921676636, "learning_rate": 5.445688217686986e-05, "loss": 0.8788, "step": 17825 }, { "epoch": 2.056991232118136, "grad_norm": 0.4174046218395233, "learning_rate": 5.4397137946698106e-05, "loss": 0.8305, "step": 17830 }, { "epoch": 2.0575680664513154, "grad_norm": 0.4230709373950958, "learning_rate": 5.433741426070442e-05, "loss": 0.877, "step": 17835 }, { "epoch": 2.0581449007844945, "grad_norm": 0.44466516375541687, "learning_rate": 5.427771114579462e-05, "loss": 0.8842, "step": 17840 }, { "epoch": 2.058721735117674, "grad_norm": 0.42083120346069336, "learning_rate": 5.421802862886494e-05, "loss": 0.8768, "step": 17845 }, { "epoch": 2.0592985694508537, "grad_norm": 0.46251851320266724, "learning_rate": 5.415836673680253e-05, "loss": 0.91, "step": 17850 }, { "epoch": 2.059875403784033, "grad_norm": 0.41464483737945557, "learning_rate": 5.4098725496485116e-05, "loss": 0.8935, "step": 17855 }, { "epoch": 2.060452238117213, "grad_norm": 0.40952378511428833, "learning_rate": 5.4039104934781305e-05, "loss": 0.8705, "step": 17860 }, { "epoch": 2.0610290724503924, "grad_norm": 0.40670013427734375, "learning_rate": 5.3979505078550184e-05, "loss": 0.8536, "step": 17865 }, { "epoch": 2.061605906783572, "grad_norm": 0.4991162121295929, "learning_rate": 5.391992595464166e-05, "loss": 0.8463, "step": 17870 }, { "epoch": 2.0621827411167515, "grad_norm": 0.3962211012840271, "learning_rate": 5.38603675898962e-05, "loss": 0.8782, "step": 17875 }, { "epoch": 2.0627595754499306, "grad_norm": 0.41492053866386414, "learning_rate": 5.380083001114503e-05, "loss": 0.9583, "step": 17880 }, { "epoch": 2.06333640978311, "grad_norm": 0.44420069456100464, "learning_rate": 5.3741313245209854e-05, "loss": 0.896, "step": 17885 }, { "epoch": 2.0639132441162897, "grad_norm": 0.37794816493988037, "learning_rate": 5.368181731890316e-05, "loss": 0.9014, "step": 17890 }, { "epoch": 2.0644900784494693, "grad_norm": 0.47934192419052124, "learning_rate": 5.362234225902794e-05, "loss": 0.9153, "step": 17895 }, { "epoch": 2.065066912782649, "grad_norm": 0.4091241657733917, "learning_rate": 5.356288809237788e-05, "loss": 0.8829, "step": 17900 }, { "epoch": 2.0656437471158284, "grad_norm": 0.4090352952480316, "learning_rate": 5.350345484573709e-05, "loss": 0.8856, "step": 17905 }, { "epoch": 2.066220581449008, "grad_norm": 0.4243384003639221, "learning_rate": 5.3444042545880514e-05, "loss": 0.8999, "step": 17910 }, { "epoch": 2.0667974157821876, "grad_norm": 0.45561861991882324, "learning_rate": 5.338465121957338e-05, "loss": 0.8859, "step": 17915 }, { "epoch": 2.0673742501153667, "grad_norm": 0.4170970320701599, "learning_rate": 5.332528089357165e-05, "loss": 0.8182, "step": 17920 }, { "epoch": 2.0679510844485463, "grad_norm": 0.4319002628326416, "learning_rate": 5.3265931594621756e-05, "loss": 0.9013, "step": 17925 }, { "epoch": 2.068527918781726, "grad_norm": 0.43598082661628723, "learning_rate": 5.320660334946072e-05, "loss": 0.8559, "step": 17930 }, { "epoch": 2.0691047531149054, "grad_norm": 0.4153001308441162, "learning_rate": 5.3147296184815956e-05, "loss": 0.8746, "step": 17935 }, { "epoch": 2.069681587448085, "grad_norm": 0.42318597435951233, "learning_rate": 5.3088010127405496e-05, "loss": 0.8615, "step": 17940 }, { "epoch": 2.0702584217812645, "grad_norm": 0.41694238781929016, "learning_rate": 5.3028745203937825e-05, "loss": 0.8214, "step": 17945 }, { "epoch": 2.070835256114444, "grad_norm": 0.4185914993286133, "learning_rate": 5.296950144111195e-05, "loss": 0.8326, "step": 17950 }, { "epoch": 2.0714120904476236, "grad_norm": 0.38809898495674133, "learning_rate": 5.29102788656172e-05, "loss": 0.8329, "step": 17955 }, { "epoch": 2.0719889247808028, "grad_norm": 0.40080171823501587, "learning_rate": 5.285107750413353e-05, "loss": 0.8864, "step": 17960 }, { "epoch": 2.0725657591139823, "grad_norm": 0.4352705180644989, "learning_rate": 5.279189738333125e-05, "loss": 0.8707, "step": 17965 }, { "epoch": 2.073142593447162, "grad_norm": 0.41367441415786743, "learning_rate": 5.273273852987113e-05, "loss": 0.8819, "step": 17970 }, { "epoch": 2.0737194277803415, "grad_norm": 0.40614110231399536, "learning_rate": 5.2673600970404336e-05, "loss": 0.9156, "step": 17975 }, { "epoch": 2.074296262113521, "grad_norm": 0.41569119691848755, "learning_rate": 5.26144847315725e-05, "loss": 0.8798, "step": 17980 }, { "epoch": 2.0748730964467006, "grad_norm": 0.41030633449554443, "learning_rate": 5.255538984000753e-05, "loss": 0.8908, "step": 17985 }, { "epoch": 2.07544993077988, "grad_norm": 0.43785807490348816, "learning_rate": 5.249631632233182e-05, "loss": 0.9173, "step": 17990 }, { "epoch": 2.0760267651130597, "grad_norm": 0.40564823150634766, "learning_rate": 5.243726420515811e-05, "loss": 0.8732, "step": 17995 }, { "epoch": 2.076603599446239, "grad_norm": 0.3811871409416199, "learning_rate": 5.237823351508953e-05, "loss": 0.8702, "step": 18000 }, { "epoch": 2.0771804337794184, "grad_norm": 0.41942134499549866, "learning_rate": 5.231922427871945e-05, "loss": 0.895, "step": 18005 }, { "epoch": 2.077757268112598, "grad_norm": 0.4052973985671997, "learning_rate": 5.2260236522631665e-05, "loss": 0.8592, "step": 18010 }, { "epoch": 2.0783341024457775, "grad_norm": 0.4108360707759857, "learning_rate": 5.2201270273400296e-05, "loss": 0.8819, "step": 18015 }, { "epoch": 2.078910936778957, "grad_norm": 0.417239785194397, "learning_rate": 5.2142325557589753e-05, "loss": 0.8474, "step": 18020 }, { "epoch": 2.0794877711121367, "grad_norm": 0.42869383096694946, "learning_rate": 5.208340240175476e-05, "loss": 0.8416, "step": 18025 }, { "epoch": 2.0800646054453162, "grad_norm": 0.40606796741485596, "learning_rate": 5.202450083244026e-05, "loss": 0.8673, "step": 18030 }, { "epoch": 2.080641439778496, "grad_norm": 0.3967663049697876, "learning_rate": 5.1965620876181564e-05, "loss": 0.8634, "step": 18035 }, { "epoch": 2.081218274111675, "grad_norm": 0.4389677345752716, "learning_rate": 5.190676255950418e-05, "loss": 0.8818, "step": 18040 }, { "epoch": 2.0817951084448545, "grad_norm": 0.440441370010376, "learning_rate": 5.184792590892397e-05, "loss": 0.875, "step": 18045 }, { "epoch": 2.082371942778034, "grad_norm": 0.45011159777641296, "learning_rate": 5.178911095094685e-05, "loss": 0.8821, "step": 18050 }, { "epoch": 2.0829487771112136, "grad_norm": 0.43679890036582947, "learning_rate": 5.173031771206913e-05, "loss": 0.8342, "step": 18055 }, { "epoch": 2.083525611444393, "grad_norm": 0.41379445791244507, "learning_rate": 5.167154621877728e-05, "loss": 0.8426, "step": 18060 }, { "epoch": 2.0841024457775728, "grad_norm": 0.4258072078227997, "learning_rate": 5.161279649754796e-05, "loss": 0.9147, "step": 18065 }, { "epoch": 2.0846792801107523, "grad_norm": 0.45177289843559265, "learning_rate": 5.155406857484804e-05, "loss": 0.8421, "step": 18070 }, { "epoch": 2.085256114443932, "grad_norm": 0.3834865689277649, "learning_rate": 5.14953624771346e-05, "loss": 0.925, "step": 18075 }, { "epoch": 2.085832948777111, "grad_norm": 0.39812150597572327, "learning_rate": 5.143667823085477e-05, "loss": 0.8863, "step": 18080 }, { "epoch": 2.0864097831102906, "grad_norm": 0.3837997317314148, "learning_rate": 5.1378015862445975e-05, "loss": 0.8636, "step": 18085 }, { "epoch": 2.08698661744347, "grad_norm": 0.4225236773490906, "learning_rate": 5.131937539833571e-05, "loss": 0.9135, "step": 18090 }, { "epoch": 2.0875634517766497, "grad_norm": 0.41509759426116943, "learning_rate": 5.126075686494165e-05, "loss": 0.8911, "step": 18095 }, { "epoch": 2.0881402861098293, "grad_norm": 0.3711944818496704, "learning_rate": 5.1202160288671505e-05, "loss": 0.8855, "step": 18100 }, { "epoch": 2.088717120443009, "grad_norm": 0.40293386578559875, "learning_rate": 5.1143585695923166e-05, "loss": 0.8907, "step": 18105 }, { "epoch": 2.0892939547761884, "grad_norm": 0.3973406255245209, "learning_rate": 5.108503311308461e-05, "loss": 0.8828, "step": 18110 }, { "epoch": 2.089870789109368, "grad_norm": 0.46865829825401306, "learning_rate": 5.1026502566533917e-05, "loss": 0.8478, "step": 18115 }, { "epoch": 2.090447623442547, "grad_norm": 0.3833647668361664, "learning_rate": 5.09679940826391e-05, "loss": 0.8695, "step": 18120 }, { "epoch": 2.0910244577757267, "grad_norm": 0.44151002168655396, "learning_rate": 5.0909507687758515e-05, "loss": 0.8356, "step": 18125 }, { "epoch": 2.091601292108906, "grad_norm": 0.4126966595649719, "learning_rate": 5.085104340824027e-05, "loss": 0.8573, "step": 18130 }, { "epoch": 2.092178126442086, "grad_norm": 0.4035085141658783, "learning_rate": 5.079260127042267e-05, "loss": 0.8223, "step": 18135 }, { "epoch": 2.0927549607752653, "grad_norm": 0.4391005337238312, "learning_rate": 5.0734181300634024e-05, "loss": 0.9041, "step": 18140 }, { "epoch": 2.093331795108445, "grad_norm": 0.4382282495498657, "learning_rate": 5.067578352519267e-05, "loss": 0.8725, "step": 18145 }, { "epoch": 2.0939086294416245, "grad_norm": 0.3936323821544647, "learning_rate": 5.061740797040684e-05, "loss": 0.8838, "step": 18150 }, { "epoch": 2.094485463774804, "grad_norm": 0.4061303734779358, "learning_rate": 5.0559054662574876e-05, "loss": 0.88, "step": 18155 }, { "epoch": 2.095062298107983, "grad_norm": 0.47590842843055725, "learning_rate": 5.050072362798507e-05, "loss": 0.8818, "step": 18160 }, { "epoch": 2.0956391324411627, "grad_norm": 0.4334276020526886, "learning_rate": 5.044241489291569e-05, "loss": 0.8728, "step": 18165 }, { "epoch": 2.0962159667743423, "grad_norm": 0.4106757640838623, "learning_rate": 5.0384128483634875e-05, "loss": 0.8644, "step": 18170 }, { "epoch": 2.096792801107522, "grad_norm": 0.3826504051685333, "learning_rate": 5.032586442640077e-05, "loss": 0.8619, "step": 18175 }, { "epoch": 2.0973696354407014, "grad_norm": 0.48092758655548096, "learning_rate": 5.0267622747461487e-05, "loss": 0.8642, "step": 18180 }, { "epoch": 2.097946469773881, "grad_norm": 0.4312995672225952, "learning_rate": 5.020940347305503e-05, "loss": 0.8716, "step": 18185 }, { "epoch": 2.0985233041070606, "grad_norm": 0.391157865524292, "learning_rate": 5.0151206629409195e-05, "loss": 0.8319, "step": 18190 }, { "epoch": 2.09910013844024, "grad_norm": 0.42850786447525024, "learning_rate": 5.009303224274191e-05, "loss": 0.8552, "step": 18195 }, { "epoch": 2.0996769727734197, "grad_norm": 0.47858262062072754, "learning_rate": 5.0034880339260734e-05, "loss": 0.8636, "step": 18200 }, { "epoch": 2.100253807106599, "grad_norm": 0.38631346821784973, "learning_rate": 4.997675094516332e-05, "loss": 0.8758, "step": 18205 }, { "epoch": 2.1008306414397784, "grad_norm": 0.4535498023033142, "learning_rate": 4.991864408663692e-05, "loss": 0.855, "step": 18210 }, { "epoch": 2.101407475772958, "grad_norm": 0.41745612025260925, "learning_rate": 4.9860559789858965e-05, "loss": 0.8748, "step": 18215 }, { "epoch": 2.1019843101061375, "grad_norm": 0.4063878655433655, "learning_rate": 4.980249808099642e-05, "loss": 0.9213, "step": 18220 }, { "epoch": 2.102561144439317, "grad_norm": 0.436110258102417, "learning_rate": 4.974445898620622e-05, "loss": 0.8632, "step": 18225 }, { "epoch": 2.1031379787724966, "grad_norm": 0.39621496200561523, "learning_rate": 4.968644253163513e-05, "loss": 0.9058, "step": 18230 }, { "epoch": 2.103714813105676, "grad_norm": 0.4312610328197479, "learning_rate": 4.9628448743419675e-05, "loss": 0.8817, "step": 18235 }, { "epoch": 2.1042916474388558, "grad_norm": 0.42516598105430603, "learning_rate": 4.957047764768612e-05, "loss": 0.8902, "step": 18240 }, { "epoch": 2.104868481772035, "grad_norm": 0.4386134743690491, "learning_rate": 4.951252927055058e-05, "loss": 0.8838, "step": 18245 }, { "epoch": 2.1054453161052145, "grad_norm": 0.38619065284729004, "learning_rate": 4.945460363811891e-05, "loss": 0.8821, "step": 18250 }, { "epoch": 2.106022150438394, "grad_norm": 0.43006646633148193, "learning_rate": 4.939670077648676e-05, "loss": 0.827, "step": 18255 }, { "epoch": 2.1065989847715736, "grad_norm": 0.39681535959243774, "learning_rate": 4.933882071173939e-05, "loss": 0.8463, "step": 18260 }, { "epoch": 2.107175819104753, "grad_norm": 0.5474952459335327, "learning_rate": 4.9280963469952e-05, "loss": 0.8507, "step": 18265 }, { "epoch": 2.1077526534379327, "grad_norm": 0.39354756474494934, "learning_rate": 4.922312907718929e-05, "loss": 0.8959, "step": 18270 }, { "epoch": 2.1083294877711123, "grad_norm": 0.412794291973114, "learning_rate": 4.916531755950585e-05, "loss": 0.8846, "step": 18275 }, { "epoch": 2.108906322104292, "grad_norm": 0.4190261960029602, "learning_rate": 4.9107528942945754e-05, "loss": 0.8847, "step": 18280 }, { "epoch": 2.109483156437471, "grad_norm": 0.4136815667152405, "learning_rate": 4.9049763253543054e-05, "loss": 0.8194, "step": 18285 }, { "epoch": 2.1100599907706505, "grad_norm": 0.44577425718307495, "learning_rate": 4.8992020517321194e-05, "loss": 0.8366, "step": 18290 }, { "epoch": 2.11063682510383, "grad_norm": 0.4660280644893646, "learning_rate": 4.8934300760293396e-05, "loss": 0.8508, "step": 18295 }, { "epoch": 2.1112136594370097, "grad_norm": 0.43971607089042664, "learning_rate": 4.8876604008462554e-05, "loss": 0.9408, "step": 18300 }, { "epoch": 2.1117904937701892, "grad_norm": 0.4964000880718231, "learning_rate": 4.881893028782118e-05, "loss": 0.8984, "step": 18305 }, { "epoch": 2.112367328103369, "grad_norm": 0.4421519935131073, "learning_rate": 4.876127962435135e-05, "loss": 0.883, "step": 18310 }, { "epoch": 2.1129441624365484, "grad_norm": 0.4391144812107086, "learning_rate": 4.870365204402483e-05, "loss": 0.8759, "step": 18315 }, { "epoch": 2.113520996769728, "grad_norm": 0.43410804867744446, "learning_rate": 4.864604757280293e-05, "loss": 0.8744, "step": 18320 }, { "epoch": 2.114097831102907, "grad_norm": 0.4183378219604492, "learning_rate": 4.8588466236636656e-05, "loss": 0.846, "step": 18325 }, { "epoch": 2.1146746654360866, "grad_norm": 0.39406901597976685, "learning_rate": 4.8530908061466404e-05, "loss": 0.8236, "step": 18330 }, { "epoch": 2.115251499769266, "grad_norm": 0.4376058876514435, "learning_rate": 4.8473373073222294e-05, "loss": 0.8406, "step": 18335 }, { "epoch": 2.1158283341024458, "grad_norm": 0.4023524224758148, "learning_rate": 4.841586129782395e-05, "loss": 0.8672, "step": 18340 }, { "epoch": 2.1164051684356253, "grad_norm": 0.5077024698257446, "learning_rate": 4.835837276118058e-05, "loss": 0.8847, "step": 18345 }, { "epoch": 2.116982002768805, "grad_norm": 0.39667654037475586, "learning_rate": 4.830090748919076e-05, "loss": 0.8446, "step": 18350 }, { "epoch": 2.1175588371019844, "grad_norm": 0.4139440953731537, "learning_rate": 4.8243465507742866e-05, "loss": 0.8577, "step": 18355 }, { "epoch": 2.118135671435164, "grad_norm": 0.40672725439071655, "learning_rate": 4.8186046842714504e-05, "loss": 0.8879, "step": 18360 }, { "epoch": 2.118712505768343, "grad_norm": 0.40575653314590454, "learning_rate": 4.812865151997298e-05, "loss": 0.9089, "step": 18365 }, { "epoch": 2.1192893401015227, "grad_norm": 0.46657443046569824, "learning_rate": 4.807127956537487e-05, "loss": 0.8865, "step": 18370 }, { "epoch": 2.1198661744347023, "grad_norm": 0.49544504284858704, "learning_rate": 4.801393100476651e-05, "loss": 0.9071, "step": 18375 }, { "epoch": 2.120443008767882, "grad_norm": 0.462701678276062, "learning_rate": 4.795660586398342e-05, "loss": 0.8795, "step": 18380 }, { "epoch": 2.1210198431010614, "grad_norm": 0.41454997658729553, "learning_rate": 4.7899304168850734e-05, "loss": 0.8521, "step": 18385 }, { "epoch": 2.121596677434241, "grad_norm": 0.4203789234161377, "learning_rate": 4.784202594518298e-05, "loss": 0.8631, "step": 18390 }, { "epoch": 2.1221735117674205, "grad_norm": 0.38255804777145386, "learning_rate": 4.778477121878413e-05, "loss": 0.859, "step": 18395 }, { "epoch": 2.1227503461006, "grad_norm": 0.405504435300827, "learning_rate": 4.772754001544748e-05, "loss": 0.8923, "step": 18400 }, { "epoch": 2.123327180433779, "grad_norm": 0.48973825573921204, "learning_rate": 4.767033236095585e-05, "loss": 0.8781, "step": 18405 }, { "epoch": 2.123904014766959, "grad_norm": 0.43271970748901367, "learning_rate": 4.761314828108139e-05, "loss": 0.887, "step": 18410 }, { "epoch": 2.1244808491001383, "grad_norm": 0.49364644289016724, "learning_rate": 4.755598780158568e-05, "loss": 0.8777, "step": 18415 }, { "epoch": 2.125057683433318, "grad_norm": 0.42385977506637573, "learning_rate": 4.749885094821951e-05, "loss": 0.8959, "step": 18420 }, { "epoch": 2.1256345177664975, "grad_norm": 0.4609013795852661, "learning_rate": 4.744173774672329e-05, "loss": 0.8556, "step": 18425 }, { "epoch": 2.126211352099677, "grad_norm": 0.4490292966365814, "learning_rate": 4.738464822282652e-05, "loss": 0.8809, "step": 18430 }, { "epoch": 2.1267881864328566, "grad_norm": 0.39603060483932495, "learning_rate": 4.732758240224818e-05, "loss": 0.9149, "step": 18435 }, { "epoch": 2.127365020766036, "grad_norm": 0.41065889596939087, "learning_rate": 4.727054031069654e-05, "loss": 0.8645, "step": 18440 }, { "epoch": 2.1279418550992153, "grad_norm": 0.4732472002506256, "learning_rate": 4.7213521973869215e-05, "loss": 0.8702, "step": 18445 }, { "epoch": 2.128518689432395, "grad_norm": 0.42086437344551086, "learning_rate": 4.715652741745298e-05, "loss": 0.8654, "step": 18450 }, { "epoch": 2.1290955237655744, "grad_norm": 0.4273388981819153, "learning_rate": 4.709955666712404e-05, "loss": 0.8802, "step": 18455 }, { "epoch": 2.129672358098754, "grad_norm": 0.4001615047454834, "learning_rate": 4.704260974854784e-05, "loss": 0.8651, "step": 18460 }, { "epoch": 2.1302491924319336, "grad_norm": 0.38031452894210815, "learning_rate": 4.6985686687379103e-05, "loss": 0.8613, "step": 18465 }, { "epoch": 2.130826026765113, "grad_norm": 0.44780388474464417, "learning_rate": 4.692878750926171e-05, "loss": 0.8348, "step": 18470 }, { "epoch": 2.1314028610982927, "grad_norm": 0.44016191363334656, "learning_rate": 4.687191223982889e-05, "loss": 0.8832, "step": 18475 }, { "epoch": 2.1319796954314723, "grad_norm": 0.3917645514011383, "learning_rate": 4.6815060904703046e-05, "loss": 0.8561, "step": 18480 }, { "epoch": 2.132556529764652, "grad_norm": 0.4286423921585083, "learning_rate": 4.6758233529495846e-05, "loss": 0.8981, "step": 18485 }, { "epoch": 2.133133364097831, "grad_norm": 0.39710286259651184, "learning_rate": 4.670143013980814e-05, "loss": 0.8657, "step": 18490 }, { "epoch": 2.1337101984310105, "grad_norm": 0.49590003490448, "learning_rate": 4.664465076122991e-05, "loss": 0.8942, "step": 18495 }, { "epoch": 2.13428703276419, "grad_norm": 0.40663713216781616, "learning_rate": 4.658789541934041e-05, "loss": 0.851, "step": 18500 }, { "epoch": 2.1348638670973696, "grad_norm": 0.4643513262271881, "learning_rate": 4.653116413970803e-05, "loss": 0.8585, "step": 18505 }, { "epoch": 2.135440701430549, "grad_norm": 0.39103883504867554, "learning_rate": 4.647445694789032e-05, "loss": 0.8343, "step": 18510 }, { "epoch": 2.1360175357637288, "grad_norm": 0.49371686577796936, "learning_rate": 4.641777386943402e-05, "loss": 0.8586, "step": 18515 }, { "epoch": 2.1365943700969083, "grad_norm": 0.44037875533103943, "learning_rate": 4.6361114929874895e-05, "loss": 0.8757, "step": 18520 }, { "epoch": 2.1371712044300875, "grad_norm": 0.42881131172180176, "learning_rate": 4.630448015473794e-05, "loss": 0.9403, "step": 18525 }, { "epoch": 2.137748038763267, "grad_norm": 0.40794649720191956, "learning_rate": 4.6247869569537225e-05, "loss": 0.862, "step": 18530 }, { "epoch": 2.1383248730964466, "grad_norm": 0.41439539194107056, "learning_rate": 4.6191283199775946e-05, "loss": 0.8535, "step": 18535 }, { "epoch": 2.138901707429626, "grad_norm": 0.4385506212711334, "learning_rate": 4.613472107094641e-05, "loss": 0.8473, "step": 18540 }, { "epoch": 2.1394785417628057, "grad_norm": 0.39511799812316895, "learning_rate": 4.607818320852988e-05, "loss": 0.9115, "step": 18545 }, { "epoch": 2.1400553760959853, "grad_norm": 0.43139395117759705, "learning_rate": 4.60216696379968e-05, "loss": 0.8281, "step": 18550 }, { "epoch": 2.140632210429165, "grad_norm": 0.3915206491947174, "learning_rate": 4.596518038480667e-05, "loss": 0.851, "step": 18555 }, { "epoch": 2.1412090447623444, "grad_norm": 0.4159088134765625, "learning_rate": 4.590871547440804e-05, "loss": 0.885, "step": 18560 }, { "epoch": 2.141785879095524, "grad_norm": 0.43738165497779846, "learning_rate": 4.585227493223836e-05, "loss": 0.8766, "step": 18565 }, { "epoch": 2.142362713428703, "grad_norm": 0.4490554630756378, "learning_rate": 4.579585878372428e-05, "loss": 0.8452, "step": 18570 }, { "epoch": 2.1429395477618827, "grad_norm": 0.4256729185581207, "learning_rate": 4.573946705428136e-05, "loss": 0.8571, "step": 18575 }, { "epoch": 2.1435163820950622, "grad_norm": 0.43782299757003784, "learning_rate": 4.5683099769314185e-05, "loss": 0.9114, "step": 18580 }, { "epoch": 2.144093216428242, "grad_norm": 0.4350529909133911, "learning_rate": 4.562675695421634e-05, "loss": 0.8728, "step": 18585 }, { "epoch": 2.1446700507614214, "grad_norm": 0.4200105667114258, "learning_rate": 4.55704386343704e-05, "loss": 0.8267, "step": 18590 }, { "epoch": 2.145246885094601, "grad_norm": 0.4020532965660095, "learning_rate": 4.551414483514781e-05, "loss": 0.8791, "step": 18595 }, { "epoch": 2.1458237194277805, "grad_norm": 0.4043259918689728, "learning_rate": 4.545787558190907e-05, "loss": 0.8687, "step": 18600 }, { "epoch": 2.1464005537609596, "grad_norm": 0.4017442464828491, "learning_rate": 4.540163090000358e-05, "loss": 0.8764, "step": 18605 }, { "epoch": 2.146977388094139, "grad_norm": 0.4435402452945709, "learning_rate": 4.534541081476973e-05, "loss": 0.8345, "step": 18610 }, { "epoch": 2.1475542224273187, "grad_norm": 0.4666100740432739, "learning_rate": 4.5289215351534666e-05, "loss": 0.8801, "step": 18615 }, { "epoch": 2.1481310567604983, "grad_norm": 0.40481117367744446, "learning_rate": 4.5233044535614676e-05, "loss": 0.8678, "step": 18620 }, { "epoch": 2.148707891093678, "grad_norm": 0.4130145013332367, "learning_rate": 4.517689839231475e-05, "loss": 0.8754, "step": 18625 }, { "epoch": 2.1492847254268574, "grad_norm": 0.41234853863716125, "learning_rate": 4.512077694692888e-05, "loss": 0.8644, "step": 18630 }, { "epoch": 2.149861559760037, "grad_norm": 0.4444868862628937, "learning_rate": 4.5064680224739783e-05, "loss": 0.8812, "step": 18635 }, { "epoch": 2.1504383940932166, "grad_norm": 0.4467960596084595, "learning_rate": 4.50086082510193e-05, "loss": 0.8759, "step": 18640 }, { "epoch": 2.151015228426396, "grad_norm": 0.4122447371482849, "learning_rate": 4.495256105102784e-05, "loss": 0.8791, "step": 18645 }, { "epoch": 2.1515920627595753, "grad_norm": 0.5279197692871094, "learning_rate": 4.489653865001485e-05, "loss": 0.8049, "step": 18650 }, { "epoch": 2.152168897092755, "grad_norm": 0.3907780647277832, "learning_rate": 4.4840541073218433e-05, "loss": 0.9522, "step": 18655 }, { "epoch": 2.1527457314259344, "grad_norm": 0.4981468617916107, "learning_rate": 4.478456834586574e-05, "loss": 0.8748, "step": 18660 }, { "epoch": 2.153322565759114, "grad_norm": 0.4211672246456146, "learning_rate": 4.472862049317249e-05, "loss": 0.8337, "step": 18665 }, { "epoch": 2.1538994000922935, "grad_norm": 0.43220365047454834, "learning_rate": 4.467269754034333e-05, "loss": 0.8608, "step": 18670 }, { "epoch": 2.154476234425473, "grad_norm": 0.4173910617828369, "learning_rate": 4.4616799512571675e-05, "loss": 0.8675, "step": 18675 }, { "epoch": 2.1550530687586527, "grad_norm": 0.4267426133155823, "learning_rate": 4.456092643503972e-05, "loss": 0.8683, "step": 18680 }, { "epoch": 2.1556299030918322, "grad_norm": 0.3888731300830841, "learning_rate": 4.450507833291831e-05, "loss": 0.8461, "step": 18685 }, { "epoch": 2.1562067374250113, "grad_norm": 0.40398934483528137, "learning_rate": 4.4449255231367183e-05, "loss": 0.8115, "step": 18690 }, { "epoch": 2.156783571758191, "grad_norm": 0.4564119279384613, "learning_rate": 4.439345715553475e-05, "loss": 0.8732, "step": 18695 }, { "epoch": 2.1573604060913705, "grad_norm": 0.43610015511512756, "learning_rate": 4.433768413055818e-05, "loss": 0.8955, "step": 18700 }, { "epoch": 2.15793724042455, "grad_norm": 0.40108174085617065, "learning_rate": 4.428193618156322e-05, "loss": 0.8907, "step": 18705 }, { "epoch": 2.1585140747577296, "grad_norm": 0.43645983934402466, "learning_rate": 4.422621333366459e-05, "loss": 0.8627, "step": 18710 }, { "epoch": 2.159090909090909, "grad_norm": 0.3946572542190552, "learning_rate": 4.4170515611965415e-05, "loss": 0.8467, "step": 18715 }, { "epoch": 2.1596677434240887, "grad_norm": 0.40307918190956116, "learning_rate": 4.411484304155771e-05, "loss": 0.8717, "step": 18720 }, { "epoch": 2.1602445777572683, "grad_norm": 0.4881908893585205, "learning_rate": 4.405919564752195e-05, "loss": 0.9199, "step": 18725 }, { "epoch": 2.1608214120904474, "grad_norm": 0.40026798844337463, "learning_rate": 4.4003573454927585e-05, "loss": 0.8857, "step": 18730 }, { "epoch": 2.161398246423627, "grad_norm": 0.4110172986984253, "learning_rate": 4.394797648883236e-05, "loss": 0.8701, "step": 18735 }, { "epoch": 2.1619750807568066, "grad_norm": 0.44310736656188965, "learning_rate": 4.389240477428288e-05, "loss": 0.8547, "step": 18740 }, { "epoch": 2.162551915089986, "grad_norm": 0.4193337559700012, "learning_rate": 4.38368583363143e-05, "loss": 0.8378, "step": 18745 }, { "epoch": 2.1631287494231657, "grad_norm": 0.43070536851882935, "learning_rate": 4.378133719995044e-05, "loss": 0.9047, "step": 18750 }, { "epoch": 2.1637055837563453, "grad_norm": 0.4080751836299896, "learning_rate": 4.37258413902036e-05, "loss": 0.8502, "step": 18755 }, { "epoch": 2.164282418089525, "grad_norm": 0.538879930973053, "learning_rate": 4.367037093207481e-05, "loss": 0.8779, "step": 18760 }, { "epoch": 2.1648592524227044, "grad_norm": 0.40123456716537476, "learning_rate": 4.36149258505536e-05, "loss": 0.8619, "step": 18765 }, { "epoch": 2.1654360867558835, "grad_norm": 0.36256951093673706, "learning_rate": 4.3559506170618116e-05, "loss": 0.8444, "step": 18770 }, { "epoch": 2.166012921089063, "grad_norm": 0.4287301003932953, "learning_rate": 4.350411191723498e-05, "loss": 0.86, "step": 18775 }, { "epoch": 2.1665897554222426, "grad_norm": 0.40902701020240784, "learning_rate": 4.344874311535944e-05, "loss": 0.8722, "step": 18780 }, { "epoch": 2.167166589755422, "grad_norm": 0.4519880712032318, "learning_rate": 4.339339978993523e-05, "loss": 0.8985, "step": 18785 }, { "epoch": 2.1677434240886018, "grad_norm": 0.39673271775245667, "learning_rate": 4.333808196589468e-05, "loss": 0.9006, "step": 18790 }, { "epoch": 2.1683202584217813, "grad_norm": 0.441983163356781, "learning_rate": 4.3282789668158476e-05, "loss": 0.9035, "step": 18795 }, { "epoch": 2.168897092754961, "grad_norm": 0.41148027777671814, "learning_rate": 4.3227522921636044e-05, "loss": 0.8512, "step": 18800 }, { "epoch": 2.1694739270881405, "grad_norm": 0.5070381760597229, "learning_rate": 4.317228175122504e-05, "loss": 0.863, "step": 18805 }, { "epoch": 2.1700507614213196, "grad_norm": 0.453912615776062, "learning_rate": 4.31170661818118e-05, "loss": 0.8576, "step": 18810 }, { "epoch": 2.170627595754499, "grad_norm": 0.4003099203109741, "learning_rate": 4.306187623827095e-05, "loss": 0.8799, "step": 18815 }, { "epoch": 2.1712044300876787, "grad_norm": 0.4114973247051239, "learning_rate": 4.300671194546579e-05, "loss": 0.8716, "step": 18820 }, { "epoch": 2.1717812644208583, "grad_norm": 0.39984384179115295, "learning_rate": 4.295157332824785e-05, "loss": 0.8489, "step": 18825 }, { "epoch": 2.172358098754038, "grad_norm": 0.4752567410469055, "learning_rate": 4.289646041145721e-05, "loss": 0.8671, "step": 18830 }, { "epoch": 2.1729349330872174, "grad_norm": 0.3880119323730469, "learning_rate": 4.284137321992235e-05, "loss": 0.8626, "step": 18835 }, { "epoch": 2.173511767420397, "grad_norm": 0.4211719036102295, "learning_rate": 4.27863117784602e-05, "loss": 0.9373, "step": 18840 }, { "epoch": 2.1740886017535765, "grad_norm": 0.4246695935726166, "learning_rate": 4.273127611187595e-05, "loss": 0.8985, "step": 18845 }, { "epoch": 2.174665436086756, "grad_norm": 0.43371304869651794, "learning_rate": 4.2676266244963335e-05, "loss": 0.8991, "step": 18850 }, { "epoch": 2.1752422704199352, "grad_norm": 0.434150755405426, "learning_rate": 4.262128220250441e-05, "loss": 0.9137, "step": 18855 }, { "epoch": 2.175819104753115, "grad_norm": 0.39126837253570557, "learning_rate": 4.256632400926961e-05, "loss": 0.9046, "step": 18860 }, { "epoch": 2.1763959390862944, "grad_norm": 0.4539991617202759, "learning_rate": 4.251139169001761e-05, "loss": 0.8592, "step": 18865 }, { "epoch": 2.176972773419474, "grad_norm": 0.41316598653793335, "learning_rate": 4.245648526949567e-05, "loss": 0.8825, "step": 18870 }, { "epoch": 2.1775496077526535, "grad_norm": 0.41215020418167114, "learning_rate": 4.240160477243913e-05, "loss": 0.8553, "step": 18875 }, { "epoch": 2.178126442085833, "grad_norm": 0.4877290725708008, "learning_rate": 4.234675022357184e-05, "loss": 0.8485, "step": 18880 }, { "epoch": 2.1787032764190126, "grad_norm": 0.436401903629303, "learning_rate": 4.229192164760576e-05, "loss": 0.8947, "step": 18885 }, { "epoch": 2.1792801107521917, "grad_norm": 0.4835764467716217, "learning_rate": 4.223711906924143e-05, "loss": 0.8144, "step": 18890 }, { "epoch": 2.1798569450853713, "grad_norm": 0.36747488379478455, "learning_rate": 4.2182342513167395e-05, "loss": 0.8148, "step": 18895 }, { "epoch": 2.180433779418551, "grad_norm": 0.43172943592071533, "learning_rate": 4.212759200406065e-05, "loss": 0.9065, "step": 18900 }, { "epoch": 2.1810106137517304, "grad_norm": 0.4256831109523773, "learning_rate": 4.2072867566586394e-05, "loss": 0.8635, "step": 18905 }, { "epoch": 2.18158744808491, "grad_norm": 0.5033872127532959, "learning_rate": 4.201816922539814e-05, "loss": 0.8843, "step": 18910 }, { "epoch": 2.1821642824180896, "grad_norm": 0.47121378779411316, "learning_rate": 4.1963497005137516e-05, "loss": 0.8994, "step": 18915 }, { "epoch": 2.182741116751269, "grad_norm": 0.42762336134910583, "learning_rate": 4.1908850930434506e-05, "loss": 0.9069, "step": 18920 }, { "epoch": 2.1833179510844487, "grad_norm": 0.4158373177051544, "learning_rate": 4.185423102590726e-05, "loss": 0.8502, "step": 18925 }, { "epoch": 2.1838947854176283, "grad_norm": 0.4415193498134613, "learning_rate": 4.179963731616221e-05, "loss": 0.8994, "step": 18930 }, { "epoch": 2.1844716197508074, "grad_norm": 0.4090315103530884, "learning_rate": 4.1745069825793836e-05, "loss": 0.8678, "step": 18935 }, { "epoch": 2.185048454083987, "grad_norm": 0.48849308490753174, "learning_rate": 4.1690528579384935e-05, "loss": 0.867, "step": 18940 }, { "epoch": 2.1856252884171665, "grad_norm": 0.4298642575740814, "learning_rate": 4.163601360150646e-05, "loss": 0.8641, "step": 18945 }, { "epoch": 2.186202122750346, "grad_norm": 0.4962320625782013, "learning_rate": 4.1581524916717515e-05, "loss": 0.8793, "step": 18950 }, { "epoch": 2.1867789570835257, "grad_norm": 0.39665916562080383, "learning_rate": 4.1527062549565344e-05, "loss": 0.8694, "step": 18955 }, { "epoch": 2.187355791416705, "grad_norm": 0.4870767295360565, "learning_rate": 4.147262652458539e-05, "loss": 0.9127, "step": 18960 }, { "epoch": 2.187932625749885, "grad_norm": 0.4359199106693268, "learning_rate": 4.141821686630112e-05, "loss": 0.8448, "step": 18965 }, { "epoch": 2.1885094600830644, "grad_norm": 0.4554474651813507, "learning_rate": 4.1363833599224225e-05, "loss": 0.899, "step": 18970 }, { "epoch": 2.1890862944162435, "grad_norm": 0.3968992531299591, "learning_rate": 4.130947674785447e-05, "loss": 0.8589, "step": 18975 }, { "epoch": 2.189663128749423, "grad_norm": 0.3888143002986908, "learning_rate": 4.1255146336679764e-05, "loss": 0.8657, "step": 18980 }, { "epoch": 2.1902399630826026, "grad_norm": 0.4677968919277191, "learning_rate": 4.1200842390175985e-05, "loss": 0.8921, "step": 18985 }, { "epoch": 2.190816797415782, "grad_norm": 0.39660555124282837, "learning_rate": 4.114656493280721e-05, "loss": 0.8675, "step": 18990 }, { "epoch": 2.1913936317489617, "grad_norm": 0.4506629705429077, "learning_rate": 4.1092313989025534e-05, "loss": 0.853, "step": 18995 }, { "epoch": 2.1919704660821413, "grad_norm": 0.45868542790412903, "learning_rate": 4.103808958327111e-05, "loss": 0.8326, "step": 19000 }, { "epoch": 2.192547300415321, "grad_norm": 0.40273788571357727, "learning_rate": 4.098389173997218e-05, "loss": 0.8579, "step": 19005 }, { "epoch": 2.1931241347485004, "grad_norm": 0.4366571605205536, "learning_rate": 4.092972048354491e-05, "loss": 0.9013, "step": 19010 }, { "epoch": 2.1937009690816796, "grad_norm": 0.44341322779655457, "learning_rate": 4.0875575838393574e-05, "loss": 0.9058, "step": 19015 }, { "epoch": 2.194277803414859, "grad_norm": 0.4099046289920807, "learning_rate": 4.082145782891046e-05, "loss": 0.8404, "step": 19020 }, { "epoch": 2.1948546377480387, "grad_norm": 0.43949276208877563, "learning_rate": 4.076736647947583e-05, "loss": 0.8557, "step": 19025 }, { "epoch": 2.1954314720812182, "grad_norm": 0.39623621106147766, "learning_rate": 4.071330181445795e-05, "loss": 0.8837, "step": 19030 }, { "epoch": 2.196008306414398, "grad_norm": 0.4332854747772217, "learning_rate": 4.065926385821307e-05, "loss": 0.8099, "step": 19035 }, { "epoch": 2.1965851407475774, "grad_norm": 0.49497532844543457, "learning_rate": 4.0605252635085345e-05, "loss": 0.8714, "step": 19040 }, { "epoch": 2.197161975080757, "grad_norm": 0.5062466859817505, "learning_rate": 4.055126816940697e-05, "loss": 0.8715, "step": 19045 }, { "epoch": 2.1977388094139365, "grad_norm": 0.43262168765068054, "learning_rate": 4.049731048549804e-05, "loss": 0.916, "step": 19050 }, { "epoch": 2.1983156437471156, "grad_norm": 0.3961028456687927, "learning_rate": 4.044337960766663e-05, "loss": 0.8536, "step": 19055 }, { "epoch": 2.198892478080295, "grad_norm": 0.4137302339076996, "learning_rate": 4.0389475560208653e-05, "loss": 0.8527, "step": 19060 }, { "epoch": 2.1994693124134748, "grad_norm": 0.39934080839157104, "learning_rate": 4.033559836740801e-05, "loss": 0.8487, "step": 19065 }, { "epoch": 2.2000461467466543, "grad_norm": 0.4289592206478119, "learning_rate": 4.028174805353648e-05, "loss": 0.8969, "step": 19070 }, { "epoch": 2.200622981079834, "grad_norm": 0.4357072114944458, "learning_rate": 4.0227924642853786e-05, "loss": 0.8291, "step": 19075 }, { "epoch": 2.2011998154130135, "grad_norm": 0.41731470823287964, "learning_rate": 4.017412815960735e-05, "loss": 0.8866, "step": 19080 }, { "epoch": 2.201776649746193, "grad_norm": 0.3993309736251831, "learning_rate": 4.012035862803275e-05, "loss": 0.9265, "step": 19085 }, { "epoch": 2.2023534840793726, "grad_norm": 0.3996540307998657, "learning_rate": 4.006661607235316e-05, "loss": 0.8724, "step": 19090 }, { "epoch": 2.2029303184125517, "grad_norm": 0.4056003987789154, "learning_rate": 4.001290051677975e-05, "loss": 0.8479, "step": 19095 }, { "epoch": 2.2035071527457313, "grad_norm": 0.42227908968925476, "learning_rate": 3.9959211985511415e-05, "loss": 0.9067, "step": 19100 }, { "epoch": 2.204083987078911, "grad_norm": 0.4420729875564575, "learning_rate": 3.990555050273506e-05, "loss": 0.9141, "step": 19105 }, { "epoch": 2.2046608214120904, "grad_norm": 0.42280176281929016, "learning_rate": 3.985191609262519e-05, "loss": 0.887, "step": 19110 }, { "epoch": 2.20523765574527, "grad_norm": 0.3818375766277313, "learning_rate": 3.979830877934423e-05, "loss": 0.8428, "step": 19115 }, { "epoch": 2.2058144900784495, "grad_norm": 0.4037393629550934, "learning_rate": 3.97447285870424e-05, "loss": 0.8572, "step": 19120 }, { "epoch": 2.206391324411629, "grad_norm": 0.4422479271888733, "learning_rate": 3.969117553985772e-05, "loss": 0.8919, "step": 19125 }, { "epoch": 2.2069681587448087, "grad_norm": 0.3848304748535156, "learning_rate": 3.9637649661915844e-05, "loss": 0.9447, "step": 19130 }, { "epoch": 2.207544993077988, "grad_norm": 0.37574300169944763, "learning_rate": 3.958415097733035e-05, "loss": 0.8699, "step": 19135 }, { "epoch": 2.2081218274111674, "grad_norm": 0.4617113471031189, "learning_rate": 3.9530679510202476e-05, "loss": 0.9728, "step": 19140 }, { "epoch": 2.208698661744347, "grad_norm": 0.42802298069000244, "learning_rate": 3.947723528462126e-05, "loss": 0.9185, "step": 19145 }, { "epoch": 2.2092754960775265, "grad_norm": 0.4145731031894684, "learning_rate": 3.942381832466334e-05, "loss": 0.8572, "step": 19150 }, { "epoch": 2.209852330410706, "grad_norm": 0.4005712568759918, "learning_rate": 3.9370428654393296e-05, "loss": 0.8823, "step": 19155 }, { "epoch": 2.2104291647438856, "grad_norm": 0.4595177471637726, "learning_rate": 3.931706629786317e-05, "loss": 0.8639, "step": 19160 }, { "epoch": 2.211005999077065, "grad_norm": 0.40236470103263855, "learning_rate": 3.926373127911287e-05, "loss": 0.8691, "step": 19165 }, { "epoch": 2.2115828334102448, "grad_norm": 0.4109460413455963, "learning_rate": 3.921042362216983e-05, "loss": 0.8875, "step": 19170 }, { "epoch": 2.212159667743424, "grad_norm": 0.4146732687950134, "learning_rate": 3.9157143351049386e-05, "loss": 0.9115, "step": 19175 }, { "epoch": 2.2127365020766034, "grad_norm": 0.38754817843437195, "learning_rate": 3.910389048975431e-05, "loss": 0.9114, "step": 19180 }, { "epoch": 2.213313336409783, "grad_norm": 0.4098663330078125, "learning_rate": 3.905066506227515e-05, "loss": 0.8382, "step": 19185 }, { "epoch": 2.2138901707429626, "grad_norm": 0.43350058794021606, "learning_rate": 3.8997467092590056e-05, "loss": 0.8488, "step": 19190 }, { "epoch": 2.214467005076142, "grad_norm": 0.44083696603775024, "learning_rate": 3.8944296604664866e-05, "loss": 0.8266, "step": 19195 }, { "epoch": 2.2150438394093217, "grad_norm": 0.44283217191696167, "learning_rate": 3.8891153622452904e-05, "loss": 0.8576, "step": 19200 }, { "epoch": 2.2156206737425013, "grad_norm": 0.4850544035434723, "learning_rate": 3.883803816989523e-05, "loss": 0.8542, "step": 19205 }, { "epoch": 2.216197508075681, "grad_norm": 0.5047842264175415, "learning_rate": 3.8784950270920465e-05, "loss": 0.8928, "step": 19210 }, { "epoch": 2.2167743424088604, "grad_norm": 0.464539498090744, "learning_rate": 3.873188994944483e-05, "loss": 0.94, "step": 19215 }, { "epoch": 2.2173511767420395, "grad_norm": 0.4273037314414978, "learning_rate": 3.867885722937201e-05, "loss": 0.8738, "step": 19220 }, { "epoch": 2.217928011075219, "grad_norm": 0.4209919273853302, "learning_rate": 3.862585213459349e-05, "loss": 0.8648, "step": 19225 }, { "epoch": 2.2185048454083987, "grad_norm": 0.420584112405777, "learning_rate": 3.857287468898806e-05, "loss": 0.8516, "step": 19230 }, { "epoch": 2.219081679741578, "grad_norm": 0.4526803493499756, "learning_rate": 3.851992491642222e-05, "loss": 0.8924, "step": 19235 }, { "epoch": 2.219658514074758, "grad_norm": 0.446689635515213, "learning_rate": 3.846700284074987e-05, "loss": 0.8676, "step": 19240 }, { "epoch": 2.2202353484079373, "grad_norm": 0.4283064901828766, "learning_rate": 3.8414108485812613e-05, "loss": 0.8934, "step": 19245 }, { "epoch": 2.220812182741117, "grad_norm": 0.43577268719673157, "learning_rate": 3.836124187543938e-05, "loss": 0.8238, "step": 19250 }, { "epoch": 2.221389017074296, "grad_norm": 0.41376611590385437, "learning_rate": 3.830840303344675e-05, "loss": 0.8273, "step": 19255 }, { "epoch": 2.2219658514074756, "grad_norm": 0.42846664786338806, "learning_rate": 3.825559198363861e-05, "loss": 0.9316, "step": 19260 }, { "epoch": 2.222542685740655, "grad_norm": 0.4213767945766449, "learning_rate": 3.820280874980658e-05, "loss": 0.8694, "step": 19265 }, { "epoch": 2.2231195200738347, "grad_norm": 0.43722113966941833, "learning_rate": 3.8150053355729495e-05, "loss": 0.8672, "step": 19270 }, { "epoch": 2.2236963544070143, "grad_norm": 0.4323386251926422, "learning_rate": 3.8097325825173826e-05, "loss": 0.8544, "step": 19275 }, { "epoch": 2.224273188740194, "grad_norm": 0.4062696695327759, "learning_rate": 3.80446261818934e-05, "loss": 0.8761, "step": 19280 }, { "epoch": 2.2248500230733734, "grad_norm": 0.4705086648464203, "learning_rate": 3.799195444962956e-05, "loss": 0.8459, "step": 19285 }, { "epoch": 2.225426857406553, "grad_norm": 0.4575541317462921, "learning_rate": 3.793931065211096e-05, "loss": 0.8433, "step": 19290 }, { "epoch": 2.2260036917397326, "grad_norm": 0.4377269446849823, "learning_rate": 3.788669481305376e-05, "loss": 0.8728, "step": 19295 }, { "epoch": 2.2265805260729117, "grad_norm": 0.41368991136550903, "learning_rate": 3.783410695616149e-05, "loss": 0.8454, "step": 19300 }, { "epoch": 2.2271573604060912, "grad_norm": 0.43097037076950073, "learning_rate": 3.778154710512512e-05, "loss": 0.866, "step": 19305 }, { "epoch": 2.227734194739271, "grad_norm": 0.4154130518436432, "learning_rate": 3.772901528362287e-05, "loss": 0.8629, "step": 19310 }, { "epoch": 2.2283110290724504, "grad_norm": 0.47126176953315735, "learning_rate": 3.767651151532057e-05, "loss": 0.8565, "step": 19315 }, { "epoch": 2.22888786340563, "grad_norm": 0.411214143037796, "learning_rate": 3.762403582387114e-05, "loss": 0.9093, "step": 19320 }, { "epoch": 2.2294646977388095, "grad_norm": 0.42424070835113525, "learning_rate": 3.757158823291508e-05, "loss": 0.8569, "step": 19325 }, { "epoch": 2.230041532071989, "grad_norm": 0.43448081612586975, "learning_rate": 3.751916876608001e-05, "loss": 0.9002, "step": 19330 }, { "epoch": 2.2306183664051686, "grad_norm": 0.442730575799942, "learning_rate": 3.746677744698114e-05, "loss": 0.9348, "step": 19335 }, { "epoch": 2.2311952007383478, "grad_norm": 0.437593936920166, "learning_rate": 3.741441429922078e-05, "loss": 0.8935, "step": 19340 }, { "epoch": 2.2317720350715273, "grad_norm": 0.41157275438308716, "learning_rate": 3.736207934638864e-05, "loss": 0.8402, "step": 19345 }, { "epoch": 2.232348869404707, "grad_norm": 0.4042685329914093, "learning_rate": 3.730977261206171e-05, "loss": 0.8719, "step": 19350 }, { "epoch": 2.2329257037378865, "grad_norm": 0.42768070101737976, "learning_rate": 3.725749411980435e-05, "loss": 0.8603, "step": 19355 }, { "epoch": 2.233502538071066, "grad_norm": 0.44104745984077454, "learning_rate": 3.720524389316802e-05, "loss": 0.8971, "step": 19360 }, { "epoch": 2.2340793724042456, "grad_norm": 0.4265894293785095, "learning_rate": 3.715302195569159e-05, "loss": 0.872, "step": 19365 }, { "epoch": 2.234656206737425, "grad_norm": 0.45184656977653503, "learning_rate": 3.710082833090115e-05, "loss": 0.8893, "step": 19370 }, { "epoch": 2.2352330410706047, "grad_norm": 0.4579407572746277, "learning_rate": 3.7048663042310063e-05, "loss": 0.8494, "step": 19375 }, { "epoch": 2.235809875403784, "grad_norm": 0.44718435406684875, "learning_rate": 3.69965261134188e-05, "loss": 0.9108, "step": 19380 }, { "epoch": 2.2363867097369634, "grad_norm": 0.44501993060112, "learning_rate": 3.694441756771525e-05, "loss": 0.8604, "step": 19385 }, { "epoch": 2.236963544070143, "grad_norm": 0.4550657272338867, "learning_rate": 3.689233742867435e-05, "loss": 0.8502, "step": 19390 }, { "epoch": 2.2375403784033225, "grad_norm": 0.38818812370300293, "learning_rate": 3.684028571975836e-05, "loss": 0.8862, "step": 19395 }, { "epoch": 2.238117212736502, "grad_norm": 0.43724438548088074, "learning_rate": 3.678826246441658e-05, "loss": 0.8706, "step": 19400 }, { "epoch": 2.2386940470696817, "grad_norm": 0.4609769582748413, "learning_rate": 3.67362676860857e-05, "loss": 0.8821, "step": 19405 }, { "epoch": 2.2392708814028612, "grad_norm": 0.4721742272377014, "learning_rate": 3.6684301408189406e-05, "loss": 0.8705, "step": 19410 }, { "epoch": 2.239847715736041, "grad_norm": 0.44866010546684265, "learning_rate": 3.66323636541386e-05, "loss": 0.8934, "step": 19415 }, { "epoch": 2.24042455006922, "grad_norm": 0.4057766795158386, "learning_rate": 3.658045444733137e-05, "loss": 0.8711, "step": 19420 }, { "epoch": 2.2410013844023995, "grad_norm": 0.43573522567749023, "learning_rate": 3.652857381115293e-05, "loss": 0.8508, "step": 19425 }, { "epoch": 2.241578218735579, "grad_norm": 0.3890364468097687, "learning_rate": 3.6476721768975554e-05, "loss": 0.8354, "step": 19430 }, { "epoch": 2.2421550530687586, "grad_norm": 0.44802749156951904, "learning_rate": 3.642489834415872e-05, "loss": 0.913, "step": 19435 }, { "epoch": 2.242731887401938, "grad_norm": 0.41781002283096313, "learning_rate": 3.637310356004897e-05, "loss": 0.8546, "step": 19440 }, { "epoch": 2.2433087217351178, "grad_norm": 0.4287906885147095, "learning_rate": 3.632133743998001e-05, "loss": 0.9148, "step": 19445 }, { "epoch": 2.2438855560682973, "grad_norm": 0.4324365556240082, "learning_rate": 3.6269600007272485e-05, "loss": 0.8566, "step": 19450 }, { "epoch": 2.244462390401477, "grad_norm": 0.47438672184944153, "learning_rate": 3.6217891285234265e-05, "loss": 0.8796, "step": 19455 }, { "epoch": 2.245039224734656, "grad_norm": 0.4307745397090912, "learning_rate": 3.6166211297160215e-05, "loss": 0.8702, "step": 19460 }, { "epoch": 2.2456160590678356, "grad_norm": 0.39747270941734314, "learning_rate": 3.611456006633228e-05, "loss": 0.9056, "step": 19465 }, { "epoch": 2.246192893401015, "grad_norm": 0.3949414789676666, "learning_rate": 3.6062937616019433e-05, "loss": 0.8409, "step": 19470 }, { "epoch": 2.2467697277341947, "grad_norm": 0.45473381876945496, "learning_rate": 3.601134396947773e-05, "loss": 0.8587, "step": 19475 }, { "epoch": 2.2473465620673743, "grad_norm": 0.5197227001190186, "learning_rate": 3.595977914995014e-05, "loss": 0.8988, "step": 19480 }, { "epoch": 2.247923396400554, "grad_norm": 0.4385240972042084, "learning_rate": 3.5908243180666734e-05, "loss": 0.9216, "step": 19485 }, { "epoch": 2.2485002307337334, "grad_norm": 0.45504510402679443, "learning_rate": 3.585673608484458e-05, "loss": 0.8773, "step": 19490 }, { "epoch": 2.249077065066913, "grad_norm": 0.419172465801239, "learning_rate": 3.5805257885687726e-05, "loss": 0.883, "step": 19495 }, { "epoch": 2.2496538994000925, "grad_norm": 0.41316547989845276, "learning_rate": 3.5753808606387226e-05, "loss": 0.8475, "step": 19500 }, { "epoch": 2.2502307337332716, "grad_norm": 0.3854261338710785, "learning_rate": 3.570238827012102e-05, "loss": 0.8527, "step": 19505 }, { "epoch": 2.250807568066451, "grad_norm": 0.39473459124565125, "learning_rate": 3.565099690005411e-05, "loss": 0.8726, "step": 19510 }, { "epoch": 2.251384402399631, "grad_norm": 0.43062132596969604, "learning_rate": 3.55996345193384e-05, "loss": 0.8598, "step": 19515 }, { "epoch": 2.2519612367328103, "grad_norm": 0.37857621908187866, "learning_rate": 3.5548301151112774e-05, "loss": 0.8552, "step": 19520 }, { "epoch": 2.25253807106599, "grad_norm": 0.4191773533821106, "learning_rate": 3.549699681850294e-05, "loss": 0.8459, "step": 19525 }, { "epoch": 2.2531149053991695, "grad_norm": 0.43940702080726624, "learning_rate": 3.544572154462165e-05, "loss": 0.8592, "step": 19530 }, { "epoch": 2.253691739732349, "grad_norm": 0.4216030538082123, "learning_rate": 3.539447535256851e-05, "loss": 0.845, "step": 19535 }, { "epoch": 2.254268574065528, "grad_norm": 0.4699593186378479, "learning_rate": 3.534325826543002e-05, "loss": 0.8323, "step": 19540 }, { "epoch": 2.2548454083987077, "grad_norm": 0.4556678235530853, "learning_rate": 3.529207030627959e-05, "loss": 0.8951, "step": 19545 }, { "epoch": 2.2554222427318873, "grad_norm": 0.433390736579895, "learning_rate": 3.524091149817752e-05, "loss": 0.8777, "step": 19550 }, { "epoch": 2.255999077065067, "grad_norm": 0.421514630317688, "learning_rate": 3.518978186417089e-05, "loss": 0.9213, "step": 19555 }, { "epoch": 2.2565759113982464, "grad_norm": 0.39421340823173523, "learning_rate": 3.513868142729373e-05, "loss": 0.8982, "step": 19560 }, { "epoch": 2.257152745731426, "grad_norm": 0.4324612319469452, "learning_rate": 3.508761021056689e-05, "loss": 0.9017, "step": 19565 }, { "epoch": 2.2577295800646056, "grad_norm": 0.4661559760570526, "learning_rate": 3.503656823699809e-05, "loss": 0.9211, "step": 19570 }, { "epoch": 2.258306414397785, "grad_norm": 0.4388502538204193, "learning_rate": 3.498555552958176e-05, "loss": 0.901, "step": 19575 }, { "epoch": 2.2588832487309647, "grad_norm": 0.3959125578403473, "learning_rate": 3.4934572111299254e-05, "loss": 0.9145, "step": 19580 }, { "epoch": 2.259460083064144, "grad_norm": 0.39172422885894775, "learning_rate": 3.4883618005118724e-05, "loss": 0.8521, "step": 19585 }, { "epoch": 2.2600369173973234, "grad_norm": 0.500770628452301, "learning_rate": 3.483269323399512e-05, "loss": 0.9168, "step": 19590 }, { "epoch": 2.260613751730503, "grad_norm": 0.43640342354774475, "learning_rate": 3.4781797820870036e-05, "loss": 0.893, "step": 19595 }, { "epoch": 2.2611905860636825, "grad_norm": 0.4276639223098755, "learning_rate": 3.473093178867208e-05, "loss": 0.885, "step": 19600 }, { "epoch": 2.261767420396862, "grad_norm": 0.4681278169155121, "learning_rate": 3.468009516031644e-05, "loss": 0.9085, "step": 19605 }, { "epoch": 2.2623442547300416, "grad_norm": 0.4059450328350067, "learning_rate": 3.462928795870515e-05, "loss": 0.8732, "step": 19610 }, { "epoch": 2.262921089063221, "grad_norm": 0.42977821826934814, "learning_rate": 3.4578510206726856e-05, "loss": 0.8538, "step": 19615 }, { "epoch": 2.2634979233964003, "grad_norm": 0.4566701054573059, "learning_rate": 3.452776192725717e-05, "loss": 0.9053, "step": 19620 }, { "epoch": 2.26407475772958, "grad_norm": 0.43263301253318787, "learning_rate": 3.4477043143158204e-05, "loss": 0.8835, "step": 19625 }, { "epoch": 2.2646515920627595, "grad_norm": 0.46123167872428894, "learning_rate": 3.4426353877278885e-05, "loss": 0.8861, "step": 19630 }, { "epoch": 2.265228426395939, "grad_norm": 0.4144291281700134, "learning_rate": 3.437569415245483e-05, "loss": 0.8385, "step": 19635 }, { "epoch": 2.2658052607291186, "grad_norm": 0.4805741310119629, "learning_rate": 3.432506399150839e-05, "loss": 0.7951, "step": 19640 }, { "epoch": 2.266382095062298, "grad_norm": 0.4421154260635376, "learning_rate": 3.427446341724846e-05, "loss": 0.9346, "step": 19645 }, { "epoch": 2.2669589293954777, "grad_norm": 0.4587462246417999, "learning_rate": 3.422389245247075e-05, "loss": 0.9087, "step": 19650 }, { "epoch": 2.2675357637286573, "grad_norm": 0.40255850553512573, "learning_rate": 3.4173351119957585e-05, "loss": 0.8269, "step": 19655 }, { "epoch": 2.268112598061837, "grad_norm": 0.41475871205329895, "learning_rate": 3.412283944247794e-05, "loss": 0.8546, "step": 19660 }, { "epoch": 2.268689432395016, "grad_norm": 0.39724406599998474, "learning_rate": 3.407235744278734e-05, "loss": 0.8428, "step": 19665 }, { "epoch": 2.2692662667281955, "grad_norm": 0.4867123067378998, "learning_rate": 3.4021905143628166e-05, "loss": 0.8971, "step": 19670 }, { "epoch": 2.269843101061375, "grad_norm": 0.4419727325439453, "learning_rate": 3.397148256772916e-05, "loss": 0.8865, "step": 19675 }, { "epoch": 2.2704199353945547, "grad_norm": 0.39697927236557007, "learning_rate": 3.3921089737805866e-05, "loss": 0.8884, "step": 19680 }, { "epoch": 2.2709967697277342, "grad_norm": 0.43786725401878357, "learning_rate": 3.387072667656025e-05, "loss": 0.9046, "step": 19685 }, { "epoch": 2.271573604060914, "grad_norm": 0.3968237638473511, "learning_rate": 3.3820393406681104e-05, "loss": 0.8644, "step": 19690 }, { "epoch": 2.2721504383940934, "grad_norm": 0.405393123626709, "learning_rate": 3.3770089950843564e-05, "loss": 0.872, "step": 19695 }, { "epoch": 2.2727272727272725, "grad_norm": 0.40466177463531494, "learning_rate": 3.3719816331709465e-05, "loss": 0.8794, "step": 19700 }, { "epoch": 2.273304107060452, "grad_norm": 0.39831775426864624, "learning_rate": 3.3669572571927167e-05, "loss": 0.8519, "step": 19705 }, { "epoch": 2.2738809413936316, "grad_norm": 0.4163300395011902, "learning_rate": 3.361935869413163e-05, "loss": 0.8841, "step": 19710 }, { "epoch": 2.274457775726811, "grad_norm": 0.4293881058692932, "learning_rate": 3.356917472094422e-05, "loss": 0.8701, "step": 19715 }, { "epoch": 2.2750346100599907, "grad_norm": 0.3978329002857208, "learning_rate": 3.3519020674972966e-05, "loss": 0.8271, "step": 19720 }, { "epoch": 2.2756114443931703, "grad_norm": 0.4234081208705902, "learning_rate": 3.3468896578812344e-05, "loss": 0.876, "step": 19725 }, { "epoch": 2.27618827872635, "grad_norm": 0.4614051580429077, "learning_rate": 3.341880245504341e-05, "loss": 0.8424, "step": 19730 }, { "epoch": 2.2767651130595294, "grad_norm": 0.3951602578163147, "learning_rate": 3.3368738326233576e-05, "loss": 0.8786, "step": 19735 }, { "epoch": 2.277341947392709, "grad_norm": 0.45089635252952576, "learning_rate": 3.331870421493688e-05, "loss": 0.8766, "step": 19740 }, { "epoch": 2.277918781725888, "grad_norm": 0.41349610686302185, "learning_rate": 3.3268700143693775e-05, "loss": 0.9101, "step": 19745 }, { "epoch": 2.2784956160590677, "grad_norm": 0.5059412717819214, "learning_rate": 3.321872613503123e-05, "loss": 0.8829, "step": 19750 }, { "epoch": 2.2790724503922473, "grad_norm": 0.4013746976852417, "learning_rate": 3.316878221146253e-05, "loss": 0.8798, "step": 19755 }, { "epoch": 2.279649284725427, "grad_norm": 0.47415482997894287, "learning_rate": 3.311886839548767e-05, "loss": 0.9138, "step": 19760 }, { "epoch": 2.2802261190586064, "grad_norm": 0.43062588572502136, "learning_rate": 3.306898470959278e-05, "loss": 0.8784, "step": 19765 }, { "epoch": 2.280802953391786, "grad_norm": 0.46922576427459717, "learning_rate": 3.301913117625065e-05, "loss": 0.8598, "step": 19770 }, { "epoch": 2.2813797877249655, "grad_norm": 0.4475753903388977, "learning_rate": 3.296930781792028e-05, "loss": 0.8808, "step": 19775 }, { "epoch": 2.281956622058145, "grad_norm": 0.4148850739002228, "learning_rate": 3.2919514657047334e-05, "loss": 0.9151, "step": 19780 }, { "epoch": 2.2825334563913247, "grad_norm": 0.4225980043411255, "learning_rate": 3.286975171606362e-05, "loss": 0.8396, "step": 19785 }, { "epoch": 2.2831102907245038, "grad_norm": 0.41470444202423096, "learning_rate": 3.282001901738749e-05, "loss": 0.8742, "step": 19790 }, { "epoch": 2.2836871250576833, "grad_norm": 0.4593305289745331, "learning_rate": 3.27703165834236e-05, "loss": 0.911, "step": 19795 }, { "epoch": 2.284263959390863, "grad_norm": 0.46919670701026917, "learning_rate": 3.272064443656303e-05, "loss": 0.8615, "step": 19800 }, { "epoch": 2.2848407937240425, "grad_norm": 0.4396454095840454, "learning_rate": 3.267100259918313e-05, "loss": 0.8898, "step": 19805 }, { "epoch": 2.285417628057222, "grad_norm": 0.3871668577194214, "learning_rate": 3.262139109364766e-05, "loss": 0.8499, "step": 19810 }, { "epoch": 2.2859944623904016, "grad_norm": 0.4604567885398865, "learning_rate": 3.257180994230671e-05, "loss": 0.8922, "step": 19815 }, { "epoch": 2.286571296723581, "grad_norm": 0.3920028507709503, "learning_rate": 3.2522259167496706e-05, "loss": 0.8444, "step": 19820 }, { "epoch": 2.2871481310567603, "grad_norm": 0.4583243131637573, "learning_rate": 3.247273879154028e-05, "loss": 0.8847, "step": 19825 }, { "epoch": 2.28772496538994, "grad_norm": 0.4010569453239441, "learning_rate": 3.2423248836746575e-05, "loss": 0.8818, "step": 19830 }, { "epoch": 2.2883017997231194, "grad_norm": 0.49284353852272034, "learning_rate": 3.237378932541084e-05, "loss": 0.9396, "step": 19835 }, { "epoch": 2.288878634056299, "grad_norm": 0.4109579026699066, "learning_rate": 3.2324360279814734e-05, "loss": 0.8614, "step": 19840 }, { "epoch": 2.2894554683894786, "grad_norm": 0.3795565366744995, "learning_rate": 3.227496172222603e-05, "loss": 0.9212, "step": 19845 }, { "epoch": 2.290032302722658, "grad_norm": 0.41086894273757935, "learning_rate": 3.222559367489901e-05, "loss": 0.8607, "step": 19850 }, { "epoch": 2.2906091370558377, "grad_norm": 0.4576627314090729, "learning_rate": 3.217625616007399e-05, "loss": 0.8155, "step": 19855 }, { "epoch": 2.2911859713890173, "grad_norm": 0.45540130138397217, "learning_rate": 3.212694919997764e-05, "loss": 0.8947, "step": 19860 }, { "epoch": 2.291762805722197, "grad_norm": 0.4258350431919098, "learning_rate": 3.2077672816822836e-05, "loss": 0.8913, "step": 19865 }, { "epoch": 2.292339640055376, "grad_norm": 0.4477665424346924, "learning_rate": 3.202842703280872e-05, "loss": 0.9329, "step": 19870 }, { "epoch": 2.2929164743885555, "grad_norm": 0.40553560853004456, "learning_rate": 3.197921187012055e-05, "loss": 0.8641, "step": 19875 }, { "epoch": 2.293493308721735, "grad_norm": 0.4320368468761444, "learning_rate": 3.193002735092989e-05, "loss": 0.8611, "step": 19880 }, { "epoch": 2.2940701430549146, "grad_norm": 0.43080151081085205, "learning_rate": 3.188087349739446e-05, "loss": 0.8702, "step": 19885 }, { "epoch": 2.294646977388094, "grad_norm": 0.4172769784927368, "learning_rate": 3.1831750331658196e-05, "loss": 0.8789, "step": 19890 }, { "epoch": 2.2952238117212738, "grad_norm": 0.465230792760849, "learning_rate": 3.178265787585112e-05, "loss": 0.8691, "step": 19895 }, { "epoch": 2.2958006460544533, "grad_norm": 0.41405051946640015, "learning_rate": 3.1733596152089495e-05, "loss": 0.9003, "step": 19900 }, { "epoch": 2.2963774803876325, "grad_norm": 0.40615877509117126, "learning_rate": 3.168456518247575e-05, "loss": 0.8408, "step": 19905 }, { "epoch": 2.296954314720812, "grad_norm": 0.49301356077194214, "learning_rate": 3.163556498909843e-05, "loss": 0.8775, "step": 19910 }, { "epoch": 2.2975311490539916, "grad_norm": 0.4456724524497986, "learning_rate": 3.1586595594032154e-05, "loss": 0.898, "step": 19915 }, { "epoch": 2.298107983387171, "grad_norm": 0.415539413690567, "learning_rate": 3.153765701933784e-05, "loss": 0.8291, "step": 19920 }, { "epoch": 2.2986848177203507, "grad_norm": 0.41856181621551514, "learning_rate": 3.148874928706232e-05, "loss": 0.9107, "step": 19925 }, { "epoch": 2.2992616520535303, "grad_norm": 0.44639578461647034, "learning_rate": 3.1439872419238666e-05, "loss": 0.8456, "step": 19930 }, { "epoch": 2.29983848638671, "grad_norm": 0.4922926723957062, "learning_rate": 3.1391026437885984e-05, "loss": 0.8511, "step": 19935 }, { "epoch": 2.3004153207198894, "grad_norm": 0.39371350407600403, "learning_rate": 3.134221136500952e-05, "loss": 0.8905, "step": 19940 }, { "epoch": 2.300992155053069, "grad_norm": 0.45283186435699463, "learning_rate": 3.1293427222600504e-05, "loss": 0.9106, "step": 19945 }, { "epoch": 2.301568989386248, "grad_norm": 0.48746415972709656, "learning_rate": 3.12446740326363e-05, "loss": 0.8145, "step": 19950 }, { "epoch": 2.3021458237194277, "grad_norm": 0.39407244324684143, "learning_rate": 3.119595181708034e-05, "loss": 0.8061, "step": 19955 }, { "epoch": 2.3027226580526072, "grad_norm": 0.4312855005264282, "learning_rate": 3.114726059788206e-05, "loss": 0.9428, "step": 19960 }, { "epoch": 2.303299492385787, "grad_norm": 0.40344172716140747, "learning_rate": 3.109860039697699e-05, "loss": 0.8769, "step": 19965 }, { "epoch": 2.3038763267189664, "grad_norm": 0.47457730770111084, "learning_rate": 3.1049971236286566e-05, "loss": 0.8066, "step": 19970 }, { "epoch": 2.304453161052146, "grad_norm": 0.4646870791912079, "learning_rate": 3.100137313771838e-05, "loss": 0.8744, "step": 19975 }, { "epoch": 2.3050299953853255, "grad_norm": 0.42959192395210266, "learning_rate": 3.0952806123165945e-05, "loss": 0.8513, "step": 19980 }, { "epoch": 2.3056068297185046, "grad_norm": 0.43078503012657166, "learning_rate": 3.09042702145088e-05, "loss": 0.9291, "step": 19985 }, { "epoch": 2.306183664051684, "grad_norm": 0.40327394008636475, "learning_rate": 3.0855765433612504e-05, "loss": 0.8477, "step": 19990 }, { "epoch": 2.3067604983848637, "grad_norm": 0.43422433733940125, "learning_rate": 3.0807291802328494e-05, "loss": 0.8785, "step": 19995 }, { "epoch": 2.3073373327180433, "grad_norm": 0.48264971375465393, "learning_rate": 3.075884934249426e-05, "loss": 0.9068, "step": 20000 }, { "epoch": 2.307914167051223, "grad_norm": 0.43897566199302673, "learning_rate": 3.0710438075933225e-05, "loss": 0.9054, "step": 20005 }, { "epoch": 2.3084910013844024, "grad_norm": 0.4612114727497101, "learning_rate": 3.066205802445477e-05, "loss": 0.9348, "step": 20010 }, { "epoch": 2.309067835717582, "grad_norm": 0.4375527799129486, "learning_rate": 3.0613709209854214e-05, "loss": 0.8707, "step": 20015 }, { "epoch": 2.3096446700507616, "grad_norm": 0.420188844203949, "learning_rate": 3.056539165391275e-05, "loss": 0.883, "step": 20020 }, { "epoch": 2.310221504383941, "grad_norm": 0.44482848048210144, "learning_rate": 3.0517105378397536e-05, "loss": 0.8882, "step": 20025 }, { "epoch": 2.3107983387171203, "grad_norm": 0.439638614654541, "learning_rate": 3.0468850405061668e-05, "loss": 0.8514, "step": 20030 }, { "epoch": 2.3113751730503, "grad_norm": 0.45619815587997437, "learning_rate": 3.04206267556441e-05, "loss": 0.8587, "step": 20035 }, { "epoch": 2.3119520073834794, "grad_norm": 0.4407486915588379, "learning_rate": 3.037243445186965e-05, "loss": 0.8501, "step": 20040 }, { "epoch": 2.312528841716659, "grad_norm": 0.4541487693786621, "learning_rate": 3.0324273515449052e-05, "loss": 0.8347, "step": 20045 }, { "epoch": 2.3131056760498385, "grad_norm": 0.43526491522789, "learning_rate": 3.027614396807892e-05, "loss": 0.8815, "step": 20050 }, { "epoch": 2.313682510383018, "grad_norm": 0.4115588665008545, "learning_rate": 3.0228045831441733e-05, "loss": 0.8109, "step": 20055 }, { "epoch": 2.3142593447161977, "grad_norm": 0.4194267988204956, "learning_rate": 3.0179979127205692e-05, "loss": 0.9207, "step": 20060 }, { "epoch": 2.3148361790493768, "grad_norm": 0.38986527919769287, "learning_rate": 3.0131943877025072e-05, "loss": 0.856, "step": 20065 }, { "epoch": 2.3154130133825563, "grad_norm": 0.43422049283981323, "learning_rate": 3.0083940102539763e-05, "loss": 0.8778, "step": 20070 }, { "epoch": 2.315989847715736, "grad_norm": 0.41118425130844116, "learning_rate": 3.0035967825375577e-05, "loss": 0.8238, "step": 20075 }, { "epoch": 2.3165666820489155, "grad_norm": 0.4259694516658783, "learning_rate": 2.998802706714412e-05, "loss": 0.8975, "step": 20080 }, { "epoch": 2.317143516382095, "grad_norm": 0.39787614345550537, "learning_rate": 2.994011784944284e-05, "loss": 0.8057, "step": 20085 }, { "epoch": 2.3177203507152746, "grad_norm": 0.42141956090927124, "learning_rate": 2.9892240193854858e-05, "loss": 0.8747, "step": 20090 }, { "epoch": 2.318297185048454, "grad_norm": 0.43193697929382324, "learning_rate": 2.9844394121949182e-05, "loss": 0.8747, "step": 20095 }, { "epoch": 2.3188740193816337, "grad_norm": 0.430708646774292, "learning_rate": 2.9796579655280576e-05, "loss": 0.8928, "step": 20100 }, { "epoch": 2.3194508537148133, "grad_norm": 0.39962413907051086, "learning_rate": 2.9748796815389556e-05, "loss": 0.8059, "step": 20105 }, { "epoch": 2.3200276880479924, "grad_norm": 0.4007127285003662, "learning_rate": 2.9701045623802315e-05, "loss": 0.8972, "step": 20110 }, { "epoch": 2.320604522381172, "grad_norm": 0.39141660928726196, "learning_rate": 2.9653326102030964e-05, "loss": 0.8787, "step": 20115 }, { "epoch": 2.3211813567143516, "grad_norm": 0.48572033643722534, "learning_rate": 2.9605638271573166e-05, "loss": 0.8466, "step": 20120 }, { "epoch": 2.321758191047531, "grad_norm": 0.43861210346221924, "learning_rate": 2.9557982153912424e-05, "loss": 0.8933, "step": 20125 }, { "epoch": 2.3223350253807107, "grad_norm": 0.44550949335098267, "learning_rate": 2.9510357770517825e-05, "loss": 0.9044, "step": 20130 }, { "epoch": 2.3229118597138902, "grad_norm": 0.3988287150859833, "learning_rate": 2.9462765142844384e-05, "loss": 0.8629, "step": 20135 }, { "epoch": 2.32348869404707, "grad_norm": 0.45350053906440735, "learning_rate": 2.9415204292332565e-05, "loss": 0.8538, "step": 20140 }, { "epoch": 2.3240655283802494, "grad_norm": 0.4375722408294678, "learning_rate": 2.9367675240408654e-05, "loss": 0.8715, "step": 20145 }, { "epoch": 2.324642362713429, "grad_norm": 0.390643835067749, "learning_rate": 2.9320178008484587e-05, "loss": 0.835, "step": 20150 }, { "epoch": 2.325219197046608, "grad_norm": 0.4461374282836914, "learning_rate": 2.9272712617957997e-05, "loss": 0.8254, "step": 20155 }, { "epoch": 2.3257960313797876, "grad_norm": 0.4431370496749878, "learning_rate": 2.9225279090212067e-05, "loss": 0.904, "step": 20160 }, { "epoch": 2.326372865712967, "grad_norm": 0.4003266990184784, "learning_rate": 2.917787744661571e-05, "loss": 0.8588, "step": 20165 }, { "epoch": 2.3269497000461468, "grad_norm": 0.4321753978729248, "learning_rate": 2.913050770852348e-05, "loss": 0.9173, "step": 20170 }, { "epoch": 2.3275265343793263, "grad_norm": 0.39779239892959595, "learning_rate": 2.9083169897275552e-05, "loss": 0.8776, "step": 20175 }, { "epoch": 2.328103368712506, "grad_norm": 0.4392048716545105, "learning_rate": 2.9035864034197623e-05, "loss": 0.9053, "step": 20180 }, { "epoch": 2.3286802030456855, "grad_norm": 0.41199496388435364, "learning_rate": 2.898859014060119e-05, "loss": 0.8739, "step": 20185 }, { "epoch": 2.3292570373788646, "grad_norm": 0.42873457074165344, "learning_rate": 2.894134823778315e-05, "loss": 0.8805, "step": 20190 }, { "epoch": 2.329833871712044, "grad_norm": 0.4416216015815735, "learning_rate": 2.8894138347026125e-05, "loss": 0.9102, "step": 20195 }, { "epoch": 2.3304107060452237, "grad_norm": 0.4041585624217987, "learning_rate": 2.8846960489598173e-05, "loss": 0.8466, "step": 20200 }, { "epoch": 2.3309875403784033, "grad_norm": 0.45262736082077026, "learning_rate": 2.8799814686753134e-05, "loss": 0.8963, "step": 20205 }, { "epoch": 2.331564374711583, "grad_norm": 0.38636353611946106, "learning_rate": 2.8752700959730193e-05, "loss": 0.788, "step": 20210 }, { "epoch": 2.3321412090447624, "grad_norm": 0.4599827826023102, "learning_rate": 2.870561932975424e-05, "loss": 0.8385, "step": 20215 }, { "epoch": 2.332718043377942, "grad_norm": 0.4050966799259186, "learning_rate": 2.8658569818035542e-05, "loss": 0.8615, "step": 20220 }, { "epoch": 2.3332948777111215, "grad_norm": 0.38030165433883667, "learning_rate": 2.8611552445770127e-05, "loss": 0.8425, "step": 20225 }, { "epoch": 2.333871712044301, "grad_norm": 0.3842707574367523, "learning_rate": 2.8564567234139304e-05, "loss": 0.8763, "step": 20230 }, { "epoch": 2.3344485463774802, "grad_norm": 0.4096066653728485, "learning_rate": 2.851761420431006e-05, "loss": 0.8119, "step": 20235 }, { "epoch": 2.33502538071066, "grad_norm": 0.4566209614276886, "learning_rate": 2.8470693377434797e-05, "loss": 0.8707, "step": 20240 }, { "epoch": 2.3356022150438394, "grad_norm": 0.4076927602291107, "learning_rate": 2.8423804774651496e-05, "loss": 0.8344, "step": 20245 }, { "epoch": 2.336179049377019, "grad_norm": 0.4491892457008362, "learning_rate": 2.8376948417083483e-05, "loss": 0.841, "step": 20250 }, { "epoch": 2.3367558837101985, "grad_norm": 0.42058318853378296, "learning_rate": 2.833012432583968e-05, "loss": 0.872, "step": 20255 }, { "epoch": 2.337332718043378, "grad_norm": 0.4171206057071686, "learning_rate": 2.8283332522014427e-05, "loss": 0.916, "step": 20260 }, { "epoch": 2.3379095523765576, "grad_norm": 0.37775084376335144, "learning_rate": 2.823657302668755e-05, "loss": 0.8729, "step": 20265 }, { "epoch": 2.3384863867097367, "grad_norm": 0.4589167535305023, "learning_rate": 2.81898458609242e-05, "loss": 0.9139, "step": 20270 }, { "epoch": 2.3390632210429163, "grad_norm": 0.3988697826862335, "learning_rate": 2.8143151045775196e-05, "loss": 0.8507, "step": 20275 }, { "epoch": 2.339640055376096, "grad_norm": 0.39948388934135437, "learning_rate": 2.8096488602276528e-05, "loss": 0.9003, "step": 20280 }, { "epoch": 2.3402168897092754, "grad_norm": 0.4345165193080902, "learning_rate": 2.804985855144979e-05, "loss": 0.887, "step": 20285 }, { "epoch": 2.340793724042455, "grad_norm": 0.4340234696865082, "learning_rate": 2.800326091430182e-05, "loss": 0.8868, "step": 20290 }, { "epoch": 2.3413705583756346, "grad_norm": 0.4125678539276123, "learning_rate": 2.7956695711825075e-05, "loss": 0.8698, "step": 20295 }, { "epoch": 2.341947392708814, "grad_norm": 0.45810467004776, "learning_rate": 2.7910162964997154e-05, "loss": 0.8384, "step": 20300 }, { "epoch": 2.3425242270419937, "grad_norm": 0.43330734968185425, "learning_rate": 2.78636626947812e-05, "loss": 0.8722, "step": 20305 }, { "epoch": 2.3431010613751733, "grad_norm": 0.392790824174881, "learning_rate": 2.7817194922125666e-05, "loss": 0.8737, "step": 20310 }, { "epoch": 2.3436778957083524, "grad_norm": 0.4088856875896454, "learning_rate": 2.777075966796442e-05, "loss": 0.9155, "step": 20315 }, { "epoch": 2.344254730041532, "grad_norm": 0.41359519958496094, "learning_rate": 2.7724356953216545e-05, "loss": 0.8969, "step": 20320 }, { "epoch": 2.3448315643747115, "grad_norm": 0.41587385535240173, "learning_rate": 2.7677986798786615e-05, "loss": 0.8366, "step": 20325 }, { "epoch": 2.345408398707891, "grad_norm": 0.460124671459198, "learning_rate": 2.763164922556445e-05, "loss": 0.8809, "step": 20330 }, { "epoch": 2.3459852330410707, "grad_norm": 0.37693455815315247, "learning_rate": 2.758534425442526e-05, "loss": 0.8692, "step": 20335 }, { "epoch": 2.34656206737425, "grad_norm": 0.4221893548965454, "learning_rate": 2.753907190622944e-05, "loss": 0.8756, "step": 20340 }, { "epoch": 2.34713890170743, "grad_norm": 0.4988071322441101, "learning_rate": 2.7492832201822882e-05, "loss": 0.8969, "step": 20345 }, { "epoch": 2.347715736040609, "grad_norm": 0.46341338753700256, "learning_rate": 2.7446625162036577e-05, "loss": 0.8354, "step": 20350 }, { "epoch": 2.3482925703737885, "grad_norm": 0.43746891617774963, "learning_rate": 2.7400450807686938e-05, "loss": 0.871, "step": 20355 }, { "epoch": 2.348869404706968, "grad_norm": 0.4257577359676361, "learning_rate": 2.735430915957553e-05, "loss": 0.8351, "step": 20360 }, { "epoch": 2.3494462390401476, "grad_norm": 0.40961867570877075, "learning_rate": 2.7308200238489367e-05, "loss": 0.8615, "step": 20365 }, { "epoch": 2.350023073373327, "grad_norm": 0.4807041585445404, "learning_rate": 2.726212406520051e-05, "loss": 0.8729, "step": 20370 }, { "epoch": 2.3505999077065067, "grad_norm": 0.4345334470272064, "learning_rate": 2.7216080660466403e-05, "loss": 0.8955, "step": 20375 }, { "epoch": 2.3511767420396863, "grad_norm": 0.4259088337421417, "learning_rate": 2.717007004502968e-05, "loss": 0.8409, "step": 20380 }, { "epoch": 2.351753576372866, "grad_norm": 0.4218992590904236, "learning_rate": 2.712409223961826e-05, "loss": 0.8594, "step": 20385 }, { "epoch": 2.3523304107060454, "grad_norm": 0.44640931487083435, "learning_rate": 2.7078147264945153e-05, "loss": 0.8873, "step": 20390 }, { "epoch": 2.3529072450392245, "grad_norm": 0.4130471646785736, "learning_rate": 2.7032235141708704e-05, "loss": 0.8687, "step": 20395 }, { "epoch": 2.353484079372404, "grad_norm": 0.37854447960853577, "learning_rate": 2.698635589059242e-05, "loss": 0.8776, "step": 20400 }, { "epoch": 2.3540609137055837, "grad_norm": 0.4220699667930603, "learning_rate": 2.6940509532265003e-05, "loss": 0.8527, "step": 20405 }, { "epoch": 2.3546377480387632, "grad_norm": 0.4551481306552887, "learning_rate": 2.689469608738028e-05, "loss": 0.8891, "step": 20410 }, { "epoch": 2.355214582371943, "grad_norm": 0.4928138852119446, "learning_rate": 2.6848915576577317e-05, "loss": 0.8627, "step": 20415 }, { "epoch": 2.3557914167051224, "grad_norm": 0.40893441438674927, "learning_rate": 2.680316802048034e-05, "loss": 0.903, "step": 20420 }, { "epoch": 2.356368251038302, "grad_norm": 0.42715442180633545, "learning_rate": 2.67574534396987e-05, "loss": 0.8748, "step": 20425 }, { "epoch": 2.356945085371481, "grad_norm": 0.4622029662132263, "learning_rate": 2.6711771854826905e-05, "loss": 0.8926, "step": 20430 }, { "epoch": 2.3575219197046606, "grad_norm": 0.43145161867141724, "learning_rate": 2.6666123286444623e-05, "loss": 0.9149, "step": 20435 }, { "epoch": 2.35809875403784, "grad_norm": 0.4071784019470215, "learning_rate": 2.6620507755116574e-05, "loss": 0.8695, "step": 20440 }, { "epoch": 2.3586755883710198, "grad_norm": 0.430257648229599, "learning_rate": 2.657492528139268e-05, "loss": 0.83, "step": 20445 }, { "epoch": 2.3592524227041993, "grad_norm": 0.39387521147727966, "learning_rate": 2.6529375885807915e-05, "loss": 0.8472, "step": 20450 }, { "epoch": 2.359829257037379, "grad_norm": 0.4547233581542969, "learning_rate": 2.6483859588882408e-05, "loss": 0.8741, "step": 20455 }, { "epoch": 2.3604060913705585, "grad_norm": 0.420403391122818, "learning_rate": 2.643837641112128e-05, "loss": 0.9036, "step": 20460 }, { "epoch": 2.360982925703738, "grad_norm": 0.4206133484840393, "learning_rate": 2.6392926373014825e-05, "loss": 0.8512, "step": 20465 }, { "epoch": 2.3615597600369176, "grad_norm": 0.42972996830940247, "learning_rate": 2.634750949503837e-05, "loss": 0.8849, "step": 20470 }, { "epoch": 2.3621365943700967, "grad_norm": 0.43919214606285095, "learning_rate": 2.630212579765231e-05, "loss": 0.8254, "step": 20475 }, { "epoch": 2.3627134287032763, "grad_norm": 0.43563053011894226, "learning_rate": 2.6256775301302115e-05, "loss": 0.8661, "step": 20480 }, { "epoch": 2.363290263036456, "grad_norm": 0.4127313792705536, "learning_rate": 2.6211458026418212e-05, "loss": 0.8734, "step": 20485 }, { "epoch": 2.3638670973696354, "grad_norm": 0.42784926295280457, "learning_rate": 2.6166173993416154e-05, "loss": 0.8856, "step": 20490 }, { "epoch": 2.364443931702815, "grad_norm": 0.4503290355205536, "learning_rate": 2.612092322269648e-05, "loss": 0.8888, "step": 20495 }, { "epoch": 2.3650207660359945, "grad_norm": 0.39504754543304443, "learning_rate": 2.6075705734644796e-05, "loss": 0.8398, "step": 20500 }, { "epoch": 2.365597600369174, "grad_norm": 0.4576849937438965, "learning_rate": 2.603052154963158e-05, "loss": 0.9339, "step": 20505 }, { "epoch": 2.3661744347023537, "grad_norm": 0.41642168164253235, "learning_rate": 2.5985370688012457e-05, "loss": 0.8616, "step": 20510 }, { "epoch": 2.3667512690355332, "grad_norm": 0.4202212989330292, "learning_rate": 2.594025317012796e-05, "loss": 0.865, "step": 20515 }, { "epoch": 2.3673281033687124, "grad_norm": 0.4365369379520416, "learning_rate": 2.5895169016303623e-05, "loss": 0.8556, "step": 20520 }, { "epoch": 2.367904937701892, "grad_norm": 0.4484837055206299, "learning_rate": 2.5850118246849942e-05, "loss": 0.8848, "step": 20525 }, { "epoch": 2.3684817720350715, "grad_norm": 0.36972007155418396, "learning_rate": 2.5805100882062416e-05, "loss": 0.8821, "step": 20530 }, { "epoch": 2.369058606368251, "grad_norm": 0.4523972272872925, "learning_rate": 2.576011694222139e-05, "loss": 0.916, "step": 20535 }, { "epoch": 2.3696354407014306, "grad_norm": 0.40989214181900024, "learning_rate": 2.571516644759223e-05, "loss": 0.8602, "step": 20540 }, { "epoch": 2.37021227503461, "grad_norm": 0.4344289004802704, "learning_rate": 2.567024941842525e-05, "loss": 0.8708, "step": 20545 }, { "epoch": 2.3707891093677897, "grad_norm": 0.4548048675060272, "learning_rate": 2.5625365874955674e-05, "loss": 0.8366, "step": 20550 }, { "epoch": 2.371365943700969, "grad_norm": 0.5554631352424622, "learning_rate": 2.5580515837403563e-05, "loss": 0.8741, "step": 20555 }, { "epoch": 2.3719427780341484, "grad_norm": 0.45449256896972656, "learning_rate": 2.5535699325973983e-05, "loss": 0.8692, "step": 20560 }, { "epoch": 2.372519612367328, "grad_norm": 0.43762558698654175, "learning_rate": 2.5490916360856853e-05, "loss": 0.8752, "step": 20565 }, { "epoch": 2.3730964467005076, "grad_norm": 0.4865242540836334, "learning_rate": 2.5446166962227023e-05, "loss": 0.8788, "step": 20570 }, { "epoch": 2.373673281033687, "grad_norm": 0.43669965863227844, "learning_rate": 2.5401451150244105e-05, "loss": 0.8666, "step": 20575 }, { "epoch": 2.3742501153668667, "grad_norm": 0.4120925962924957, "learning_rate": 2.5356768945052745e-05, "loss": 0.8631, "step": 20580 }, { "epoch": 2.3748269497000463, "grad_norm": 0.4763656258583069, "learning_rate": 2.531212036678231e-05, "loss": 0.8937, "step": 20585 }, { "epoch": 2.375403784033226, "grad_norm": 0.41615408658981323, "learning_rate": 2.5267505435547078e-05, "loss": 0.8541, "step": 20590 }, { "epoch": 2.3759806183664054, "grad_norm": 0.44895505905151367, "learning_rate": 2.522292417144617e-05, "loss": 0.8887, "step": 20595 }, { "epoch": 2.3765574526995845, "grad_norm": 0.46258002519607544, "learning_rate": 2.5178376594563556e-05, "loss": 0.8727, "step": 20600 }, { "epoch": 2.377134287032764, "grad_norm": 0.4195844233036041, "learning_rate": 2.513386272496796e-05, "loss": 0.823, "step": 20605 }, { "epoch": 2.3777111213659436, "grad_norm": 0.41249099373817444, "learning_rate": 2.5089382582712994e-05, "loss": 0.836, "step": 20610 }, { "epoch": 2.378287955699123, "grad_norm": 0.39638757705688477, "learning_rate": 2.5044936187837044e-05, "loss": 0.8776, "step": 20615 }, { "epoch": 2.378864790032303, "grad_norm": 0.5457749962806702, "learning_rate": 2.5000523560363322e-05, "loss": 0.9153, "step": 20620 }, { "epoch": 2.3794416243654823, "grad_norm": 0.3871450424194336, "learning_rate": 2.4956144720299712e-05, "loss": 0.8258, "step": 20625 }, { "epoch": 2.380018458698662, "grad_norm": 0.4669581353664398, "learning_rate": 2.4911799687639102e-05, "loss": 0.8968, "step": 20630 }, { "epoch": 2.380595293031841, "grad_norm": 0.44625550508499146, "learning_rate": 2.4867488482358923e-05, "loss": 0.8696, "step": 20635 }, { "epoch": 2.3811721273650206, "grad_norm": 0.4140227437019348, "learning_rate": 2.482321112442151e-05, "loss": 0.867, "step": 20640 }, { "epoch": 2.3817489616982, "grad_norm": 0.4040051996707916, "learning_rate": 2.477896763377382e-05, "loss": 0.882, "step": 20645 }, { "epoch": 2.3823257960313797, "grad_norm": 0.41291922330856323, "learning_rate": 2.4734758030347738e-05, "loss": 0.8418, "step": 20650 }, { "epoch": 2.3829026303645593, "grad_norm": 0.42268839478492737, "learning_rate": 2.4690582334059685e-05, "loss": 0.8842, "step": 20655 }, { "epoch": 2.383479464697739, "grad_norm": 0.46346721053123474, "learning_rate": 2.464644056481098e-05, "loss": 0.8908, "step": 20660 }, { "epoch": 2.3840562990309184, "grad_norm": 0.3883967995643616, "learning_rate": 2.4602332742487476e-05, "loss": 0.8437, "step": 20665 }, { "epoch": 2.384633133364098, "grad_norm": 0.4117676913738251, "learning_rate": 2.455825888695994e-05, "loss": 0.9205, "step": 20670 }, { "epoch": 2.3852099676972776, "grad_norm": 0.3966183066368103, "learning_rate": 2.451421901808365e-05, "loss": 0.8497, "step": 20675 }, { "epoch": 2.3857868020304567, "grad_norm": 0.42475172877311707, "learning_rate": 2.4470213155698683e-05, "loss": 0.8584, "step": 20680 }, { "epoch": 2.3863636363636362, "grad_norm": 0.4242252707481384, "learning_rate": 2.4426241319629772e-05, "loss": 0.8568, "step": 20685 }, { "epoch": 2.386940470696816, "grad_norm": 0.4951821565628052, "learning_rate": 2.4382303529686324e-05, "loss": 0.8503, "step": 20690 }, { "epoch": 2.3875173050299954, "grad_norm": 0.4654747247695923, "learning_rate": 2.433839980566236e-05, "loss": 0.8316, "step": 20695 }, { "epoch": 2.388094139363175, "grad_norm": 0.4673211872577667, "learning_rate": 2.4294530167336615e-05, "loss": 0.884, "step": 20700 }, { "epoch": 2.3886709736963545, "grad_norm": 0.45259878039360046, "learning_rate": 2.425069463447245e-05, "loss": 0.9021, "step": 20705 }, { "epoch": 2.389247808029534, "grad_norm": 0.4560452997684479, "learning_rate": 2.4206893226817884e-05, "loss": 0.911, "step": 20710 }, { "epoch": 2.389824642362713, "grad_norm": 0.420181006193161, "learning_rate": 2.4163125964105448e-05, "loss": 0.882, "step": 20715 }, { "epoch": 2.3904014766958928, "grad_norm": 0.43059244751930237, "learning_rate": 2.4119392866052492e-05, "loss": 0.8786, "step": 20720 }, { "epoch": 2.3909783110290723, "grad_norm": 0.4146776497364044, "learning_rate": 2.407569395236079e-05, "loss": 0.8795, "step": 20725 }, { "epoch": 2.391555145362252, "grad_norm": 0.3845740556716919, "learning_rate": 2.4032029242716826e-05, "loss": 0.8814, "step": 20730 }, { "epoch": 2.3921319796954315, "grad_norm": 0.3898622691631317, "learning_rate": 2.398839875679155e-05, "loss": 0.8552, "step": 20735 }, { "epoch": 2.392708814028611, "grad_norm": 0.4128072261810303, "learning_rate": 2.3944802514240726e-05, "loss": 0.9095, "step": 20740 }, { "epoch": 2.3932856483617906, "grad_norm": 0.4304887652397156, "learning_rate": 2.390124053470443e-05, "loss": 0.8821, "step": 20745 }, { "epoch": 2.39386248269497, "grad_norm": 0.40331536531448364, "learning_rate": 2.3857712837807454e-05, "loss": 0.8946, "step": 20750 }, { "epoch": 2.3944393170281497, "grad_norm": 0.40375304222106934, "learning_rate": 2.3814219443159115e-05, "loss": 0.8667, "step": 20755 }, { "epoch": 2.395016151361329, "grad_norm": 0.4574378430843353, "learning_rate": 2.3770760370353294e-05, "loss": 0.8721, "step": 20760 }, { "epoch": 2.3955929856945084, "grad_norm": 0.4717426002025604, "learning_rate": 2.372733563896834e-05, "loss": 0.8803, "step": 20765 }, { "epoch": 2.396169820027688, "grad_norm": 0.49499765038490295, "learning_rate": 2.3683945268567197e-05, "loss": 0.9142, "step": 20770 }, { "epoch": 2.3967466543608675, "grad_norm": 0.40809836983680725, "learning_rate": 2.364058927869732e-05, "loss": 0.9058, "step": 20775 }, { "epoch": 2.397323488694047, "grad_norm": 0.40131765604019165, "learning_rate": 2.35972676888907e-05, "loss": 0.8766, "step": 20780 }, { "epoch": 2.3979003230272267, "grad_norm": 0.43188804388046265, "learning_rate": 2.35539805186637e-05, "loss": 0.8498, "step": 20785 }, { "epoch": 2.3984771573604062, "grad_norm": 0.4838411509990692, "learning_rate": 2.3510727787517382e-05, "loss": 0.8694, "step": 20790 }, { "epoch": 2.3990539916935854, "grad_norm": 0.43422776460647583, "learning_rate": 2.3467509514937126e-05, "loss": 0.8835, "step": 20795 }, { "epoch": 2.3996308260267654, "grad_norm": 0.4064343571662903, "learning_rate": 2.3424325720392882e-05, "loss": 0.8297, "step": 20800 }, { "epoch": 2.4002076603599445, "grad_norm": 0.42274659872055054, "learning_rate": 2.3381176423338956e-05, "loss": 0.8689, "step": 20805 }, { "epoch": 2.400784494693124, "grad_norm": 0.5127131938934326, "learning_rate": 2.3338061643214316e-05, "loss": 0.852, "step": 20810 }, { "epoch": 2.4013613290263036, "grad_norm": 0.4743903577327728, "learning_rate": 2.3294981399442138e-05, "loss": 0.8761, "step": 20815 }, { "epoch": 2.401938163359483, "grad_norm": 0.4744824767112732, "learning_rate": 2.325193571143024e-05, "loss": 0.8937, "step": 20820 }, { "epoch": 2.4025149976926627, "grad_norm": 0.4018848240375519, "learning_rate": 2.3208924598570702e-05, "loss": 0.8439, "step": 20825 }, { "epoch": 2.4030918320258423, "grad_norm": 0.44422465562820435, "learning_rate": 2.3165948080240206e-05, "loss": 0.8936, "step": 20830 }, { "epoch": 2.403668666359022, "grad_norm": 0.4371984302997589, "learning_rate": 2.31230061757997e-05, "loss": 0.848, "step": 20835 }, { "epoch": 2.404245500692201, "grad_norm": 0.438287615776062, "learning_rate": 2.3080098904594603e-05, "loss": 0.8541, "step": 20840 }, { "epoch": 2.4048223350253806, "grad_norm": 0.4366404712200165, "learning_rate": 2.303722628595474e-05, "loss": 0.8519, "step": 20845 }, { "epoch": 2.40539916935856, "grad_norm": 0.3852338194847107, "learning_rate": 2.299438833919432e-05, "loss": 0.8386, "step": 20850 }, { "epoch": 2.4059760036917397, "grad_norm": 0.42250022292137146, "learning_rate": 2.295158508361188e-05, "loss": 0.8668, "step": 20855 }, { "epoch": 2.4065528380249193, "grad_norm": 0.45582088828086853, "learning_rate": 2.2908816538490385e-05, "loss": 0.9077, "step": 20860 }, { "epoch": 2.407129672358099, "grad_norm": 0.447816401720047, "learning_rate": 2.286608272309716e-05, "loss": 0.8893, "step": 20865 }, { "epoch": 2.4077065066912784, "grad_norm": 0.5000319480895996, "learning_rate": 2.2823383656683904e-05, "loss": 0.933, "step": 20870 }, { "epoch": 2.408283341024458, "grad_norm": 0.43965062499046326, "learning_rate": 2.2780719358486524e-05, "loss": 0.8496, "step": 20875 }, { "epoch": 2.4088601753576375, "grad_norm": 0.44113633036613464, "learning_rate": 2.2738089847725497e-05, "loss": 0.8328, "step": 20880 }, { "epoch": 2.4094370096908166, "grad_norm": 0.4106186628341675, "learning_rate": 2.2695495143605416e-05, "loss": 0.8419, "step": 20885 }, { "epoch": 2.410013844023996, "grad_norm": 0.41286739706993103, "learning_rate": 2.26529352653153e-05, "loss": 0.8693, "step": 20890 }, { "epoch": 2.4105906783571758, "grad_norm": 0.42569470405578613, "learning_rate": 2.2610410232028467e-05, "loss": 0.8539, "step": 20895 }, { "epoch": 2.4111675126903553, "grad_norm": 0.39974820613861084, "learning_rate": 2.2567920062902546e-05, "loss": 0.8694, "step": 20900 }, { "epoch": 2.411744347023535, "grad_norm": 0.42251622676849365, "learning_rate": 2.2525464777079398e-05, "loss": 0.8916, "step": 20905 }, { "epoch": 2.4123211813567145, "grad_norm": 0.4560185670852661, "learning_rate": 2.2483044393685215e-05, "loss": 0.8593, "step": 20910 }, { "epoch": 2.412898015689894, "grad_norm": 0.40557077527046204, "learning_rate": 2.244065893183048e-05, "loss": 0.8338, "step": 20915 }, { "epoch": 2.413474850023073, "grad_norm": 0.41522446274757385, "learning_rate": 2.239830841060996e-05, "loss": 0.9022, "step": 20920 }, { "epoch": 2.4140516843562527, "grad_norm": 0.39065486192703247, "learning_rate": 2.235599284910258e-05, "loss": 0.8769, "step": 20925 }, { "epoch": 2.4146285186894323, "grad_norm": 0.4300009310245514, "learning_rate": 2.231371226637161e-05, "loss": 0.8506, "step": 20930 }, { "epoch": 2.415205353022612, "grad_norm": 0.4822119176387787, "learning_rate": 2.2271466681464547e-05, "loss": 0.8726, "step": 20935 }, { "epoch": 2.4157821873557914, "grad_norm": 0.4451304078102112, "learning_rate": 2.2229256113413087e-05, "loss": 0.8631, "step": 20940 }, { "epoch": 2.416359021688971, "grad_norm": 0.4610227644443512, "learning_rate": 2.21870805812332e-05, "loss": 0.9034, "step": 20945 }, { "epoch": 2.4169358560221506, "grad_norm": 0.40991950035095215, "learning_rate": 2.214494010392505e-05, "loss": 0.8939, "step": 20950 }, { "epoch": 2.41751269035533, "grad_norm": 0.4460807740688324, "learning_rate": 2.210283470047296e-05, "loss": 0.8765, "step": 20955 }, { "epoch": 2.4180895246885097, "grad_norm": 0.43716761469841003, "learning_rate": 2.2060764389845534e-05, "loss": 0.887, "step": 20960 }, { "epoch": 2.418666359021689, "grad_norm": 0.4361554980278015, "learning_rate": 2.2018729190995514e-05, "loss": 0.8302, "step": 20965 }, { "epoch": 2.4192431933548684, "grad_norm": 0.3899691700935364, "learning_rate": 2.1976729122859864e-05, "loss": 0.8283, "step": 20970 }, { "epoch": 2.419820027688048, "grad_norm": 0.4203675091266632, "learning_rate": 2.1934764204359648e-05, "loss": 0.883, "step": 20975 }, { "epoch": 2.4203968620212275, "grad_norm": 0.48073139786720276, "learning_rate": 2.1892834454400167e-05, "loss": 0.8684, "step": 20980 }, { "epoch": 2.420973696354407, "grad_norm": 0.4376397132873535, "learning_rate": 2.185093989187087e-05, "loss": 0.8898, "step": 20985 }, { "epoch": 2.4215505306875866, "grad_norm": 0.39727121591567993, "learning_rate": 2.1809080535645323e-05, "loss": 0.8615, "step": 20990 }, { "epoch": 2.422127365020766, "grad_norm": 0.4959402084350586, "learning_rate": 2.176725640458127e-05, "loss": 0.8504, "step": 20995 }, { "epoch": 2.4227041993539453, "grad_norm": 0.3901364207267761, "learning_rate": 2.1725467517520526e-05, "loss": 0.8537, "step": 21000 }, { "epoch": 2.423281033687125, "grad_norm": 0.4253600239753723, "learning_rate": 2.1683713893289094e-05, "loss": 0.8638, "step": 21005 }, { "epoch": 2.4238578680203045, "grad_norm": 0.43812429904937744, "learning_rate": 2.1641995550697038e-05, "loss": 0.8103, "step": 21010 }, { "epoch": 2.424434702353484, "grad_norm": 0.38738441467285156, "learning_rate": 2.1600312508538602e-05, "loss": 0.8374, "step": 21015 }, { "epoch": 2.4250115366866636, "grad_norm": 0.4647636115550995, "learning_rate": 2.155866478559202e-05, "loss": 0.8965, "step": 21020 }, { "epoch": 2.425588371019843, "grad_norm": 0.4338874816894531, "learning_rate": 2.1517052400619696e-05, "loss": 0.8724, "step": 21025 }, { "epoch": 2.4261652053530227, "grad_norm": 0.4465797543525696, "learning_rate": 2.1475475372368094e-05, "loss": 0.8758, "step": 21030 }, { "epoch": 2.4267420396862023, "grad_norm": 0.427768349647522, "learning_rate": 2.1433933719567745e-05, "loss": 0.8586, "step": 21035 }, { "epoch": 2.427318874019382, "grad_norm": 0.42242631316185, "learning_rate": 2.139242746093323e-05, "loss": 0.9039, "step": 21040 }, { "epoch": 2.427895708352561, "grad_norm": 0.4240041673183441, "learning_rate": 2.1350956615163254e-05, "loss": 0.8989, "step": 21045 }, { "epoch": 2.4284725426857405, "grad_norm": 0.4253004789352417, "learning_rate": 2.130952120094044e-05, "loss": 0.8653, "step": 21050 }, { "epoch": 2.42904937701892, "grad_norm": 0.4393162429332733, "learning_rate": 2.126812123693155e-05, "loss": 0.8465, "step": 21055 }, { "epoch": 2.4296262113520997, "grad_norm": 0.43222832679748535, "learning_rate": 2.1226756741787356e-05, "loss": 0.8754, "step": 21060 }, { "epoch": 2.4302030456852792, "grad_norm": 0.46488478779792786, "learning_rate": 2.1185427734142682e-05, "loss": 0.8392, "step": 21065 }, { "epoch": 2.430779880018459, "grad_norm": 0.39256176352500916, "learning_rate": 2.114413423261622e-05, "loss": 0.8815, "step": 21070 }, { "epoch": 2.4313567143516384, "grad_norm": 0.42439353466033936, "learning_rate": 2.1102876255810887e-05, "loss": 0.8672, "step": 21075 }, { "epoch": 2.4319335486848175, "grad_norm": 0.476100355386734, "learning_rate": 2.106165382231341e-05, "loss": 0.9268, "step": 21080 }, { "epoch": 2.432510383017997, "grad_norm": 0.4509223997592926, "learning_rate": 2.102046695069463e-05, "loss": 0.8451, "step": 21085 }, { "epoch": 2.4330872173511766, "grad_norm": 0.44436001777648926, "learning_rate": 2.0979315659509223e-05, "loss": 0.8915, "step": 21090 }, { "epoch": 2.433664051684356, "grad_norm": 0.4077122211456299, "learning_rate": 2.0938199967296036e-05, "loss": 0.8599, "step": 21095 }, { "epoch": 2.4342408860175357, "grad_norm": 0.4311679005622864, "learning_rate": 2.0897119892577698e-05, "loss": 0.8531, "step": 21100 }, { "epoch": 2.4348177203507153, "grad_norm": 0.4484409987926483, "learning_rate": 2.085607545386088e-05, "loss": 0.8949, "step": 21105 }, { "epoch": 2.435394554683895, "grad_norm": 0.42149195075035095, "learning_rate": 2.0815066669636174e-05, "loss": 0.8299, "step": 21110 }, { "epoch": 2.4359713890170744, "grad_norm": 0.40608465671539307, "learning_rate": 2.077409355837816e-05, "loss": 0.8374, "step": 21115 }, { "epoch": 2.436548223350254, "grad_norm": 0.41218310594558716, "learning_rate": 2.0733156138545252e-05, "loss": 0.862, "step": 21120 }, { "epoch": 2.437125057683433, "grad_norm": 0.42621365189552307, "learning_rate": 2.069225442857984e-05, "loss": 0.9075, "step": 21125 }, { "epoch": 2.4377018920166127, "grad_norm": 0.4423667788505554, "learning_rate": 2.0651388446908248e-05, "loss": 0.8844, "step": 21130 }, { "epoch": 2.4382787263497923, "grad_norm": 0.41898277401924133, "learning_rate": 2.0610558211940702e-05, "loss": 0.8856, "step": 21135 }, { "epoch": 2.438855560682972, "grad_norm": 0.39691296219825745, "learning_rate": 2.0569763742071236e-05, "loss": 0.8571, "step": 21140 }, { "epoch": 2.4394323950161514, "grad_norm": 0.43472006916999817, "learning_rate": 2.0529005055677884e-05, "loss": 0.829, "step": 21145 }, { "epoch": 2.440009229349331, "grad_norm": 0.4144620895385742, "learning_rate": 2.0488282171122498e-05, "loss": 0.8758, "step": 21150 }, { "epoch": 2.4405860636825105, "grad_norm": 0.4102288782596588, "learning_rate": 2.044759510675086e-05, "loss": 0.9259, "step": 21155 }, { "epoch": 2.44116289801569, "grad_norm": 0.4925200045108795, "learning_rate": 2.040694388089247e-05, "loss": 0.889, "step": 21160 }, { "epoch": 2.4417397323488697, "grad_norm": 0.42156094312667847, "learning_rate": 2.036632851186091e-05, "loss": 0.8579, "step": 21165 }, { "epoch": 2.4423165666820488, "grad_norm": 0.43018943071365356, "learning_rate": 2.0325749017953387e-05, "loss": 0.8924, "step": 21170 }, { "epoch": 2.4428934010152283, "grad_norm": 0.3908984959125519, "learning_rate": 2.0285205417451115e-05, "loss": 0.8549, "step": 21175 }, { "epoch": 2.443470235348408, "grad_norm": 0.4131862223148346, "learning_rate": 2.0244697728618966e-05, "loss": 0.8871, "step": 21180 }, { "epoch": 2.4440470696815875, "grad_norm": 0.42835119366645813, "learning_rate": 2.0204225969705846e-05, "loss": 0.8736, "step": 21185 }, { "epoch": 2.444623904014767, "grad_norm": 0.43167683482170105, "learning_rate": 2.0163790158944283e-05, "loss": 0.8479, "step": 21190 }, { "epoch": 2.4452007383479466, "grad_norm": 0.3958747982978821, "learning_rate": 2.0123390314550717e-05, "loss": 0.8444, "step": 21195 }, { "epoch": 2.445777572681126, "grad_norm": 0.4122403562068939, "learning_rate": 2.0083026454725364e-05, "loss": 0.8428, "step": 21200 }, { "epoch": 2.4463544070143053, "grad_norm": 0.4481377899646759, "learning_rate": 2.0042698597652222e-05, "loss": 0.8766, "step": 21205 }, { "epoch": 2.446931241347485, "grad_norm": 0.4057758152484894, "learning_rate": 2.000240676149904e-05, "loss": 0.8332, "step": 21210 }, { "epoch": 2.4475080756806644, "grad_norm": 0.4093271791934967, "learning_rate": 1.9962150964417382e-05, "loss": 0.86, "step": 21215 }, { "epoch": 2.448084910013844, "grad_norm": 0.42271873354911804, "learning_rate": 1.9921931224542567e-05, "loss": 0.8735, "step": 21220 }, { "epoch": 2.4486617443470236, "grad_norm": 0.4053228795528412, "learning_rate": 1.9881747559993703e-05, "loss": 0.8769, "step": 21225 }, { "epoch": 2.449238578680203, "grad_norm": 0.4581349790096283, "learning_rate": 1.9841599988873517e-05, "loss": 0.8737, "step": 21230 }, { "epoch": 2.4498154130133827, "grad_norm": 0.37571778893470764, "learning_rate": 1.9801488529268673e-05, "loss": 0.8509, "step": 21235 }, { "epoch": 2.4503922473465622, "grad_norm": 0.4023771286010742, "learning_rate": 1.976141319924939e-05, "loss": 0.8249, "step": 21240 }, { "epoch": 2.450969081679742, "grad_norm": 0.4219907224178314, "learning_rate": 1.972137401686973e-05, "loss": 0.872, "step": 21245 }, { "epoch": 2.451545916012921, "grad_norm": 0.4790753722190857, "learning_rate": 1.9681371000167348e-05, "loss": 0.9489, "step": 21250 }, { "epoch": 2.4521227503461005, "grad_norm": 0.4022129476070404, "learning_rate": 1.964140416716379e-05, "loss": 0.832, "step": 21255 }, { "epoch": 2.45269958467928, "grad_norm": 0.4099849462509155, "learning_rate": 1.960147353586409e-05, "loss": 0.8369, "step": 21260 }, { "epoch": 2.4532764190124596, "grad_norm": 0.45280352234840393, "learning_rate": 1.9561579124257133e-05, "loss": 0.9413, "step": 21265 }, { "epoch": 2.453853253345639, "grad_norm": 0.40902838110923767, "learning_rate": 1.9521720950315403e-05, "loss": 0.865, "step": 21270 }, { "epoch": 2.4544300876788188, "grad_norm": 0.39801719784736633, "learning_rate": 1.9481899031995133e-05, "loss": 0.8029, "step": 21275 }, { "epoch": 2.4550069220119983, "grad_norm": 0.42373040318489075, "learning_rate": 1.9442113387236105e-05, "loss": 0.9056, "step": 21280 }, { "epoch": 2.4555837563451774, "grad_norm": 0.4700776934623718, "learning_rate": 1.940236403396186e-05, "loss": 0.8751, "step": 21285 }, { "epoch": 2.456160590678357, "grad_norm": 0.47902897000312805, "learning_rate": 1.9362650990079566e-05, "loss": 0.8409, "step": 21290 }, { "epoch": 2.4567374250115366, "grad_norm": 0.41159096360206604, "learning_rate": 1.9322974273480054e-05, "loss": 0.8969, "step": 21295 }, { "epoch": 2.457314259344716, "grad_norm": 0.46974456310272217, "learning_rate": 1.9283333902037694e-05, "loss": 0.8648, "step": 21300 }, { "epoch": 2.4578910936778957, "grad_norm": 0.3993097245693207, "learning_rate": 1.9243729893610597e-05, "loss": 0.8908, "step": 21305 }, { "epoch": 2.4584679280110753, "grad_norm": 0.4157516360282898, "learning_rate": 1.9204162266040425e-05, "loss": 0.8746, "step": 21310 }, { "epoch": 2.459044762344255, "grad_norm": 0.46683579683303833, "learning_rate": 1.9164631037152513e-05, "loss": 0.8727, "step": 21315 }, { "epoch": 2.4596215966774344, "grad_norm": 0.44361960887908936, "learning_rate": 1.912513622475567e-05, "loss": 0.8974, "step": 21320 }, { "epoch": 2.460198431010614, "grad_norm": 0.46571844816207886, "learning_rate": 1.9085677846642492e-05, "loss": 0.8774, "step": 21325 }, { "epoch": 2.460775265343793, "grad_norm": 0.4342435896396637, "learning_rate": 1.9046255920588985e-05, "loss": 0.8723, "step": 21330 }, { "epoch": 2.4613520996769727, "grad_norm": 0.4246952533721924, "learning_rate": 1.9006870464354853e-05, "loss": 0.8366, "step": 21335 }, { "epoch": 2.4619289340101522, "grad_norm": 0.41374680399894714, "learning_rate": 1.896752149568323e-05, "loss": 0.8464, "step": 21340 }, { "epoch": 2.462505768343332, "grad_norm": 0.48700255155563354, "learning_rate": 1.8928209032301013e-05, "loss": 0.8893, "step": 21345 }, { "epoch": 2.4630826026765114, "grad_norm": 0.3991559147834778, "learning_rate": 1.888893309191847e-05, "loss": 0.8556, "step": 21350 }, { "epoch": 2.463659437009691, "grad_norm": 0.4696345925331116, "learning_rate": 1.884969369222952e-05, "loss": 0.8444, "step": 21355 }, { "epoch": 2.4642362713428705, "grad_norm": 0.39918190240859985, "learning_rate": 1.8810490850911577e-05, "loss": 0.8522, "step": 21360 }, { "epoch": 2.4648131056760496, "grad_norm": 0.42222800850868225, "learning_rate": 1.8771324585625627e-05, "loss": 0.8798, "step": 21365 }, { "epoch": 2.465389940009229, "grad_norm": 0.4302942454814911, "learning_rate": 1.87321949140161e-05, "loss": 0.8588, "step": 21370 }, { "epoch": 2.4659667743424087, "grad_norm": 0.463563472032547, "learning_rate": 1.8693101853711004e-05, "loss": 0.8984, "step": 21375 }, { "epoch": 2.4665436086755883, "grad_norm": 0.40896716713905334, "learning_rate": 1.8654045422321863e-05, "loss": 0.9366, "step": 21380 }, { "epoch": 2.467120443008768, "grad_norm": 0.3967675268650055, "learning_rate": 1.8615025637443673e-05, "loss": 0.8929, "step": 21385 }, { "epoch": 2.4676972773419474, "grad_norm": 0.41192832589149475, "learning_rate": 1.857604251665487e-05, "loss": 0.895, "step": 21390 }, { "epoch": 2.468274111675127, "grad_norm": 0.4389733672142029, "learning_rate": 1.8537096077517502e-05, "loss": 0.911, "step": 21395 }, { "epoch": 2.4688509460083066, "grad_norm": 0.42747360467910767, "learning_rate": 1.8498186337576972e-05, "loss": 0.8929, "step": 21400 }, { "epoch": 2.469427780341486, "grad_norm": 0.41354596614837646, "learning_rate": 1.845931331436219e-05, "loss": 0.8477, "step": 21405 }, { "epoch": 2.4700046146746653, "grad_norm": 0.45903632044792175, "learning_rate": 1.842047702538554e-05, "loss": 0.8825, "step": 21410 }, { "epoch": 2.470581449007845, "grad_norm": 0.45671504735946655, "learning_rate": 1.838167748814288e-05, "loss": 0.871, "step": 21415 }, { "epoch": 2.4711582833410244, "grad_norm": 0.42325904965400696, "learning_rate": 1.8342914720113404e-05, "loss": 0.8901, "step": 21420 }, { "epoch": 2.471735117674204, "grad_norm": 0.404392272233963, "learning_rate": 1.8304188738759864e-05, "loss": 0.9069, "step": 21425 }, { "epoch": 2.4723119520073835, "grad_norm": 0.4236079156398773, "learning_rate": 1.8265499561528377e-05, "loss": 0.8515, "step": 21430 }, { "epoch": 2.472888786340563, "grad_norm": 0.5150244832038879, "learning_rate": 1.822684720584852e-05, "loss": 0.9117, "step": 21435 }, { "epoch": 2.4734656206737426, "grad_norm": 0.40461140871047974, "learning_rate": 1.8188231689133207e-05, "loss": 0.8762, "step": 21440 }, { "epoch": 2.4740424550069218, "grad_norm": 0.4534957706928253, "learning_rate": 1.8149653028778813e-05, "loss": 0.8637, "step": 21445 }, { "epoch": 2.4746192893401013, "grad_norm": 0.42262566089630127, "learning_rate": 1.8111111242165124e-05, "loss": 0.851, "step": 21450 }, { "epoch": 2.475196123673281, "grad_norm": 0.41150304675102234, "learning_rate": 1.8072606346655274e-05, "loss": 0.8881, "step": 21455 }, { "epoch": 2.4757729580064605, "grad_norm": 0.42715293169021606, "learning_rate": 1.803413835959583e-05, "loss": 0.8618, "step": 21460 }, { "epoch": 2.47634979233964, "grad_norm": 0.4450746178627014, "learning_rate": 1.7995707298316632e-05, "loss": 0.8519, "step": 21465 }, { "epoch": 2.4769266266728196, "grad_norm": 0.3851078748703003, "learning_rate": 1.7957313180130986e-05, "loss": 0.8565, "step": 21470 }, { "epoch": 2.477503461005999, "grad_norm": 0.4541506767272949, "learning_rate": 1.79189560223355e-05, "loss": 0.8453, "step": 21475 }, { "epoch": 2.4780802953391787, "grad_norm": 0.39562171697616577, "learning_rate": 1.788063584221017e-05, "loss": 0.8445, "step": 21480 }, { "epoch": 2.4786571296723583, "grad_norm": 0.41531699895858765, "learning_rate": 1.7842352657018313e-05, "loss": 0.8581, "step": 21485 }, { "epoch": 2.4792339640055374, "grad_norm": 0.4400758743286133, "learning_rate": 1.7804106484006543e-05, "loss": 0.9219, "step": 21490 }, { "epoch": 2.479810798338717, "grad_norm": 0.4268995225429535, "learning_rate": 1.776589734040487e-05, "loss": 0.889, "step": 21495 }, { "epoch": 2.4803876326718965, "grad_norm": 0.4040692150592804, "learning_rate": 1.7727725243426564e-05, "loss": 0.849, "step": 21500 }, { "epoch": 2.480964467005076, "grad_norm": 0.40337270498275757, "learning_rate": 1.7689590210268235e-05, "loss": 0.9017, "step": 21505 }, { "epoch": 2.4815413013382557, "grad_norm": 0.40132004022598267, "learning_rate": 1.7651492258109835e-05, "loss": 0.8515, "step": 21510 }, { "epoch": 2.4821181356714352, "grad_norm": 0.3863861858844757, "learning_rate": 1.7613431404114487e-05, "loss": 0.8567, "step": 21515 }, { "epoch": 2.482694970004615, "grad_norm": 0.4050062894821167, "learning_rate": 1.7575407665428735e-05, "loss": 0.8635, "step": 21520 }, { "epoch": 2.4832718043377944, "grad_norm": 0.4430626630783081, "learning_rate": 1.7537421059182314e-05, "loss": 0.8908, "step": 21525 }, { "epoch": 2.483848638670974, "grad_norm": 0.4688493609428406, "learning_rate": 1.7499471602488316e-05, "loss": 0.8878, "step": 21530 }, { "epoch": 2.484425473004153, "grad_norm": 0.42432737350463867, "learning_rate": 1.7461559312442953e-05, "loss": 0.8621, "step": 21535 }, { "epoch": 2.4850023073373326, "grad_norm": 0.4097782373428345, "learning_rate": 1.74236842061259e-05, "loss": 0.8953, "step": 21540 }, { "epoch": 2.485579141670512, "grad_norm": 0.3850114345550537, "learning_rate": 1.7385846300599885e-05, "loss": 0.8483, "step": 21545 }, { "epoch": 2.4861559760036918, "grad_norm": 0.43694594502449036, "learning_rate": 1.7348045612910978e-05, "loss": 0.8793, "step": 21550 }, { "epoch": 2.4867328103368713, "grad_norm": 0.39885058999061584, "learning_rate": 1.7310282160088465e-05, "loss": 0.8459, "step": 21555 }, { "epoch": 2.487309644670051, "grad_norm": 0.4454430043697357, "learning_rate": 1.7272555959144888e-05, "loss": 0.9015, "step": 21560 }, { "epoch": 2.4878864790032305, "grad_norm": 0.4712061285972595, "learning_rate": 1.7234867027075906e-05, "loss": 0.8686, "step": 21565 }, { "epoch": 2.4884633133364096, "grad_norm": 0.4153970777988434, "learning_rate": 1.7197215380860497e-05, "loss": 0.8595, "step": 21570 }, { "epoch": 2.489040147669589, "grad_norm": 0.3996500074863434, "learning_rate": 1.7159601037460805e-05, "loss": 0.8391, "step": 21575 }, { "epoch": 2.4896169820027687, "grad_norm": 0.43990781903266907, "learning_rate": 1.712202401382217e-05, "loss": 0.8731, "step": 21580 }, { "epoch": 2.4901938163359483, "grad_norm": 0.46257680654525757, "learning_rate": 1.7084484326873062e-05, "loss": 0.8236, "step": 21585 }, { "epoch": 2.490770650669128, "grad_norm": 0.4220145344734192, "learning_rate": 1.704698199352527e-05, "loss": 0.869, "step": 21590 }, { "epoch": 2.4913474850023074, "grad_norm": 0.4439292550086975, "learning_rate": 1.7009517030673584e-05, "loss": 0.8021, "step": 21595 }, { "epoch": 2.491924319335487, "grad_norm": 0.4120362102985382, "learning_rate": 1.6972089455196115e-05, "loss": 0.9061, "step": 21600 }, { "epoch": 2.4925011536686665, "grad_norm": 0.39903098344802856, "learning_rate": 1.6934699283953968e-05, "loss": 0.8302, "step": 21605 }, { "epoch": 2.493077988001846, "grad_norm": 0.4350089132785797, "learning_rate": 1.6897346533791592e-05, "loss": 0.8538, "step": 21610 }, { "epoch": 2.4936548223350252, "grad_norm": 0.3748519718647003, "learning_rate": 1.6860031221536398e-05, "loss": 0.8907, "step": 21615 }, { "epoch": 2.494231656668205, "grad_norm": 0.40715354681015015, "learning_rate": 1.6822753363999066e-05, "loss": 0.8603, "step": 21620 }, { "epoch": 2.4948084910013844, "grad_norm": 0.44604524970054626, "learning_rate": 1.678551297797325e-05, "loss": 0.8541, "step": 21625 }, { "epoch": 2.495385325334564, "grad_norm": 0.45316800475120544, "learning_rate": 1.674831008023594e-05, "loss": 0.8733, "step": 21630 }, { "epoch": 2.4959621596677435, "grad_norm": 0.42586323618888855, "learning_rate": 1.671114468754702e-05, "loss": 0.8808, "step": 21635 }, { "epoch": 2.496538994000923, "grad_norm": 0.40052857995033264, "learning_rate": 1.6674016816649595e-05, "loss": 0.8482, "step": 21640 }, { "epoch": 2.4971158283341026, "grad_norm": 0.3876071572303772, "learning_rate": 1.6636926484269855e-05, "loss": 0.8543, "step": 21645 }, { "epoch": 2.4976926626672817, "grad_norm": 0.3747224509716034, "learning_rate": 1.6599873707117087e-05, "loss": 0.8397, "step": 21650 }, { "epoch": 2.4982694970004613, "grad_norm": 0.41739127039909363, "learning_rate": 1.6562858501883595e-05, "loss": 0.8634, "step": 21655 }, { "epoch": 2.498846331333641, "grad_norm": 0.4056764841079712, "learning_rate": 1.6525880885244815e-05, "loss": 0.8947, "step": 21660 }, { "epoch": 2.4994231656668204, "grad_norm": 0.3982926905155182, "learning_rate": 1.648894087385925e-05, "loss": 0.8889, "step": 21665 }, { "epoch": 2.5, "grad_norm": 0.46115100383758545, "learning_rate": 1.6452038484368447e-05, "loss": 0.8877, "step": 21670 }, { "epoch": 2.5005768343331796, "grad_norm": 0.40610867738723755, "learning_rate": 1.641517373339696e-05, "loss": 0.9315, "step": 21675 }, { "epoch": 2.501153668666359, "grad_norm": 0.4094102084636688, "learning_rate": 1.6378346637552512e-05, "loss": 0.869, "step": 21680 }, { "epoch": 2.5017305029995387, "grad_norm": 0.3844679892063141, "learning_rate": 1.6341557213425708e-05, "loss": 0.822, "step": 21685 }, { "epoch": 2.5023073373327183, "grad_norm": 0.39057260751724243, "learning_rate": 1.6304805477590312e-05, "loss": 0.8623, "step": 21690 }, { "epoch": 2.5028841716658974, "grad_norm": 0.40054717659950256, "learning_rate": 1.6268091446602973e-05, "loss": 0.8537, "step": 21695 }, { "epoch": 2.503461005999077, "grad_norm": 0.4297622740268707, "learning_rate": 1.6231415137003537e-05, "loss": 0.8425, "step": 21700 }, { "epoch": 2.5040378403322565, "grad_norm": 0.4544278383255005, "learning_rate": 1.6194776565314672e-05, "loss": 0.8456, "step": 21705 }, { "epoch": 2.504614674665436, "grad_norm": 0.42204564809799194, "learning_rate": 1.6158175748042147e-05, "loss": 0.9345, "step": 21710 }, { "epoch": 2.5051915089986156, "grad_norm": 0.40919971466064453, "learning_rate": 1.6121612701674725e-05, "loss": 0.8399, "step": 21715 }, { "epoch": 2.505768343331795, "grad_norm": 0.44898590445518494, "learning_rate": 1.6085087442684122e-05, "loss": 0.9533, "step": 21720 }, { "epoch": 2.5063451776649748, "grad_norm": 0.4511086940765381, "learning_rate": 1.6048599987525015e-05, "loss": 0.8747, "step": 21725 }, { "epoch": 2.506922011998154, "grad_norm": 0.37081825733184814, "learning_rate": 1.601215035263508e-05, "loss": 0.8409, "step": 21730 }, { "epoch": 2.507498846331334, "grad_norm": 0.4122322201728821, "learning_rate": 1.597573855443497e-05, "loss": 0.853, "step": 21735 }, { "epoch": 2.508075680664513, "grad_norm": 0.45850232243537903, "learning_rate": 1.5939364609328265e-05, "loss": 0.8752, "step": 21740 }, { "epoch": 2.5086525149976926, "grad_norm": 0.43158847093582153, "learning_rate": 1.5903028533701457e-05, "loss": 0.8965, "step": 21745 }, { "epoch": 2.509229349330872, "grad_norm": 0.41096311807632446, "learning_rate": 1.5866730343924085e-05, "loss": 0.9114, "step": 21750 }, { "epoch": 2.5098061836640517, "grad_norm": 0.4422203004360199, "learning_rate": 1.5830470056348513e-05, "loss": 0.8432, "step": 21755 }, { "epoch": 2.5103830179972313, "grad_norm": 0.42912983894348145, "learning_rate": 1.5794247687310093e-05, "loss": 0.8377, "step": 21760 }, { "epoch": 2.510959852330411, "grad_norm": 0.4608105719089508, "learning_rate": 1.575806325312702e-05, "loss": 0.8773, "step": 21765 }, { "epoch": 2.5115366866635904, "grad_norm": 0.43036070466041565, "learning_rate": 1.5721916770100532e-05, "loss": 0.8908, "step": 21770 }, { "epoch": 2.5121135209967695, "grad_norm": 0.44745394587516785, "learning_rate": 1.5685808254514634e-05, "loss": 0.8709, "step": 21775 }, { "epoch": 2.512690355329949, "grad_norm": 0.4649929702281952, "learning_rate": 1.5649737722636315e-05, "loss": 0.849, "step": 21780 }, { "epoch": 2.5132671896631287, "grad_norm": 0.4303283989429474, "learning_rate": 1.5613705190715356e-05, "loss": 0.8852, "step": 21785 }, { "epoch": 2.5138440239963082, "grad_norm": 0.5020930171012878, "learning_rate": 1.557771067498458e-05, "loss": 0.9027, "step": 21790 }, { "epoch": 2.514420858329488, "grad_norm": 0.359627902507782, "learning_rate": 1.554175419165951e-05, "loss": 0.8415, "step": 21795 }, { "epoch": 2.5149976926626674, "grad_norm": 0.41575315594673157, "learning_rate": 1.5505835756938636e-05, "loss": 0.8718, "step": 21800 }, { "epoch": 2.515574526995847, "grad_norm": 0.400100439786911, "learning_rate": 1.546995538700329e-05, "loss": 0.8759, "step": 21805 }, { "epoch": 2.516151361329026, "grad_norm": 0.40466317534446716, "learning_rate": 1.5434113098017667e-05, "loss": 0.8871, "step": 21810 }, { "epoch": 2.516728195662206, "grad_norm": 0.4189120829105377, "learning_rate": 1.5398308906128735e-05, "loss": 0.8984, "step": 21815 }, { "epoch": 2.517305029995385, "grad_norm": 0.4149659276008606, "learning_rate": 1.5362542827466387e-05, "loss": 0.8664, "step": 21820 }, { "epoch": 2.5178818643285648, "grad_norm": 0.41705700755119324, "learning_rate": 1.5326814878143304e-05, "loss": 0.8438, "step": 21825 }, { "epoch": 2.5184586986617443, "grad_norm": 0.3998613953590393, "learning_rate": 1.529112507425502e-05, "loss": 0.869, "step": 21830 }, { "epoch": 2.519035532994924, "grad_norm": 0.4276602566242218, "learning_rate": 1.5255473431879785e-05, "loss": 0.8712, "step": 21835 }, { "epoch": 2.5196123673281035, "grad_norm": 0.4247130751609802, "learning_rate": 1.5219859967078854e-05, "loss": 0.8362, "step": 21840 }, { "epoch": 2.520189201661283, "grad_norm": 0.42334526777267456, "learning_rate": 1.5184284695896056e-05, "loss": 0.8558, "step": 21845 }, { "epoch": 2.5207660359944626, "grad_norm": 0.4518727958202362, "learning_rate": 1.5148747634358185e-05, "loss": 0.8547, "step": 21850 }, { "epoch": 2.5213428703276417, "grad_norm": 0.39660370349884033, "learning_rate": 1.5113248798474689e-05, "loss": 0.8924, "step": 21855 }, { "epoch": 2.5219197046608213, "grad_norm": 0.431823194026947, "learning_rate": 1.5077788204237952e-05, "loss": 0.8683, "step": 21860 }, { "epoch": 2.522496538994001, "grad_norm": 0.4138554036617279, "learning_rate": 1.5042365867622976e-05, "loss": 0.8733, "step": 21865 }, { "epoch": 2.5230733733271804, "grad_norm": 0.4500097334384918, "learning_rate": 1.5006981804587595e-05, "loss": 0.8854, "step": 21870 }, { "epoch": 2.52365020766036, "grad_norm": 0.3916674256324768, "learning_rate": 1.4971636031072422e-05, "loss": 0.8951, "step": 21875 }, { "epoch": 2.5242270419935395, "grad_norm": 0.4572630524635315, "learning_rate": 1.4936328563000812e-05, "loss": 0.8475, "step": 21880 }, { "epoch": 2.524803876326719, "grad_norm": 0.44522368907928467, "learning_rate": 1.4901059416278806e-05, "loss": 0.8946, "step": 21885 }, { "epoch": 2.525380710659898, "grad_norm": 0.5151233673095703, "learning_rate": 1.4865828606795241e-05, "loss": 0.8551, "step": 21890 }, { "epoch": 2.5259575449930782, "grad_norm": 0.43266403675079346, "learning_rate": 1.4830636150421662e-05, "loss": 0.8539, "step": 21895 }, { "epoch": 2.5265343793262574, "grad_norm": 0.4494539499282837, "learning_rate": 1.4795482063012367e-05, "loss": 0.8631, "step": 21900 }, { "epoch": 2.527111213659437, "grad_norm": 0.44507652521133423, "learning_rate": 1.4760366360404266e-05, "loss": 0.9034, "step": 21905 }, { "epoch": 2.5276880479926165, "grad_norm": 0.44144436717033386, "learning_rate": 1.4725289058417158e-05, "loss": 0.8356, "step": 21910 }, { "epoch": 2.528264882325796, "grad_norm": 0.3664838969707489, "learning_rate": 1.4690250172853348e-05, "loss": 0.8655, "step": 21915 }, { "epoch": 2.5288417166589756, "grad_norm": 0.4298860728740692, "learning_rate": 1.4655249719497965e-05, "loss": 0.8781, "step": 21920 }, { "epoch": 2.529418550992155, "grad_norm": 0.43058478832244873, "learning_rate": 1.4620287714118764e-05, "loss": 0.9014, "step": 21925 }, { "epoch": 2.5299953853253347, "grad_norm": 0.43566563725471497, "learning_rate": 1.4585364172466231e-05, "loss": 0.8951, "step": 21930 }, { "epoch": 2.530572219658514, "grad_norm": 0.4238441586494446, "learning_rate": 1.4550479110273429e-05, "loss": 0.9008, "step": 21935 }, { "epoch": 2.5311490539916934, "grad_norm": 0.514903724193573, "learning_rate": 1.4515632543256197e-05, "loss": 0.8527, "step": 21940 }, { "epoch": 2.531725888324873, "grad_norm": 0.3885857164859772, "learning_rate": 1.4480824487112943e-05, "loss": 0.8232, "step": 21945 }, { "epoch": 2.5323027226580526, "grad_norm": 0.4228072166442871, "learning_rate": 1.4446054957524802e-05, "loss": 0.9215, "step": 21950 }, { "epoch": 2.532879556991232, "grad_norm": 0.4234387278556824, "learning_rate": 1.441132397015551e-05, "loss": 0.886, "step": 21955 }, { "epoch": 2.5334563913244117, "grad_norm": 0.39268434047698975, "learning_rate": 1.437663154065142e-05, "loss": 0.8399, "step": 21960 }, { "epoch": 2.5340332256575913, "grad_norm": 0.4078601896762848, "learning_rate": 1.4341977684641539e-05, "loss": 0.8798, "step": 21965 }, { "epoch": 2.5346100599907704, "grad_norm": 0.4169177711009979, "learning_rate": 1.4307362417737512e-05, "loss": 0.8964, "step": 21970 }, { "epoch": 2.5351868943239504, "grad_norm": 0.40286895632743835, "learning_rate": 1.4272785755533601e-05, "loss": 0.8619, "step": 21975 }, { "epoch": 2.5357637286571295, "grad_norm": 0.43225333094596863, "learning_rate": 1.4238247713606622e-05, "loss": 0.8598, "step": 21980 }, { "epoch": 2.536340562990309, "grad_norm": 0.5134694576263428, "learning_rate": 1.4203748307516052e-05, "loss": 0.862, "step": 21985 }, { "epoch": 2.5369173973234886, "grad_norm": 0.5272102355957031, "learning_rate": 1.4169287552803923e-05, "loss": 0.876, "step": 21990 }, { "epoch": 2.537494231656668, "grad_norm": 0.44927138090133667, "learning_rate": 1.4134865464994894e-05, "loss": 0.8765, "step": 21995 }, { "epoch": 2.5380710659898478, "grad_norm": 0.44614723324775696, "learning_rate": 1.4100482059596177e-05, "loss": 0.8442, "step": 22000 }, { "epoch": 2.5386479003230273, "grad_norm": 0.4107849895954132, "learning_rate": 1.4066137352097575e-05, "loss": 0.8332, "step": 22005 }, { "epoch": 2.539224734656207, "grad_norm": 0.3938974440097809, "learning_rate": 1.4031831357971414e-05, "loss": 0.8645, "step": 22010 }, { "epoch": 2.539801568989386, "grad_norm": 0.4226299822330475, "learning_rate": 1.3997564092672622e-05, "loss": 0.8903, "step": 22015 }, { "epoch": 2.540378403322566, "grad_norm": 0.4413795471191406, "learning_rate": 1.396333557163868e-05, "loss": 0.9201, "step": 22020 }, { "epoch": 2.540955237655745, "grad_norm": 0.4873966872692108, "learning_rate": 1.3929145810289612e-05, "loss": 0.884, "step": 22025 }, { "epoch": 2.5415320719889247, "grad_norm": 0.435316264629364, "learning_rate": 1.3894994824027951e-05, "loss": 0.8776, "step": 22030 }, { "epoch": 2.5421089063221043, "grad_norm": 0.4176023006439209, "learning_rate": 1.3860882628238781e-05, "loss": 0.8666, "step": 22035 }, { "epoch": 2.542685740655284, "grad_norm": 0.4190213084220886, "learning_rate": 1.3826809238289717e-05, "loss": 0.894, "step": 22040 }, { "epoch": 2.5432625749884634, "grad_norm": 0.45858487486839294, "learning_rate": 1.3792774669530917e-05, "loss": 0.8955, "step": 22045 }, { "epoch": 2.543839409321643, "grad_norm": 0.44214770197868347, "learning_rate": 1.3758778937294947e-05, "loss": 0.8868, "step": 22050 }, { "epoch": 2.5444162436548226, "grad_norm": 0.4137255549430847, "learning_rate": 1.3724822056897046e-05, "loss": 0.8208, "step": 22055 }, { "epoch": 2.5449930779880017, "grad_norm": 0.4444392919540405, "learning_rate": 1.369090404363479e-05, "loss": 0.8669, "step": 22060 }, { "epoch": 2.5455699123211812, "grad_norm": 0.433173269033432, "learning_rate": 1.365702491278833e-05, "loss": 0.8687, "step": 22065 }, { "epoch": 2.546146746654361, "grad_norm": 0.4325042963027954, "learning_rate": 1.3623184679620272e-05, "loss": 0.8709, "step": 22070 }, { "epoch": 2.5467235809875404, "grad_norm": 0.4476592540740967, "learning_rate": 1.358938335937574e-05, "loss": 0.9137, "step": 22075 }, { "epoch": 2.54730041532072, "grad_norm": 0.42596349120140076, "learning_rate": 1.3555620967282235e-05, "loss": 0.8615, "step": 22080 }, { "epoch": 2.5478772496538995, "grad_norm": 0.38326412439346313, "learning_rate": 1.352189751854982e-05, "loss": 0.8753, "step": 22085 }, { "epoch": 2.548454083987079, "grad_norm": 0.4048885405063629, "learning_rate": 1.3488213028370967e-05, "loss": 0.8891, "step": 22090 }, { "epoch": 2.549030918320258, "grad_norm": 0.39084184169769287, "learning_rate": 1.3454567511920634e-05, "loss": 0.8896, "step": 22095 }, { "epoch": 2.549607752653438, "grad_norm": 0.41996973752975464, "learning_rate": 1.3420960984356134e-05, "loss": 0.8452, "step": 22100 }, { "epoch": 2.5501845869866173, "grad_norm": 0.47061657905578613, "learning_rate": 1.33873934608173e-05, "loss": 0.8706, "step": 22105 }, { "epoch": 2.550761421319797, "grad_norm": 0.4455969035625458, "learning_rate": 1.3353864956426366e-05, "loss": 0.8736, "step": 22110 }, { "epoch": 2.5513382556529764, "grad_norm": 0.4241873025894165, "learning_rate": 1.3320375486288017e-05, "loss": 0.8226, "step": 22115 }, { "epoch": 2.551915089986156, "grad_norm": 0.3960394561290741, "learning_rate": 1.3286925065489253e-05, "loss": 0.8473, "step": 22120 }, { "epoch": 2.5524919243193356, "grad_norm": 0.43792304396629333, "learning_rate": 1.3253513709099652e-05, "loss": 0.8635, "step": 22125 }, { "epoch": 2.553068758652515, "grad_norm": 0.44686734676361084, "learning_rate": 1.3220141432171007e-05, "loss": 0.8711, "step": 22130 }, { "epoch": 2.5536455929856947, "grad_norm": 0.4718668460845947, "learning_rate": 1.3186808249737658e-05, "loss": 0.8091, "step": 22135 }, { "epoch": 2.554222427318874, "grad_norm": 0.42717134952545166, "learning_rate": 1.3153514176816195e-05, "loss": 0.8444, "step": 22140 }, { "epoch": 2.5547992616520534, "grad_norm": 0.403614342212677, "learning_rate": 1.3120259228405751e-05, "loss": 0.7916, "step": 22145 }, { "epoch": 2.555376095985233, "grad_norm": 0.4558524489402771, "learning_rate": 1.3087043419487676e-05, "loss": 0.8748, "step": 22150 }, { "epoch": 2.5559529303184125, "grad_norm": 0.47509682178497314, "learning_rate": 1.305386676502578e-05, "loss": 0.8737, "step": 22155 }, { "epoch": 2.556529764651592, "grad_norm": 0.4862813651561737, "learning_rate": 1.3020729279966215e-05, "loss": 0.9136, "step": 22160 }, { "epoch": 2.5571065989847717, "grad_norm": 0.413223534822464, "learning_rate": 1.2987630979237509e-05, "loss": 0.9088, "step": 22165 }, { "epoch": 2.5576834333179512, "grad_norm": 0.4112166464328766, "learning_rate": 1.2954571877750443e-05, "loss": 0.864, "step": 22170 }, { "epoch": 2.5582602676511303, "grad_norm": 0.4073093831539154, "learning_rate": 1.2921551990398262e-05, "loss": 0.8841, "step": 22175 }, { "epoch": 2.5588371019843104, "grad_norm": 0.4392577111721039, "learning_rate": 1.2888571332056464e-05, "loss": 0.8658, "step": 22180 }, { "epoch": 2.5594139363174895, "grad_norm": 0.3867861032485962, "learning_rate": 1.2855629917582935e-05, "loss": 0.8429, "step": 22185 }, { "epoch": 2.559990770650669, "grad_norm": 0.43121537566185, "learning_rate": 1.2822727761817776e-05, "loss": 0.8772, "step": 22190 }, { "epoch": 2.5605676049838486, "grad_norm": 0.4013991355895996, "learning_rate": 1.2789864879583557e-05, "loss": 0.8684, "step": 22195 }, { "epoch": 2.561144439317028, "grad_norm": 0.38427436351776123, "learning_rate": 1.2757041285685011e-05, "loss": 0.8633, "step": 22200 }, { "epoch": 2.5617212736502077, "grad_norm": 0.41684120893478394, "learning_rate": 1.2724256994909268e-05, "loss": 0.8734, "step": 22205 }, { "epoch": 2.5622981079833873, "grad_norm": 0.4538641571998596, "learning_rate": 1.2691512022025653e-05, "loss": 0.856, "step": 22210 }, { "epoch": 2.562874942316567, "grad_norm": 0.39726805686950684, "learning_rate": 1.2658806381785926e-05, "loss": 0.8719, "step": 22215 }, { "epoch": 2.563451776649746, "grad_norm": 0.4542260468006134, "learning_rate": 1.2626140088923987e-05, "loss": 0.8779, "step": 22220 }, { "epoch": 2.5640286109829256, "grad_norm": 0.4026002883911133, "learning_rate": 1.2593513158156089e-05, "loss": 0.8843, "step": 22225 }, { "epoch": 2.564605445316105, "grad_norm": 0.43637779355049133, "learning_rate": 1.2560925604180673e-05, "loss": 0.877, "step": 22230 }, { "epoch": 2.5651822796492847, "grad_norm": 0.4000450670719147, "learning_rate": 1.2528377441678585e-05, "loss": 0.8963, "step": 22235 }, { "epoch": 2.5657591139824643, "grad_norm": 0.41496726870536804, "learning_rate": 1.2495868685312761e-05, "loss": 0.9021, "step": 22240 }, { "epoch": 2.566335948315644, "grad_norm": 0.4522967040538788, "learning_rate": 1.2463399349728488e-05, "loss": 0.8862, "step": 22245 }, { "epoch": 2.5669127826488234, "grad_norm": 0.3863905966281891, "learning_rate": 1.2430969449553276e-05, "loss": 0.8659, "step": 22250 }, { "epoch": 2.5674896169820025, "grad_norm": 0.48074695467948914, "learning_rate": 1.2398578999396848e-05, "loss": 0.8919, "step": 22255 }, { "epoch": 2.5680664513151825, "grad_norm": 0.38571128249168396, "learning_rate": 1.2366228013851156e-05, "loss": 0.8478, "step": 22260 }, { "epoch": 2.5686432856483616, "grad_norm": 0.4104611873626709, "learning_rate": 1.2333916507490384e-05, "loss": 0.8858, "step": 22265 }, { "epoch": 2.569220119981541, "grad_norm": 0.45108452439308167, "learning_rate": 1.2301644494870934e-05, "loss": 0.8982, "step": 22270 }, { "epoch": 2.5697969543147208, "grad_norm": 0.38730573654174805, "learning_rate": 1.2269411990531421e-05, "loss": 0.8801, "step": 22275 }, { "epoch": 2.5703737886479003, "grad_norm": 0.40812236070632935, "learning_rate": 1.2237219008992607e-05, "loss": 0.8772, "step": 22280 }, { "epoch": 2.57095062298108, "grad_norm": 0.4362446367740631, "learning_rate": 1.2205065564757568e-05, "loss": 0.8175, "step": 22285 }, { "epoch": 2.5715274573142595, "grad_norm": 0.47301381826400757, "learning_rate": 1.2172951672311427e-05, "loss": 0.8879, "step": 22290 }, { "epoch": 2.572104291647439, "grad_norm": 0.4802537262439728, "learning_rate": 1.2140877346121604e-05, "loss": 0.8624, "step": 22295 }, { "epoch": 2.572681125980618, "grad_norm": 0.39373570680618286, "learning_rate": 1.2108842600637571e-05, "loss": 0.8733, "step": 22300 }, { "epoch": 2.5732579603137977, "grad_norm": 0.38379108905792236, "learning_rate": 1.207684745029114e-05, "loss": 0.8824, "step": 22305 }, { "epoch": 2.5738347946469773, "grad_norm": 0.44027572870254517, "learning_rate": 1.2044891909496126e-05, "loss": 0.8337, "step": 22310 }, { "epoch": 2.574411628980157, "grad_norm": 0.4214009642601013, "learning_rate": 1.2012975992648568e-05, "loss": 0.8312, "step": 22315 }, { "epoch": 2.5749884633133364, "grad_norm": 0.46107718348503113, "learning_rate": 1.1981099714126654e-05, "loss": 0.8527, "step": 22320 }, { "epoch": 2.575565297646516, "grad_norm": 0.4151912331581116, "learning_rate": 1.1949263088290742e-05, "loss": 0.8836, "step": 22325 }, { "epoch": 2.5761421319796955, "grad_norm": 0.4159373939037323, "learning_rate": 1.1917466129483246e-05, "loss": 0.8533, "step": 22330 }, { "epoch": 2.5767189663128747, "grad_norm": 0.4287991523742676, "learning_rate": 1.1885708852028777e-05, "loss": 0.894, "step": 22335 }, { "epoch": 2.5772958006460547, "grad_norm": 0.4311448335647583, "learning_rate": 1.1853991270234043e-05, "loss": 0.8697, "step": 22340 }, { "epoch": 2.577872634979234, "grad_norm": 0.43213003873825073, "learning_rate": 1.1822313398387919e-05, "loss": 0.8727, "step": 22345 }, { "epoch": 2.5784494693124134, "grad_norm": 0.42621949315071106, "learning_rate": 1.1790675250761263e-05, "loss": 0.8978, "step": 22350 }, { "epoch": 2.579026303645593, "grad_norm": 0.4279092848300934, "learning_rate": 1.1759076841607208e-05, "loss": 0.8577, "step": 22355 }, { "epoch": 2.5796031379787725, "grad_norm": 0.4713653326034546, "learning_rate": 1.1727518185160847e-05, "loss": 0.8832, "step": 22360 }, { "epoch": 2.580179972311952, "grad_norm": 0.43556147813796997, "learning_rate": 1.1695999295639459e-05, "loss": 0.8847, "step": 22365 }, { "epoch": 2.5807568066451316, "grad_norm": 0.4158504605293274, "learning_rate": 1.1664520187242289e-05, "loss": 0.878, "step": 22370 }, { "epoch": 2.581333640978311, "grad_norm": 0.5004168152809143, "learning_rate": 1.1633080874150826e-05, "loss": 0.8739, "step": 22375 }, { "epoch": 2.5819104753114903, "grad_norm": 0.42205938696861267, "learning_rate": 1.1601681370528484e-05, "loss": 0.8659, "step": 22380 }, { "epoch": 2.5824873096446703, "grad_norm": 0.4189571738243103, "learning_rate": 1.1570321690520824e-05, "loss": 0.923, "step": 22385 }, { "epoch": 2.5830641439778494, "grad_norm": 0.49161621928215027, "learning_rate": 1.1539001848255426e-05, "loss": 0.8671, "step": 22390 }, { "epoch": 2.583640978311029, "grad_norm": 0.42730411887168884, "learning_rate": 1.150772185784198e-05, "loss": 0.9117, "step": 22395 }, { "epoch": 2.5842178126442086, "grad_norm": 0.40977731347084045, "learning_rate": 1.1476481733372134e-05, "loss": 0.8231, "step": 22400 }, { "epoch": 2.584794646977388, "grad_norm": 0.423208624124527, "learning_rate": 1.1445281488919645e-05, "loss": 0.868, "step": 22405 }, { "epoch": 2.5853714813105677, "grad_norm": 0.4165674149990082, "learning_rate": 1.1414121138540279e-05, "loss": 0.8439, "step": 22410 }, { "epoch": 2.5859483156437473, "grad_norm": 0.5030677914619446, "learning_rate": 1.138300069627184e-05, "loss": 0.844, "step": 22415 }, { "epoch": 2.586525149976927, "grad_norm": 0.49662232398986816, "learning_rate": 1.1351920176134168e-05, "loss": 0.8879, "step": 22420 }, { "epoch": 2.587101984310106, "grad_norm": 0.40264540910720825, "learning_rate": 1.1320879592129052e-05, "loss": 0.8523, "step": 22425 }, { "epoch": 2.5876788186432855, "grad_norm": 0.4302821457386017, "learning_rate": 1.1289878958240364e-05, "loss": 0.8544, "step": 22430 }, { "epoch": 2.588255652976465, "grad_norm": 0.44366252422332764, "learning_rate": 1.125891828843394e-05, "loss": 0.8559, "step": 22435 }, { "epoch": 2.5888324873096447, "grad_norm": 0.4585106074810028, "learning_rate": 1.1227997596657636e-05, "loss": 0.8834, "step": 22440 }, { "epoch": 2.5894093216428242, "grad_norm": 0.41217270493507385, "learning_rate": 1.1197116896841297e-05, "loss": 0.8668, "step": 22445 }, { "epoch": 2.589986155976004, "grad_norm": 0.39120951294898987, "learning_rate": 1.1166276202896698e-05, "loss": 0.8655, "step": 22450 }, { "epoch": 2.5905629903091834, "grad_norm": 0.44525665044784546, "learning_rate": 1.1135475528717642e-05, "loss": 0.8944, "step": 22455 }, { "epoch": 2.5911398246423625, "grad_norm": 0.4466302990913391, "learning_rate": 1.1104714888179901e-05, "loss": 0.8592, "step": 22460 }, { "epoch": 2.5917166589755425, "grad_norm": 0.41439515352249146, "learning_rate": 1.107399429514121e-05, "loss": 0.8623, "step": 22465 }, { "epoch": 2.5922934933087216, "grad_norm": 0.41282886266708374, "learning_rate": 1.1043313763441277e-05, "loss": 0.8325, "step": 22470 }, { "epoch": 2.592870327641901, "grad_norm": 0.4122515022754669, "learning_rate": 1.1012673306901689e-05, "loss": 0.8651, "step": 22475 }, { "epoch": 2.5934471619750807, "grad_norm": 0.4221882224082947, "learning_rate": 1.098207293932605e-05, "loss": 0.8378, "step": 22480 }, { "epoch": 2.5940239963082603, "grad_norm": 0.43227624893188477, "learning_rate": 1.0951512674499898e-05, "loss": 0.8472, "step": 22485 }, { "epoch": 2.59460083064144, "grad_norm": 0.4147707223892212, "learning_rate": 1.092099252619071e-05, "loss": 0.8718, "step": 22490 }, { "epoch": 2.5951776649746194, "grad_norm": 0.45452365279197693, "learning_rate": 1.0890512508147821e-05, "loss": 0.8584, "step": 22495 }, { "epoch": 2.595754499307799, "grad_norm": 0.3966226875782013, "learning_rate": 1.0860072634102569e-05, "loss": 0.8835, "step": 22500 }, { "epoch": 2.596331333640978, "grad_norm": 0.4434650242328644, "learning_rate": 1.0829672917768175e-05, "loss": 0.8187, "step": 22505 }, { "epoch": 2.5969081679741577, "grad_norm": 0.5329386591911316, "learning_rate": 1.0799313372839759e-05, "loss": 0.9135, "step": 22510 }, { "epoch": 2.5974850023073373, "grad_norm": 0.6921077966690063, "learning_rate": 1.0768994012994371e-05, "loss": 0.8589, "step": 22515 }, { "epoch": 2.598061836640517, "grad_norm": 0.4159179627895355, "learning_rate": 1.0738714851890963e-05, "loss": 0.8675, "step": 22520 }, { "epoch": 2.5986386709736964, "grad_norm": 0.5262424945831299, "learning_rate": 1.07084759031703e-05, "loss": 0.9055, "step": 22525 }, { "epoch": 2.599215505306876, "grad_norm": 0.4461279511451721, "learning_rate": 1.0678277180455109e-05, "loss": 0.835, "step": 22530 }, { "epoch": 2.5997923396400555, "grad_norm": 0.4341142177581787, "learning_rate": 1.0648118697349996e-05, "loss": 0.8783, "step": 22535 }, { "epoch": 2.6003691739732346, "grad_norm": 0.4575253427028656, "learning_rate": 1.0618000467441402e-05, "loss": 0.9055, "step": 22540 }, { "epoch": 2.6009460083064146, "grad_norm": 0.4164150059223175, "learning_rate": 1.0587922504297642e-05, "loss": 0.868, "step": 22545 }, { "epoch": 2.6015228426395938, "grad_norm": 0.431892067193985, "learning_rate": 1.0557884821468899e-05, "loss": 0.8345, "step": 22550 }, { "epoch": 2.6020996769727733, "grad_norm": 0.48790407180786133, "learning_rate": 1.0527887432487204e-05, "loss": 0.8831, "step": 22555 }, { "epoch": 2.602676511305953, "grad_norm": 0.44928449392318726, "learning_rate": 1.049793035086647e-05, "loss": 0.838, "step": 22560 }, { "epoch": 2.6032533456391325, "grad_norm": 0.3979651927947998, "learning_rate": 1.0468013590102355e-05, "loss": 0.8959, "step": 22565 }, { "epoch": 2.603830179972312, "grad_norm": 0.4415774345397949, "learning_rate": 1.043813716367249e-05, "loss": 0.8473, "step": 22570 }, { "epoch": 2.6044070143054916, "grad_norm": 0.4301419258117676, "learning_rate": 1.040830108503622e-05, "loss": 0.8718, "step": 22575 }, { "epoch": 2.604983848638671, "grad_norm": 0.4541887640953064, "learning_rate": 1.0378505367634794e-05, "loss": 0.8461, "step": 22580 }, { "epoch": 2.6055606829718503, "grad_norm": 0.4886419177055359, "learning_rate": 1.0348750024891162e-05, "loss": 0.8319, "step": 22585 }, { "epoch": 2.60613751730503, "grad_norm": 0.4782902002334595, "learning_rate": 1.031903507021027e-05, "loss": 0.877, "step": 22590 }, { "epoch": 2.6067143516382094, "grad_norm": 0.45295077562332153, "learning_rate": 1.0289360516978686e-05, "loss": 0.856, "step": 22595 }, { "epoch": 2.607291185971389, "grad_norm": 0.4169815480709076, "learning_rate": 1.0259726378564871e-05, "loss": 0.8554, "step": 22600 }, { "epoch": 2.6078680203045685, "grad_norm": 0.46750664710998535, "learning_rate": 1.0230132668319082e-05, "loss": 0.911, "step": 22605 }, { "epoch": 2.608444854637748, "grad_norm": 0.41136133670806885, "learning_rate": 1.020057939957334e-05, "loss": 0.8669, "step": 22610 }, { "epoch": 2.6090216889709277, "grad_norm": 0.46126261353492737, "learning_rate": 1.017106658564142e-05, "loss": 0.8655, "step": 22615 }, { "epoch": 2.609598523304107, "grad_norm": 0.4138302206993103, "learning_rate": 1.014159423981893e-05, "loss": 0.8647, "step": 22620 }, { "epoch": 2.610175357637287, "grad_norm": 0.4453456401824951, "learning_rate": 1.0112162375383205e-05, "loss": 0.8721, "step": 22625 }, { "epoch": 2.610752191970466, "grad_norm": 0.4735527038574219, "learning_rate": 1.0082771005593394e-05, "loss": 0.8572, "step": 22630 }, { "epoch": 2.6113290263036455, "grad_norm": 0.4040181338787079, "learning_rate": 1.0053420143690284e-05, "loss": 0.8589, "step": 22635 }, { "epoch": 2.611905860636825, "grad_norm": 0.44362005591392517, "learning_rate": 1.0024109802896597e-05, "loss": 0.9095, "step": 22640 }, { "epoch": 2.6124826949700046, "grad_norm": 0.43080490827560425, "learning_rate": 9.994839996416628e-06, "loss": 0.8964, "step": 22645 }, { "epoch": 2.613059529303184, "grad_norm": 0.4174696207046509, "learning_rate": 9.965610737436515e-06, "loss": 0.8707, "step": 22650 }, { "epoch": 2.6136363636363638, "grad_norm": 0.40807998180389404, "learning_rate": 9.936422039124049e-06, "loss": 0.8727, "step": 22655 }, { "epoch": 2.6142131979695433, "grad_norm": 0.46339353919029236, "learning_rate": 9.907273914628879e-06, "loss": 0.8609, "step": 22660 }, { "epoch": 2.6147900323027224, "grad_norm": 0.4206438958644867, "learning_rate": 9.87816637708221e-06, "loss": 0.857, "step": 22665 }, { "epoch": 2.615366866635902, "grad_norm": 0.45879271626472473, "learning_rate": 9.849099439597087e-06, "loss": 0.8676, "step": 22670 }, { "epoch": 2.6159437009690816, "grad_norm": 0.4163302481174469, "learning_rate": 9.820073115268213e-06, "loss": 0.8893, "step": 22675 }, { "epoch": 2.616520535302261, "grad_norm": 0.4696640074253082, "learning_rate": 9.791087417172019e-06, "loss": 0.9178, "step": 22680 }, { "epoch": 2.6170973696354407, "grad_norm": 0.39774516224861145, "learning_rate": 9.762142358366588e-06, "loss": 0.8646, "step": 22685 }, { "epoch": 2.6176742039686203, "grad_norm": 0.42907288670539856, "learning_rate": 9.733237951891728e-06, "loss": 0.8773, "step": 22690 }, { "epoch": 2.6182510383018, "grad_norm": 0.4168655574321747, "learning_rate": 9.704374210768952e-06, "loss": 0.8637, "step": 22695 }, { "epoch": 2.618827872634979, "grad_norm": 0.4312590956687927, "learning_rate": 9.675551148001439e-06, "loss": 0.8503, "step": 22700 }, { "epoch": 2.619404706968159, "grad_norm": 0.4251955449581146, "learning_rate": 9.646768776574e-06, "loss": 0.8621, "step": 22705 }, { "epoch": 2.619981541301338, "grad_norm": 0.4650317430496216, "learning_rate": 9.618027109453176e-06, "loss": 0.8369, "step": 22710 }, { "epoch": 2.6205583756345177, "grad_norm": 0.39958468079566956, "learning_rate": 9.58932615958712e-06, "loss": 0.8926, "step": 22715 }, { "epoch": 2.621135209967697, "grad_norm": 0.4295012652873993, "learning_rate": 9.560665939905711e-06, "loss": 0.8615, "step": 22720 }, { "epoch": 2.621712044300877, "grad_norm": 0.3877478539943695, "learning_rate": 9.532046463320365e-06, "loss": 0.8575, "step": 22725 }, { "epoch": 2.6222888786340564, "grad_norm": 0.4100772738456726, "learning_rate": 9.503467742724292e-06, "loss": 0.9196, "step": 22730 }, { "epoch": 2.622865712967236, "grad_norm": 0.4218401312828064, "learning_rate": 9.474929790992193e-06, "loss": 0.8966, "step": 22735 }, { "epoch": 2.6234425473004155, "grad_norm": 0.4165334701538086, "learning_rate": 9.446432620980517e-06, "loss": 0.8652, "step": 22740 }, { "epoch": 2.6240193816335946, "grad_norm": 0.36938929557800293, "learning_rate": 9.417976245527238e-06, "loss": 0.8635, "step": 22745 }, { "epoch": 2.6245962159667746, "grad_norm": 0.45037856698036194, "learning_rate": 9.389560677452092e-06, "loss": 0.8718, "step": 22750 }, { "epoch": 2.6251730502999537, "grad_norm": 0.5322123765945435, "learning_rate": 9.361185929556282e-06, "loss": 0.8919, "step": 22755 }, { "epoch": 2.6257498846331333, "grad_norm": 0.4363367557525635, "learning_rate": 9.332852014622706e-06, "loss": 0.8882, "step": 22760 }, { "epoch": 2.626326718966313, "grad_norm": 0.3961765170097351, "learning_rate": 9.304558945415842e-06, "loss": 0.8816, "step": 22765 }, { "epoch": 2.6269035532994924, "grad_norm": 0.4418666660785675, "learning_rate": 9.276306734681805e-06, "loss": 0.851, "step": 22770 }, { "epoch": 2.627480387632672, "grad_norm": 0.4532480239868164, "learning_rate": 9.248095395148226e-06, "loss": 0.8243, "step": 22775 }, { "epoch": 2.6280572219658516, "grad_norm": 0.45740392804145813, "learning_rate": 9.219924939524383e-06, "loss": 0.901, "step": 22780 }, { "epoch": 2.628634056299031, "grad_norm": 0.4594760239124298, "learning_rate": 9.191795380501134e-06, "loss": 0.8952, "step": 22785 }, { "epoch": 2.6292108906322103, "grad_norm": 0.41434475779533386, "learning_rate": 9.163706730750909e-06, "loss": 0.8527, "step": 22790 }, { "epoch": 2.62978772496539, "grad_norm": 0.40205803513526917, "learning_rate": 9.135659002927643e-06, "loss": 0.875, "step": 22795 }, { "epoch": 2.6303645592985694, "grad_norm": 0.4774476885795593, "learning_rate": 9.107652209666973e-06, "loss": 0.8499, "step": 22800 }, { "epoch": 2.630941393631749, "grad_norm": 0.42177462577819824, "learning_rate": 9.079686363585949e-06, "loss": 0.8989, "step": 22805 }, { "epoch": 2.6315182279649285, "grad_norm": 0.4283266067504883, "learning_rate": 9.051761477283283e-06, "loss": 0.8996, "step": 22810 }, { "epoch": 2.632095062298108, "grad_norm": 0.38501039147377014, "learning_rate": 9.023877563339134e-06, "loss": 0.8513, "step": 22815 }, { "epoch": 2.6326718966312876, "grad_norm": 0.43188706040382385, "learning_rate": 8.996034634315332e-06, "loss": 0.9017, "step": 22820 }, { "epoch": 2.6332487309644668, "grad_norm": 0.43529829382896423, "learning_rate": 8.968232702755119e-06, "loss": 0.8545, "step": 22825 }, { "epoch": 2.6338255652976468, "grad_norm": 0.3926048278808594, "learning_rate": 8.940471781183335e-06, "loss": 0.8873, "step": 22830 }, { "epoch": 2.634402399630826, "grad_norm": 0.437465101480484, "learning_rate": 8.912751882106318e-06, "loss": 0.8341, "step": 22835 }, { "epoch": 2.6349792339640055, "grad_norm": 0.44119659066200256, "learning_rate": 8.88507301801198e-06, "loss": 0.9208, "step": 22840 }, { "epoch": 2.635556068297185, "grad_norm": 0.4317054748535156, "learning_rate": 8.857435201369645e-06, "loss": 0.8602, "step": 22845 }, { "epoch": 2.6361329026303646, "grad_norm": 0.40922558307647705, "learning_rate": 8.829838444630234e-06, "loss": 0.8744, "step": 22850 }, { "epoch": 2.636709736963544, "grad_norm": 0.4548785984516144, "learning_rate": 8.802282760226132e-06, "loss": 0.8525, "step": 22855 }, { "epoch": 2.6372865712967237, "grad_norm": 0.42294684052467346, "learning_rate": 8.774768160571257e-06, "loss": 0.8859, "step": 22860 }, { "epoch": 2.6378634056299033, "grad_norm": 0.4373004734516144, "learning_rate": 8.747294658060934e-06, "loss": 0.8366, "step": 22865 }, { "epoch": 2.6384402399630824, "grad_norm": 0.4728624224662781, "learning_rate": 8.719862265072065e-06, "loss": 0.8719, "step": 22870 }, { "epoch": 2.639017074296262, "grad_norm": 0.4086358845233917, "learning_rate": 8.692470993962987e-06, "loss": 0.8158, "step": 22875 }, { "epoch": 2.6395939086294415, "grad_norm": 0.4760802090167999, "learning_rate": 8.665120857073528e-06, "loss": 0.9017, "step": 22880 }, { "epoch": 2.640170742962621, "grad_norm": 0.39884287118911743, "learning_rate": 8.637811866724977e-06, "loss": 0.8547, "step": 22885 }, { "epoch": 2.6407475772958007, "grad_norm": 0.40861549973487854, "learning_rate": 8.610544035220103e-06, "loss": 0.8679, "step": 22890 }, { "epoch": 2.6413244116289802, "grad_norm": 0.43178048729896545, "learning_rate": 8.58331737484308e-06, "loss": 0.8428, "step": 22895 }, { "epoch": 2.64190124596216, "grad_norm": 0.46922725439071655, "learning_rate": 8.556131897859587e-06, "loss": 0.8974, "step": 22900 }, { "epoch": 2.642478080295339, "grad_norm": 0.4884260594844818, "learning_rate": 8.528987616516748e-06, "loss": 0.8894, "step": 22905 }, { "epoch": 2.643054914628519, "grad_norm": 0.3909541368484497, "learning_rate": 8.501884543043114e-06, "loss": 0.8726, "step": 22910 }, { "epoch": 2.643631748961698, "grad_norm": 0.4605487585067749, "learning_rate": 8.474822689648643e-06, "loss": 0.841, "step": 22915 }, { "epoch": 2.6442085832948776, "grad_norm": 0.3980555832386017, "learning_rate": 8.44780206852478e-06, "loss": 0.8423, "step": 22920 }, { "epoch": 2.644785417628057, "grad_norm": 0.3951432704925537, "learning_rate": 8.420822691844354e-06, "loss": 0.8701, "step": 22925 }, { "epoch": 2.6453622519612368, "grad_norm": 0.3847266137599945, "learning_rate": 8.393884571761645e-06, "loss": 0.925, "step": 22930 }, { "epoch": 2.6459390862944163, "grad_norm": 0.414840430021286, "learning_rate": 8.366987720412322e-06, "loss": 0.8733, "step": 22935 }, { "epoch": 2.646515920627596, "grad_norm": 0.5108116269111633, "learning_rate": 8.340132149913448e-06, "loss": 0.8971, "step": 22940 }, { "epoch": 2.6470927549607755, "grad_norm": 0.4190194308757782, "learning_rate": 8.313317872363524e-06, "loss": 0.855, "step": 22945 }, { "epoch": 2.6476695892939546, "grad_norm": 0.4162110984325409, "learning_rate": 8.286544899842441e-06, "loss": 0.8875, "step": 22950 }, { "epoch": 2.648246423627134, "grad_norm": 0.459574431180954, "learning_rate": 8.259813244411463e-06, "loss": 0.9131, "step": 22955 }, { "epoch": 2.6488232579603137, "grad_norm": 0.4537857472896576, "learning_rate": 8.233122918113278e-06, "loss": 0.8846, "step": 22960 }, { "epoch": 2.6494000922934933, "grad_norm": 0.42362692952156067, "learning_rate": 8.206473932971903e-06, "loss": 0.8929, "step": 22965 }, { "epoch": 2.649976926626673, "grad_norm": 0.42241352796554565, "learning_rate": 8.179866300992756e-06, "loss": 0.8383, "step": 22970 }, { "epoch": 2.6505537609598524, "grad_norm": 0.43495357036590576, "learning_rate": 8.153300034162637e-06, "loss": 0.8805, "step": 22975 }, { "epoch": 2.651130595293032, "grad_norm": 0.43217211961746216, "learning_rate": 8.126775144449705e-06, "loss": 0.8806, "step": 22980 }, { "epoch": 2.651707429626211, "grad_norm": 0.4123685956001282, "learning_rate": 8.100291643803493e-06, "loss": 0.8525, "step": 22985 }, { "epoch": 2.652284263959391, "grad_norm": 0.4632377326488495, "learning_rate": 8.07384954415482e-06, "loss": 0.9339, "step": 22990 }, { "epoch": 2.65286109829257, "grad_norm": 0.44759467244148254, "learning_rate": 8.04744885741593e-06, "loss": 0.8938, "step": 22995 }, { "epoch": 2.65343793262575, "grad_norm": 0.46450936794281006, "learning_rate": 8.021089595480391e-06, "loss": 0.8825, "step": 23000 }, { "epoch": 2.6540147669589293, "grad_norm": 0.3885645568370819, "learning_rate": 7.994771770223108e-06, "loss": 0.8601, "step": 23005 }, { "epoch": 2.654591601292109, "grad_norm": 0.4260353446006775, "learning_rate": 7.968495393500285e-06, "loss": 0.8432, "step": 23010 }, { "epoch": 2.6551684356252885, "grad_norm": 0.49735400080680847, "learning_rate": 7.9422604771495e-06, "loss": 0.8832, "step": 23015 }, { "epoch": 2.655745269958468, "grad_norm": 0.4212798476219177, "learning_rate": 7.916067032989628e-06, "loss": 0.884, "step": 23020 }, { "epoch": 2.6563221042916476, "grad_norm": 0.4518694281578064, "learning_rate": 7.889915072820874e-06, "loss": 0.8523, "step": 23025 }, { "epoch": 2.6568989386248267, "grad_norm": 0.41297948360443115, "learning_rate": 7.863804608424718e-06, "loss": 0.8713, "step": 23030 }, { "epoch": 2.6574757729580063, "grad_norm": 0.41061973571777344, "learning_rate": 7.837735651564037e-06, "loss": 0.8119, "step": 23035 }, { "epoch": 2.658052607291186, "grad_norm": 0.45814794301986694, "learning_rate": 7.811708213982883e-06, "loss": 0.8421, "step": 23040 }, { "epoch": 2.6586294416243654, "grad_norm": 0.4035818576812744, "learning_rate": 7.785722307406684e-06, "loss": 0.9158, "step": 23045 }, { "epoch": 2.659206275957545, "grad_norm": 0.46518614888191223, "learning_rate": 7.759777943542157e-06, "loss": 0.88, "step": 23050 }, { "epoch": 2.6597831102907246, "grad_norm": 0.47394055128097534, "learning_rate": 7.733875134077307e-06, "loss": 0.8324, "step": 23055 }, { "epoch": 2.660359944623904, "grad_norm": 0.44765111804008484, "learning_rate": 7.708013890681343e-06, "loss": 0.9019, "step": 23060 }, { "epoch": 2.6609367789570832, "grad_norm": 0.43634724617004395, "learning_rate": 7.682194225004845e-06, "loss": 0.8848, "step": 23065 }, { "epoch": 2.6615136132902633, "grad_norm": 0.43800708651542664, "learning_rate": 7.656416148679612e-06, "loss": 0.9194, "step": 23070 }, { "epoch": 2.6620904476234424, "grad_norm": 0.42345482110977173, "learning_rate": 7.630679673318742e-06, "loss": 0.9358, "step": 23075 }, { "epoch": 2.662667281956622, "grad_norm": 0.4679677486419678, "learning_rate": 7.604984810516514e-06, "loss": 0.9458, "step": 23080 }, { "epoch": 2.6632441162898015, "grad_norm": 0.4194246530532837, "learning_rate": 7.579331571848569e-06, "loss": 0.8814, "step": 23085 }, { "epoch": 2.663820950622981, "grad_norm": 0.3922426402568817, "learning_rate": 7.5537199688716975e-06, "loss": 0.8442, "step": 23090 }, { "epoch": 2.6643977849561606, "grad_norm": 0.42661118507385254, "learning_rate": 7.528150013124024e-06, "loss": 0.8797, "step": 23095 }, { "epoch": 2.66497461928934, "grad_norm": 0.41264429688453674, "learning_rate": 7.502621716124791e-06, "loss": 0.8877, "step": 23100 }, { "epoch": 2.6655514536225198, "grad_norm": 0.3934708833694458, "learning_rate": 7.477135089374631e-06, "loss": 0.8582, "step": 23105 }, { "epoch": 2.666128287955699, "grad_norm": 0.4392053782939911, "learning_rate": 7.451690144355261e-06, "loss": 0.8686, "step": 23110 }, { "epoch": 2.666705122288879, "grad_norm": 0.44735977053642273, "learning_rate": 7.4262868925296995e-06, "loss": 0.8693, "step": 23115 }, { "epoch": 2.667281956622058, "grad_norm": 0.41240233182907104, "learning_rate": 7.400925345342147e-06, "loss": 0.8983, "step": 23120 }, { "epoch": 2.6678587909552376, "grad_norm": 0.42448821663856506, "learning_rate": 7.375605514218065e-06, "loss": 0.8409, "step": 23125 }, { "epoch": 2.668435625288417, "grad_norm": 0.4222618341445923, "learning_rate": 7.35032741056404e-06, "loss": 0.8585, "step": 23130 }, { "epoch": 2.6690124596215967, "grad_norm": 0.4419105648994446, "learning_rate": 7.32509104576794e-06, "loss": 0.8444, "step": 23135 }, { "epoch": 2.6695892939547763, "grad_norm": 0.4208768308162689, "learning_rate": 7.299896431198772e-06, "loss": 0.8427, "step": 23140 }, { "epoch": 2.670166128287956, "grad_norm": 0.41719236969947815, "learning_rate": 7.274743578206788e-06, "loss": 0.8716, "step": 23145 }, { "epoch": 2.6707429626211354, "grad_norm": 0.4166914224624634, "learning_rate": 7.24963249812336e-06, "loss": 0.857, "step": 23150 }, { "epoch": 2.6713197969543145, "grad_norm": 0.45486098527908325, "learning_rate": 7.224563202261125e-06, "loss": 0.8793, "step": 23155 }, { "epoch": 2.671896631287494, "grad_norm": 0.39285019040107727, "learning_rate": 7.199535701913806e-06, "loss": 0.8704, "step": 23160 }, { "epoch": 2.6724734656206737, "grad_norm": 0.3915071487426758, "learning_rate": 7.174550008356374e-06, "loss": 0.9232, "step": 23165 }, { "epoch": 2.6730502999538532, "grad_norm": 0.40840792655944824, "learning_rate": 7.149606132844888e-06, "loss": 0.8931, "step": 23170 }, { "epoch": 2.673627134287033, "grad_norm": 0.42678335309028625, "learning_rate": 7.124704086616684e-06, "loss": 0.8597, "step": 23175 }, { "epoch": 2.6742039686202124, "grad_norm": 0.44222939014434814, "learning_rate": 7.0998438808901115e-06, "loss": 0.8811, "step": 23180 }, { "epoch": 2.674780802953392, "grad_norm": 0.4381917417049408, "learning_rate": 7.075025526864798e-06, "loss": 0.924, "step": 23185 }, { "epoch": 2.675357637286571, "grad_norm": 0.3970070481300354, "learning_rate": 7.05024903572139e-06, "loss": 0.8886, "step": 23190 }, { "epoch": 2.675934471619751, "grad_norm": 0.4008063077926636, "learning_rate": 7.025514418621826e-06, "loss": 0.8771, "step": 23195 }, { "epoch": 2.67651130595293, "grad_norm": 0.4886510372161865, "learning_rate": 7.000821686709036e-06, "loss": 0.8932, "step": 23200 }, { "epoch": 2.6770881402861098, "grad_norm": 0.4655059576034546, "learning_rate": 6.976170851107178e-06, "loss": 0.8692, "step": 23205 }, { "epoch": 2.6776649746192893, "grad_norm": 0.5062782168388367, "learning_rate": 6.95156192292149e-06, "loss": 0.9053, "step": 23210 }, { "epoch": 2.678241808952469, "grad_norm": 0.41502344608306885, "learning_rate": 6.9269949132383606e-06, "loss": 0.7778, "step": 23215 }, { "epoch": 2.6788186432856484, "grad_norm": 0.45568451285362244, "learning_rate": 6.902469833125236e-06, "loss": 0.8275, "step": 23220 }, { "epoch": 2.679395477618828, "grad_norm": 0.4134273827075958, "learning_rate": 6.877986693630745e-06, "loss": 0.8874, "step": 23225 }, { "epoch": 2.6799723119520076, "grad_norm": 0.39337506890296936, "learning_rate": 6.853545505784575e-06, "loss": 0.8619, "step": 23230 }, { "epoch": 2.6805491462851867, "grad_norm": 0.436361163854599, "learning_rate": 6.8291462805975535e-06, "loss": 0.8283, "step": 23235 }, { "epoch": 2.6811259806183663, "grad_norm": 0.4170958995819092, "learning_rate": 6.804789029061531e-06, "loss": 0.8482, "step": 23240 }, { "epoch": 2.681702814951546, "grad_norm": 0.3744247853755951, "learning_rate": 6.780473762149553e-06, "loss": 0.8689, "step": 23245 }, { "epoch": 2.6822796492847254, "grad_norm": 0.45442360639572144, "learning_rate": 6.756200490815645e-06, "loss": 0.8552, "step": 23250 }, { "epoch": 2.682856483617905, "grad_norm": 0.4478665888309479, "learning_rate": 6.731969225995005e-06, "loss": 0.87, "step": 23255 }, { "epoch": 2.6834333179510845, "grad_norm": 0.4316500723361969, "learning_rate": 6.707779978603823e-06, "loss": 0.8455, "step": 23260 }, { "epoch": 2.684010152284264, "grad_norm": 0.40314388275146484, "learning_rate": 6.683632759539449e-06, "loss": 0.8341, "step": 23265 }, { "epoch": 2.684586986617443, "grad_norm": 0.4224727153778076, "learning_rate": 6.659527579680203e-06, "loss": 0.8857, "step": 23270 }, { "epoch": 2.6851638209506232, "grad_norm": 0.3884515166282654, "learning_rate": 6.635464449885542e-06, "loss": 0.834, "step": 23275 }, { "epoch": 2.6857406552838023, "grad_norm": 0.464347243309021, "learning_rate": 6.611443380995963e-06, "loss": 0.8886, "step": 23280 }, { "epoch": 2.686317489616982, "grad_norm": 0.4518876075744629, "learning_rate": 6.587464383832998e-06, "loss": 0.927, "step": 23285 }, { "epoch": 2.6868943239501615, "grad_norm": 0.40290123224258423, "learning_rate": 6.563527469199205e-06, "loss": 0.8613, "step": 23290 }, { "epoch": 2.687471158283341, "grad_norm": 0.4083547294139862, "learning_rate": 6.5396326478782465e-06, "loss": 0.858, "step": 23295 }, { "epoch": 2.6880479926165206, "grad_norm": 0.45641326904296875, "learning_rate": 6.515779930634757e-06, "loss": 0.8951, "step": 23300 }, { "epoch": 2.6886248269497, "grad_norm": 0.4491538107395172, "learning_rate": 6.491969328214464e-06, "loss": 0.8756, "step": 23305 }, { "epoch": 2.6892016612828797, "grad_norm": 0.3983515202999115, "learning_rate": 6.468200851344042e-06, "loss": 0.8785, "step": 23310 }, { "epoch": 2.689778495616059, "grad_norm": 0.4467414319515228, "learning_rate": 6.444474510731302e-06, "loss": 0.8429, "step": 23315 }, { "epoch": 2.6903553299492384, "grad_norm": 0.4083499610424042, "learning_rate": 6.42079031706495e-06, "loss": 0.8667, "step": 23320 }, { "epoch": 2.690932164282418, "grad_norm": 0.43478161096572876, "learning_rate": 6.397148281014798e-06, "loss": 0.8544, "step": 23325 }, { "epoch": 2.6915089986155976, "grad_norm": 0.41683241724967957, "learning_rate": 6.373548413231589e-06, "loss": 0.8836, "step": 23330 }, { "epoch": 2.692085832948777, "grad_norm": 0.4447765350341797, "learning_rate": 6.349990724347155e-06, "loss": 0.843, "step": 23335 }, { "epoch": 2.6926626672819567, "grad_norm": 0.41929274797439575, "learning_rate": 6.326475224974249e-06, "loss": 0.8597, "step": 23340 }, { "epoch": 2.6932395016151363, "grad_norm": 0.42952585220336914, "learning_rate": 6.303001925706664e-06, "loss": 0.865, "step": 23345 }, { "epoch": 2.6938163359483154, "grad_norm": 0.43239691853523254, "learning_rate": 6.279570837119164e-06, "loss": 0.9176, "step": 23350 }, { "epoch": 2.6943931702814954, "grad_norm": 0.38875019550323486, "learning_rate": 6.256181969767505e-06, "loss": 0.8258, "step": 23355 }, { "epoch": 2.6949700046146745, "grad_norm": 0.4094666540622711, "learning_rate": 6.2328353341884025e-06, "loss": 0.8804, "step": 23360 }, { "epoch": 2.695546838947854, "grad_norm": 0.3923322260379791, "learning_rate": 6.209530940899566e-06, "loss": 0.8529, "step": 23365 }, { "epoch": 2.6961236732810336, "grad_norm": 0.39394620060920715, "learning_rate": 6.186268800399675e-06, "loss": 0.8437, "step": 23370 }, { "epoch": 2.696700507614213, "grad_norm": 0.4831443428993225, "learning_rate": 6.163048923168391e-06, "loss": 0.8895, "step": 23375 }, { "epoch": 2.6972773419473928, "grad_norm": 0.4099637269973755, "learning_rate": 6.139871319666269e-06, "loss": 0.8729, "step": 23380 }, { "epoch": 2.6978541762805723, "grad_norm": 0.41623443365097046, "learning_rate": 6.116736000334888e-06, "loss": 0.8712, "step": 23385 }, { "epoch": 2.698431010613752, "grad_norm": 0.4021301865577698, "learning_rate": 6.0936429755967475e-06, "loss": 0.8658, "step": 23390 }, { "epoch": 2.699007844946931, "grad_norm": 0.4065121114253998, "learning_rate": 6.070592255855312e-06, "loss": 0.8956, "step": 23395 }, { "epoch": 2.6995846792801106, "grad_norm": 0.42467519640922546, "learning_rate": 6.047583851494965e-06, "loss": 0.8458, "step": 23400 }, { "epoch": 2.70016151361329, "grad_norm": 0.48021507263183594, "learning_rate": 6.024617772881058e-06, "loss": 0.8981, "step": 23405 }, { "epoch": 2.7007383479464697, "grad_norm": 0.42031559348106384, "learning_rate": 6.001694030359828e-06, "loss": 0.8129, "step": 23410 }, { "epoch": 2.7013151822796493, "grad_norm": 0.43568867444992065, "learning_rate": 5.978812634258468e-06, "loss": 0.9101, "step": 23415 }, { "epoch": 2.701892016612829, "grad_norm": 0.4510698616504669, "learning_rate": 5.955973594885111e-06, "loss": 0.8683, "step": 23420 }, { "epoch": 2.7024688509460084, "grad_norm": 0.4196416139602661, "learning_rate": 5.9331769225287825e-06, "loss": 0.826, "step": 23425 }, { "epoch": 2.703045685279188, "grad_norm": 0.40882837772369385, "learning_rate": 5.910422627459411e-06, "loss": 0.8191, "step": 23430 }, { "epoch": 2.7036225196123675, "grad_norm": 0.4428280293941498, "learning_rate": 5.887710719927853e-06, "loss": 0.8544, "step": 23435 }, { "epoch": 2.7041993539455467, "grad_norm": 0.40964657068252563, "learning_rate": 5.865041210165878e-06, "loss": 0.876, "step": 23440 }, { "epoch": 2.7047761882787262, "grad_norm": 0.5035736560821533, "learning_rate": 5.842414108386151e-06, "loss": 0.8761, "step": 23445 }, { "epoch": 2.705353022611906, "grad_norm": 0.416984498500824, "learning_rate": 5.8198294247822304e-06, "loss": 0.8908, "step": 23450 }, { "epoch": 2.7059298569450854, "grad_norm": 0.4158463776111603, "learning_rate": 5.7972871695285205e-06, "loss": 0.8577, "step": 23455 }, { "epoch": 2.706506691278265, "grad_norm": 0.42650651931762695, "learning_rate": 5.774787352780387e-06, "loss": 0.905, "step": 23460 }, { "epoch": 2.7070835256114445, "grad_norm": 0.4276556670665741, "learning_rate": 5.752329984674032e-06, "loss": 0.9039, "step": 23465 }, { "epoch": 2.707660359944624, "grad_norm": 0.4451688230037689, "learning_rate": 5.729915075326531e-06, "loss": 0.8185, "step": 23470 }, { "epoch": 2.708237194277803, "grad_norm": 0.40357574820518494, "learning_rate": 5.707542634835883e-06, "loss": 0.8972, "step": 23475 }, { "epoch": 2.708814028610983, "grad_norm": 0.44075727462768555, "learning_rate": 5.685212673280871e-06, "loss": 0.8165, "step": 23480 }, { "epoch": 2.7093908629441623, "grad_norm": 0.3642112612724304, "learning_rate": 5.662925200721203e-06, "loss": 0.8017, "step": 23485 }, { "epoch": 2.709967697277342, "grad_norm": 0.4557955861091614, "learning_rate": 5.640680227197426e-06, "loss": 0.8327, "step": 23490 }, { "epoch": 2.7105445316105214, "grad_norm": 0.43093356490135193, "learning_rate": 5.618477762730956e-06, "loss": 0.8696, "step": 23495 }, { "epoch": 2.711121365943701, "grad_norm": 0.4352542757987976, "learning_rate": 5.596317817324048e-06, "loss": 0.8514, "step": 23500 }, { "epoch": 2.7116982002768806, "grad_norm": 0.4431220293045044, "learning_rate": 5.574200400959773e-06, "loss": 0.8733, "step": 23505 }, { "epoch": 2.71227503461006, "grad_norm": 0.39745551347732544, "learning_rate": 5.552125523602092e-06, "loss": 0.8409, "step": 23510 }, { "epoch": 2.7128518689432397, "grad_norm": 0.44975340366363525, "learning_rate": 5.530093195195774e-06, "loss": 0.8785, "step": 23515 }, { "epoch": 2.713428703276419, "grad_norm": 0.4235374629497528, "learning_rate": 5.5081034256664445e-06, "loss": 0.8495, "step": 23520 }, { "epoch": 2.7140055376095984, "grad_norm": 0.42339780926704407, "learning_rate": 5.4861562249204916e-06, "loss": 0.8342, "step": 23525 }, { "epoch": 2.714582371942778, "grad_norm": 0.42378342151641846, "learning_rate": 5.464251602845238e-06, "loss": 0.8429, "step": 23530 }, { "epoch": 2.7151592062759575, "grad_norm": 0.43994879722595215, "learning_rate": 5.442389569308703e-06, "loss": 0.8321, "step": 23535 }, { "epoch": 2.715736040609137, "grad_norm": 0.42166078090667725, "learning_rate": 5.420570134159797e-06, "loss": 0.8969, "step": 23540 }, { "epoch": 2.7163128749423167, "grad_norm": 0.49834781885147095, "learning_rate": 5.3987933072282e-06, "loss": 0.8835, "step": 23545 }, { "epoch": 2.7168897092754962, "grad_norm": 0.470022052526474, "learning_rate": 5.377059098324455e-06, "loss": 0.833, "step": 23550 }, { "epoch": 2.7174665436086753, "grad_norm": 0.41667890548706055, "learning_rate": 5.355367517239829e-06, "loss": 0.8216, "step": 23555 }, { "epoch": 2.7180433779418554, "grad_norm": 0.39226892590522766, "learning_rate": 5.333718573746426e-06, "loss": 0.8741, "step": 23560 }, { "epoch": 2.7186202122750345, "grad_norm": 0.44253507256507874, "learning_rate": 5.312112277597159e-06, "loss": 0.8915, "step": 23565 }, { "epoch": 2.719197046608214, "grad_norm": 0.40449512004852295, "learning_rate": 5.290548638525694e-06, "loss": 0.8604, "step": 23570 }, { "epoch": 2.7197738809413936, "grad_norm": 0.43502479791641235, "learning_rate": 5.269027666246473e-06, "loss": 0.8704, "step": 23575 }, { "epoch": 2.720350715274573, "grad_norm": 0.4082026779651642, "learning_rate": 5.247549370454763e-06, "loss": 0.8301, "step": 23580 }, { "epoch": 2.7209275496077527, "grad_norm": 0.4308987557888031, "learning_rate": 5.2261137608265675e-06, "loss": 0.868, "step": 23585 }, { "epoch": 2.7215043839409323, "grad_norm": 0.4319988191127777, "learning_rate": 5.204720847018674e-06, "loss": 0.8218, "step": 23590 }, { "epoch": 2.722081218274112, "grad_norm": 0.42464616894721985, "learning_rate": 5.183370638668616e-06, "loss": 0.9011, "step": 23595 }, { "epoch": 2.722658052607291, "grad_norm": 0.3950149416923523, "learning_rate": 5.162063145394736e-06, "loss": 0.8079, "step": 23600 }, { "epoch": 2.7232348869404706, "grad_norm": 0.4139585793018341, "learning_rate": 5.140798376796064e-06, "loss": 0.8729, "step": 23605 }, { "epoch": 2.72381172127365, "grad_norm": 0.42863261699676514, "learning_rate": 5.119576342452459e-06, "loss": 0.8763, "step": 23610 }, { "epoch": 2.7243885556068297, "grad_norm": 0.4117303788661957, "learning_rate": 5.098397051924441e-06, "loss": 0.8344, "step": 23615 }, { "epoch": 2.7249653899400093, "grad_norm": 0.49408987164497375, "learning_rate": 5.077260514753379e-06, "loss": 0.9043, "step": 23620 }, { "epoch": 2.725542224273189, "grad_norm": 0.3946788012981415, "learning_rate": 5.056166740461265e-06, "loss": 0.8488, "step": 23625 }, { "epoch": 2.7261190586063684, "grad_norm": 0.38921356201171875, "learning_rate": 5.035115738550933e-06, "loss": 0.8801, "step": 23630 }, { "epoch": 2.7266958929395475, "grad_norm": 0.4450054168701172, "learning_rate": 5.014107518505862e-06, "loss": 0.8416, "step": 23635 }, { "epoch": 2.7272727272727275, "grad_norm": 0.41446584463119507, "learning_rate": 4.993142089790337e-06, "loss": 0.8843, "step": 23640 }, { "epoch": 2.7278495616059066, "grad_norm": 0.4286811053752899, "learning_rate": 4.972219461849293e-06, "loss": 0.8531, "step": 23645 }, { "epoch": 2.728426395939086, "grad_norm": 0.45309022068977356, "learning_rate": 4.951339644108422e-06, "loss": 0.9202, "step": 23650 }, { "epoch": 2.7290032302722658, "grad_norm": 0.4311707615852356, "learning_rate": 4.9305026459741224e-06, "loss": 0.8921, "step": 23655 }, { "epoch": 2.7295800646054453, "grad_norm": 0.4332069754600525, "learning_rate": 4.909708476833519e-06, "loss": 0.8678, "step": 23660 }, { "epoch": 2.730156898938625, "grad_norm": 0.438365638256073, "learning_rate": 4.888957146054407e-06, "loss": 0.8514, "step": 23665 }, { "epoch": 2.7307337332718045, "grad_norm": 0.4583643078804016, "learning_rate": 4.8682486629852975e-06, "loss": 0.9301, "step": 23670 }, { "epoch": 2.731310567604984, "grad_norm": 0.4006592631340027, "learning_rate": 4.8475830369554056e-06, "loss": 0.8692, "step": 23675 }, { "epoch": 2.731887401938163, "grad_norm": 0.4375672936439514, "learning_rate": 4.826960277274662e-06, "loss": 0.8924, "step": 23680 }, { "epoch": 2.7324642362713427, "grad_norm": 0.4388617277145386, "learning_rate": 4.8063803932336114e-06, "loss": 0.9116, "step": 23685 }, { "epoch": 2.7330410706045223, "grad_norm": 0.3963301479816437, "learning_rate": 4.785843394103584e-06, "loss": 0.8852, "step": 23690 }, { "epoch": 2.733617904937702, "grad_norm": 0.436459481716156, "learning_rate": 4.7653492891365005e-06, "loss": 0.8846, "step": 23695 }, { "epoch": 2.7341947392708814, "grad_norm": 0.4878678321838379, "learning_rate": 4.74489808756502e-06, "loss": 0.9057, "step": 23700 }, { "epoch": 2.734771573604061, "grad_norm": 0.4217132329940796, "learning_rate": 4.7244897986024165e-06, "loss": 0.9, "step": 23705 }, { "epoch": 2.7353484079372405, "grad_norm": 0.43869632482528687, "learning_rate": 4.704124431442702e-06, "loss": 0.912, "step": 23710 }, { "epoch": 2.7359252422704197, "grad_norm": 0.4051395654678345, "learning_rate": 4.683801995260484e-06, "loss": 0.8338, "step": 23715 }, { "epoch": 2.7365020766035997, "grad_norm": 0.4011788070201874, "learning_rate": 4.663522499211081e-06, "loss": 0.8586, "step": 23720 }, { "epoch": 2.737078910936779, "grad_norm": 0.4310953617095947, "learning_rate": 4.643285952430432e-06, "loss": 0.8509, "step": 23725 }, { "epoch": 2.7376557452699584, "grad_norm": 0.457690566778183, "learning_rate": 4.623092364035153e-06, "loss": 0.8965, "step": 23730 }, { "epoch": 2.738232579603138, "grad_norm": 0.4056735038757324, "learning_rate": 4.602941743122469e-06, "loss": 0.8441, "step": 23735 }, { "epoch": 2.7388094139363175, "grad_norm": 0.48004797101020813, "learning_rate": 4.5828340987703055e-06, "loss": 0.8624, "step": 23740 }, { "epoch": 2.739386248269497, "grad_norm": 0.4055866599082947, "learning_rate": 4.562769440037174e-06, "loss": 0.8006, "step": 23745 }, { "epoch": 2.7399630826026766, "grad_norm": 0.46271732449531555, "learning_rate": 4.542747775962264e-06, "loss": 0.9206, "step": 23750 }, { "epoch": 2.740539916935856, "grad_norm": 0.4342441260814667, "learning_rate": 4.5227691155653284e-06, "loss": 0.8854, "step": 23755 }, { "epoch": 2.7411167512690353, "grad_norm": 0.40607601404190063, "learning_rate": 4.502833467846857e-06, "loss": 0.8357, "step": 23760 }, { "epoch": 2.741693585602215, "grad_norm": 0.39423415064811707, "learning_rate": 4.4829408417878526e-06, "loss": 0.8733, "step": 23765 }, { "epoch": 2.7422704199353944, "grad_norm": 0.42036357522010803, "learning_rate": 4.4630912463500045e-06, "loss": 0.8773, "step": 23770 }, { "epoch": 2.742847254268574, "grad_norm": 0.4077562689781189, "learning_rate": 4.443284690475558e-06, "loss": 0.8332, "step": 23775 }, { "epoch": 2.7434240886017536, "grad_norm": 0.4172145426273346, "learning_rate": 4.423521183087453e-06, "loss": 0.8823, "step": 23780 }, { "epoch": 2.744000922934933, "grad_norm": 0.4038824141025543, "learning_rate": 4.40380073308917e-06, "loss": 0.8898, "step": 23785 }, { "epoch": 2.7445777572681127, "grad_norm": 0.4768836498260498, "learning_rate": 4.384123349364788e-06, "loss": 0.8761, "step": 23790 }, { "epoch": 2.7451545916012923, "grad_norm": 0.41172316670417786, "learning_rate": 4.364489040779029e-06, "loss": 0.8762, "step": 23795 }, { "epoch": 2.745731425934472, "grad_norm": 0.42793089151382446, "learning_rate": 4.344897816177207e-06, "loss": 0.8787, "step": 23800 }, { "epoch": 2.746308260267651, "grad_norm": 0.39413848519325256, "learning_rate": 4.32534968438516e-06, "loss": 0.8793, "step": 23805 }, { "epoch": 2.7468850946008305, "grad_norm": 0.6779045462608337, "learning_rate": 4.30584465420939e-06, "loss": 0.8953, "step": 23810 }, { "epoch": 2.74746192893401, "grad_norm": 0.4348139762878418, "learning_rate": 4.286382734436933e-06, "loss": 0.8643, "step": 23815 }, { "epoch": 2.7480387632671897, "grad_norm": 0.45245304703712463, "learning_rate": 4.266963933835455e-06, "loss": 0.8175, "step": 23820 }, { "epoch": 2.748615597600369, "grad_norm": 0.44413793087005615, "learning_rate": 4.2475882611531235e-06, "loss": 0.8779, "step": 23825 }, { "epoch": 2.749192431933549, "grad_norm": 0.41267192363739014, "learning_rate": 4.228255725118735e-06, "loss": 0.8591, "step": 23830 }, { "epoch": 2.7497692662667284, "grad_norm": 0.40713363885879517, "learning_rate": 4.208966334441633e-06, "loss": 0.8831, "step": 23835 }, { "epoch": 2.7503461005999075, "grad_norm": 0.4391236901283264, "learning_rate": 4.189720097811745e-06, "loss": 0.8798, "step": 23840 }, { "epoch": 2.7509229349330875, "grad_norm": 0.3802451491355896, "learning_rate": 4.1705170238994894e-06, "loss": 0.9073, "step": 23845 }, { "epoch": 2.7514997692662666, "grad_norm": 0.41070911288261414, "learning_rate": 4.151357121355947e-06, "loss": 0.8754, "step": 23850 }, { "epoch": 2.752076603599446, "grad_norm": 0.4319693446159363, "learning_rate": 4.132240398812648e-06, "loss": 0.8826, "step": 23855 }, { "epoch": 2.7526534379326257, "grad_norm": 0.40889811515808105, "learning_rate": 4.113166864881723e-06, "loss": 0.941, "step": 23860 }, { "epoch": 2.7532302722658053, "grad_norm": 0.46315550804138184, "learning_rate": 4.0941365281558454e-06, "loss": 0.8335, "step": 23865 }, { "epoch": 2.753807106598985, "grad_norm": 0.4564089775085449, "learning_rate": 4.075149397208222e-06, "loss": 0.8596, "step": 23870 }, { "epoch": 2.7543839409321644, "grad_norm": 0.4790186882019043, "learning_rate": 4.056205480592579e-06, "loss": 0.8531, "step": 23875 }, { "epoch": 2.754960775265344, "grad_norm": 0.3872862458229065, "learning_rate": 4.037304786843188e-06, "loss": 0.8556, "step": 23880 }, { "epoch": 2.755537609598523, "grad_norm": 0.4773821234703064, "learning_rate": 4.018447324474861e-06, "loss": 0.8746, "step": 23885 }, { "epoch": 2.7561144439317027, "grad_norm": 0.4576581120491028, "learning_rate": 3.9996331019829245e-06, "loss": 0.8682, "step": 23890 }, { "epoch": 2.7566912782648822, "grad_norm": 0.3763803541660309, "learning_rate": 3.980862127843199e-06, "loss": 0.8513, "step": 23895 }, { "epoch": 2.757268112598062, "grad_norm": 0.426574170589447, "learning_rate": 3.962134410512064e-06, "loss": 0.8845, "step": 23900 }, { "epoch": 2.7578449469312414, "grad_norm": 0.4308461844921112, "learning_rate": 3.9434499584263705e-06, "loss": 0.8892, "step": 23905 }, { "epoch": 2.758421781264421, "grad_norm": 0.40955811738967896, "learning_rate": 3.924808780003531e-06, "loss": 0.9009, "step": 23910 }, { "epoch": 2.7589986155976005, "grad_norm": 0.38951027393341064, "learning_rate": 3.906210883641415e-06, "loss": 0.836, "step": 23915 }, { "epoch": 2.7595754499307796, "grad_norm": 0.4530879259109497, "learning_rate": 3.887656277718432e-06, "loss": 0.858, "step": 23920 }, { "epoch": 2.7601522842639596, "grad_norm": 0.4421103000640869, "learning_rate": 3.86914497059343e-06, "loss": 0.8089, "step": 23925 }, { "epoch": 2.7607291185971388, "grad_norm": 0.4470920264720917, "learning_rate": 3.850676970605815e-06, "loss": 0.8756, "step": 23930 }, { "epoch": 2.7613059529303183, "grad_norm": 0.4560685157775879, "learning_rate": 3.832252286075444e-06, "loss": 0.8843, "step": 23935 }, { "epoch": 2.761882787263498, "grad_norm": 0.44257691502571106, "learning_rate": 3.813870925302698e-06, "loss": 0.8533, "step": 23940 }, { "epoch": 2.7624596215966775, "grad_norm": 0.42879223823547363, "learning_rate": 3.7955328965683877e-06, "loss": 0.8343, "step": 23945 }, { "epoch": 2.763036455929857, "grad_norm": 0.4552403688430786, "learning_rate": 3.7772382081338377e-06, "loss": 0.9233, "step": 23950 }, { "epoch": 2.7636132902630366, "grad_norm": 0.4159514009952545, "learning_rate": 3.7589868682408434e-06, "loss": 0.8426, "step": 23955 }, { "epoch": 2.764190124596216, "grad_norm": 0.4519195854663849, "learning_rate": 3.7407788851116845e-06, "loss": 0.9192, "step": 23960 }, { "epoch": 2.7647669589293953, "grad_norm": 0.40860262513160706, "learning_rate": 3.722614266949076e-06, "loss": 0.8953, "step": 23965 }, { "epoch": 2.765343793262575, "grad_norm": 0.4030444622039795, "learning_rate": 3.7044930219362063e-06, "loss": 0.8673, "step": 23970 }, { "epoch": 2.7659206275957544, "grad_norm": 0.4095665216445923, "learning_rate": 3.6864151582367446e-06, "loss": 0.8867, "step": 23975 }, { "epoch": 2.766497461928934, "grad_norm": 0.4742783308029175, "learning_rate": 3.668380683994799e-06, "loss": 0.8197, "step": 23980 }, { "epoch": 2.7670742962621135, "grad_norm": 0.41096723079681396, "learning_rate": 3.6503896073349587e-06, "loss": 0.8778, "step": 23985 }, { "epoch": 2.767651130595293, "grad_norm": 0.38448628783226013, "learning_rate": 3.632441936362174e-06, "loss": 0.8749, "step": 23990 }, { "epoch": 2.7682279649284727, "grad_norm": 0.38678210973739624, "learning_rate": 3.614537679161989e-06, "loss": 0.8455, "step": 23995 }, { "epoch": 2.768804799261652, "grad_norm": 0.37491893768310547, "learning_rate": 3.5966768438002507e-06, "loss": 0.8531, "step": 24000 }, { "epoch": 2.769381633594832, "grad_norm": 0.41927438974380493, "learning_rate": 3.5788594383233122e-06, "loss": 0.8561, "step": 24005 }, { "epoch": 2.769958467928011, "grad_norm": 0.44681841135025024, "learning_rate": 3.5610854707579523e-06, "loss": 0.8804, "step": 24010 }, { "epoch": 2.7705353022611905, "grad_norm": 0.39751601219177246, "learning_rate": 3.5433549491113884e-06, "loss": 0.8692, "step": 24015 }, { "epoch": 2.77111213659437, "grad_norm": 0.43277502059936523, "learning_rate": 3.5256678813712417e-06, "loss": 0.8762, "step": 24020 }, { "epoch": 2.7716889709275496, "grad_norm": 0.41634657979011536, "learning_rate": 3.5080242755055726e-06, "loss": 0.8523, "step": 24025 }, { "epoch": 2.772265805260729, "grad_norm": 0.3915756344795227, "learning_rate": 3.4904241394628557e-06, "loss": 0.8583, "step": 24030 }, { "epoch": 2.7728426395939088, "grad_norm": 0.4474189579486847, "learning_rate": 3.472867481172004e-06, "loss": 0.8383, "step": 24035 }, { "epoch": 2.7734194739270883, "grad_norm": 0.4305979907512665, "learning_rate": 3.455354308542291e-06, "loss": 0.8492, "step": 24040 }, { "epoch": 2.7739963082602674, "grad_norm": 0.4478662610054016, "learning_rate": 3.4378846294634835e-06, "loss": 0.8993, "step": 24045 }, { "epoch": 2.774573142593447, "grad_norm": 0.3863767683506012, "learning_rate": 3.4204584518056747e-06, "loss": 0.861, "step": 24050 }, { "epoch": 2.7751499769266266, "grad_norm": 0.4080348312854767, "learning_rate": 3.403075783419407e-06, "loss": 0.8817, "step": 24055 }, { "epoch": 2.775726811259806, "grad_norm": 0.4075169861316681, "learning_rate": 3.3857366321355722e-06, "loss": 0.8641, "step": 24060 }, { "epoch": 2.7763036455929857, "grad_norm": 0.45938998460769653, "learning_rate": 3.3684410057655435e-06, "loss": 0.868, "step": 24065 }, { "epoch": 2.7768804799261653, "grad_norm": 0.416753351688385, "learning_rate": 3.3511889121009886e-06, "loss": 0.8145, "step": 24070 }, { "epoch": 2.777457314259345, "grad_norm": 0.39491215348243713, "learning_rate": 3.3339803589140352e-06, "loss": 0.8632, "step": 24075 }, { "epoch": 2.778034148592524, "grad_norm": 0.41789379715919495, "learning_rate": 3.316815353957159e-06, "loss": 0.8214, "step": 24080 }, { "epoch": 2.778610982925704, "grad_norm": 0.3917028307914734, "learning_rate": 3.2996939049632415e-06, "loss": 0.8763, "step": 24085 }, { "epoch": 2.779187817258883, "grad_norm": 0.47759175300598145, "learning_rate": 3.2826160196455123e-06, "loss": 0.8576, "step": 24090 }, { "epoch": 2.7797646515920627, "grad_norm": 0.40511590242385864, "learning_rate": 3.2655817056975957e-06, "loss": 0.8361, "step": 24095 }, { "epoch": 2.780341485925242, "grad_norm": 0.4438006579875946, "learning_rate": 3.248590970793486e-06, "loss": 0.8489, "step": 24100 }, { "epoch": 2.780918320258422, "grad_norm": 0.4250805974006653, "learning_rate": 3.23164382258756e-06, "loss": 0.9377, "step": 24105 }, { "epoch": 2.7814951545916013, "grad_norm": 0.41761264204978943, "learning_rate": 3.214740268714511e-06, "loss": 0.8414, "step": 24110 }, { "epoch": 2.782071988924781, "grad_norm": 0.44590649008750916, "learning_rate": 3.1978803167894365e-06, "loss": 0.8881, "step": 24115 }, { "epoch": 2.7826488232579605, "grad_norm": 0.5200037360191345, "learning_rate": 3.181063974407772e-06, "loss": 0.8552, "step": 24120 }, { "epoch": 2.7832256575911396, "grad_norm": 0.4275122284889221, "learning_rate": 3.1642912491453346e-06, "loss": 0.8344, "step": 24125 }, { "epoch": 2.7838024919243196, "grad_norm": 0.4023871123790741, "learning_rate": 3.1475621485582253e-06, "loss": 0.8803, "step": 24130 }, { "epoch": 2.7843793262574987, "grad_norm": 0.43436017632484436, "learning_rate": 3.1308766801829926e-06, "loss": 0.8767, "step": 24135 }, { "epoch": 2.7849561605906783, "grad_norm": 0.4553993046283722, "learning_rate": 3.114234851536435e-06, "loss": 0.9139, "step": 24140 }, { "epoch": 2.785532994923858, "grad_norm": 0.44283974170684814, "learning_rate": 3.0976366701157445e-06, "loss": 0.8655, "step": 24145 }, { "epoch": 2.7861098292570374, "grad_norm": 0.46823441982269287, "learning_rate": 3.081082143398395e-06, "loss": 0.8996, "step": 24150 }, { "epoch": 2.786686663590217, "grad_norm": 0.46041712164878845, "learning_rate": 3.0645712788422985e-06, "loss": 0.8655, "step": 24155 }, { "epoch": 2.7872634979233966, "grad_norm": 0.4426816999912262, "learning_rate": 3.0481040838855833e-06, "loss": 0.9124, "step": 24160 }, { "epoch": 2.787840332256576, "grad_norm": 0.48756763339042664, "learning_rate": 3.0316805659467705e-06, "loss": 0.8486, "step": 24165 }, { "epoch": 2.7884171665897552, "grad_norm": 0.40607529878616333, "learning_rate": 3.015300732424686e-06, "loss": 0.8413, "step": 24170 }, { "epoch": 2.788994000922935, "grad_norm": 0.4447746276855469, "learning_rate": 2.998964590698483e-06, "loss": 0.8516, "step": 24175 }, { "epoch": 2.7895708352561144, "grad_norm": 0.4920103847980499, "learning_rate": 2.9826721481276077e-06, "loss": 0.8483, "step": 24180 }, { "epoch": 2.790147669589294, "grad_norm": 0.6388232111930847, "learning_rate": 2.9664234120518442e-06, "loss": 0.886, "step": 24185 }, { "epoch": 2.7907245039224735, "grad_norm": 0.4191949665546417, "learning_rate": 2.950218389791293e-06, "loss": 0.8321, "step": 24190 }, { "epoch": 2.791301338255653, "grad_norm": 0.45636507868766785, "learning_rate": 2.934057088646336e-06, "loss": 0.8973, "step": 24195 }, { "epoch": 2.7918781725888326, "grad_norm": 0.3961809277534485, "learning_rate": 2.91793951589765e-06, "loss": 0.8602, "step": 24200 }, { "epoch": 2.7924550069220118, "grad_norm": 0.3874358534812927, "learning_rate": 2.9018656788062813e-06, "loss": 0.8962, "step": 24205 }, { "epoch": 2.7930318412551918, "grad_norm": 0.40755221247673035, "learning_rate": 2.8858355846134944e-06, "loss": 0.8433, "step": 24210 }, { "epoch": 2.793608675588371, "grad_norm": 0.40966683626174927, "learning_rate": 2.8698492405408783e-06, "loss": 0.8909, "step": 24215 }, { "epoch": 2.7941855099215505, "grad_norm": 0.45877885818481445, "learning_rate": 2.8539066537903057e-06, "loss": 0.832, "step": 24220 }, { "epoch": 2.79476234425473, "grad_norm": 0.42089635133743286, "learning_rate": 2.8380078315439653e-06, "loss": 0.8511, "step": 24225 }, { "epoch": 2.7953391785879096, "grad_norm": 0.41457778215408325, "learning_rate": 2.8221527809642933e-06, "loss": 0.8538, "step": 24230 }, { "epoch": 2.795916012921089, "grad_norm": 0.4614168405532837, "learning_rate": 2.8063415091940216e-06, "loss": 0.8687, "step": 24235 }, { "epoch": 2.7964928472542687, "grad_norm": 0.4669603705406189, "learning_rate": 2.790574023356163e-06, "loss": 0.902, "step": 24240 }, { "epoch": 2.7970696815874483, "grad_norm": 0.42838254570961, "learning_rate": 2.774850330554002e-06, "loss": 0.8776, "step": 24245 }, { "epoch": 2.7976465159206274, "grad_norm": 0.4778376519680023, "learning_rate": 2.7591704378710836e-06, "loss": 0.8457, "step": 24250 }, { "epoch": 2.798223350253807, "grad_norm": 0.4371931850910187, "learning_rate": 2.7435343523712242e-06, "loss": 0.8841, "step": 24255 }, { "epoch": 2.7988001845869865, "grad_norm": 0.4283929169178009, "learning_rate": 2.7279420810985335e-06, "loss": 0.8337, "step": 24260 }, { "epoch": 2.799377018920166, "grad_norm": 0.4353237450122833, "learning_rate": 2.712393631077359e-06, "loss": 0.8101, "step": 24265 }, { "epoch": 2.7999538532533457, "grad_norm": 0.4581010043621063, "learning_rate": 2.6968890093122754e-06, "loss": 0.8645, "step": 24270 }, { "epoch": 2.8005306875865252, "grad_norm": 0.43251487612724304, "learning_rate": 2.681428222788174e-06, "loss": 0.8927, "step": 24275 }, { "epoch": 2.801107521919705, "grad_norm": 0.46299970149993896, "learning_rate": 2.6660112784701706e-06, "loss": 0.9245, "step": 24280 }, { "epoch": 2.801684356252884, "grad_norm": 0.41510728001594543, "learning_rate": 2.650638183303611e-06, "loss": 0.8558, "step": 24285 }, { "epoch": 2.802261190586064, "grad_norm": 0.4396783113479614, "learning_rate": 2.63530894421411e-06, "loss": 0.8827, "step": 24290 }, { "epoch": 2.802838024919243, "grad_norm": 0.47082120180130005, "learning_rate": 2.6200235681075324e-06, "loss": 0.8778, "step": 24295 }, { "epoch": 2.8034148592524226, "grad_norm": 0.40990665555000305, "learning_rate": 2.6047820618699592e-06, "loss": 0.8656, "step": 24300 }, { "epoch": 2.803991693585602, "grad_norm": 0.4030509293079376, "learning_rate": 2.58958443236772e-06, "loss": 0.8368, "step": 24305 }, { "epoch": 2.8045685279187818, "grad_norm": 0.39093419909477234, "learning_rate": 2.57443068644736e-06, "loss": 0.8536, "step": 24310 }, { "epoch": 2.8051453622519613, "grad_norm": 0.398468017578125, "learning_rate": 2.5593208309357187e-06, "loss": 0.8371, "step": 24315 }, { "epoch": 2.805722196585141, "grad_norm": 0.41183245182037354, "learning_rate": 2.544254872639762e-06, "loss": 0.8466, "step": 24320 }, { "epoch": 2.8062990309183204, "grad_norm": 0.4442862570285797, "learning_rate": 2.5292328183467606e-06, "loss": 0.8567, "step": 24325 }, { "epoch": 2.8068758652514996, "grad_norm": 0.43638312816619873, "learning_rate": 2.514254674824168e-06, "loss": 0.8596, "step": 24330 }, { "epoch": 2.807452699584679, "grad_norm": 0.4281933307647705, "learning_rate": 2.4993204488196865e-06, "loss": 0.9114, "step": 24335 }, { "epoch": 2.8080295339178587, "grad_norm": 0.4679451882839203, "learning_rate": 2.4844301470612007e-06, "loss": 0.8452, "step": 24340 }, { "epoch": 2.8086063682510383, "grad_norm": 0.438088059425354, "learning_rate": 2.469583776256812e-06, "loss": 0.8585, "step": 24345 }, { "epoch": 2.809183202584218, "grad_norm": 0.4836006760597229, "learning_rate": 2.4547813430948473e-06, "loss": 0.8732, "step": 24350 }, { "epoch": 2.8097600369173974, "grad_norm": 0.42423558235168457, "learning_rate": 2.4400228542438396e-06, "loss": 0.8497, "step": 24355 }, { "epoch": 2.810336871250577, "grad_norm": 0.45980703830718994, "learning_rate": 2.4253083163525038e-06, "loss": 0.9032, "step": 24360 }, { "epoch": 2.810913705583756, "grad_norm": 0.39328107237815857, "learning_rate": 2.4106377360497813e-06, "loss": 0.8759, "step": 24365 }, { "epoch": 2.811490539916936, "grad_norm": 0.420552134513855, "learning_rate": 2.3960111199447854e-06, "loss": 0.8785, "step": 24370 }, { "epoch": 2.812067374250115, "grad_norm": 0.4153137505054474, "learning_rate": 2.3814284746268344e-06, "loss": 0.8768, "step": 24375 }, { "epoch": 2.812644208583295, "grad_norm": 0.4206068515777588, "learning_rate": 2.366889806665451e-06, "loss": 0.8547, "step": 24380 }, { "epoch": 2.8132210429164743, "grad_norm": 0.42058634757995605, "learning_rate": 2.352395122610329e-06, "loss": 0.8448, "step": 24385 }, { "epoch": 2.813797877249654, "grad_norm": 0.44256195425987244, "learning_rate": 2.3379444289913342e-06, "loss": 0.8852, "step": 24390 }, { "epoch": 2.8143747115828335, "grad_norm": 0.4605026841163635, "learning_rate": 2.3235377323185593e-06, "loss": 0.8693, "step": 24395 }, { "epoch": 2.814951545916013, "grad_norm": 0.4125789403915405, "learning_rate": 2.3091750390822232e-06, "loss": 0.9181, "step": 24400 }, { "epoch": 2.8155283802491926, "grad_norm": 0.429656982421875, "learning_rate": 2.2948563557527836e-06, "loss": 0.8192, "step": 24405 }, { "epoch": 2.8161052145823717, "grad_norm": 0.4208963215351105, "learning_rate": 2.280581688780792e-06, "loss": 0.9055, "step": 24410 }, { "epoch": 2.8166820489155513, "grad_norm": 0.40175744891166687, "learning_rate": 2.266351044597037e-06, "loss": 0.8848, "step": 24415 }, { "epoch": 2.817258883248731, "grad_norm": 0.39711418747901917, "learning_rate": 2.2521644296124466e-06, "loss": 0.8627, "step": 24420 }, { "epoch": 2.8178357175819104, "grad_norm": 0.4246174693107605, "learning_rate": 2.2380218502181193e-06, "loss": 0.8744, "step": 24425 }, { "epoch": 2.81841255191509, "grad_norm": 0.4455760419368744, "learning_rate": 2.2239233127853366e-06, "loss": 0.8491, "step": 24430 }, { "epoch": 2.8189893862482696, "grad_norm": 0.3960672616958618, "learning_rate": 2.209868823665473e-06, "loss": 0.839, "step": 24435 }, { "epoch": 2.819566220581449, "grad_norm": 0.44110697507858276, "learning_rate": 2.1958583891901307e-06, "loss": 0.832, "step": 24440 }, { "epoch": 2.8201430549146282, "grad_norm": 0.43871957063674927, "learning_rate": 2.1818920156710387e-06, "loss": 0.8863, "step": 24445 }, { "epoch": 2.8207198892478083, "grad_norm": 0.4036722481250763, "learning_rate": 2.1679697094000638e-06, "loss": 0.8492, "step": 24450 }, { "epoch": 2.8212967235809874, "grad_norm": 0.4256274402141571, "learning_rate": 2.1540914766492336e-06, "loss": 0.8343, "step": 24455 }, { "epoch": 2.821873557914167, "grad_norm": 0.4566933810710907, "learning_rate": 2.1402573236707357e-06, "loss": 0.9099, "step": 24460 }, { "epoch": 2.8224503922473465, "grad_norm": 0.4147023856639862, "learning_rate": 2.1264672566968736e-06, "loss": 0.9145, "step": 24465 }, { "epoch": 2.823027226580526, "grad_norm": 0.3891587257385254, "learning_rate": 2.1127212819400775e-06, "loss": 0.8941, "step": 24470 }, { "epoch": 2.8236040609137056, "grad_norm": 0.4380452334880829, "learning_rate": 2.0990194055929723e-06, "loss": 0.8465, "step": 24475 }, { "epoch": 2.824180895246885, "grad_norm": 0.4227445721626282, "learning_rate": 2.0853616338282644e-06, "loss": 0.8529, "step": 24480 }, { "epoch": 2.8247577295800648, "grad_norm": 0.46117278933525085, "learning_rate": 2.0717479727987876e-06, "loss": 0.8607, "step": 24485 }, { "epoch": 2.825334563913244, "grad_norm": 0.4497821629047394, "learning_rate": 2.0581784286375585e-06, "loss": 0.8925, "step": 24490 }, { "epoch": 2.825911398246424, "grad_norm": 0.4422582685947418, "learning_rate": 2.044653007457653e-06, "loss": 0.943, "step": 24495 }, { "epoch": 2.826488232579603, "grad_norm": 0.4594055116176605, "learning_rate": 2.03117171535232e-06, "loss": 0.9053, "step": 24500 }, { "epoch": 2.8270650669127826, "grad_norm": 0.4220978319644928, "learning_rate": 2.017734558394879e-06, "loss": 0.8149, "step": 24505 }, { "epoch": 2.827641901245962, "grad_norm": 0.43389609456062317, "learning_rate": 2.0043415426388324e-06, "loss": 0.8253, "step": 24510 }, { "epoch": 2.8282187355791417, "grad_norm": 0.4446219503879547, "learning_rate": 1.9909926741177422e-06, "loss": 0.8887, "step": 24515 }, { "epoch": 2.8287955699123213, "grad_norm": 0.4167942404747009, "learning_rate": 1.977687958845298e-06, "loss": 0.8468, "step": 24520 }, { "epoch": 2.829372404245501, "grad_norm": 0.4484202563762665, "learning_rate": 1.964427402815294e-06, "loss": 0.8895, "step": 24525 }, { "epoch": 2.8299492385786804, "grad_norm": 0.4193936884403229, "learning_rate": 1.9512110120016638e-06, "loss": 0.8567, "step": 24530 }, { "epoch": 2.8305260729118595, "grad_norm": 0.4360736310482025, "learning_rate": 1.9380387923583877e-06, "loss": 0.8866, "step": 24535 }, { "epoch": 2.831102907245039, "grad_norm": 0.4343816936016083, "learning_rate": 1.924910749819586e-06, "loss": 0.8251, "step": 24540 }, { "epoch": 2.8316797415782187, "grad_norm": 0.4194128215312958, "learning_rate": 1.9118268902994617e-06, "loss": 0.8629, "step": 24545 }, { "epoch": 2.8322565759113982, "grad_norm": 0.41894203424453735, "learning_rate": 1.898787219692344e-06, "loss": 0.9119, "step": 24550 }, { "epoch": 2.832833410244578, "grad_norm": 0.4088500738143921, "learning_rate": 1.8857917438725892e-06, "loss": 0.8716, "step": 24555 }, { "epoch": 2.8334102445777574, "grad_norm": 0.42529842257499695, "learning_rate": 1.8728404686947253e-06, "loss": 0.8761, "step": 24560 }, { "epoch": 2.833987078910937, "grad_norm": 0.4301418364048004, "learning_rate": 1.8599333999932966e-06, "loss": 0.9105, "step": 24565 }, { "epoch": 2.834563913244116, "grad_norm": 0.4569443166255951, "learning_rate": 1.8470705435829849e-06, "loss": 0.8798, "step": 24570 }, { "epoch": 2.835140747577296, "grad_norm": 0.4449009597301483, "learning_rate": 1.8342519052584995e-06, "loss": 0.9007, "step": 24575 }, { "epoch": 2.835717581910475, "grad_norm": 0.4420955181121826, "learning_rate": 1.8214774907947097e-06, "loss": 0.8373, "step": 24580 }, { "epoch": 2.8362944162436547, "grad_norm": 0.43387678265571594, "learning_rate": 1.8087473059464788e-06, "loss": 0.8486, "step": 24585 }, { "epoch": 2.8368712505768343, "grad_norm": 0.45262908935546875, "learning_rate": 1.796061356448797e-06, "loss": 0.8243, "step": 24590 }, { "epoch": 2.837448084910014, "grad_norm": 0.4249054789543152, "learning_rate": 1.783419648016682e-06, "loss": 0.9009, "step": 24595 }, { "epoch": 2.8380249192431934, "grad_norm": 0.4239564836025238, "learning_rate": 1.770822186345289e-06, "loss": 0.8845, "step": 24600 }, { "epoch": 2.838601753576373, "grad_norm": 0.4570298492908478, "learning_rate": 1.7582689771097672e-06, "loss": 0.8624, "step": 24605 }, { "epoch": 2.8391785879095526, "grad_norm": 0.49115484952926636, "learning_rate": 1.7457600259653707e-06, "loss": 0.8467, "step": 24610 }, { "epoch": 2.8397554222427317, "grad_norm": 0.375393271446228, "learning_rate": 1.7332953385474027e-06, "loss": 0.8949, "step": 24615 }, { "epoch": 2.8403322565759113, "grad_norm": 0.44062313437461853, "learning_rate": 1.7208749204712493e-06, "loss": 0.9072, "step": 24620 }, { "epoch": 2.840909090909091, "grad_norm": 0.4475950598716736, "learning_rate": 1.7084987773323123e-06, "loss": 0.8664, "step": 24625 }, { "epoch": 2.8414859252422704, "grad_norm": 0.4045661985874176, "learning_rate": 1.6961669147060765e-06, "loss": 0.8447, "step": 24630 }, { "epoch": 2.84206275957545, "grad_norm": 0.40116119384765625, "learning_rate": 1.6838793381480644e-06, "loss": 0.8533, "step": 24635 }, { "epoch": 2.8426395939086295, "grad_norm": 0.46115195751190186, "learning_rate": 1.671636053193859e-06, "loss": 0.8771, "step": 24640 }, { "epoch": 2.843216428241809, "grad_norm": 0.41099631786346436, "learning_rate": 1.6594370653590706e-06, "loss": 0.8727, "step": 24645 }, { "epoch": 2.843793262574988, "grad_norm": 0.42782846093177795, "learning_rate": 1.647282380139392e-06, "loss": 0.8315, "step": 24650 }, { "epoch": 2.844370096908168, "grad_norm": 0.49658653140068054, "learning_rate": 1.63517200301051e-06, "loss": 0.8544, "step": 24655 }, { "epoch": 2.8449469312413473, "grad_norm": 0.4295457899570465, "learning_rate": 1.6231059394281934e-06, "loss": 0.8673, "step": 24660 }, { "epoch": 2.845523765574527, "grad_norm": 0.40046048164367676, "learning_rate": 1.611084194828194e-06, "loss": 0.8988, "step": 24665 }, { "epoch": 2.8461005999077065, "grad_norm": 0.462961345911026, "learning_rate": 1.5991067746263799e-06, "loss": 0.8601, "step": 24670 }, { "epoch": 2.846677434240886, "grad_norm": 0.43810996413230896, "learning_rate": 1.587173684218557e-06, "loss": 0.9112, "step": 24675 }, { "epoch": 2.8472542685740656, "grad_norm": 0.3803338408470154, "learning_rate": 1.5752849289806248e-06, "loss": 0.821, "step": 24680 }, { "epoch": 2.847831102907245, "grad_norm": 0.47152554988861084, "learning_rate": 1.5634405142684882e-06, "loss": 0.8787, "step": 24685 }, { "epoch": 2.8484079372404247, "grad_norm": 0.43199577927589417, "learning_rate": 1.55164044541809e-06, "loss": 0.8574, "step": 24690 }, { "epoch": 2.848984771573604, "grad_norm": 0.43718257546424866, "learning_rate": 1.5398847277453776e-06, "loss": 0.8367, "step": 24695 }, { "epoch": 2.8495616059067834, "grad_norm": 0.484164834022522, "learning_rate": 1.5281733665463038e-06, "loss": 0.8878, "step": 24700 }, { "epoch": 2.850138440239963, "grad_norm": 0.42896854877471924, "learning_rate": 1.5165063670968926e-06, "loss": 0.9043, "step": 24705 }, { "epoch": 2.8507152745731426, "grad_norm": 0.4501945674419403, "learning_rate": 1.5048837346531285e-06, "loss": 0.872, "step": 24710 }, { "epoch": 2.851292108906322, "grad_norm": 0.41754597425460815, "learning_rate": 1.4933054744510344e-06, "loss": 0.871, "step": 24715 }, { "epoch": 2.8518689432395017, "grad_norm": 0.4565035402774811, "learning_rate": 1.4817715917066488e-06, "loss": 0.8715, "step": 24720 }, { "epoch": 2.8524457775726813, "grad_norm": 0.4439191222190857, "learning_rate": 1.4702820916159931e-06, "loss": 0.8738, "step": 24725 }, { "epoch": 2.8530226119058604, "grad_norm": 0.39780697226524353, "learning_rate": 1.4588369793551271e-06, "loss": 0.9057, "step": 24730 }, { "epoch": 2.8535994462390404, "grad_norm": 0.42179131507873535, "learning_rate": 1.4474362600800706e-06, "loss": 0.881, "step": 24735 }, { "epoch": 2.8541762805722195, "grad_norm": 0.468203604221344, "learning_rate": 1.436079938926904e-06, "loss": 0.8879, "step": 24740 }, { "epoch": 2.854753114905399, "grad_norm": 0.44837823510169983, "learning_rate": 1.4247680210116465e-06, "loss": 0.9177, "step": 24745 }, { "epoch": 2.8553299492385786, "grad_norm": 0.42178669571876526, "learning_rate": 1.4135005114303435e-06, "loss": 0.835, "step": 24750 }, { "epoch": 2.855906783571758, "grad_norm": 0.3851020336151123, "learning_rate": 1.4022774152590235e-06, "loss": 0.8502, "step": 24755 }, { "epoch": 2.8564836179049378, "grad_norm": 0.43105438351631165, "learning_rate": 1.3910987375537422e-06, "loss": 0.8873, "step": 24760 }, { "epoch": 2.8570604522381173, "grad_norm": 0.5082978010177612, "learning_rate": 1.379964483350482e-06, "loss": 0.8742, "step": 24765 }, { "epoch": 2.857637286571297, "grad_norm": 0.4297298491001129, "learning_rate": 1.3688746576652646e-06, "loss": 0.8741, "step": 24770 }, { "epoch": 2.858214120904476, "grad_norm": 0.4092559218406677, "learning_rate": 1.3578292654940706e-06, "loss": 0.8442, "step": 24775 }, { "epoch": 2.8587909552376556, "grad_norm": 0.3794494867324829, "learning_rate": 1.3468283118128756e-06, "loss": 0.8774, "step": 24780 }, { "epoch": 2.859367789570835, "grad_norm": 0.4544118642807007, "learning_rate": 1.3358718015776262e-06, "loss": 0.8916, "step": 24785 }, { "epoch": 2.8599446239040147, "grad_norm": 0.43125012516975403, "learning_rate": 1.324959739724263e-06, "loss": 0.8613, "step": 24790 }, { "epoch": 2.8605214582371943, "grad_norm": 0.4157480299472809, "learning_rate": 1.314092131168665e-06, "loss": 0.9128, "step": 24795 }, { "epoch": 2.861098292570374, "grad_norm": 0.4354327619075775, "learning_rate": 1.303268980806749e-06, "loss": 0.8693, "step": 24800 }, { "epoch": 2.8616751269035534, "grad_norm": 0.41278907656669617, "learning_rate": 1.2924902935143258e-06, "loss": 0.8691, "step": 24805 }, { "epoch": 2.8622519612367325, "grad_norm": 0.39481645822525024, "learning_rate": 1.2817560741472445e-06, "loss": 0.8255, "step": 24810 }, { "epoch": 2.8628287955699125, "grad_norm": 0.4812294840812683, "learning_rate": 1.2710663275412705e-06, "loss": 0.8372, "step": 24815 }, { "epoch": 2.8634056299030917, "grad_norm": 0.42143645882606506, "learning_rate": 1.2604210585121845e-06, "loss": 0.8488, "step": 24820 }, { "epoch": 2.8639824642362712, "grad_norm": 0.43948641419410706, "learning_rate": 1.2498202718556617e-06, "loss": 0.8479, "step": 24825 }, { "epoch": 2.864559298569451, "grad_norm": 0.4091070592403412, "learning_rate": 1.2392639723474153e-06, "loss": 0.8532, "step": 24830 }, { "epoch": 2.8651361329026304, "grad_norm": 0.43268927931785583, "learning_rate": 1.2287521647430521e-06, "loss": 0.8438, "step": 24835 }, { "epoch": 2.86571296723581, "grad_norm": 0.41381460428237915, "learning_rate": 1.2182848537781622e-06, "loss": 0.86, "step": 24840 }, { "epoch": 2.8662898015689895, "grad_norm": 0.5061653852462769, "learning_rate": 1.2078620441683064e-06, "loss": 0.8887, "step": 24845 }, { "epoch": 2.866866635902169, "grad_norm": 0.4368427097797394, "learning_rate": 1.1974837406089846e-06, "loss": 0.8962, "step": 24850 }, { "epoch": 2.867443470235348, "grad_norm": 0.43707120418548584, "learning_rate": 1.187149947775612e-06, "loss": 0.8111, "step": 24855 }, { "epoch": 2.868020304568528, "grad_norm": 0.4115869998931885, "learning_rate": 1.1768606703236095e-06, "loss": 0.8536, "step": 24860 }, { "epoch": 2.8685971389017073, "grad_norm": 0.3919137418270111, "learning_rate": 1.1666159128883136e-06, "loss": 0.893, "step": 24865 }, { "epoch": 2.869173973234887, "grad_norm": 0.42175352573394775, "learning_rate": 1.1564156800849879e-06, "loss": 0.8756, "step": 24870 }, { "epoch": 2.8697508075680664, "grad_norm": 0.41766905784606934, "learning_rate": 1.1462599765088788e-06, "loss": 0.875, "step": 24875 }, { "epoch": 2.870327641901246, "grad_norm": 0.43964409828186035, "learning_rate": 1.13614880673516e-06, "loss": 0.8937, "step": 24880 }, { "epoch": 2.8709044762344256, "grad_norm": 0.44723713397979736, "learning_rate": 1.1260821753188987e-06, "loss": 0.9313, "step": 24885 }, { "epoch": 2.871481310567605, "grad_norm": 0.4014928936958313, "learning_rate": 1.1160600867951455e-06, "loss": 0.8574, "step": 24890 }, { "epoch": 2.8720581449007847, "grad_norm": 0.42120859026908875, "learning_rate": 1.106082545678877e-06, "loss": 0.866, "step": 24895 }, { "epoch": 2.872634979233964, "grad_norm": 0.4354383945465088, "learning_rate": 1.0961495564650092e-06, "loss": 0.8993, "step": 24900 }, { "epoch": 2.8732118135671434, "grad_norm": 0.38240766525268555, "learning_rate": 1.0862611236283405e-06, "loss": 0.8508, "step": 24905 }, { "epoch": 2.873788647900323, "grad_norm": 0.41191530227661133, "learning_rate": 1.0764172516236515e-06, "loss": 0.829, "step": 24910 }, { "epoch": 2.8743654822335025, "grad_norm": 0.5137365460395813, "learning_rate": 1.0666179448856174e-06, "loss": 0.9256, "step": 24915 }, { "epoch": 2.874942316566682, "grad_norm": 0.4102938175201416, "learning_rate": 1.056863207828851e-06, "loss": 0.8725, "step": 24920 }, { "epoch": 2.8755191508998617, "grad_norm": 0.3814050257205963, "learning_rate": 1.0471530448478705e-06, "loss": 0.8422, "step": 24925 }, { "epoch": 2.876095985233041, "grad_norm": 0.3723147213459015, "learning_rate": 1.0374874603171326e-06, "loss": 0.8474, "step": 24930 }, { "epoch": 2.8766728195662203, "grad_norm": 0.4165055751800537, "learning_rate": 1.027866458590998e-06, "loss": 0.8373, "step": 24935 }, { "epoch": 2.8772496538994004, "grad_norm": 0.49059098958969116, "learning_rate": 1.0182900440037447e-06, "loss": 0.8712, "step": 24940 }, { "epoch": 2.8778264882325795, "grad_norm": 0.41809362173080444, "learning_rate": 1.0087582208695768e-06, "loss": 0.9039, "step": 24945 }, { "epoch": 2.878403322565759, "grad_norm": 0.44100937247276306, "learning_rate": 9.992709934825816e-07, "loss": 0.819, "step": 24950 }, { "epoch": 2.8789801568989386, "grad_norm": 0.4227614402770996, "learning_rate": 9.898283661167851e-07, "loss": 0.8547, "step": 24955 }, { "epoch": 2.879556991232118, "grad_norm": 0.40128713846206665, "learning_rate": 9.804303430261174e-07, "loss": 0.8602, "step": 24960 }, { "epoch": 2.8801338255652977, "grad_norm": 0.40818148851394653, "learning_rate": 9.71076928444381e-07, "loss": 0.8955, "step": 24965 }, { "epoch": 2.8807106598984773, "grad_norm": 0.4075045883655548, "learning_rate": 9.617681265853273e-07, "loss": 0.8676, "step": 24970 }, { "epoch": 2.881287494231657, "grad_norm": 0.4465732276439667, "learning_rate": 9.525039416425907e-07, "loss": 0.859, "step": 24975 }, { "epoch": 2.881864328564836, "grad_norm": 0.45487141609191895, "learning_rate": 9.432843777896993e-07, "loss": 0.8775, "step": 24980 }, { "epoch": 2.8824411628980156, "grad_norm": 0.40925332903862, "learning_rate": 9.341094391800753e-07, "loss": 0.8657, "step": 24985 }, { "epoch": 2.883017997231195, "grad_norm": 0.4177109897136688, "learning_rate": 9.249791299470567e-07, "loss": 0.8468, "step": 24990 }, { "epoch": 2.8835948315643747, "grad_norm": 0.4083639979362488, "learning_rate": 9.158934542038755e-07, "loss": 0.8908, "step": 24995 }, { "epoch": 2.8841716658975542, "grad_norm": 0.49375760555267334, "learning_rate": 9.068524160436242e-07, "loss": 0.8653, "step": 25000 }, { "epoch": 2.884748500230734, "grad_norm": 0.4585607945919037, "learning_rate": 8.978560195393115e-07, "loss": 0.9006, "step": 25005 }, { "epoch": 2.8853253345639134, "grad_norm": 0.4486338198184967, "learning_rate": 8.889042687438509e-07, "loss": 0.8807, "step": 25010 }, { "epoch": 2.8859021688970925, "grad_norm": 0.4402649700641632, "learning_rate": 8.799971676900165e-07, "loss": 0.8646, "step": 25015 }, { "epoch": 2.8864790032302725, "grad_norm": 0.4237249791622162, "learning_rate": 8.711347203904541e-07, "loss": 0.9027, "step": 25020 }, { "epoch": 2.8870558375634516, "grad_norm": 0.4376821517944336, "learning_rate": 8.623169308377365e-07, "loss": 0.8866, "step": 25025 }, { "epoch": 2.887632671896631, "grad_norm": 0.3880428373813629, "learning_rate": 8.535438030042863e-07, "loss": 0.8597, "step": 25030 }, { "epoch": 2.8882095062298108, "grad_norm": 0.3971506655216217, "learning_rate": 8.448153408424087e-07, "loss": 0.8553, "step": 25035 }, { "epoch": 2.8887863405629903, "grad_norm": 0.4134998321533203, "learning_rate": 8.361315482843135e-07, "loss": 0.8479, "step": 25040 }, { "epoch": 2.88936317489617, "grad_norm": 0.42676177620887756, "learning_rate": 8.274924292420494e-07, "loss": 0.9141, "step": 25045 }, { "epoch": 2.8899400092293495, "grad_norm": 0.43701738119125366, "learning_rate": 8.188979876075475e-07, "loss": 0.866, "step": 25050 }, { "epoch": 2.890516843562529, "grad_norm": 0.4272172749042511, "learning_rate": 8.103482272526441e-07, "loss": 0.887, "step": 25055 }, { "epoch": 2.891093677895708, "grad_norm": 0.4200077950954437, "learning_rate": 8.018431520290027e-07, "loss": 0.8927, "step": 25060 }, { "epoch": 2.8916705122288877, "grad_norm": 0.43398797512054443, "learning_rate": 7.933827657682025e-07, "loss": 0.8489, "step": 25065 }, { "epoch": 2.8922473465620673, "grad_norm": 0.42155030369758606, "learning_rate": 7.849670722816283e-07, "loss": 0.8899, "step": 25070 }, { "epoch": 2.892824180895247, "grad_norm": 0.4187012016773224, "learning_rate": 7.765960753605916e-07, "loss": 0.8781, "step": 25075 }, { "epoch": 2.8934010152284264, "grad_norm": 0.4618048071861267, "learning_rate": 7.682697787762317e-07, "loss": 0.9056, "step": 25080 }, { "epoch": 2.893977849561606, "grad_norm": 0.4267641603946686, "learning_rate": 7.599881862795811e-07, "loss": 0.8268, "step": 25085 }, { "epoch": 2.8945546838947855, "grad_norm": 0.4020291268825531, "learning_rate": 7.517513016014777e-07, "loss": 0.8911, "step": 25090 }, { "epoch": 2.8951315182279647, "grad_norm": 0.44923388957977295, "learning_rate": 7.435591284526866e-07, "loss": 0.8486, "step": 25095 }, { "epoch": 2.8957083525611447, "grad_norm": 0.42874395847320557, "learning_rate": 7.354116705237779e-07, "loss": 0.893, "step": 25100 }, { "epoch": 2.896285186894324, "grad_norm": 0.41941747069358826, "learning_rate": 7.273089314852155e-07, "loss": 0.8298, "step": 25105 }, { "epoch": 2.8968620212275034, "grad_norm": 0.4208540916442871, "learning_rate": 7.192509149872684e-07, "loss": 0.8906, "step": 25110 }, { "epoch": 2.897438855560683, "grad_norm": 0.474488228559494, "learning_rate": 7.112376246601215e-07, "loss": 0.9308, "step": 25115 }, { "epoch": 2.8980156898938625, "grad_norm": 0.4039963185787201, "learning_rate": 7.032690641137651e-07, "loss": 0.8741, "step": 25120 }, { "epoch": 2.898592524227042, "grad_norm": 0.4397977590560913, "learning_rate": 6.953452369380497e-07, "loss": 0.867, "step": 25125 }, { "epoch": 2.8991693585602216, "grad_norm": 0.4029087722301483, "learning_rate": 6.874661467026756e-07, "loss": 0.8548, "step": 25130 }, { "epoch": 2.899746192893401, "grad_norm": 0.41865789890289307, "learning_rate": 6.79631796957192e-07, "loss": 0.8462, "step": 25135 }, { "epoch": 2.9003230272265803, "grad_norm": 0.45501357316970825, "learning_rate": 6.718421912309758e-07, "loss": 0.8626, "step": 25140 }, { "epoch": 2.90089986155976, "grad_norm": 0.41223421692848206, "learning_rate": 6.640973330332756e-07, "loss": 0.8938, "step": 25145 }, { "epoch": 2.9014766958929394, "grad_norm": 0.4089823365211487, "learning_rate": 6.563972258531559e-07, "loss": 0.8879, "step": 25150 }, { "epoch": 2.902053530226119, "grad_norm": 0.40834125876426697, "learning_rate": 6.487418731595418e-07, "loss": 0.8813, "step": 25155 }, { "epoch": 2.9026303645592986, "grad_norm": 0.4374805986881256, "learning_rate": 6.411312784011636e-07, "loss": 0.9119, "step": 25160 }, { "epoch": 2.903207198892478, "grad_norm": 0.41101983189582825, "learning_rate": 6.335654450066341e-07, "loss": 0.845, "step": 25165 }, { "epoch": 2.9037840332256577, "grad_norm": 0.4206681251525879, "learning_rate": 6.260443763843493e-07, "loss": 0.926, "step": 25170 }, { "epoch": 2.9043608675588373, "grad_norm": 0.4462982714176178, "learning_rate": 6.185680759225876e-07, "loss": 0.8473, "step": 25175 }, { "epoch": 2.904937701892017, "grad_norm": 0.47528621554374695, "learning_rate": 6.111365469894215e-07, "loss": 0.8485, "step": 25180 }, { "epoch": 2.905514536225196, "grad_norm": 0.40770596265792847, "learning_rate": 6.037497929327839e-07, "loss": 0.8505, "step": 25185 }, { "epoch": 2.9060913705583755, "grad_norm": 0.46322932839393616, "learning_rate": 5.964078170804133e-07, "loss": 0.8557, "step": 25190 }, { "epoch": 2.906668204891555, "grad_norm": 0.4159286618232727, "learning_rate": 5.891106227398857e-07, "loss": 0.8949, "step": 25195 }, { "epoch": 2.9072450392247347, "grad_norm": 0.41801539063453674, "learning_rate": 5.818582131985939e-07, "loss": 0.8604, "step": 25200 }, { "epoch": 2.907821873557914, "grad_norm": 0.4559653103351593, "learning_rate": 5.746505917237688e-07, "loss": 0.8639, "step": 25205 }, { "epoch": 2.908398707891094, "grad_norm": 0.45020580291748047, "learning_rate": 5.674877615624686e-07, "loss": 0.8776, "step": 25210 }, { "epoch": 2.9089755422242733, "grad_norm": 0.48369133472442627, "learning_rate": 5.603697259415341e-07, "loss": 0.9128, "step": 25215 }, { "epoch": 2.9095523765574525, "grad_norm": 0.44913429021835327, "learning_rate": 5.532964880676894e-07, "loss": 0.8563, "step": 25220 }, { "epoch": 2.9101292108906325, "grad_norm": 0.4256819486618042, "learning_rate": 5.462680511274187e-07, "loss": 0.8501, "step": 25225 }, { "epoch": 2.9107060452238116, "grad_norm": 0.46557503938674927, "learning_rate": 5.392844182870449e-07, "loss": 0.8828, "step": 25230 }, { "epoch": 2.911282879556991, "grad_norm": 0.43622320890426636, "learning_rate": 5.323455926927179e-07, "loss": 0.8439, "step": 25235 }, { "epoch": 2.9118597138901707, "grad_norm": 0.4079887568950653, "learning_rate": 5.254515774703927e-07, "loss": 0.8835, "step": 25240 }, { "epoch": 2.9124365482233503, "grad_norm": 0.43824106454849243, "learning_rate": 5.186023757258407e-07, "loss": 0.9007, "step": 25245 }, { "epoch": 2.91301338255653, "grad_norm": 0.43379032611846924, "learning_rate": 5.117979905446269e-07, "loss": 0.8738, "step": 25250 }, { "epoch": 2.9135902168897094, "grad_norm": 0.43974149227142334, "learning_rate": 5.050384249921436e-07, "loss": 0.8539, "step": 25255 }, { "epoch": 2.914167051222889, "grad_norm": 0.433366596698761, "learning_rate": 4.983236821135995e-07, "loss": 0.8263, "step": 25260 }, { "epoch": 2.914743885556068, "grad_norm": 0.4105539917945862, "learning_rate": 4.916537649339858e-07, "loss": 0.8926, "step": 25265 }, { "epoch": 2.9153207198892477, "grad_norm": 0.41276198625564575, "learning_rate": 4.850286764581102e-07, "loss": 0.8533, "step": 25270 }, { "epoch": 2.9158975542224272, "grad_norm": 0.4376462697982788, "learning_rate": 4.784484196706073e-07, "loss": 0.8931, "step": 25275 }, { "epoch": 2.916474388555607, "grad_norm": 0.46353521943092346, "learning_rate": 4.719129975358838e-07, "loss": 0.8613, "step": 25280 }, { "epoch": 2.9170512228887864, "grad_norm": 0.41219547390937805, "learning_rate": 4.6542241299816216e-07, "loss": 0.8311, "step": 25285 }, { "epoch": 2.917628057221966, "grad_norm": 0.428505539894104, "learning_rate": 4.5897666898145896e-07, "loss": 0.8886, "step": 25290 }, { "epoch": 2.9182048915551455, "grad_norm": 0.4599054753780365, "learning_rate": 4.5257576838960704e-07, "loss": 0.8833, "step": 25295 }, { "epoch": 2.9187817258883246, "grad_norm": 0.428320974111557, "learning_rate": 4.4621971410619967e-07, "loss": 0.9012, "step": 25300 }, { "epoch": 2.9193585602215046, "grad_norm": 0.430586576461792, "learning_rate": 4.3990850899467975e-07, "loss": 0.8718, "step": 25305 }, { "epoch": 2.9199353945546838, "grad_norm": 0.43232461810112, "learning_rate": 4.336421558982284e-07, "loss": 0.8654, "step": 25310 }, { "epoch": 2.9205122288878633, "grad_norm": 0.41640791296958923, "learning_rate": 4.274206576398876e-07, "loss": 0.8622, "step": 25315 }, { "epoch": 2.921089063221043, "grad_norm": 0.4136120080947876, "learning_rate": 4.2124401702241524e-07, "loss": 0.8064, "step": 25320 }, { "epoch": 2.9216658975542225, "grad_norm": 0.44188201427459717, "learning_rate": 4.151122368284299e-07, "loss": 0.8416, "step": 25325 }, { "epoch": 2.922242731887402, "grad_norm": 0.3820507228374481, "learning_rate": 4.090253198202887e-07, "loss": 0.8859, "step": 25330 }, { "epoch": 2.9228195662205816, "grad_norm": 0.3827502429485321, "learning_rate": 4.029832687401758e-07, "loss": 0.8754, "step": 25335 }, { "epoch": 2.923396400553761, "grad_norm": 0.4079684317111969, "learning_rate": 3.969860863100472e-07, "loss": 0.8505, "step": 25340 }, { "epoch": 2.9239732348869403, "grad_norm": 0.40014228224754333, "learning_rate": 3.9103377523163065e-07, "loss": 0.8446, "step": 25345 }, { "epoch": 2.92455006922012, "grad_norm": 0.4371630549430847, "learning_rate": 3.851263381864589e-07, "loss": 0.8806, "step": 25350 }, { "epoch": 2.9251269035532994, "grad_norm": 0.41463762521743774, "learning_rate": 3.7926377783585874e-07, "loss": 0.9124, "step": 25355 }, { "epoch": 2.925703737886479, "grad_norm": 0.44109275937080383, "learning_rate": 3.734460968208953e-07, "loss": 0.8611, "step": 25360 }, { "epoch": 2.9262805722196585, "grad_norm": 0.42749449610710144, "learning_rate": 3.67673297762483e-07, "loss": 0.8623, "step": 25365 }, { "epoch": 2.926857406552838, "grad_norm": 0.4444100558757782, "learning_rate": 3.619453832612418e-07, "loss": 0.8512, "step": 25370 }, { "epoch": 2.9274342408860177, "grad_norm": 0.4210651218891144, "learning_rate": 3.562623558976408e-07, "loss": 0.8563, "step": 25375 }, { "epoch": 2.928011075219197, "grad_norm": 0.4154101014137268, "learning_rate": 3.506242182318653e-07, "loss": 0.866, "step": 25380 }, { "epoch": 2.928587909552377, "grad_norm": 0.40918856859207153, "learning_rate": 3.4503097280392807e-07, "loss": 0.898, "step": 25385 }, { "epoch": 2.929164743885556, "grad_norm": 0.40677812695503235, "learning_rate": 3.394826221335912e-07, "loss": 0.9173, "step": 25390 }, { "epoch": 2.9297415782187355, "grad_norm": 0.3963682949542999, "learning_rate": 3.339791687203997e-07, "loss": 0.806, "step": 25395 }, { "epoch": 2.930318412551915, "grad_norm": 0.49158430099487305, "learning_rate": 3.285206150436593e-07, "loss": 0.8529, "step": 25400 }, { "epoch": 2.9308952468850946, "grad_norm": 0.46478375792503357, "learning_rate": 3.2310696356248063e-07, "loss": 0.8591, "step": 25405 }, { "epoch": 2.931472081218274, "grad_norm": 0.4427299201488495, "learning_rate": 3.177382167156906e-07, "loss": 0.8901, "step": 25410 }, { "epoch": 2.9320489155514537, "grad_norm": 0.37393638491630554, "learning_rate": 3.1241437692196563e-07, "loss": 0.843, "step": 25415 }, { "epoch": 2.9326257498846333, "grad_norm": 0.4250434339046478, "learning_rate": 3.0713544657966497e-07, "loss": 0.8659, "step": 25420 }, { "epoch": 2.9332025842178124, "grad_norm": 0.4124998152256012, "learning_rate": 3.019014280669641e-07, "loss": 0.8527, "step": 25425 }, { "epoch": 2.933779418550992, "grad_norm": 0.5417208075523376, "learning_rate": 2.967123237418212e-07, "loss": 0.8771, "step": 25430 }, { "epoch": 2.9343562528841716, "grad_norm": 0.4095006287097931, "learning_rate": 2.91568135941922e-07, "loss": 0.8655, "step": 25435 }, { "epoch": 2.934933087217351, "grad_norm": 0.43682244420051575, "learning_rate": 2.8646886698473484e-07, "loss": 0.8607, "step": 25440 }, { "epoch": 2.9355099215505307, "grad_norm": 0.4769591987133026, "learning_rate": 2.8141451916748887e-07, "loss": 0.9018, "step": 25445 }, { "epoch": 2.9360867558837103, "grad_norm": 0.42697569727897644, "learning_rate": 2.764050947671737e-07, "loss": 0.8414, "step": 25450 }, { "epoch": 2.93666359021689, "grad_norm": 0.44473573565483093, "learning_rate": 2.7144059604055085e-07, "loss": 0.8637, "step": 25455 }, { "epoch": 2.937240424550069, "grad_norm": 0.40624740719795227, "learning_rate": 2.6652102522414233e-07, "loss": 0.8681, "step": 25460 }, { "epoch": 2.937817258883249, "grad_norm": 0.45977187156677246, "learning_rate": 2.6164638453421984e-07, "loss": 0.8526, "step": 25465 }, { "epoch": 2.938394093216428, "grad_norm": 0.42826855182647705, "learning_rate": 2.568166761668156e-07, "loss": 0.8874, "step": 25470 }, { "epoch": 2.9389709275496076, "grad_norm": 0.450840026140213, "learning_rate": 2.5203190229771136e-07, "loss": 0.8652, "step": 25475 }, { "epoch": 2.939547761882787, "grad_norm": 0.49307262897491455, "learning_rate": 2.472920650824828e-07, "loss": 0.8745, "step": 25480 }, { "epoch": 2.940124596215967, "grad_norm": 0.4430190622806549, "learning_rate": 2.4259716665641083e-07, "loss": 0.8579, "step": 25485 }, { "epoch": 2.9407014305491463, "grad_norm": 0.40895533561706543, "learning_rate": 2.3794720913458136e-07, "loss": 0.8497, "step": 25490 }, { "epoch": 2.941278264882326, "grad_norm": 0.39817044138908386, "learning_rate": 2.333421946117853e-07, "loss": 0.806, "step": 25495 }, { "epoch": 2.9418550992155055, "grad_norm": 0.43439817428588867, "learning_rate": 2.2878212516260766e-07, "loss": 0.8963, "step": 25500 }, { "epoch": 2.9424319335486846, "grad_norm": 0.4119809567928314, "learning_rate": 2.242670028413607e-07, "loss": 0.8188, "step": 25505 }, { "epoch": 2.943008767881864, "grad_norm": 0.4609575569629669, "learning_rate": 2.1979682968211733e-07, "loss": 0.8902, "step": 25510 }, { "epoch": 2.9435856022150437, "grad_norm": 0.47616538405418396, "learning_rate": 2.1537160769870002e-07, "loss": 0.8088, "step": 25515 }, { "epoch": 2.9441624365482233, "grad_norm": 0.4469929039478302, "learning_rate": 2.109913388846807e-07, "loss": 0.8527, "step": 25520 }, { "epoch": 2.944739270881403, "grad_norm": 0.42096269130706787, "learning_rate": 2.066560252133698e-07, "loss": 0.8832, "step": 25525 }, { "epoch": 2.9453161052145824, "grad_norm": 0.5134122371673584, "learning_rate": 2.0236566863784944e-07, "loss": 0.867, "step": 25530 }, { "epoch": 2.945892939547762, "grad_norm": 0.43390166759490967, "learning_rate": 1.98120271090918e-07, "loss": 0.8658, "step": 25535 }, { "epoch": 2.9464697738809416, "grad_norm": 0.47963863611221313, "learning_rate": 1.9391983448514562e-07, "loss": 0.8758, "step": 25540 }, { "epoch": 2.947046608214121, "grad_norm": 0.4790579378604889, "learning_rate": 1.8976436071284076e-07, "loss": 0.845, "step": 25545 }, { "epoch": 2.9476234425473002, "grad_norm": 0.534599244594574, "learning_rate": 1.8565385164605042e-07, "loss": 0.8906, "step": 25550 }, { "epoch": 2.94820027688048, "grad_norm": 0.3933981657028198, "learning_rate": 1.815883091365489e-07, "loss": 0.8556, "step": 25555 }, { "epoch": 2.9487771112136594, "grad_norm": 0.481326162815094, "learning_rate": 1.775677350159044e-07, "loss": 0.8605, "step": 25560 }, { "epoch": 2.949353945546839, "grad_norm": 0.41006627678871155, "learning_rate": 1.73592131095357e-07, "loss": 0.8734, "step": 25565 }, { "epoch": 2.9499307798800185, "grad_norm": 0.40375635027885437, "learning_rate": 1.6966149916595176e-07, "loss": 0.8292, "step": 25570 }, { "epoch": 2.950507614213198, "grad_norm": 0.4399477541446686, "learning_rate": 1.657758409984278e-07, "loss": 0.8711, "step": 25575 }, { "epoch": 2.9510844485463776, "grad_norm": 0.4203029274940491, "learning_rate": 1.6193515834329599e-07, "loss": 0.8731, "step": 25580 }, { "epoch": 2.9516612828795568, "grad_norm": 0.4194481372833252, "learning_rate": 1.5813945293078337e-07, "loss": 0.8465, "step": 25585 }, { "epoch": 2.9522381172127368, "grad_norm": 0.3688070774078369, "learning_rate": 1.5438872647086655e-07, "loss": 0.844, "step": 25590 }, { "epoch": 2.952814951545916, "grad_norm": 0.44683653116226196, "learning_rate": 1.5068298065324947e-07, "loss": 0.8533, "step": 25595 }, { "epoch": 2.9533917858790955, "grad_norm": 0.44640904664993286, "learning_rate": 1.470222171473856e-07, "loss": 0.8771, "step": 25600 }, { "epoch": 2.953968620212275, "grad_norm": 0.42615175247192383, "learning_rate": 1.4340643760244464e-07, "loss": 0.8491, "step": 25605 }, { "epoch": 2.9545454545454546, "grad_norm": 0.44119492173194885, "learning_rate": 1.398356436473569e-07, "loss": 0.9057, "step": 25610 }, { "epoch": 2.955122288878634, "grad_norm": 0.4439990818500519, "learning_rate": 1.3630983689075782e-07, "loss": 0.8654, "step": 25615 }, { "epoch": 2.9556991232118137, "grad_norm": 0.41365984082221985, "learning_rate": 1.328290189210435e-07, "loss": 0.861, "step": 25620 }, { "epoch": 2.9562759575449933, "grad_norm": 0.3996998071670532, "learning_rate": 1.293931913063151e-07, "loss": 0.9193, "step": 25625 }, { "epoch": 2.9568527918781724, "grad_norm": 0.4594114422798157, "learning_rate": 1.2600235559443452e-07, "loss": 0.8558, "step": 25630 }, { "epoch": 2.957429626211352, "grad_norm": 0.4400221109390259, "learning_rate": 1.2265651331296869e-07, "loss": 0.9092, "step": 25635 }, { "epoch": 2.9580064605445315, "grad_norm": 0.4317476749420166, "learning_rate": 1.1935566596923408e-07, "loss": 0.8826, "step": 25640 }, { "epoch": 2.958583294877711, "grad_norm": 0.46063005924224854, "learning_rate": 1.1609981505025236e-07, "loss": 0.8511, "step": 25645 }, { "epoch": 2.9591601292108907, "grad_norm": 0.48726972937583923, "learning_rate": 1.1288896202281685e-07, "loss": 0.8762, "step": 25650 }, { "epoch": 2.9597369635440702, "grad_norm": 0.4288542866706848, "learning_rate": 1.0972310833340382e-07, "loss": 0.8795, "step": 25655 }, { "epoch": 2.96031379787725, "grad_norm": 0.42209890484809875, "learning_rate": 1.066022554082391e-07, "loss": 0.8819, "step": 25660 }, { "epoch": 2.960890632210429, "grad_norm": 0.45075806975364685, "learning_rate": 1.0352640465327578e-07, "loss": 0.8137, "step": 25665 }, { "epoch": 2.961467466543609, "grad_norm": 0.4366128146648407, "learning_rate": 1.0049555745419436e-07, "loss": 0.8849, "step": 25670 }, { "epoch": 2.962044300876788, "grad_norm": 0.40258607268333435, "learning_rate": 9.750971517639152e-08, "loss": 0.838, "step": 25675 }, { "epoch": 2.9626211352099676, "grad_norm": 0.39733338356018066, "learning_rate": 9.456887916499125e-08, "loss": 0.8313, "step": 25680 }, { "epoch": 2.963197969543147, "grad_norm": 0.4235396683216095, "learning_rate": 9.16730507448671e-08, "loss": 0.8569, "step": 25685 }, { "epoch": 2.9637748038763267, "grad_norm": 0.41825294494628906, "learning_rate": 8.882223122056443e-08, "loss": 0.8876, "step": 25690 }, { "epoch": 2.9643516382095063, "grad_norm": 0.4579850137233734, "learning_rate": 8.601642187640036e-08, "loss": 0.8732, "step": 25695 }, { "epoch": 2.964928472542686, "grad_norm": 0.43321606516838074, "learning_rate": 8.325562397640819e-08, "loss": 0.8948, "step": 25700 }, { "epoch": 2.9655053068758654, "grad_norm": 0.4565126895904541, "learning_rate": 8.053983876431526e-08, "loss": 0.8532, "step": 25705 }, { "epoch": 2.9660821412090446, "grad_norm": 0.3763374984264374, "learning_rate": 7.786906746358735e-08, "loss": 0.858, "step": 25710 }, { "epoch": 2.966658975542224, "grad_norm": 0.40120729804039, "learning_rate": 7.524331127741757e-08, "loss": 0.8648, "step": 25715 }, { "epoch": 2.9672358098754037, "grad_norm": 0.42425310611724854, "learning_rate": 7.266257138872634e-08, "loss": 0.8882, "step": 25720 }, { "epoch": 2.9678126442085833, "grad_norm": 0.4503181576728821, "learning_rate": 7.012684896011702e-08, "loss": 0.7968, "step": 25725 }, { "epoch": 2.968389478541763, "grad_norm": 0.4242125451564789, "learning_rate": 6.763614513395356e-08, "loss": 0.8441, "step": 25730 }, { "epoch": 2.9689663128749424, "grad_norm": 0.4024699628353119, "learning_rate": 6.519046103230508e-08, "loss": 0.9132, "step": 25735 }, { "epoch": 2.969543147208122, "grad_norm": 0.4432372748851776, "learning_rate": 6.278979775694582e-08, "loss": 0.8438, "step": 25740 }, { "epoch": 2.970119981541301, "grad_norm": 0.498928040266037, "learning_rate": 6.043415638938842e-08, "loss": 0.8593, "step": 25745 }, { "epoch": 2.970696815874481, "grad_norm": 0.4619358777999878, "learning_rate": 5.8123537990850684e-08, "loss": 0.8345, "step": 25750 }, { "epoch": 2.97127365020766, "grad_norm": 0.49964797496795654, "learning_rate": 5.585794360226659e-08, "loss": 0.9009, "step": 25755 }, { "epoch": 2.9718504845408398, "grad_norm": 0.3956110179424286, "learning_rate": 5.3637374244308594e-08, "loss": 0.8223, "step": 25760 }, { "epoch": 2.9724273188740193, "grad_norm": 0.4627871513366699, "learning_rate": 5.146183091732093e-08, "loss": 0.8327, "step": 25765 }, { "epoch": 2.973004153207199, "grad_norm": 0.4105352461338043, "learning_rate": 4.9331314601408495e-08, "loss": 0.8556, "step": 25770 }, { "epoch": 2.9735809875403785, "grad_norm": 0.4358341693878174, "learning_rate": 4.7245826256370194e-08, "loss": 0.8812, "step": 25775 }, { "epoch": 2.974157821873558, "grad_norm": 0.4529142677783966, "learning_rate": 4.520536682171006e-08, "loss": 0.8748, "step": 25780 }, { "epoch": 2.9747346562067376, "grad_norm": 0.4764014780521393, "learning_rate": 4.320993721668165e-08, "loss": 0.876, "step": 25785 }, { "epoch": 2.9753114905399167, "grad_norm": 0.47748324275016785, "learning_rate": 4.1259538340210345e-08, "loss": 0.9133, "step": 25790 }, { "epoch": 2.9758883248730963, "grad_norm": 0.4135587513446808, "learning_rate": 3.9354171070959955e-08, "loss": 0.8732, "step": 25795 }, { "epoch": 2.976465159206276, "grad_norm": 0.4119958281517029, "learning_rate": 3.7493836267310514e-08, "loss": 0.8257, "step": 25800 }, { "epoch": 2.9770419935394554, "grad_norm": 0.44728052616119385, "learning_rate": 3.567853476733607e-08, "loss": 0.8718, "step": 25805 }, { "epoch": 2.977618827872635, "grad_norm": 0.4038022756576538, "learning_rate": 3.390826738883801e-08, "loss": 0.8482, "step": 25810 }, { "epoch": 2.9781956622058146, "grad_norm": 0.461870938539505, "learning_rate": 3.218303492932284e-08, "loss": 0.8751, "step": 25815 }, { "epoch": 2.978772496538994, "grad_norm": 0.4327203035354614, "learning_rate": 3.050283816601329e-08, "loss": 0.8683, "step": 25820 }, { "epoch": 2.9793493308721732, "grad_norm": 0.4000185430049896, "learning_rate": 2.8867677855837217e-08, "loss": 0.8966, "step": 25825 }, { "epoch": 2.9799261652053533, "grad_norm": 0.4935370087623596, "learning_rate": 2.7277554735449794e-08, "loss": 0.8866, "step": 25830 }, { "epoch": 2.9805029995385324, "grad_norm": 0.40175366401672363, "learning_rate": 2.573246952118913e-08, "loss": 0.89, "step": 25835 }, { "epoch": 2.981079833871712, "grad_norm": 0.4187186360359192, "learning_rate": 2.4232422909131745e-08, "loss": 0.8667, "step": 25840 }, { "epoch": 2.9816566682048915, "grad_norm": 0.46566322445869446, "learning_rate": 2.2777415575037098e-08, "loss": 0.847, "step": 25845 }, { "epoch": 2.982233502538071, "grad_norm": 0.40721821784973145, "learning_rate": 2.136744817440306e-08, "loss": 0.9071, "step": 25850 }, { "epoch": 2.9828103368712506, "grad_norm": 0.3912089765071869, "learning_rate": 2.000252134241043e-08, "loss": 0.8563, "step": 25855 }, { "epoch": 2.98338717120443, "grad_norm": 0.4236389696598053, "learning_rate": 1.8682635693978433e-08, "loss": 0.8758, "step": 25860 }, { "epoch": 2.9839640055376098, "grad_norm": 0.4218614101409912, "learning_rate": 1.7407791823698115e-08, "loss": 0.8246, "step": 25865 }, { "epoch": 2.984540839870789, "grad_norm": 0.47850501537323, "learning_rate": 1.6177990305910053e-08, "loss": 0.8596, "step": 25870 }, { "epoch": 2.985117674203969, "grad_norm": 0.440022736787796, "learning_rate": 1.499323169462663e-08, "loss": 0.8545, "step": 25875 }, { "epoch": 2.985694508537148, "grad_norm": 0.4404289126396179, "learning_rate": 1.3853516523587572e-08, "loss": 0.8757, "step": 25880 }, { "epoch": 2.9862713428703276, "grad_norm": 0.4234904646873474, "learning_rate": 1.275884530622662e-08, "loss": 0.8972, "step": 25885 }, { "epoch": 2.986848177203507, "grad_norm": 0.416761577129364, "learning_rate": 1.1709218535715938e-08, "loss": 0.8496, "step": 25890 }, { "epoch": 2.9874250115366867, "grad_norm": 0.3803170621395111, "learning_rate": 1.0704636684910618e-08, "loss": 0.9054, "step": 25895 }, { "epoch": 2.9880018458698663, "grad_norm": 0.47127583622932434, "learning_rate": 9.74510020635977e-09, "loss": 0.8522, "step": 25900 }, { "epoch": 2.988578680203046, "grad_norm": 0.4301356375217438, "learning_rate": 8.83060953235093e-09, "loss": 0.8353, "step": 25905 }, { "epoch": 2.9891555145362254, "grad_norm": 0.5385040044784546, "learning_rate": 7.96116507485456e-09, "loss": 0.9251, "step": 25910 }, { "epoch": 2.9897323488694045, "grad_norm": 0.5166802406311035, "learning_rate": 7.136767225568441e-09, "loss": 0.8873, "step": 25915 }, { "epoch": 2.990309183202584, "grad_norm": 0.421013742685318, "learning_rate": 6.357416355884382e-09, "loss": 0.8365, "step": 25920 }, { "epoch": 2.9908860175357637, "grad_norm": 0.4109059274196625, "learning_rate": 5.62311281688821e-09, "loss": 0.8723, "step": 25925 }, { "epoch": 2.9914628518689432, "grad_norm": 0.5444170236587524, "learning_rate": 4.93385693940418e-09, "loss": 0.8993, "step": 25930 }, { "epoch": 2.992039686202123, "grad_norm": 0.4387677013874054, "learning_rate": 4.289649033928367e-09, "loss": 0.8556, "step": 25935 }, { "epoch": 2.9926165205353024, "grad_norm": 0.4136563539505005, "learning_rate": 3.6904893906730687e-09, "loss": 0.8288, "step": 25940 }, { "epoch": 2.993193354868482, "grad_norm": 0.4198967218399048, "learning_rate": 3.1363782795779117e-09, "loss": 0.8665, "step": 25945 }, { "epoch": 2.993770189201661, "grad_norm": 0.4858287572860718, "learning_rate": 2.627315950265441e-09, "loss": 0.9142, "step": 25950 }, { "epoch": 2.994347023534841, "grad_norm": 0.43194618821144104, "learning_rate": 2.1633026320633244e-09, "loss": 0.8462, "step": 25955 }, { "epoch": 2.99492385786802, "grad_norm": 0.5099018812179565, "learning_rate": 1.744338534015455e-09, "loss": 0.8614, "step": 25960 }, { "epoch": 2.9955006922011997, "grad_norm": 0.409798800945282, "learning_rate": 1.3704238448708496e-09, "loss": 0.8789, "step": 25965 }, { "epoch": 2.9960775265343793, "grad_norm": 0.38359326124191284, "learning_rate": 1.041558733061443e-09, "loss": 0.88, "step": 25970 }, { "epoch": 2.996654360867559, "grad_norm": 0.44114139676094055, "learning_rate": 7.577433467576001e-10, "loss": 0.8549, "step": 25975 }, { "epoch": 2.9972311952007384, "grad_norm": 0.4402305781841278, "learning_rate": 5.189778138237067e-10, "loss": 0.8814, "step": 25980 }, { "epoch": 2.997808029533918, "grad_norm": 0.43162301182746887, "learning_rate": 3.25262241795965e-10, "loss": 0.902, "step": 25985 }, { "epoch": 2.9983848638670976, "grad_norm": 0.43010595440864563, "learning_rate": 1.7659671797121134e-10, "loss": 0.8726, "step": 25990 }, { "epoch": 2.9989616982002767, "grad_norm": 0.47699853777885437, "learning_rate": 7.298130931809865e-11, "loss": 0.8768, "step": 25995 }, { "epoch": 2.9995385325334563, "grad_norm": 0.4188362956047058, "learning_rate": 1.4416062510402839e-11, "loss": 0.8605, "step": 26000 }, { "epoch": 3.0, "eval_loss": 0.9587316513061523, "eval_runtime": 959.8828, "eval_samples_per_second": 15.992, "eval_steps_per_second": 1.0, "step": 26004 }, { "epoch": 3.0, "step": 26004, "total_flos": 3.7211673621310734e+19, "train_loss": 0.9273421984235244, "train_runtime": 99241.3322, "train_samples_per_second": 4.192, "train_steps_per_second": 0.262 } ], "logging_steps": 5, "max_steps": 26004, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.7211673621310734e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }